xref: /onnv-gate/usr/src/lib/lvm/libmeta/common/meta_import.c (revision 734:d24968311d53)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <assert.h>
30 #include <ctype.h>
31 #include <libdevinfo.h>
32 #include <mdiox.h>
33 #include <meta.h>
34 #include "meta_repartition.h"
35 #include "meta_set_prv.h"
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <strings.h>
39 #include <sys/lvm/md_mddb.h>
40 #include <sys/lvm/md_names.h>
41 #include <sys/lvm/md_crc.h>
42 #include <sys/lvm/md_convert.h>
43 
44 typedef struct did_list {
45 	void		*rdid;	/* real did if replicated set */
46 	void		*did;	/* did stored in lb */
47 	char		*devname;
48 	dev_t		dev;
49 	uint_t		did_index;
50 	char		*minor_name;
51 	struct did_list	*next;
52 } did_list_t;
53 
54 typedef struct replicated_disk {
55 	void			*old_devid;
56 	void 			*new_devid;
57 	struct replicated_disk	*next;
58 } replicated_disk_t;
59 
60 /*
61  * The current implementation limits the max device id length to 256 bytes.
62  * Should the max device id length be increased, this definition would have to
63  * be bumped up accordingly
64  */
65 #define	MAX_DEVID_LEN		256
66 
67 /*
68  * We store a global list of all the replicated disks in the system. In
69  * order to prevent us from performing a linear search on this list, we
70  * store the disks in a two dimensional sparse array. The disks are bucketed
71  * based on the length of their device ids.
72  */
73 static replicated_disk_t *replicated_disk_list[MAX_DEVID_LEN + 1] = {NULL};
74 
75 /*
76  * The list of replicated disks is built just once and this flag is set
77  * once it's done
78  */
79 static int replicated_disk_list_built = 0;
80 
81 /*
82  * Map logical blk to physical
83  *
84  * This is based on the routine of the same name in the md kernel module (see
85  * file md_mddb.c), with the following caveats:
86  *
87  * - The kernel routine works on in core master blocks, or mddb_mb_ic_t; this
88  * routine works instead on the mddb_mb_t read directly from the disk
89  */
90 daddr_t
91 getphysblk(
92 	mddb_block_t	blk,
93 	mddb_mb_t	*mbp
94 )
95 {
96 	/*
97 	 * Sanity check: is the block within range?  If so, we then assume
98 	 * that the block range map in the master block is valid and
99 	 * consistent with the block count.  Unfortunately, there is no
100 	 * reliable way to validate this assumption.
101 	 */
102 	if (blk >= mbp->mb_blkcnt || blk >= mbp->mb_blkmap.m_consecutive)
103 		return ((daddr_t)-1);
104 
105 	return (mbp->mb_blkmap.m_firstblk + blk);
106 }
107 
108 
109 
110 /*
111  * drive_append()
112  *
113  * Append to tail of linked list of md_im_drive_info_t.
114  *
115  * Will allocate space for new node and copy args into new space.
116  *
117  * Returns pointer to new node.
118  */
119 static md_im_drive_info_t *
120 drive_append(
121 	md_im_drive_info_t	**midpp,
122 	mddrivename_t		*dnp,
123 	void			*devid,
124 	void			*rdevid,
125 	void			*devname,
126 	int			devid_sz,
127 	char			*minor_name,
128 	md_timeval32_t		timestamp,
129 	md_im_replica_info_t	*mirp
130 )
131 {
132 	md_im_drive_info_t	*midp;
133 	int			o_devid_sz;
134 
135 	for (; (*midpp != NULL); midpp = &((*midpp)->mid_next))
136 		;
137 
138 	midp = *midpp = Zalloc(sizeof (md_im_drive_info_t));
139 
140 	midp->mid_dnp = dnp;
141 
142 	/*
143 	 * If rdevid is not NULL then we know we are dealing with
144 	 * replicated diskset case. 'devid_sz' will always be the
145 	 * size of a valid devid which can be 'devid' or 'rdevid'
146 	 */
147 	midp->mid_devid = (void *)Malloc(devid_sz);
148 
149 	if (rdevid) {
150 		(void) memcpy(midp->mid_devid, rdevid, devid_sz);
151 		/*
152 		 * Also need to store the 'other' devid
153 		 */
154 		o_devid_sz = devid_sizeof((ddi_devid_t)devid);
155 		midp->mid_o_devid = (void *)Malloc(o_devid_sz);
156 		(void) memcpy(midp->mid_o_devid, devid, o_devid_sz);
157 		midp->mid_o_devid_sz = o_devid_sz;
158 	} else {
159 		/*
160 		 * In the case of regular diskset, midp->mid_o_devid
161 		 * will be a NULL pointer
162 		 */
163 		(void) memcpy(midp->mid_devid, devid, devid_sz);
164 	}
165 
166 	if (devname)
167 		midp->mid_devname = Strdup(devname);
168 
169 	midp->mid_devid_sz = devid_sz;
170 	midp->mid_setcreatetimestamp = timestamp;
171 	(void) strlcpy(midp->mid_minor_name, minor_name, MDDB_MINOR_NAME_MAX);
172 	midp->mid_replicas = mirp;
173 
174 	return (midp);
175 }
176 
177 
178 
179 /*
180  * drive_append_wrapper()
181  *
182  * Constant time append wrapper; the append function will always walk the list,
183  * this will take a tail argument and use the append function on just the tail
184  * node, doing the appropriate old-tail-next-pointer bookkeeping.
185  */
186 static md_im_drive_info_t **
187 drive_append_wrapper(
188 	md_im_drive_info_t	**tailpp,
189 	mddrivename_t		*dnp,
190 	void 			*devid,
191 	void			*rdevid,
192 	void			*devname,
193 	int			devid_sz,
194 	char			*minor_name,
195 	md_timeval32_t		timestamp,
196 	md_im_replica_info_t	*mirp
197 )
198 {
199 	(void) drive_append(tailpp, dnp, devid, rdevid, devname, devid_sz,
200 	    minor_name, timestamp, mirp);
201 
202 	if ((*tailpp)->mid_next == NULL)
203 		return (tailpp);
204 
205 	return (&((*tailpp)->mid_next));
206 }
207 
208 
209 
210 /*
211  * replica_append()
212  *
213  * Append to tail of linked list of md_im_replica_info_t.
214  *
215  * Will allocate space for new node and copy args into new space.
216  *
217  * Returns pointer to new node.
218  */
219 static md_im_replica_info_t *
220 replica_append(
221 	md_im_replica_info_t	**mirpp,
222 	int			flags,
223 	daddr32_t		offset,
224 	daddr32_t		length,
225 	md_timeval32_t		timestamp
226 )
227 {
228 	md_im_replica_info_t	*mirp;
229 
230 	for (; (*mirpp != NULL); mirpp = &((*mirpp)->mir_next))
231 		;
232 
233 	mirp = *mirpp = Zalloc(sizeof (md_im_replica_info_t));
234 
235 	mirp->mir_flags = flags;
236 	mirp->mir_offset = offset;
237 	mirp->mir_length = length;
238 	mirp->mir_timestamp = timestamp;
239 
240 	return (mirp);
241 
242 }
243 
244 
245 
246 /*
247  * replica_append_wrapper()
248  *
249  * Constant time append wrapper; the append function will always walk the list,
250  * this will take a tail argument and use the append function on just the tail
251  * node, doing the appropriate old-tail-next-pointer bookkeeping.
252  */
253 static md_im_replica_info_t **
254 replica_append_wrapper(
255 	md_im_replica_info_t	**tailpp,
256 	int			flags,
257 	daddr32_t		offset,
258 	daddr32_t		length,
259 	md_timeval32_t		timestamp
260 )
261 {
262 	(void) replica_append(tailpp, flags, offset, length, timestamp);
263 
264 	if ((*tailpp)->mir_next == NULL)
265 		return (tailpp);
266 
267 	return (&(*tailpp)->mir_next);
268 }
269 
270 /*
271  * map_replica_disk()
272  *
273  * Searches the device id list for a specific
274  * disk based on the locator block device id array index.
275  *
276  * Returns a pointer to the did_list node if a match was
277  * found or NULL otherwise.
278  */
279 static did_list_t *
280 map_replica_disk(
281 	did_list_t	*did_listp,
282 	int		did_index
283 )
284 {
285 	did_list_t	*tailp = did_listp;
286 
287 	while (tailp != NULL) {
288 		if (tailp->did_index == did_index)
289 			return (tailp);
290 		tailp = tailp->next;
291 	}
292 
293 	/* not found, return failure */
294 	return (NULL);
295 }
296 
297 /*
298  * replicated_list_lookup()
299  *
300  * looks up a replicated disk entry in the global replicated disk list
301  * based upon the length of that disk's device id. returns the new device id
302  * for the disk.
303  * If you store the returned devid you must create a local copy.
304  */
305 static void *
306 replicated_list_lookup(
307 	uint_t	devid_len,
308 	void	*old_devid
309 )
310 {
311 	replicated_disk_t *head = NULL;
312 
313 	assert(devid_len <= MAX_DEVID_LEN);
314 	head = replicated_disk_list[devid_len];
315 
316 	if (head == NULL)
317 		return (NULL);
318 
319 	do {
320 		if (devid_compare((ddi_devid_t)old_devid,
321 			(ddi_devid_t)head->old_devid) == 0)
322 			return (head->new_devid);
323 		head = head->next;
324 	} while (head != NULL);
325 
326 	return (NULL);
327 }
328 
329 /*
330  * replicated_list_insert()
331  *
332  * inserts a replicated disk entry into the global replicated disk list
333  */
334 static void
335 replicated_list_insert(
336 	size_t	old_devid_len,
337 	void	*old_devid,
338 	void	*new_devid
339 )
340 {
341 	replicated_disk_t	*repl_disk, **first_entry;
342 	void			*repl_old_devid = NULL;
343 
344 	assert(old_devid_len <= MAX_DEVID_LEN);
345 
346 	repl_disk = Zalloc(sizeof (replicated_disk_t));
347 	repl_old_devid = Zalloc(old_devid_len);
348 	(void) memcpy(repl_old_devid, (void *)old_devid, old_devid_len);
349 
350 	repl_disk->old_devid = repl_old_devid;
351 	repl_disk->new_devid = new_devid;
352 
353 	first_entry = &replicated_disk_list[old_devid_len];
354 
355 	if (*first_entry == NULL) {
356 		*first_entry = repl_disk;
357 		return;
358 	}
359 
360 	repl_disk->next = *first_entry;
361 	replicated_disk_list[old_devid_len] = repl_disk;
362 }
363 
364 /*
365  * get_replica_disks()
366  *
367  * Will step through the locator records in the supplied locator block, and add
368  * each one with an active replica to a supplied list of md_im_drive_info_t, and
369  * add the appropriate replicas to the md_im_replica_info_t contained therein.
370  */
371 static void
372 get_replica_disks(
373 	md_im_set_desc_t	*misp,
374 	did_list_t		*did_listp,
375 	mddb_mb_t		*mb,
376 	mddb_lb_t		*lbp,
377 	md_error_t		*ep,
378 	int			replicated
379 )
380 {
381 	mddrivename_t		*dnp;
382 	int			indx, on_list;
383 	mdsetname_t		*sp = metasetname(MD_LOCAL_NAME, ep);
384 	int			flags;
385 	int			devid_sz;
386 	char			*minor_name;
387 	did_list_t		*replica_disk;
388 	daddr32_t		offset;
389 	daddr32_t		length;
390 	md_timeval32_t		timestamp;
391 	md_im_replica_info_t	**mirpp = NULL;
392 	md_im_drive_info_t	**midpp = &misp->mis_drives;
393 	md_im_drive_info_t	*midp;
394 	void			*did;
395 
396 	for (indx = 0; indx < lbp->lb_loccnt; indx++) {
397 
398 		on_list = 0;
399 		if (lbp->lb_locators[indx].l_flags & MDDB_F_ACTIVE) {
400 
401 			/*
402 			 * search the device id list for a
403 			 * specific ctds based on the locator
404 			 * block device id array index.
405 			 */
406 			replica_disk = map_replica_disk(did_listp, indx);
407 
408 			assert(replica_disk != NULL);
409 
410 
411 			/*
412 			 * metadrivename() can fail for a slice name
413 			 * if there is not an existing mddrivename_t.
414 			 * So we use metadiskname() to strip the slice
415 			 * number.
416 			 */
417 			dnp = metadrivename(&sp,
418 			    metadiskname(replica_disk->devname), ep);
419 
420 			for (midp = misp->mis_drives; midp != NULL;
421 				midp = midp->mid_next) {
422 				if (dnp == midp->mid_dnp) {
423 					on_list = 1;
424 					mirpp = &midp->mid_replicas;
425 					break;
426 				}
427 			}
428 
429 			/*
430 			 * Get the correct devid_sz
431 			 */
432 			if (replicated)
433 				did = replica_disk->rdid;
434 			else
435 				did = replica_disk->did;
436 
437 			devid_sz = devid_sizeof((ddi_devid_t)did);
438 			minor_name = replica_disk->minor_name;
439 
440 			/*
441 			 * New on the list so add it
442 			 */
443 			if (!on_list) {
444 				mddb_mb_t	*mbp;
445 				uint_t		sliceno;
446 				mdname_t	*rsp;
447 				int		fd = -1;
448 
449 				mbp = Malloc(DEV_BSIZE);
450 
451 				/* determine the replica slice */
452 				if (meta_replicaslice(dnp, &sliceno,
453 				    ep) != 0) {
454 					Free(mbp);
455 					continue;
456 				}
457 
458 				/*
459 				 * if the replica slice size is zero,
460 				 * don't bother opening
461 				 */
462 				if (dnp->vtoc.parts[sliceno].size == 0) {
463 					Free(mbp);
464 					continue;
465 				}
466 
467 				if ((rsp = metaslicename(dnp, sliceno,
468 				    ep)) == NULL) {
469 					Free(mbp);
470 					continue;
471 				}
472 
473 				if ((fd = open(rsp->rname,
474 				    O_RDONLY| O_NDELAY)) < 0) {
475 					Free(mbp);
476 					continue;
477 				}
478 
479 				/*
480 				 * a drive may not have a master block
481 				 */
482 				if (read_master_block(ep, fd, mbp,
483 				    DEV_BSIZE) <= 0) {
484 					mdclrerror(ep);
485 					Free(mbp);
486 					(void) close(fd);
487 					continue;
488 				}
489 
490 				(void) close(fd);
491 				midpp = drive_append_wrapper(midpp, dnp,
492 				    replica_disk->did, replica_disk->rdid,
493 				    replica_disk->devname,
494 				    devid_sz, minor_name, mbp->mb_setcreatetime,
495 				    NULL);
496 				mirpp = &((*midpp)->mid_replicas);
497 				Free(mbp);
498 			}
499 
500 			/*
501 			 * For either of these assertions to fail, it implies
502 			 * a NULL return from metadrivename() above.  Since
503 			 * the args came from a presumed valid locator block,
504 			 * that's Bad.
505 			 */
506 			assert(midpp != NULL);
507 			assert(mirpp != NULL);
508 
509 			/*
510 			 * Extract the parameters describing this replica.
511 			 *
512 			 * The magic "1" in the length calculation accounts
513 			 * for the length of the master block, in addition to
514 			 * the block count it describes.  (The master block
515 			 * will always take up one block on the disk, and
516 			 * there will always only be one master block per
517 			 * replica, even though much of the code is structured
518 			 * to handle noncontiguous replicas.)
519 			 */
520 			flags = lbp->lb_locators[indx].l_flags;
521 			offset = lbp->lb_locators[indx].l_blkno;
522 			length = mb->mb_blkcnt + 1;
523 			timestamp = mb->mb_setcreatetime;
524 
525 			mirpp = replica_append_wrapper(mirpp, flags,
526 				offset, length, timestamp);
527 
528 			/*
529 			 * If we're here it means -
530 			 *
531 			 * a) we had an active copy of the replica, and
532 			 * b) we've added the disk to the list of
533 			 *    disks as well.
534 			 *
535 			 * We need to bump up the number of active
536 			 * replica count for each such replica so that it
537 			 * can be used later for replica quorum check.
538 			 */
539 			misp->mis_active_replicas++;
540 		}
541 	}
542 }
543 
544 
545 /*
546  * append_pnm_rec()
547  *
548  * Append pnm_rec_t entry to list of physical devices in the diskset.  Entry
549  * contains a mapping of n_key in NM namespace(or min_key in DID_NM namespace)
550  * to name of the physical device.  This list will be used to ensure that the
551  * correct names of the physical devices are printed in the metastat output--the
552  * NM namespace might have stale information about where the physical devices
553  * were previously located when the diskset was last active.
554  */
555 static void
556 append_pnm_rec(
557 	pnm_rec_t	**pnm,
558 	mdkey_t		min_key,
559 	char		*n_name
560 )
561 {
562 	pnm_rec_t 	*tmp_pnm;
563 	char 		*p;
564 	int 		len;
565 
566 	if ((p = strrchr(n_name, '/')) != NULL)
567 		p++;
568 
569 	/*
570 	 * Allocates pnm_rec_t record for the physical
571 	 * device.
572 	 */
573 	len = strlen(p) + 1; /* Length of name plus Null term */
574 	tmp_pnm  = Malloc(sizeof (pnm_rec_t) + len);
575 	(void) strncpy(tmp_pnm->n_name, p, len);
576 	tmp_pnm->n_key = min_key;
577 
578 	/*
579 	 * Adds new element to head of pnm_rec_t list.
580 	 */
581 	if (*pnm == NULL) {
582 		tmp_pnm->next = NULL;
583 		*pnm = tmp_pnm;
584 	} else {
585 		tmp_pnm->next = *pnm;
586 		*pnm = tmp_pnm;
587 	}
588 }
589 
590 /*
591  * free_pnm_rec_list()
592  *
593  * Freeing all pnm_rec_t entries on the list of physical devices in the
594  * diskset.
595  */
596 void
597 free_pnm_rec_list(pnm_rec_t **pnm)
598 {
599 	pnm_rec_t	*tmp_pnm, *rm_pnm;
600 
601 	for (tmp_pnm = *pnm; tmp_pnm != NULL; ) {
602 		rm_pnm = tmp_pnm;
603 		tmp_pnm = tmp_pnm->next;
604 		Free(rm_pnm);
605 	}
606 
607 	*pnm = NULL;
608 }
609 
610 
611 /*
612  * get_disks_from_didnamespace()
613  * This function was origionally called: get_nonreplica_disks()
614  *
615  * Extracts the disks without replicas from the locator name space and adds them
616  * to the supplied list of md_im_drive_info_t.
617  * If the print verbose option was given then this function will also
618  * correct the nm namespace so that the n_name is the right ctd name
619  */
620 static void
621 get_disks_from_didnamespace(
622 	md_im_set_desc_t	*misp,
623 	pnm_rec_t		**pnm,
624 	mddb_rb_t		*did_nm,
625 	mddb_rb_t		*did_shrnm,
626 	uint_t 			imp_flags,
627 	int			replicated,
628 	md_error_t		*ep
629 )
630 {
631 	char			*search_path = "/dev";
632 	devid_nmlist_t		*nmlist;
633 	md_im_drive_info_t	*midp, **midpp = &misp->mis_drives;
634 	mddrivename_t		*dnp;
635 	mdsetname_t		*sp = metasetname(MD_LOCAL_NAME, ep);
636 	mddb_rb_t		*rbp_did = did_nm;
637 	mddb_rb_t		*rbp_did_shr = did_shrnm;
638 	int			on_list = 0;
639 	int			devid_sz;
640 	struct devid_min_rec	*did_rec;
641 	struct devid_shr_rec	*did_shr_rec;
642 	struct did_shr_name	*did;
643 	struct did_min_name	*min;
644 	void			*r_did;	/* NULL if not a replicated diskset */
645 	void			*valid_did;
646 
647 	/*
648 	 * We got a pointer to an mddb record, which we expect to contain a
649 	 * name record; extract the pointer thereto.
650 	 */
651 	/* LINTED */
652 	did_rec = (struct devid_min_rec *)((caddr_t)(&rbp_did->rb_data));
653 	/* LINTED */
654 	did_shr_rec = (struct devid_shr_rec *)
655 	    ((caddr_t)(&rbp_did_shr->rb_data));
656 
657 	/*
658 	 * Skip the nm_rec_hdr and iterate on the array of struct minor_name
659 	 * at the end of the devid_min_rec
660 	 */
661 	for (min = &did_rec->minor_name[0]; min->min_devid_key != 0;
662 	    /* LINTED */
663 	    min = (struct did_min_name *)((char *)min + DID_NAMSIZ(min))) {
664 
665 		on_list = 0;
666 		r_did = NULL;
667 
668 		/*
669 		 * For a give DID_NM key, locate the corresponding device
670 		 * id from DID_NM_SHR
671 		 */
672 		for (did = &did_shr_rec->device_id[0]; did->did_key != 0;
673 		    /* LINTED */
674 		    did = (struct did_shr_name *)
675 		    ((char *)did + DID_SHR_NAMSIZ(did))) {
676 			/*
677 			 * We got a match, this is the device id we're
678 			 * looking for
679 			 */
680 			if (min->min_devid_key == did->did_key)
681 				break;
682 		}
683 
684 		if (did->did_key == 0) {
685 			/* we didn't find a match */
686 			assert(did->did_key != 0);
687 			md_exit(NULL, 1);
688 		}
689 
690 		/*
691 		 * If replicated diskset
692 		 */
693 		if (replicated) {
694 			size_t		new_devid_len;
695 			char		*temp;
696 			/*
697 			 * In this case, did->did_devid will
698 			 * be invalid so lookup the real one
699 			 */
700 			temp = replicated_list_lookup(did->did_size,
701 			    did->did_devid);
702 			new_devid_len = devid_sizeof((ddi_devid_t)temp);
703 			r_did = Zalloc(new_devid_len);
704 			(void) memcpy(r_did, temp, new_devid_len);
705 			valid_did = r_did;
706 		} else {
707 			valid_did = did->did_devid;
708 		}
709 
710 		/* Get the ctds mapping for that device id */
711 		if (meta_deviceid_to_nmlist(search_path,
712 		    (ddi_devid_t)valid_did,
713 		    &min->min_name[0], &nmlist) == 0) {
714 
715 			assert(nmlist->devname != NULL);
716 			dnp = metadrivename(&sp,
717 			    metadiskname(nmlist->devname), ep);
718 			/*
719 			 * Add drive to pnm_rec_t list of physical devices for
720 			 * metastat output.
721 			 */
722 			if (imp_flags & META_IMP_VERBOSE) {
723 				append_pnm_rec(pnm, min->min_key,
724 				    nmlist->devname);
725 			}
726 
727 			assert(dnp != NULL);
728 			/* Is it already on the list? */
729 			for (midp = misp->mis_drives; midp != NULL;
730 			    midp = midp->mid_next) {
731 				if (midp->mid_dnp == dnp) {
732 					on_list = 1;
733 					break;
734 				}
735 			}
736 
737 			devid_sz = devid_sizeof(
738 			    (ddi_devid_t)valid_did);
739 
740 			if (!on_list) {
741 				mddb_mb_t	*mbp;
742 				uint_t		sliceno;
743 				mdname_t	*rsp;
744 				int		fd = -1;
745 
746 				mbp = Malloc(DEV_BSIZE);
747 
748 				/* determine the replica slice */
749 				if (meta_replicaslice(dnp, &sliceno,
750 				    ep) != 0) {
751 					Free(mbp);
752 					continue;
753 				}
754 
755 				/*
756 				 * if the replica slice size is zero,
757 				 * don't bother opening
758 				 */
759 				if (dnp->vtoc.parts[sliceno].size
760 				    == 0) {
761 					Free(mbp);
762 					continue;
763 				}
764 
765 				if ((rsp = metaslicename(dnp, sliceno,
766 				    ep)) == NULL) {
767 					Free(mbp);
768 					continue;
769 				}
770 
771 				if ((fd = open(rsp->rname,
772 				    O_RDONLY| O_NDELAY)) < 0) {
773 					Free(mbp);
774 					continue;
775 				}
776 
777 				/*
778 				 * a drive may not have a master block
779 				 */
780 				if (read_master_block(ep, fd, mbp,
781 				    DEV_BSIZE) <= 0) {
782 					mdclrerror(ep);
783 					Free(mbp);
784 						(void) close(fd);
785 						continue;
786 				}
787 
788 				(void) close(fd);
789 				/*
790 				 * If it is replicated diskset,
791 				 * r_did will be non-NULL and
792 				 * devid_sz will be its size.
793 				 * Passing the devname as NULL because field
794 				 * is not currently used for a non-replica disk.
795 				 */
796 				midpp = drive_append_wrapper(midpp,
797 				    dnp, &did->did_devid, r_did, NULL,
798 				    devid_sz, &min->min_name[0],
799 				    mbp->mb_setcreatetime, NULL);
800 				Free(mbp);
801 			}
802 		devid_free_nmlist(nmlist);
803 		}
804 	}
805 }
806 
807 /*
808  * set_append()
809  *
810  * Append to tail of linked list of md_im_set_desc_t.
811  *
812  * Will allocate space for new node AND populate it by extracting disks with
813  * and without replicas from the locator blocks and locator namespace.
814  *
815  * Returns pointer to new node.
816  */
817 static md_im_set_desc_t *
818 set_append(
819 	md_im_set_desc_t	**mispp,
820 	did_list_t		*did_listp,
821 	mddb_mb_t		*mb,
822 	mddb_lb_t		*lbp,
823 	mddb_rb_t		*nm,
824 	pnm_rec_t		**pnm,
825 	mddb_rb_t		*did_nm,
826 	mddb_rb_t		*did_shrnm,
827 	uint_t 			imp_flags,
828 	int			replicated,
829 	md_error_t		*ep
830 )
831 {
832 
833 	md_im_set_desc_t	*misp;
834 	set_t			setno = mb->mb_setno;
835 
836 	/* run to end of list */
837 	for (; (*mispp != NULL); mispp = &((*mispp)->mis_next))
838 		;
839 
840 	/* allocate new list element */
841 	misp = *mispp = Zalloc(sizeof (md_im_set_desc_t));
842 
843 	if (replicated)
844 		misp->mis_flags = MD_IM_SET_REPLICATED;
845 
846 	misp->mis_oldsetno = setno;
847 
848 	/* Get the disks with and without replicas */
849 	get_replica_disks(misp, did_listp, mb, lbp, ep, replicated);
850 
851 	if (nm != NULL && did_nm != NULL && did_shrnm != NULL) {
852 		get_disks_from_didnamespace(misp, pnm, did_nm,
853 		    did_shrnm, imp_flags, replicated, ep);
854 	}
855 
856 	/*
857 	 * An error in this struct could come from either of
858 	 * the above routines;
859 	 * in both cases, we want to pass it back on up.
860 	 */
861 
862 	return (misp);
863 }
864 
865 
866 /*
867  * add_disk_names()
868  *
869  * Iterator to walk the minor node tree of the device snapshot, adding only the
870  * first non-block instance of each non-cdrom minor node to a list of disks.
871  */
872 static int
873 add_disk_names(di_node_t node, di_minor_t minor, void *args)
874 {
875 	char			*search_path = "/dev";
876 	ddi_devid_t		devid = di_devid(node);
877 	devid_nmlist_t		*nm;
878 	char			*min = di_minor_name(minor);
879 	md_im_names_t		*cnames = (md_im_names_t *)args;
880 	static di_node_t	save_node = NULL;
881 
882 	/*
883 	 * skip CD devices
884 	 * If a device does not have a device id, we can't
885 	 * do anything with it so just exclude it from our
886 	 * list.
887 	 *
888 	 * This would also encompass CD devices and floppy
889 	 * devices that don't have a device id.
890 	 */
891 	if (devid == NULL) {
892 		return (DI_WALK_CONTINUE);
893 	}
894 
895 	/* char disk devices (as opposed to block) */
896 	if (di_minor_spectype(minor) == S_IFCHR) {
897 
898 		/* only first occurrence (slice 0) of each instance */
899 		if (save_node == NULL || node != save_node) {
900 			save_node = node;
901 			if (meta_deviceid_to_nmlist(search_path, devid,
902 			    min, &nm) == 0) {
903 				int	index = cnames->min_count++;
904 
905 				assert(nm->devname != NULL);
906 				cnames->min_names =
907 					Realloc(cnames->min_names,
908 						cnames->min_count *
909 						sizeof (char *));
910 
911 				assert(cnames->min_names != NULL);
912 				cnames->min_names[index] =
913 					metadiskname(nm->devname);
914 				devid_free_nmlist(nm);
915 			}
916 		}
917 	}
918 	return (DI_WALK_CONTINUE);
919 }
920 
921 
922 
923 /*
924  * meta_list_disks()
925  *
926  * Snapshots the device tree and extracts disk devices from the snapshot.
927  */
928 int
929 meta_list_disks(md_error_t *ep, md_im_names_t *cnames)
930 {
931 	di_node_t root_node;
932 
933 	assert(cnames != NULL);
934 	cnames->min_count = 0;
935 	cnames->min_names = NULL;
936 
937 	if ((root_node = di_init("/", DINFOCPYALL|DINFOFORCE))
938 	    == DI_NODE_NIL) {
939 		return (mdsyserror(ep, errno, NULL));
940 	}
941 
942 	(void) di_walk_minor(root_node, DDI_NT_BLOCK, 0, cnames,
943 	    add_disk_names);
944 
945 	di_fini(root_node);
946 	return (0);
947 }
948 
949 /*
950  * meta_imp_drvused
951  *
952  * Checks if given drive is mounted, swapped, part of disk configuration
953  * or in use by SVM.  ep also has error code set up if drive is in use.
954  *
955  * Returns 1 if drive is in use.
956  * Returns 0 if drive is not in use.
957  */
958 int
959 meta_imp_drvused(
960 	mdsetname_t		*sp,
961 	mddrivename_t		*dnp,
962 	md_error_t		*ep
963 )
964 {
965 	md_error_t		status = mdnullerror;
966 	md_error_t		*db_ep = &status;
967 
968 	/*
969 	 * We pass in db_ep to meta_setup_db_locations
970 	 * and never ever use the error contained therein
971 	 * because all we're interested in is a check to
972 	 * see whether any local metadbs are present.
973 	 */
974 	if ((meta_check_drivemounted(sp, dnp, ep) != 0) ||
975 	    (meta_check_driveswapped(sp, dnp, ep) != 0) ||
976 	    (((meta_setup_db_locations(db_ep) == 0) &&
977 	    ((meta_check_drive_inuse(sp, dnp, 1, ep) != 0) ||
978 	    (meta_check_driveinset(sp, dnp, ep) != 0))))) {
979 		return (1);
980 	} else {
981 		return (0);
982 	}
983 }
984 
985 /*
986  * meta_prune_cnames()
987  *
988  * Removes in-use disks from the list prior to further processing.
989  *
990  * Return value depends on err_on_prune flag: if set, and one or more disks
991  * are pruned, the return list will be the pruned disks.  If not set, or if no
992  * disks are pruned, the return list will be the unpruned disks.
993  */
994 mddrivenamelist_t *
995 meta_prune_cnames(
996 	md_error_t *ep,
997 	md_im_names_t *cnames,
998 	int err_on_prune
999 )
1000 {
1001 	int			d;
1002 	int			fcount = 0;
1003 	mddrivenamelist_t	*dnlp = NULL;
1004 	mddrivenamelist_t	**dnlpp = &dnlp;
1005 	mddrivenamelist_t	*fdnlp = NULL;
1006 	mddrivenamelist_t	**fdnlpp = &fdnlp;
1007 	mdsetname_t		*sp = metasetname(MD_LOCAL_NAME, ep);
1008 
1009 	for (d = 0; d < cnames->min_count; ++d) {
1010 		mddrivename_t	*dnp;
1011 
1012 		dnp = metadrivename(&sp, cnames->min_names[d], ep);
1013 		if (dnp == NULL) {
1014 			/*
1015 			 * Assuming we're interested in knowing about
1016 			 * whatever error occurred, but not in stopping.
1017 			 */
1018 			mde_perror(ep, cnames->min_names[d]);
1019 			mdclrerror(ep);
1020 
1021 			continue;
1022 		}
1023 
1024 		/*
1025 		 * Check if the drive is inuse.
1026 		 */
1027 		if (meta_imp_drvused(sp, dnp, ep)) {
1028 			fdnlpp = meta_drivenamelist_append_wrapper(fdnlpp, dnp);
1029 			fcount++;
1030 			mdclrerror(ep);
1031 		} else {
1032 			dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
1033 		}
1034 	}
1035 
1036 	if (fcount) {
1037 		if (err_on_prune) {
1038 			(void) mddserror(ep, MDE_DS_DRIVEINUSE, 0,
1039 			    NULL, fdnlp->drivenamep->cname, NULL);
1040 			metafreedrivenamelist(dnlp);
1041 			return (fdnlp);
1042 		}
1043 		metafreedrivenamelist(fdnlp);
1044 	}
1045 
1046 	return (dnlp);
1047 }
1048 
1049 /*
1050  * read_master_block()
1051  *
1052  * Returns:
1053  *	< 0 for failure
1054  *	  0 for no valid master block
1055  *	  1 for valid master block
1056  *
1057  * The supplied buffer will be filled in for EITHER 0 or 1.
1058  */
1059 int
1060 read_master_block(
1061 	md_error_t	*ep,
1062 	int		fd,
1063 	void		*bp,
1064 	int		bsize
1065 )
1066 {
1067 	mddb_mb_t	*mbp = bp;
1068 	int		rval = 1;
1069 
1070 	assert(bp != NULL);
1071 
1072 	if (lseek(fd, (off_t)dbtob(16), SEEK_SET) < 0)
1073 		return (mdsyserror(ep, errno, NULL));
1074 
1075 	if (read(fd, bp, bsize) != bsize)
1076 		return (mdsyserror(ep, errno, NULL));
1077 
1078 	/*
1079 	 * The master block magic number can either be MDDB_MAGIC_MB in
1080 	 * the case of a real master block, or, it can be MDDB_MAGIC_DU
1081 	 * in the case of a dummy master block
1082 	 */
1083 	if ((mbp->mb_magic != MDDB_MAGIC_MB) &&
1084 	    (mbp->mb_magic != MDDB_MAGIC_DU)) {
1085 		rval = 0;
1086 		(void) mdmddberror(ep, MDE_DB_MASTER, 0, 0, 0, NULL);
1087 	}
1088 
1089 	if (mbp->mb_revision != MDDB_REV_MB) {
1090 		rval = 0;
1091 	}
1092 
1093 	return (rval);
1094 }
1095 
1096 /*
1097  * read_locator_block()
1098  *
1099  * Returns:
1100  *	< 0 for failure
1101  *	  0 for no valid locator block
1102  *	  1 for valid locator block
1103  */
1104 int
1105 read_locator_block(
1106 	md_error_t	*ep,
1107 	int		fd,
1108 	mddb_mb_t	*mbp,
1109 	void		*bp,
1110 	int		bsize
1111 )
1112 {
1113 	mddb_lb_t	*lbp = bp;
1114 
1115 	assert(bp != NULL);
1116 
1117 	if (lseek(fd, (off_t)dbtob(mbp->mb_blkmap.m_firstblk), SEEK_SET) < 0)
1118 		return (mdsyserror(ep, errno, NULL));
1119 
1120 	if (read(fd, bp, bsize) != bsize)
1121 		return (mdsyserror(ep, errno, NULL));
1122 
1123 	return ((lbp->lb_magic == MDDB_MAGIC_LB) ? 1 : 0);
1124 }
1125 
1126 int
1127 phys_read(
1128 	md_error_t	*ep,
1129 	int		fd,
1130 	mddb_mb_t	*mbp,
1131 	daddr_t		blk,
1132 	void		*bp,
1133 	int		bcount
1134 )
1135 {
1136 	daddr_t		pblk;
1137 
1138 	if ((pblk = getphysblk(blk, mbp)) < 0)
1139 		return (mdmddberror(ep, MDE_DB_BLKRANGE, NODEV32,
1140 			MD_LOCAL_SET, blk, NULL));
1141 
1142 	if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0)
1143 		return (mdsyserror(ep, errno, NULL));
1144 
1145 	if (read(fd, bp, bcount) != bcount)
1146 		return (mdsyserror(ep, errno, NULL));
1147 
1148 	return (bcount);
1149 }
1150 
1151 /*
1152  * read_locator_block_did()
1153  *
1154  * Returns:
1155  * 	< 0 for failure
1156  *	  0 for no valid locator name struct
1157  *	  1 for valid locator name struct
1158  */
1159 int
1160 read_locator_block_did(
1161 	md_error_t	*ep,
1162 	int		fd,
1163 	mddb_mb_t	*mbp,
1164 	mddb_lb_t	*lbp,
1165 	void		*bp,
1166 	int		bsize
1167 )
1168 {
1169 	int		lb_didfirstblk = lbp->lb_didfirstblk;
1170 	mddb_did_blk_t	*lbdidp = bp;
1171 	int		rval;
1172 
1173 	assert(bp != NULL);
1174 
1175 	if ((rval = phys_read(ep, fd, mbp, lb_didfirstblk, bp, bsize)) < 0)
1176 		return (rval);
1177 
1178 	return ((lbdidp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
1179 }
1180 
1181 /*
1182  * read_locator_names()
1183  *
1184  * Returns:
1185  *	< 0 for failure
1186  *	  0 for no valid locator name struct
1187  *	  1 for valid locator name struct
1188  */
1189 int
1190 read_locator_names(
1191 	md_error_t	*ep,
1192 	int		fd,
1193 	mddb_mb_t	*mbp,
1194 	mddb_lb_t	*lbp,
1195 	void		*bp,
1196 	int		bsize
1197 )
1198 {
1199 	int		lnfirstblk = lbp->lb_lnfirstblk;
1200 	mddb_ln_t	*lnp = bp;
1201 	int		rval;
1202 
1203 	assert(bp != NULL);
1204 
1205 	if ((rval = phys_read(ep, fd, mbp, lnfirstblk, bp, bsize)) < 0)
1206 		return (rval);
1207 
1208 	return ((lnp->ln_magic == MDDB_MAGIC_LN) ? 1 : 0);
1209 }
1210 
1211 
1212 int
1213 read_database_block(
1214 	md_error_t	*ep,
1215 	int		fd,
1216 	mddb_mb_t	*mbp,
1217 	int		dbblk,
1218 	void		*bp,
1219 	int		bsize
1220 )
1221 {
1222 	mddb_db_t	*dbp = bp;
1223 	int		rval;
1224 
1225 	assert(bp != NULL);
1226 
1227 	if ((rval = phys_read(ep, fd, mbp, dbblk, bp, bsize)) < 0)
1228 		return (rval);
1229 
1230 	return ((dbp->db_magic == MDDB_MAGIC_DB) ? 1 : 0);
1231 }
1232 
1233 int
1234 read_loc_didblks(
1235 	md_error_t	*ep,
1236 	int		fd,
1237 	mddb_mb_t	*mbp,
1238 	int		didblk,
1239 	void		*bp,
1240 	int		bsize
1241 )
1242 {
1243 	mddb_did_blk_t	*didbp = bp;
1244 	int		rval;
1245 
1246 	assert(bp != NULL);
1247 
1248 	if ((rval = phys_read(ep, fd, mbp, didblk, bp, bsize)) < 0)
1249 		return (rval);
1250 
1251 	return ((didbp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0);
1252 }
1253 
1254 
1255 int
1256 read_loc_didinfo(
1257 	md_error_t	*ep,
1258 	int		fd,
1259 	mddb_mb_t	*mbp,
1260 	int		infoblk,
1261 	void		*bp,
1262 	int		bsize
1263 )
1264 {
1265 	int		rval = 1;
1266 	mddb_did_info_t	*infop = bp;
1267 
1268 	assert(bp != NULL);
1269 
1270 	if ((rval = phys_read(ep, fd, mbp, infoblk, bp, bsize)) < 0)
1271 		return (rval);
1272 
1273 	return ((infop->info_flags & MDDB_DID_EXISTS) ? 1 : 0);
1274 }
1275 
1276 /*
1277  * meta_nm_rec()
1278  *
1279  * Return the DE corresponding to the requested namespace record type.
1280  * Modifies dbp to have a firstentry if one isn't there.
1281  */
1282 static mddb_de_t *
1283 meta_nm_rec(mddb_db_t *dbp, mddb_type_t rectype)
1284 {
1285 	mddb_de_t *dep;
1286 	int	desize;
1287 
1288 	if (dbp->db_firstentry != NULL) {
1289 		/* LINTED */
1290 		dep = (mddb_de_t *)((caddr_t)(&dbp->db_firstentry)
1291 				    + sizeof (dbp->db_firstentry));
1292 		dbp->db_firstentry = dep;
1293 		while (dep && dep->de_next) {
1294 			desize = sizeof (*dep) - sizeof (dep->de_blks) +
1295 				sizeof (daddr_t) * dep->de_blkcount;
1296 			/* LINTED */
1297 			dep->de_next = (mddb_de_t *)
1298 				((caddr_t)dep + desize);
1299 			dep = dep->de_next;
1300 		}
1301 	}
1302 
1303 	for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) {
1304 		if (dep->de_type1 == rectype)
1305 			break;
1306 	}
1307 	return (dep);
1308 }
1309 
1310 /*
1311  * read_nm_rec()
1312  *
1313  * Reads the NM, NM_DID or NM_DID_SHR record in the mddb and stores the
1314  * configuration data in the buffer 'nm'
1315  *
1316  * Returns:
1317  *	< 0 for failure
1318  *	  0 for no valid NM/DID_NM/DID_NM_SHR record
1319  *	  1 for valid NM/DID_NM/DID_NM_SHR record
1320  *
1321  */
1322 static int
1323 read_nm_rec(
1324 	md_error_t 	*ep,
1325 	int 		fd,
1326 	mddb_mb_t	*mbp,
1327 	mddb_lb_t	*lbp,
1328 	char		**nm,
1329 	mddb_type_t	rectype,
1330 	char		*diskname
1331 )
1332 {
1333 	int		cnt, dbblk, rval = 0;
1334 	char		db[DEV_BSIZE];
1335 	mddb_de_t	*dep;
1336 	/*LINTED*/
1337 	mddb_db_t	*dbp = (mddb_db_t *)&db;
1338 	char 		*tmpnm = NULL;
1339 	daddr_t		pblk;
1340 
1341 	for (dbblk = lbp->lb_dbfirstblk;
1342 	    dbblk != 0;
1343 	    dbblk = dbp->db_nextblk) {
1344 
1345 		if ((rval = read_database_block(ep, fd, mbp, dbblk, dbp,
1346 		    sizeof (db))) <= 0)
1347 			return (rval);
1348 
1349 		/*
1350 		 * Locate NM/DID_NM/DID_NM_SHR record. Normally there is
1351 		 * only one record per mddb. There is a rare case when we
1352 		 * can't expand the record. If this is the case then we
1353 		 * will have multiple NM/DID_NM/DID_NM_SHR records linked
1354 		 * with r_next_recid.
1355 		 *
1356 		 * For now assume the normal case and handle the extended
1357 		 * namespace in Phase 2.
1358 		 */
1359 		if ((dep = meta_nm_rec(dbp, rectype)) != NULL)
1360 			break;
1361 	}
1362 
1363 	/* If meta_nm_rec() never succeeded, bail out */
1364 	if (dep == NULL)
1365 		return (0);
1366 
1367 	/* Read in the appropriate record and return configurations */
1368 	tmpnm = (char *)Zalloc(dbtob(dep->de_blkcount));
1369 	*nm = tmpnm;
1370 
1371 	for (cnt = 0; cnt < dep->de_blkcount; cnt++) {
1372 		if ((pblk = getphysblk(dep->de_blks[cnt], mbp)) < 0) {
1373 			rval = mdmddberror(ep, MDE_DB_BLKRANGE,
1374 			    NODEV32, MD_LOCAL_SET,
1375 			    dep->de_blks[cnt], diskname);
1376 			return (rval);
1377 		}
1378 
1379 		if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) {
1380 			rval = mdsyserror(ep, errno, diskname);
1381 			return (rval);
1382 		}
1383 
1384 		if (read(fd, tmpnm, DEV_BSIZE) != DEV_BSIZE) {
1385 			rval = mdsyserror(ep, errno, diskname);
1386 			return (rval);
1387 		}
1388 
1389 		tmpnm += DEV_BSIZE;
1390 	}
1391 	return (1);
1392 }
1393 
1394 /*
1395  * is_replicated
1396  *
1397  * Determines whether a disk has been replicated or not. It checks to see
1398  * if the device id stored in the master block is the same as the device id
1399  * registered for that disk on the current system. If the two device ids are
1400  * different, then we know that the disk has been replicated.
1401  *
1402  * If need_devid is set and the disk is replicated, fill in the new_devid.
1403  * Also, if need_devid is set, this routine allocates memory for the device
1404  * ids; the caller of this routine is responsible for free'ing up the memory.
1405  *
1406  * Returns:
1407  * 	1	if it's a replicated disk
1408  * 	0 	if it's not a replicated disk
1409  */
1410 static int
1411 is_replicated(
1412 	int fd,
1413 	mddb_mb_t *mbp,
1414 	int need_devid,
1415 	void **new_devid
1416 )
1417 {
1418 	ddi_devid_t	current_devid;
1419 	int		retval = 0;
1420 	size_t		new_devid_len;
1421 
1422 	if (mbp->mb_devid_magic != MDDB_MAGIC_DE)
1423 		return (retval);
1424 
1425 	if (devid_get(fd, &current_devid) != 0)
1426 		return (retval);
1427 
1428 	if (devid_compare((ddi_devid_t)mbp->mb_devid, current_devid) != 0)
1429 		retval = 1;
1430 
1431 	if (retval && need_devid) {
1432 		new_devid_len = devid_sizeof(current_devid);
1433 		*new_devid = Zalloc(new_devid_len);
1434 		(void) memcpy(*new_devid, (void *)current_devid, new_devid_len);
1435 	}
1436 
1437 	devid_free(current_devid);
1438 	return (retval);
1439 }
1440 
1441 /*
1442  * free_replicated_disks_list()
1443  *
1444  * this frees up all the memory allocated by build_replicated_disks_list
1445  */
1446 static void
1447 free_replicated_disks_list()
1448 {
1449 	replicated_disk_t 	**repl_disk, *temp;
1450 	int 			index;
1451 
1452 	for (index = 0; index <= MAX_DEVID_LEN; index++) {
1453 		repl_disk = &replicated_disk_list[index];
1454 
1455 		while (*repl_disk != NULL) {
1456 			temp = *repl_disk;
1457 			*repl_disk = (*repl_disk)->next;
1458 
1459 			Free(temp->old_devid);
1460 			Free(temp->new_devid);
1461 			Free(temp);
1462 		}
1463 	}
1464 }
1465 
1466 /*
1467  * build_replicated_disks_list()
1468  *
1469  * Builds a list of disks that have been replicated using either a
1470  * remote replication or a point-in-time replication software. The
1471  * list is stored as a two dimensional sparse array.
1472  *
1473  * Returns
1474  * 	1	on success
1475  * 	0 	on failure
1476  */
1477 static int
1478 build_replicated_disks_list(
1479 	md_error_t *ep,
1480 	mddrivenamelist_t *dnlp
1481 )
1482 {
1483 	uint_t			sliceno;
1484 	int			fd = -1;
1485 	mddrivenamelist_t	*dp;
1486 	mdname_t		*rsp;
1487 	mddb_mb_t		*mbp;
1488 
1489 	mbp = Malloc(DEV_BSIZE);
1490 
1491 	for (dp = dnlp; dp != NULL; dp = dp->next) {
1492 		mddrivename_t *dnp;
1493 		void *new_devid;
1494 
1495 		dnp = dp->drivenamep;
1496 		/* determine the replica slice */
1497 		if (meta_replicaslice(dnp, &sliceno, ep) != 0)
1498 			continue;
1499 
1500 		/*
1501 		 * if the replica slice size is zero, don't bother opening
1502 		 */
1503 		if (dnp->vtoc.parts[sliceno].size == 0)
1504 			continue;
1505 
1506 		if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
1507 			continue;
1508 
1509 		if ((fd = open(rsp->rname, O_RDONLY| O_NDELAY)) < 0)
1510 			return (mdsyserror(ep, errno, rsp->rname));
1511 
1512 		/* a drive may not have a master block so we just continue */
1513 		if (read_master_block(ep, fd, mbp, DEV_BSIZE) <= 0) {
1514 			(void) close(fd);
1515 			mdclrerror(ep);
1516 			continue;
1517 		}
1518 
1519 		if (is_replicated(fd, mbp, 1, &new_devid)) {
1520 			replicated_list_insert(mbp->mb_devid_len,
1521 			    mbp->mb_devid, new_devid);
1522 		}
1523 		(void) close(fd);
1524 	}
1525 	replicated_disk_list_built = 1;
1526 
1527 	Free(mbp);
1528 	return (1);
1529 }
1530 
1531 /*
1532  * free_did_list()
1533  *
1534  * Frees the did_list allocated as part of build_did_list
1535  */
1536 static void
1537 free_did_list(
1538 	did_list_t	*did_listp
1539 )
1540 {
1541 	did_list_t	*temp, *head;
1542 
1543 	head = did_listp;
1544 
1545 	while (head != NULL) {
1546 		temp = head;
1547 		head = head->next;
1548 		if (temp->rdid)
1549 			Free(temp->rdid);
1550 		if (temp->did)
1551 			Free(temp->did);
1552 		if (temp->devname)
1553 			Free(temp->devname);
1554 		if (temp->minor_name)
1555 			Free(temp->minor_name);
1556 		Free(temp);
1557 	}
1558 }
1559 
1560 /*
1561  * build_did_list()
1562  *
1563  * Build a list of device ids corresponding to disks in the locator block.
1564  * Memory is allocated here for the nodes in the did_list. The callers of
1565  * this routine must also call free_did_list to free up the memory after
1566  * they're done.
1567  *
1568  * Returns:
1569  *	< 0 		for failure
1570  *	  0 		for no valid locator block device id array
1571  *	  1 		for valid locator block device id array
1572  *	  ENOTSUP	partial diskset, not all disks in a diskset on the
1573  *			system where import is being executed
1574  */
1575 static int
1576 build_did_list(
1577 	md_error_t	*ep,
1578 	int		fd,
1579 	mddb_mb_t	*mb,
1580 	mddb_did_blk_t	*lbdidp,
1581 	did_list_t	**did_listp,
1582 	int		replicated
1583 )
1584 {
1585 	char 		*search_path = "/dev";
1586 	char		*minor_name;
1587 	int		rval, cnt;
1588 	devid_nmlist_t	*nm;
1589 	uint_t		did_info_length = 0;
1590 	uint_t		did_info_firstblk = 0;
1591 	did_list_t	*new, *head = NULL;
1592 	char		*bp = NULL, *temp;
1593 	mddb_did_info_t	*did_info = NULL;
1594 	void		*did = NULL;
1595 	size_t		new_devid_len;
1596 
1597 	for (cnt = 0; cnt < MDDB_NLB; cnt++) {
1598 		did_info = &lbdidp->blk_info[cnt];
1599 
1600 		if (!(did_info->info_flags & MDDB_DID_EXISTS))
1601 			continue;
1602 
1603 		new = Zalloc(sizeof (did_list_t));
1604 		new->did = Zalloc(did_info->info_length);
1605 
1606 		/*
1607 		 * If we can re-use the buffer already has been
1608 		 * read in then just use it.  Otherwise free
1609 		 * the previous one and alloc a new one
1610 		 */
1611 		if (dbtob(did_info->info_blkcnt) != did_info_length &&
1612 		    did_info->info_firstblk != did_info_firstblk) {
1613 
1614 			did_info_length = dbtob(did_info->info_blkcnt);
1615 			did_info_firstblk = did_info->info_firstblk;
1616 
1617 			if (bp)
1618 				Free(bp);
1619 			bp = temp = Zalloc(did_info_length);
1620 
1621 			if ((rval = phys_read(ep, fd, mb, did_info_firstblk,
1622 			    (void *)bp, did_info_length)) < 0)
1623 				return (rval);
1624 		} else {
1625 			temp = bp;
1626 		}
1627 
1628 		temp += did_info->info_offset;
1629 		(void) memcpy(new->did, temp, did_info->info_length);
1630 		new->did_index = cnt;
1631 		minor_name = did_info->info_minor_name;
1632 
1633 		/*
1634 		 * If we are not able to find the ctd mapping corresponding
1635 		 * to a given device id, it probably means the device id in
1636 		 * question is not registered with the system.
1637 		 *
1638 		 * Highly likely that the only time this happens, we've hit
1639 		 * a case where not all the disks that are a part of the
1640 		 * diskset were moved before importing the diskset.
1641 		 *
1642 		 * If set is a replicated diskset, then the device id we get
1643 		 * from 'lb' will be the 'other' did and we need to lookup
1644 		 * the real one before we call this routine.
1645 		 */
1646 		if (replicated) {
1647 		    temp = replicated_list_lookup(did_info->info_length,
1648 			new->did);
1649 		    new_devid_len = devid_sizeof((ddi_devid_t)temp);
1650 		    new->rdid = Zalloc(new_devid_len);
1651 		    (void) memcpy(new->rdid, temp, new_devid_len);
1652 		    did = new->rdid;
1653 		} else {
1654 		    did = new->did;
1655 		}
1656 
1657 		if (devid_valid((ddi_devid_t)(did)) == 0) {
1658 			return (-1);
1659 		}
1660 
1661 		if ((rval = meta_deviceid_to_nmlist(search_path,
1662 		    (ddi_devid_t)did, minor_name, &nm)) != 0) {
1663 			*did_listp = head;
1664 			free_did_list(*did_listp);
1665 			*did_listp = NULL;
1666 			(void) mddserror(ep, MDE_DS_PARTIALSET, MD_SET_BAD,
1667 			    mynode(), NULL, NULL);
1668 			return (ENOTSUP);
1669 		}
1670 
1671 		assert(nm->devname != NULL);
1672 		new->devname = Strdup(nm->devname);
1673 		new->dev = nm->dev;
1674 		new->minor_name = Strdup(minor_name);
1675 
1676 		devid_free_nmlist(nm);
1677 
1678 		new->next = head;
1679 		head = new;
1680 	}
1681 
1682 	/* Free the last bp */
1683 	if (bp)
1684 		Free(bp);
1685 	*did_listp = head;
1686 	return (1);
1687 }
1688 /*
1689  * check_nm_disks
1690  *	Checks the disks listed in the shared did namespace to see if they
1691  *	are accessable on the system. If not, return ENOTSUP error to
1692  *	indicate we have a partial diskset.
1693  * Returns:
1694  *	< 0 		for failure
1695  *	  0		success
1696  *	  ENOTSUP	partial diskset, not all disks in a diskset on the
1697  *			system where import is being executed
1698  */
1699 static int
1700 check_nm_disks(
1701 	md_error_t		*ep,
1702 	struct devid_min_rec	*did_nmp,
1703 	struct devid_shr_rec	*did_shrnmp
1704 )
1705 {
1706 	char 		*search_path = "/dev";
1707 	char		*minor_name = NULL;
1708 	uint_t		used_size, min_used_size;
1709 	ddi_devid_t	did;
1710 	devid_nmlist_t	*nm;
1711 	void		*did_min_namep;
1712 	void		*did_shr_namep;
1713 	size_t		did_nsize, did_shr_nsize;
1714 
1715 	used_size = did_shrnmp->did_rec_hdr.r_used_size -
1716 	    sizeof (struct nm_rec_hdr);
1717 	min_used_size = did_nmp->min_rec_hdr.r_used_size -
1718 	    sizeof (struct nm_rec_hdr);
1719 	did_shr_namep = (void *)(&did_shrnmp->device_id[0]);
1720 	while (used_size > (int)sizeof (struct did_shr_name)) {
1721 		did_min_namep = (void *)(&did_nmp->minor_name[0]);
1722 		/* grab device id and minor name from the shared spaces */
1723 		did = (ddi_devid_t)(((struct did_shr_name *)
1724 		    did_shr_namep)->did_devid);
1725 		if (devid_valid(did) == 0) {
1726 			return (-1);
1727 		}
1728 
1729 		/*
1730 		 * We need to check that the DID_NM and DID_SHR_NM are in
1731 		 * sync. It is possible that we took a panic between writing
1732 		 * the two areas to disk. This would be cleaned up on the
1733 		 * next snarf but we don't know for sure that snarf has even
1734 		 * happened since we're reading from disk.
1735 		 */
1736 		while (((struct did_shr_name *)did_shr_namep)->did_key !=
1737 		    ((struct did_min_name *)did_min_namep)->min_devid_key) {
1738 			did_nsize = DID_NAMSIZ((struct did_min_name *)
1739 			    did_min_namep);
1740 			did_min_namep = ((void *)((char *)did_min_namep +
1741 			    did_nsize));
1742 			min_used_size -= did_nsize;
1743 			if (min_used_size < (int)sizeof (struct did_min_name))
1744 				continue;
1745 		}
1746 		minor_name = ((struct did_min_name *)did_min_namep)->min_name;
1747 
1748 		/*
1749 		 * Try to find disk in the system. If we can't find the
1750 		 * disk, we have a partial diskset.
1751 		 */
1752 		if ((meta_deviceid_to_nmlist(search_path,
1753 		    did, minor_name, &nm)) != 0) {
1754 			(void) mddserror(ep, MDE_DS_PARTIALSET, MD_SET_BAD,
1755 			    mynode(), NULL, NULL);
1756 			return (ENOTSUP);
1757 		}
1758 		devid_free_nmlist(nm);
1759 		used_size -= DID_SHR_NAMSIZ((struct did_shr_name *)
1760 		    did_shr_namep);
1761 		/* increment to next item in the shared spaces */
1762 		did_shr_nsize = DID_SHR_NAMSIZ((struct did_shr_name *)
1763 		    did_shr_namep);
1764 		did_shr_namep = ((void *)((char *)did_shr_namep +
1765 		    did_shr_nsize));
1766 	}
1767 	return (0);
1768 }
1769 
1770 
1771 /*
1772  * report_metadb_info()
1773  *
1774  * Generates metadb output for the diskset.
1775  *
1776  */
1777 static void
1778 report_metadb_info(
1779 	md_im_set_desc_t	*misp,
1780 	char			*indent
1781 )
1782 {
1783 	md_im_drive_info_t	*d;
1784 	md_im_replica_info_t	*r;
1785 	char			*unk_str = "";
1786 	int			i;
1787 
1788 	(void) printf("%s\t%5.5s\t\t%9.9s\t%11.11s\n", indent, gettext("flags"),
1789 	    gettext("first blk"), gettext("block count"));
1790 
1791 	unk_str = gettext("unknown");
1792 
1793 	/*
1794 	 * Looping through all drives in the diskset to print
1795 	 * out information about the drive and if the verbose
1796 	 * option is set print out replica data.
1797 	 */
1798 	for (d = misp->mis_drives; d != NULL; d = d->mid_next) {
1799 
1800 		if (d->mid_replicas != NULL) {
1801 			for (r = d->mid_replicas; r != NULL;
1802 			    r = r->mir_next) {
1803 				(void) printf("%s", indent);
1804 				for (i = 0; i < MDDB_FLAGS_LEN; i++) {
1805 					if (r->mir_flags & (1 << i)) {
1806 						(void) putchar(
1807 						    MDDB_FLAGS_STRING[i]);
1808 					} else {
1809 						(void) putchar(' ');
1810 					}
1811 				}
1812 				if ((r->mir_offset == -1) && (r->mir_length
1813 				    == -1)) {
1814 					(void) printf("%7.7s\t\t%7.7s\t",
1815 					    unk_str, unk_str);
1816 				} else if (r->mir_length == -1) {
1817 					(void) printf("%i\t\t%7.7s\t",
1818 					    r->mir_offset, unk_str);
1819 				} else {
1820 					(void) printf("%i\t\t%i\t",
1821 					    r->mir_offset, r->mir_length);
1822 				}
1823 				(void) printf("\t%s\n",
1824 				    d->mid_devname);
1825 			}
1826 		}
1827 	}
1828 	(void) printf("\n");
1829 }
1830 
1831 
1832 /*
1833  * report_set_info()
1834  *
1835  * Returns:
1836  *	< 0 for failure
1837  *	  0 for success
1838  *
1839  */
1840 static int
1841 report_set_info(
1842 	md_im_set_desc_t	*misp,
1843 	mddb_mb_t		*mb,
1844 	mddb_lb_t		*lbp,
1845 	mddb_rb_t		*nm,
1846 	pnm_rec_t		**pnm,
1847 	mdname_t		*rsp,
1848 	int			fd,
1849 	uint_t			imp_flags,
1850 	int			set_count,
1851 	md_error_t		*ep
1852 )
1853 {
1854 	int 			rval = 0;
1855 	md_im_drive_info_t	*d;
1856 	md_im_replica_info_t	*r;
1857 	md_im_drive_info_t	*good_disk = NULL;
1858 	int			i;
1859 	int			in = META_INDENT;
1860 	char			indent[MAXPATHLEN];
1861 	int			dlen = 0;
1862 	md_timeval32_t		firstdisktime;
1863 	md_timeval32_t		lastaccess; /* stores last modified timestamp */
1864 	int			set_contains_time_conflict = 0;
1865 	int			disk_time_conflict = 0;
1866 
1867 
1868 	/* Calculates the correct indentation. */
1869 	indent[0] = 0;
1870 	for (i = 0; i < in; i++)
1871 		(void) strlcat(indent, " ", sizeof (indent));
1872 
1873 	/*
1874 	 * This will print before the information for the first diskset
1875 	 * if the verbose option was set.
1876 	 */
1877 	if (set_count == 1) {
1878 		if (imp_flags & META_IMP_REPORT) {
1879 			(void) printf("\n%s:\n\n",
1880 			    gettext("Disksets eligible for import"));
1881 		}
1882 	}
1883 
1884 	/*
1885 	 * Make the distinction between a regular diskset and
1886 	 * a replicated diskset.
1887 	 */
1888 	if (misp->mis_flags & MD_IM_SET_REPLICATED) {
1889 		if (imp_flags & META_IMP_REPORT) {
1890 			(void) printf("%i)  %s:\n", set_count, gettext(
1891 			    "Found replicated diskset containing disks"));
1892 		} else {
1893 			(void) printf("\n%s:\n", gettext(
1894 			    "Importing replicated diskset containing disks"));
1895 		}
1896 	} else {
1897 		if (imp_flags & META_IMP_REPORT) {
1898 			(void) printf("%i)  %s:\n", set_count, gettext(
1899 			    "Found regular diskset containing disks"));
1900 		} else {
1901 			(void) printf("\n%s:\n", gettext(
1902 			    "Importing regular diskset containing disks"));
1903 		}
1904 	}
1905 
1906 
1907 	/*
1908 	 * Save the set creation time for the first disk in the
1909 	 * diskset.
1910 	 */
1911 	for (d = misp->mis_drives; d != NULL; d = d->mid_next) {
1912 		dlen = max(dlen, strlen(d->mid_dnp->cname));
1913 		if (good_disk == NULL) {
1914 			for (r = d->mid_replicas; r != NULL; r = r->mir_next) {
1915 				if (r->mir_flags & MDDB_F_ACTIVE) {
1916 					good_disk = d;
1917 					firstdisktime =
1918 					    d->mid_setcreatetimestamp;
1919 					break;
1920 				}
1921 			}
1922 		} else {
1923 			break;
1924 		}
1925 	}
1926 
1927 
1928 	/*
1929 	 * Compares the set creation time from the first disk in the
1930 	 * diskset to the diskset creation time on all other
1931 	 * disks in the diskset.
1932 	 * If they are different then the disk probably belongs to a
1933 	 * different diskset so we will print out a warning.
1934 	 *
1935 	 * Looping through all drives in the diskset to print
1936 	 * out information about the drive.
1937 	 */
1938 	for (d = misp->mis_drives; d != NULL; disk_time_conflict = 0,
1939 	    d = d->mid_next) {
1940 		/*
1941 		 * Verify that the disk's seconds and micro-seconds fields
1942 		 * match the fields for the good_disk.
1943 		 */
1944 		if ((firstdisktime.tv_sec !=
1945 		    d->mid_setcreatetimestamp.tv_sec) ||
1946 		    (firstdisktime.tv_usec !=
1947 		    d->mid_setcreatetimestamp.tv_usec)) {
1948 			disk_time_conflict = 1;
1949 			set_contains_time_conflict = 1;
1950 		}
1951 
1952 		/* Printing disk names. */
1953 		if (disk_time_conflict == 1) {
1954 			/* print '*' next to conflicting disk */
1955 			(void) printf("%s%-*.*s *\n", indent,
1956 			    dlen, dlen, d->mid_dnp->cname);
1957 		} else {
1958 			(void) printf("%s%-*.*s\n", indent,
1959 			    dlen, dlen, d->mid_dnp->cname);
1960 		}
1961 	}
1962 	(void) printf("\n");
1963 
1964 	/*
1965 	 * This note explains the "*" that appears next to the
1966 	 * disks with metadbs' whose lb_inittime timestamp does not
1967 	 * match the rest of the diskset.
1968 	 */
1969 	if (set_contains_time_conflict) {
1970 		(void) printf("%s%s\n%s%s\n\n", indent,
1971 		    gettext("* WARNING: This disk has been reused in "
1972 		    "another diskset."), indent, gettext("Import may corrupt "
1973 		    "data in the diskset."));
1974 	}
1975 
1976 
1977 	/*
1978 	 * If the verbose flag was given on the command line,
1979 	 * we will print out the metastat -c information , the
1980 	 * creation time, and last modified time for the diskset.
1981 	 */
1982 	if (imp_flags & META_IMP_VERBOSE) {
1983 		(void) printf("%s%s\n", indent,
1984 		    gettext("Metadatabase information:"));
1985 		report_metadb_info(misp, indent);
1986 
1987 		/*
1988 		 * Printing creation time and last modified time.
1989 		 * Last modified: uses the global variable "lastaccess",
1990 		 * which is set to the last updated timestamp from all of
1991 		 * the database blocks(db_timestamp) or record blocks
1992 		 * (rb_timestamp).
1993 		 * Creation time is the locator block init time
1994 		 * (lb_inittime).
1995 		 */
1996 		lastaccess = good_disk->mid_replicas->mir_timestamp;
1997 
1998 		(void) printf("%s%s\n", indent,
1999 		    gettext("Metadevice information:"));
2000 		rval = report_metastat_info(mb, lbp, nm, pnm, rsp, fd,
2001 		    &lastaccess, ep);
2002 		if (rval < 0) {
2003 			return (rval);
2004 		}
2005 
2006 		(void) printf("%s%s:\t%s\n", indent,
2007 		    gettext("Creation time"),
2008 		    meta_print_time(&good_disk->mid_replicas->mir_timestamp));
2009 		(void) printf("%s%s:\t%s\n", indent,
2010 		    gettext("Last modified time"),
2011 		    meta_print_time(&lastaccess));
2012 	} else {
2013 		/*
2014 		 * Even if the verbose option is not set, we will print the
2015 		 * creation time for the diskset.
2016 		 */
2017 		(void) printf("%s%s:\t%s\n", indent, gettext("Creation time"),
2018 		    meta_print_time(&good_disk->mid_replicas->mir_timestamp));
2019 	}
2020 
2021 
2022 	/*
2023 	 * If the diskset is not actually being imported, then we
2024 	 * print out extra information about how to import it.
2025 	 * If the verbose flag was not set, then we will also
2026 	 * print out information about how to obtain verbose output.
2027 	 */
2028 	if (imp_flags & META_IMP_REPORT) {
2029 		/*
2030 		 * TRANSLATION_NOTE
2031 		 *
2032 		 * The translation of the phrase "For more information
2033 		 * about this set" will be followed by a ":" and a
2034 		 * suggested command (untranslatable) that the user
2035 		 * may use to request additional information.
2036 		 */
2037 		if (!(imp_flags & META_IMP_VERBOSE)) {
2038 		(void) printf("%s%s:\n%s  %s -r -v %s\n", indent,
2039 		    gettext("For more information about this diskset"),
2040 		    indent, myname, good_disk->mid_dnp->cname);
2041 		}
2042 		/*
2043 		 * TRANSLATION_NOTE
2044 		 *
2045 		 * The translation of the phrase "To import this set"
2046 		 * will be followed by a ":" and a suggested command
2047 		 * (untranslatable) that the user may use to import
2048 		 * the specified diskset.
2049 		 */
2050 		(void) printf("%s%s:\n%s  %s -s <newsetname> %s\n", indent,
2051 		    gettext("To import this diskset"), indent, myname,
2052 		    good_disk->mid_dnp->cname);
2053 	}
2054 	(void) printf("\n\n");
2055 
2056 	return (rval);
2057 }
2058 
2059 
2060 /*
2061  * meta_get_and_report_set_info
2062  *
2063  * Scans a given drive for set specific information. If the given drive
2064  * has a shared metadb, scans the shared metadb for information pertaining
2065  * to the set.
2066  *
2067  * Returns:
2068  * 	<0 	for failure
2069  *	0	success but no replicas were found
2070  *	1	success and a replica was found
2071  *	ENOTSUP for partial disksets detected
2072  */
2073 int
2074 meta_get_and_report_set_info(
2075 	mddrivenamelist_t	*dp,
2076 	md_im_set_desc_t	**mispp,
2077 	int			local_mb_ok,
2078 	uint_t			imp_flags,
2079 	int			*set_count,
2080 	md_error_t 		*ep
2081 )
2082 {
2083 	uint_t			s;
2084 	mdname_t		*rsp;
2085 	int			fd;
2086 	char			mb[DEV_BSIZE];
2087 				/*LINTED*/
2088 	mddb_mb_t		*mbp = (mddb_mb_t *)mb;
2089 	char			lb[dbtob(MDDB_LBCNT)];
2090 				/*LINTED*/
2091 	mddb_lb_t		*lbp = (mddb_lb_t *)lb;
2092 	mddb_did_blk_t		*lbdidp = NULL;
2093 	mddb_ln_t		*lnp = NULL;
2094 	int			lnsize, lbdid_size;
2095 	int			rval = 0;
2096 	char			db[DEV_BSIZE];
2097 				/*LINTED*/
2098 	mddb_db_t		*dbp = (mddb_db_t *)db;
2099 	did_list_t		*did_listp = NULL;
2100 	mddrivenamelist_t	*dnlp;
2101 	mddrivename_t 		*dnp;
2102 	md_im_names_t		cnames = { 0, NULL};
2103 	char			*nm = NULL;
2104 	char			*did_nm = NULL, *did_shrnm = NULL;
2105 	struct nm_rec		*nmp;
2106 	struct devid_shr_rec	*did_shrnmp;
2107 	struct devid_min_rec	*did_nmp;
2108 	int			extended_namespace = 0;
2109 	int			replicated = 0;
2110 	pnm_rec_t		*pnm = NULL; /* list of physical devs in set */
2111 	md_im_set_desc_t	*misp;
2112 
2113 	dnp = dp->drivenamep;
2114 
2115 	/*
2116 	 * Determine and open the replica slice
2117 	 */
2118 	if (meta_replicaslice(dnp, &s, ep) != 0) {
2119 		return (-1);
2120 	}
2121 
2122 	/*
2123 	 * Test for the size of replica slice in question. If
2124 	 * the size is zero, we know that this is not a disk that was
2125 	 * part of a set and it should be silently ignored for import.
2126 	 */
2127 	if (dnp->vtoc.parts[s].size == 0)
2128 		return (0);
2129 
2130 	if ((rsp = metaslicename(dnp, s, ep)) == NULL) {
2131 		return (-1);
2132 	}
2133 
2134 	if ((fd = open(rsp->rname, O_RDONLY|O_NDELAY)) < 0)
2135 		return (mdsyserror(ep, errno, rsp->cname));
2136 
2137 	/*
2138 	 * After the open() succeeds, we should return via the "out"
2139 	 * label to clean up after ourselves.  (Up 'til now, we can
2140 	 * just return directly, because there are no resources to
2141 	 * give back.)
2142 	 */
2143 
2144 	if ((rval = read_master_block(ep, fd, mbp, sizeof (mb))) <= 0)
2145 		goto out;
2146 
2147 	replicated = is_replicated(fd, mbp, 0, NULL);
2148 
2149 	if (!local_mb_ok && mbp->mb_setno == 0) {
2150 		rval = 0;
2151 		goto out;
2152 	}
2153 
2154 	if ((rval = read_locator_block(ep, fd, mbp, lbp, sizeof (lb))) <= 0)
2155 		goto out;
2156 
2157 	/*
2158 	 * Once the locator block has been read, we need to
2159 	 * check if the locator block commit count is zero.
2160 	 * If it is zero, we know that the replica we're dealing
2161 	 * with is on a disk that was deleted from the disk set;
2162 	 * and, it potentially has stale data. We need to quit
2163 	 * in that case
2164 	 */
2165 	if (lbp->lb_commitcnt == 0) {
2166 		rval = 0;
2167 		goto out;
2168 	}
2169 
2170 	/*
2171 	 * Make sure that the disk being imported has device id
2172 	 * namespace present for disksets. If a disk doesn't have
2173 	 * device id namespace, we skip reading the replica on that disk
2174 	 */
2175 	if (!(lbp->lb_flags & MDDB_DEVID_STYLE)) {
2176 		rval = 0;
2177 		goto out;
2178 	}
2179 
2180 	/*
2181 	 * Grab the locator block device id array. Allocate memory for the
2182 	 * array first.
2183 	 */
2184 	lbdid_size = dbtob(lbp->lb_didblkcnt);
2185 	lbdidp = Zalloc(lbdid_size);
2186 
2187 	if ((rval = read_locator_block_did(ep, fd, mbp, lbp, lbdidp,
2188 	    lbdid_size)) <= 0)
2189 		goto out;
2190 
2191 	/*
2192 	 * For a disk that has not been replicated, extract the device ids
2193 	 * stored in the locator block device id array and store them in
2194 	 * a list.
2195 	 *
2196 	 * If the disk has been replicated using replication software such
2197 	 * as HDS Truecopy/ShadowImage or EMC SRDF/BCV, the device ids in
2198 	 * the locator block are invalid and we need to build a list of
2199 	 * replicated disks.
2200 	 */
2201 	if (replicated && !replicated_disk_list_built) {
2202 		/*
2203 		 * if there's a replicated diskset involved, we need to
2204 		 * scan the system one more time and build a list of all
2205 		 * candidate disks that might be part of that replicated set
2206 		 */
2207 		if (meta_list_disks(ep, &cnames) != 0) {
2208 			rval = 0;
2209 			goto out;
2210 		}
2211 		dnlp = meta_prune_cnames(ep, &cnames, 0);
2212 		rval = build_replicated_disks_list(ep, dnlp);
2213 		if (rval == 0)
2214 			goto out;
2215 	}
2216 
2217 	rval = build_did_list(ep, fd, mbp, lbdidp, &did_listp, replicated);
2218 
2219 	if ((rval <= 0) || (rval == ENOTSUP))
2220 		goto out;
2221 
2222 	/*
2223 	 * Until here, we've gotten away with fixed sizes for the
2224 	 * master block and locator block.  The locator names,
2225 	 * however, are sized (and therefore allocated) dynamically
2226 	 * according to information in the locator block.
2227 	 */
2228 	lnsize = dbtob(lbp->lb_lnblkcnt);
2229 	lnp = Zalloc(lnsize);
2230 
2231 	if ((rval = read_locator_names(ep, fd, mbp, lbp, lnp, lnsize)) <= 0)
2232 		goto out;
2233 
2234 	/*
2235 	 * Read in the NM record
2236 	 * If no NM record was found, it still is a valid configuration
2237 	 * but it also means that we won't find any corresponding DID_NM
2238 	 * or DID_SHR_NM.
2239 	 */
2240 	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &nm, MDDB_NM, rsp->cname))
2241 	    < 0)
2242 		goto out;
2243 	else if (rval == 0)
2244 		goto append;
2245 
2246 	/*
2247 	 * At this point, we have read in all of the blocks that form
2248 	 * the nm_rec.  We should at least detect the corner case
2249 	 * mentioned above, in which r_next_recid links to another
2250 	 * nm_rec. Extended namespace handling is left for Phase 2.
2251 	 *
2252 	 * What this should really be is a loop, each iteration of
2253 	 * which reads in a nm_rec and calls the set_append().
2254 	 */
2255 	/*LINTED*/
2256 	nmp = (struct nm_rec *)(nm + sizeof (mddb_rb_t));
2257 	if (nmp->r_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2258 		extended_namespace = 1;
2259 		rval = 0;
2260 		goto out;
2261 	}
2262 
2263 	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_nm,
2264 	    MDDB_DID_NM, rsp->cname)) < 0)
2265 		goto out;
2266 	else if (rval == 0)
2267 		goto append;
2268 
2269 	/*LINTED*/
2270 	did_nmp = (struct devid_min_rec *)(did_nm + sizeof (mddb_rb_t) -
2271 	    sizeof (int));
2272 	if (did_nmp->min_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2273 		extended_namespace = 1;
2274 		rval = 0;
2275 		goto out;
2276 	}
2277 
2278 	if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_shrnm,
2279 	    MDDB_DID_SHR_NM, rsp->cname)) < 0)
2280 		goto out;
2281 	else if (rval == 0)
2282 		goto append;
2283 
2284 	/*LINTED*/
2285 	did_shrnmp = (struct devid_shr_rec *)(did_shrnm + sizeof (mddb_rb_t) -
2286 	    sizeof (int));
2287 	if (did_shrnmp->did_rec_hdr.r_next_recid != (mddb_recid_t)0) {
2288 		extended_namespace = 1;
2289 		rval = 0;
2290 		goto out;
2291 	}
2292 
2293 	/*
2294 	 * We need to check if all of the disks listed in the namespace
2295 	 * are actually available. If they aren't we'll return with
2296 	 * an ENOTSUP error which indicates a partial diskset.
2297 	 */
2298 	rval = check_nm_disks(ep, did_nmp, did_shrnmp);
2299 	if ((rval < 0) || (rval == ENOTSUP))
2300 		goto out;
2301 
2302 append:
2303 	/* Finally, we've got what we need to process this replica. */
2304 	misp = set_append(mispp, did_listp, mbp, lbp,
2305 	    /*LINTED*/
2306 	    (mddb_rb_t *)nm, &pnm, (mddb_rb_t *)did_nm, (mddb_rb_t *)did_shrnm,
2307 	    imp_flags, replicated, ep);
2308 
2309 	*set_count += 1;
2310 	rval = report_set_info(misp, mbp, lbp,
2311 		/*LINTED*/
2312 		(mddb_rb_t *)nm, &pnm, rsp, fd, imp_flags, *set_count, ep);
2313 	if (rval < 0)
2314 		goto out;
2315 
2316 	/* Return the fact that we found at least one set */
2317 	rval = 1;
2318 
2319 out:
2320 	if (fd >= 0)
2321 		(void) close(fd);
2322 	if (did_listp != NULL)
2323 		free_did_list(did_listp);
2324 	if (lnp != NULL)
2325 		Free(lnp);
2326 	if (nm != NULL)
2327 		Free(nm);
2328 	if (did_nm != NULL)
2329 		Free(did_nm);
2330 	if (did_shrnm != NULL)
2331 		Free(did_shrnm);
2332 	if (pnm != NULL)
2333 		free_pnm_rec_list(&pnm);
2334 
2335 	/*
2336 	 * If we are at the end of the list, we must free up
2337 	 * the replicated list too
2338 	 */
2339 	if (dp->next == NULL)
2340 		free_replicated_disks_list();
2341 
2342 	if (extended_namespace)
2343 		return (mddserror(ep, MDE_DS_EXTENDEDNM, MD_SET_BAD,
2344 		    mynode(), NULL, NULL));
2345 
2346 	return (rval);
2347 }
2348 
2349 /*
2350  * Return the minor name associated with a given disk slice
2351  */
2352 static char *
2353 meta_getminor_name(
2354 	char *devname,
2355 	md_error_t *ep
2356 )
2357 {
2358 	int 	fd = -1;
2359 	char 	*minor_name = NULL;
2360 	char	*ret_minor_name = NULL;
2361 
2362 	if (devname == NULL)
2363 		return (NULL);
2364 
2365 	if ((fd = open(devname, O_RDONLY|O_NDELAY, 0)) < 0) {
2366 		(void) mdsyserror(ep, errno, devname);
2367 		return (NULL);
2368 	}
2369 
2370 	if (devid_get_minor_name(fd, &minor_name) == 0) {
2371 		ret_minor_name = Strdup(minor_name);
2372 		devid_str_free(minor_name);
2373 	}
2374 
2375 	(void) close(fd);
2376 	return (ret_minor_name);
2377 }
2378 
2379 static int
2380 meta_replica_quorum(
2381 	md_im_set_desc_t *misp,
2382 	md_error_t *ep
2383 )
2384 {
2385 	md_im_drive_info_t	*midp;
2386 	mddrivename_t		*dnp;
2387 	md_im_replica_info_t    *midr;
2388 	mdname_t		*np;
2389 	struct stat		st_buf;
2390 	uint_t			rep_slice;
2391 	int			replica_count = 0;
2392 
2393 	for (midp = misp->mis_drives; midp != NULL;
2394 		midp = midp->mid_next) {
2395 
2396 		dnp = midp->mid_dnp;
2397 
2398 		if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
2399 			((np = metaslicename(dnp, rep_slice, ep))
2400 			== NULL)) {
2401 			mdclrerror(ep);
2402 			continue;
2403 		}
2404 
2405 		if (stat(np->bname, &st_buf) != 0)
2406 			continue;
2407 
2408 		/*
2409 		 * The drive is okay now count its replicas
2410 		 */
2411 		for (midr = midp->mid_replicas; midr != NULL;
2412 			midr = midr->mir_next) {
2413 			replica_count++;
2414 		}
2415 	}
2416 
2417 	if (replica_count < (misp->mis_active_replicas + 1)/2)
2418 		return (-1);
2419 
2420 	return (0);
2421 }
2422 
2423 static set_t
2424 meta_imp_setno(
2425 	md_error_t *ep
2426 )
2427 {
2428 	set_t	max_sets, setno;
2429 	int	bool;
2430 
2431 	if ((max_sets = get_max_sets(ep)) == 0) {
2432 		return (MD_SET_BAD);
2433 	}
2434 
2435 	/*
2436 	 * This code needs to be expanded when we run in SunCluster
2437 	 * environment SunCluster obtains setno internally
2438 	 */
2439 	for (setno = 1; setno < max_sets; setno++) {
2440 		if (clnt_setnumbusy(mynode(), setno,
2441 			&bool, ep) == -1) {
2442 			setno = MD_SET_BAD;
2443 			break;
2444 		}
2445 		/*
2446 		 * found one available
2447 		 */
2448 		if (bool == FALSE)
2449 			break;
2450 	}
2451 
2452 	if (setno == max_sets) {
2453 		setno = MD_SET_BAD;
2454 	}
2455 
2456 	return (setno);
2457 }
2458 
2459 int
2460 meta_imp_set(
2461 	md_im_set_desc_t *misp,
2462 	char		*setname,
2463 	int		force,
2464 	bool_t		dry_run,
2465 	md_error_t	*ep
2466 )
2467 {
2468 	md_timeval32_t		tp;
2469 	md_im_drive_info_t	*midp;
2470 	uint_t			rep_slice;
2471 	mddrivename_t		*dnp;
2472 	struct mddb_config	c;
2473 	mdname_t		*np;
2474 	md_im_replica_info_t	*mirp;
2475 	char			setnum_link[MAXPATHLEN];
2476 	char			setname_link[MAXPATHLEN];
2477 	char			*minor_name = NULL;
2478 
2479 	(void) memset(&c, 0, sizeof (c));
2480 	(void) strlcpy(c.c_setname, setname, sizeof (c.c_setname));
2481 	c.c_sideno = 0;
2482 	c.c_flags = MDDB_C_IMPORT;
2483 
2484 	/*
2485 	 * Check to see if the setname that the set is being imported into,
2486 	 * already exists.
2487 	 */
2488 	if (getsetbyname(c.c_setname, ep) != NULL) {
2489 		return (mddserror(ep, MDE_DS_SETNAMEBUSY, MD_SET_BAD,
2490 		    mynode(), NULL, c.c_setname));
2491 	}
2492 
2493 	/*
2494 	 * Find the next available set number
2495 	 */
2496 	if ((c.c_setno = meta_imp_setno(ep)) == MD_SET_BAD) {
2497 		return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD,
2498 		    mynode(), NULL, c.c_setname));
2499 	}
2500 
2501 	if (meta_gettimeofday(&tp) == -1) {
2502 		return (mdsyserror(ep, errno, NULL));
2503 	}
2504 	c.c_timestamp = tp;
2505 
2506 	/* Check to see if replica quorum requirement is fulfilled */
2507 	if (!force && meta_replica_quorum(misp, ep) == -1)
2508 		return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD,
2509 		    mynode(), NULL, c.c_setname));
2510 
2511 	for (midp = misp->mis_drives; midp != NULL;
2512 		midp = midp->mid_next) {
2513 		mdcinfo_t	*cinfo;
2514 
2515 		/*
2516 		 * We pass down the list of the drives in the
2517 		 * set down to the kernel irrespective of
2518 		 * whether the drives have a replica or not.
2519 		 *
2520 		 * The kernel detects which of the drives don't
2521 		 * have a replica and accordingly does the
2522 		 * right thing.
2523 		 */
2524 		dnp = midp->mid_dnp;
2525 		if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) ||
2526 		    ((np = metaslicename(dnp, rep_slice, ep))
2527 		    == NULL)) {
2528 			mdclrerror(ep);
2529 			continue;
2530 		}
2531 
2532 		(void) strcpy(c.c_locator.l_devname, np->bname);
2533 		c.c_locator.l_dev = meta_cmpldev(np->dev);
2534 		c.c_locator.l_mnum = meta_getminor(np->dev);
2535 		c.c_locator.l_devid = (uintptr_t)Malloc(midp->mid_devid_sz);
2536 		(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
2537 		    midp->mid_devid, midp->mid_devid_sz);
2538 		c.c_locator.l_devid_sz = midp->mid_devid_sz;
2539 		c.c_locator.l_devid_flags =
2540 		    MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
2541 		if (midp->mid_o_devid) {
2542 			c.c_locator.l_old_devid =
2543 			    (uint64_t)(uintptr_t)Malloc(midp->mid_o_devid_sz);
2544 			(void) memcpy((void *)(uintptr_t)
2545 			    c.c_locator.l_old_devid,
2546 			    midp->mid_o_devid, midp->mid_o_devid_sz);
2547 			c.c_locator.l_old_devid_sz = midp->mid_o_devid_sz;
2548 		}
2549 		minor_name = meta_getminor_name(np->bname, ep);
2550 		(void) strncpy(c.c_locator.l_minor_name, minor_name,
2551 		    sizeof (c.c_locator.l_minor_name));
2552 
2553 		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
2554 			mdclrerror(ep);
2555 			continue;
2556 		}
2557 		(void) strncpy(c.c_locator.l_driver, cinfo->dname,
2558 		    sizeof (c.c_locator.l_driver));
2559 
2560 		mirp = midp->mid_replicas;
2561 
2562 		do {
2563 			if (mirp) {
2564 				c.c_locator.l_flags = 0;
2565 				c.c_locator.l_blkno = mirp->mir_offset;
2566 				mirp = mirp->mir_next;
2567 			} else {
2568 				/*
2569 				 * Default offset for dummy is 16
2570 				 */
2571 				c.c_locator.l_blkno = 16;
2572 			}
2573 
2574 			if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
2575 				Free((void *)(uintptr_t)c.c_locator.l_devid);
2576 				if (c.c_locator.l_old_devid)
2577 					Free((void *)(uintptr_t)
2578 					    c.c_locator.l_old_devid);
2579 				return (mdstealerror(ep, &c.c_mde));
2580 			}
2581 		} while (mirp != NULL);
2582 	}
2583 
2584 	/*
2585 	 * If the dry run option was specified, flag success
2586 	 * and exit out
2587 	 */
2588 	if (dry_run == 1) {
2589 		md_eprintf("%s\n", dgettext(TEXT_DOMAIN,
2590 		    "import should be successful"));
2591 		Free((void *)(uintptr_t)c.c_locator.l_devid);
2592 		if (c.c_locator.l_old_devid)
2593 			Free((void *)(uintptr_t)c.c_locator.l_old_devid);
2594 		return (0);
2595 	}
2596 
2597 	/*
2598 	 * Now kernel should have all the information
2599 	 * regarding the import diskset replica.
2600 	 * Tell kernel to load them up and import the set
2601 	 */
2602 	if (metaioctl(MD_IOCIMP_LOAD, &c.c_setno, &c.c_mde, NULL) != 0) {
2603 		Free((void *)(uintptr_t)c.c_locator.l_devid);
2604 		if (c.c_locator.l_old_devid)
2605 			Free((void *)(uintptr_t)c.c_locator.l_old_devid);
2606 		return (mdstealerror(ep, &c.c_mde));
2607 	}
2608 
2609 	(void) meta_smf_enable(META_SMF_DISKSET, NULL);
2610 
2611 	/* The set has now been imported, create the appropriate symlink */
2612 	(void) snprintf(setname_link, MAXPATHLEN, "/dev/md/%s", setname);
2613 	(void) snprintf(setnum_link, MAXPATHLEN, "shared/%d", c.c_setno);
2614 
2615 	/*
2616 	 * Since we already verified that the setname was OK, make sure to
2617 	 * cleanup before proceeding.
2618 	 */
2619 	if (unlink(setname_link) == -1) {
2620 		if (errno != ENOENT)
2621 			(void) mdsyserror(ep, errno, setname_link);
2622 	}
2623 
2624 	if (symlink(setnum_link, setname_link) == -1)
2625 		(void) mdsyserror(ep, errno, setname_link);
2626 
2627 	/* resnarf the set that has just been imported */
2628 	if (clnt_resnarf_set(mynode(), c.c_setno, ep) != 0)
2629 		md_eprintf("%s\n", dgettext(TEXT_DOMAIN, "Please stop and "
2630 		    "restart rpc.metad"));
2631 
2632 	Free((void *)(uintptr_t)c.c_locator.l_devid);
2633 	if (c.c_locator.l_old_devid)
2634 		Free((void *)(uintptr_t)c.c_locator.l_old_devid);
2635 	return (0);
2636 }
2637