xref: /onnv-gate/usr/src/lib/lvm/libmeta/common/meta_set_tkr.c (revision 12630:e9b26c370c79)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51945Sjeanm  * Common Development and Distribution License (the "License").
61945Sjeanm  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*12630SRay.Hassan@oracle.COM  * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate  * Metadevice diskset interfaces
280Sstevel@tonic-gate  */
290Sstevel@tonic-gate 
300Sstevel@tonic-gate #include "meta_set_prv.h"
310Sstevel@tonic-gate #include <sys/lvm/md_crc.h>
322902Spetede #include <strings.h>
332902Spetede #include <sys/bitmap.h>
340Sstevel@tonic-gate 
351945Sjeanm extern	char	*blkname(char *);
360Sstevel@tonic-gate 
370Sstevel@tonic-gate static int
upd_dr_dbinfo(mdsetname_t * sp,md_set_desc * sd,md_drive_desc * dd,md_replicalist_t * rlp,int forceflg,md_error_t * ep)380Sstevel@tonic-gate upd_dr_dbinfo(
390Sstevel@tonic-gate 	mdsetname_t		*sp,
400Sstevel@tonic-gate 	md_set_desc		*sd,
410Sstevel@tonic-gate 	md_drive_desc		*dd,
420Sstevel@tonic-gate 	md_replicalist_t	*rlp,
430Sstevel@tonic-gate 	int			forceflg,
440Sstevel@tonic-gate 	md_error_t		*ep
450Sstevel@tonic-gate )
460Sstevel@tonic-gate {
470Sstevel@tonic-gate 	md_drive_desc		*p;
480Sstevel@tonic-gate 	md_replica_t		*r;
490Sstevel@tonic-gate 	md_replicalist_t	*rl;
500Sstevel@tonic-gate 	int			i;
510Sstevel@tonic-gate 	int			dbcnt;
520Sstevel@tonic-gate 	int			rval = 0;
530Sstevel@tonic-gate 	daddr_t			nblks = 0;
540Sstevel@tonic-gate 	md_setkey_t		*cl_sk;
550Sstevel@tonic-gate 	md_error_t		xep = mdnullerror;
560Sstevel@tonic-gate 	md_mnnode_desc		*nd;
570Sstevel@tonic-gate 	ddi_devid_t		devid;
580Sstevel@tonic-gate 
590Sstevel@tonic-gate 	/* find the smallest existing replica */
600Sstevel@tonic-gate 	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
610Sstevel@tonic-gate 		r = rl->rl_repp;
620Sstevel@tonic-gate 		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
630Sstevel@tonic-gate 	}
640Sstevel@tonic-gate 
650Sstevel@tonic-gate 	if (nblks <= 0)
660Sstevel@tonic-gate 		nblks = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE;
670Sstevel@tonic-gate 
680Sstevel@tonic-gate 	for (p = dd; p != NULL; p = p->dd_next) {
690Sstevel@tonic-gate 		dbcnt = 0;
700Sstevel@tonic-gate 		for (rl = rlp; rl != NULL; rl = rl->rl_next) {
710Sstevel@tonic-gate 			r = rl->rl_repp;
720Sstevel@tonic-gate 
730Sstevel@tonic-gate 			/*
740Sstevel@tonic-gate 			 * Before we bump up the dbcnt, if we're
750Sstevel@tonic-gate 			 * running with device ids in disksets, let's
760Sstevel@tonic-gate 			 * compare the device ids otherwise we compare
770Sstevel@tonic-gate 			 * the ctd names.
780Sstevel@tonic-gate 			 *
790Sstevel@tonic-gate 			 * There is a possibility the device ids might
800Sstevel@tonic-gate 			 * have changed. To account for that case, we
810Sstevel@tonic-gate 			 * fallback to comparing the ctd names if the
820Sstevel@tonic-gate 			 * device id comparison fails. If we aren't running
830Sstevel@tonic-gate 			 * in device id mode and a disk has moved, the ctd's
840Sstevel@tonic-gate 			 * won't match.
850Sstevel@tonic-gate 			 */
860Sstevel@tonic-gate 			if ((p->dd_dnp->devid != NULL) &&
870Sstevel@tonic-gate 			    (r->r_devid != NULL) && (!MD_MNSET_DESC(sd))) {
880Sstevel@tonic-gate 				(void) devid_str_decode(p->dd_dnp->devid,
890Sstevel@tonic-gate 				    &devid, NULL);
900Sstevel@tonic-gate 				if ((devid_compare(devid, r->r_devid) == 0) ||
910Sstevel@tonic-gate 				    (strcmp(r->r_namep->drivenamep->cname,
920Sstevel@tonic-gate 				    p->dd_dnp->cname) == 0))
930Sstevel@tonic-gate 					dbcnt++;
940Sstevel@tonic-gate 				devid_free(devid);
950Sstevel@tonic-gate 			} else {
960Sstevel@tonic-gate 				if (strcmp(r->r_namep->drivenamep->cname,
970Sstevel@tonic-gate 				    p->dd_dnp->cname) == 0)
980Sstevel@tonic-gate 					dbcnt++;
990Sstevel@tonic-gate 			}
1000Sstevel@tonic-gate 		}
1010Sstevel@tonic-gate 		p->dd_dbcnt = dbcnt;
1020Sstevel@tonic-gate 		p->dd_dbsize = dbcnt > 0 ? nblks : 0;
1030Sstevel@tonic-gate 	}
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate 	/* Lock the set on current set members */
1060Sstevel@tonic-gate 	if (MD_MNSET_DESC(sd)) {
1070Sstevel@tonic-gate 		nd = sd->sd_nodelist;
1080Sstevel@tonic-gate 		while (nd) {
1090Sstevel@tonic-gate 			/* If this is forced, don't lock other sides */
1100Sstevel@tonic-gate 			if (forceflg && strcmp(mynode(), nd->nd_nodename)
1110Sstevel@tonic-gate 			    != 0) {
1120Sstevel@tonic-gate 				nd = nd->nd_next;
1130Sstevel@tonic-gate 				continue;
1140Sstevel@tonic-gate 			}
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate 			/* We already locked this side in the caller */
1170Sstevel@tonic-gate 			if (strcmp(mynode(), nd->nd_nodename) == 0) {
1180Sstevel@tonic-gate 				nd = nd->nd_next;
1190Sstevel@tonic-gate 				continue;
1200Sstevel@tonic-gate 			}
1210Sstevel@tonic-gate 
1220Sstevel@tonic-gate 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
1230Sstevel@tonic-gate 				nd = nd->nd_next;
1240Sstevel@tonic-gate 				continue;
1250Sstevel@tonic-gate 			}
1260Sstevel@tonic-gate 
1270Sstevel@tonic-gate 			if (clnt_lock_set(nd->nd_nodename, sp, ep)) {
1280Sstevel@tonic-gate 				rval = -1;
1290Sstevel@tonic-gate 				goto out;
1300Sstevel@tonic-gate 			}
1310Sstevel@tonic-gate 			nd = nd->nd_next;
1320Sstevel@tonic-gate 		}
1330Sstevel@tonic-gate 	} else {
1340Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
1350Sstevel@tonic-gate 			/* Skip empty slots */
1360Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
1370Sstevel@tonic-gate 				continue;
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 			/* If this is forced, don't lock other sides */
1400Sstevel@tonic-gate 			if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
1410Sstevel@tonic-gate 				continue;
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate 			/* We already locked this side in the caller */
1440Sstevel@tonic-gate 			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
1450Sstevel@tonic-gate 				continue;
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate 			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
1480Sstevel@tonic-gate 				rval = -1;
1490Sstevel@tonic-gate 				goto out;
1500Sstevel@tonic-gate 			}
1510Sstevel@tonic-gate 		}
1520Sstevel@tonic-gate 	}
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	if (MD_MNSET_DESC(sd)) {
1550Sstevel@tonic-gate 		nd = sd->sd_nodelist;
1560Sstevel@tonic-gate 		while (nd) {
1570Sstevel@tonic-gate 			/* If this is forced, then only care about this node */
1580Sstevel@tonic-gate 			if (forceflg && strcmp(mynode(), nd->nd_nodename)
1590Sstevel@tonic-gate 			    != 0) {
1600Sstevel@tonic-gate 				nd = nd->nd_next;
1610Sstevel@tonic-gate 				continue;
1620Sstevel@tonic-gate 			}
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
1650Sstevel@tonic-gate 				nd = nd->nd_next;
1660Sstevel@tonic-gate 				continue;
1670Sstevel@tonic-gate 			}
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd,
1700Sstevel@tonic-gate 			    ep) == -1) {
1710Sstevel@tonic-gate 				if (! mdiserror(ep, MDE_NO_SET) &&
1720Sstevel@tonic-gate 				    ! mdismddberror(ep, MDE_DB_NODB)) {
1730Sstevel@tonic-gate 					rval = -1;
1740Sstevel@tonic-gate 					break;
1750Sstevel@tonic-gate 				}
1760Sstevel@tonic-gate 				mdclrerror(ep);
1770Sstevel@tonic-gate 			}
1780Sstevel@tonic-gate 			nd = nd->nd_next;
1790Sstevel@tonic-gate 		}
1800Sstevel@tonic-gate 	} else {
1810Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
1820Sstevel@tonic-gate 			/* Skip empty slots */
1830Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
1840Sstevel@tonic-gate 				continue;
1850Sstevel@tonic-gate 
1860Sstevel@tonic-gate 			/* If this is forced, then only care about this node */
1870Sstevel@tonic-gate 			if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
1880Sstevel@tonic-gate 				continue;
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd,
1910Sstevel@tonic-gate 			    ep) == -1) {
1920Sstevel@tonic-gate 				if (! mdiserror(ep, MDE_NO_SET) &&
1930Sstevel@tonic-gate 				    ! mdismddberror(ep, MDE_DB_NODB)) {
1940Sstevel@tonic-gate 					rval = -1;
1950Sstevel@tonic-gate 					break;
1960Sstevel@tonic-gate 				}
1970Sstevel@tonic-gate 				mdclrerror(ep);
1980Sstevel@tonic-gate 			}
1990Sstevel@tonic-gate 		}
2000Sstevel@tonic-gate 	}
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate out:
2030Sstevel@tonic-gate 	cl_sk = cl_get_setkey(sp->setno, sp->setname);
2040Sstevel@tonic-gate 	if (MD_MNSET_DESC(sd)) {
2050Sstevel@tonic-gate 		nd = sd->sd_nodelist;
2060Sstevel@tonic-gate 		while (nd) {
2070Sstevel@tonic-gate 			/* If this is forced, don't unlock other sides */
2080Sstevel@tonic-gate 			if (forceflg && strcmp(mynode(), nd->nd_nodename)
2090Sstevel@tonic-gate 			    != 0) {
2100Sstevel@tonic-gate 				nd = nd->nd_next;
2110Sstevel@tonic-gate 				continue;
2120Sstevel@tonic-gate 			}
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 			/* We will unlocked this side in the caller */
2150Sstevel@tonic-gate 			if (strcmp(mynode(), nd->nd_nodename) == 0) {
2160Sstevel@tonic-gate 				nd = nd->nd_next;
2170Sstevel@tonic-gate 				continue;
2180Sstevel@tonic-gate 			}
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
2210Sstevel@tonic-gate 				nd = nd->nd_next;
2220Sstevel@tonic-gate 				continue;
2230Sstevel@tonic-gate 			}
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate 			if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) {
2260Sstevel@tonic-gate 				if (rval == 0)
2270Sstevel@tonic-gate 					(void) mdstealerror(ep, &xep);
2280Sstevel@tonic-gate 				rval = -1;
2290Sstevel@tonic-gate 			}
2300Sstevel@tonic-gate 			nd = nd->nd_next;
2310Sstevel@tonic-gate 		}
2320Sstevel@tonic-gate 	} else {
2330Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
2340Sstevel@tonic-gate 			/* Skip empty slots */
2350Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
2360Sstevel@tonic-gate 				continue;
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate 			/* If this is forced, don't unlock other sides */
2390Sstevel@tonic-gate 			if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0)
2400Sstevel@tonic-gate 				continue;
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 			/* We will unlocked this side in the caller */
2430Sstevel@tonic-gate 			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
2440Sstevel@tonic-gate 				continue;
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate 			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
2470Sstevel@tonic-gate 				if (rval == 0)
2480Sstevel@tonic-gate 					(void) mdstealerror(ep, &xep);
2490Sstevel@tonic-gate 				rval = -1;
2500Sstevel@tonic-gate 			}
2510Sstevel@tonic-gate 		}
2520Sstevel@tonic-gate 	}
2530Sstevel@tonic-gate 	/* Do not clear the key, via cl_set_setkey(NULL) this is nested */
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate 	return (rval);
2560Sstevel@tonic-gate }
2570Sstevel@tonic-gate 
2580Sstevel@tonic-gate static int
usetag_take(set_t setno,int usetag,md_error_t * ep)2590Sstevel@tonic-gate usetag_take(set_t setno, int usetag, md_error_t *ep)
2600Sstevel@tonic-gate {
2610Sstevel@tonic-gate 	mddb_dtag_use_parm_t	dtup;
2620Sstevel@tonic-gate 
2630Sstevel@tonic-gate 	(void) memset(&dtup, '\0', sizeof (mddb_dtag_use_parm_t));
2640Sstevel@tonic-gate 	dtup.dtup_id = usetag;
2650Sstevel@tonic-gate 	dtup.dtup_setno = setno;
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 	if (metaioctl(MD_MED_USE_TAG, &dtup, &dtup.dtup_mde, NULL) != 0)
2680Sstevel@tonic-gate 		return (mdstealerror(ep, &dtup.dtup_mde));
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate 	return (0);
2710Sstevel@tonic-gate }
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate static int
useit_take(set_t setno,md_error_t * ep)2740Sstevel@tonic-gate useit_take(set_t setno, md_error_t *ep)
2750Sstevel@tonic-gate {
2760Sstevel@tonic-gate 	mddb_accept_parm_t	accp;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 	(void) memset(&accp, '\0', sizeof (mddb_accept_parm_t));
2790Sstevel@tonic-gate 	accp.accp_setno = setno;
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 	if (metaioctl(MD_MED_ACCEPT, &accp, &accp.accp_mde, NULL) != 0)
2820Sstevel@tonic-gate 		return (mdstealerror(ep, &accp.accp_mde));
2830Sstevel@tonic-gate 
2840Sstevel@tonic-gate 	return (0);
2850Sstevel@tonic-gate }
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate /*
2880Sstevel@tonic-gate  * Update the master block with the device id information for the disks
2890Sstevel@tonic-gate  * in the diskset. The device id information will be consumed by the
2900Sstevel@tonic-gate  * diskset import code in case of remotely replicated disksets.
2910Sstevel@tonic-gate  *
2920Sstevel@tonic-gate  * For the drives that have a valid diskset mddb on them, we add the
2930Sstevel@tonic-gate  * device id for the drive to the unused portion of the mddb.
2940Sstevel@tonic-gate  *
2950Sstevel@tonic-gate  * For the drives that don't have a diskset mddb on them, we add a dummy
2960Sstevel@tonic-gate  * master block that contains the device id for the drive. A dummy master
2970Sstevel@tonic-gate  * block is signified by changing the master block magic number, mb_magic,
2980Sstevel@tonic-gate  * to MDDB_MAGIC_DU.
2990Sstevel@tonic-gate  *
3000Sstevel@tonic-gate  * This code is responsible primarily for adding the appropriate device id
3010Sstevel@tonic-gate  * information to diskset disks that didn't have the information. This would
3020Sstevel@tonic-gate  * typically occur when the OS has been upgraded from an OS release prior to
3030Sstevel@tonic-gate  * Solaris 10
3040Sstevel@tonic-gate  *
3050Sstevel@tonic-gate  * The error path in this routine is defined as - if an error occurs while
3060Sstevel@tonic-gate  * updating the mddb for one disk in the diskset, don't bother updating *any*
3070Sstevel@tonic-gate  * of the mddbs because it's game over anyways as far as disaster recovery for
3080Sstevel@tonic-gate  * that diskset is concerned.
3090Sstevel@tonic-gate  *
3100Sstevel@tonic-gate  * This code will need to be revisited if and when support for importing
3110Sstevel@tonic-gate  * partial disksets is added.
3120Sstevel@tonic-gate  *
3130Sstevel@tonic-gate  * NOTE: This code relies heavily on the meta_repartition() working correctly
3140Sstevel@tonic-gate  * and reformatting a drive, so that there's enough room for a dummy master
3150Sstevel@tonic-gate  * block, every time a drive is added to a diskset. Should
3160Sstevel@tonic-gate  * the meta_repartition() code change in future, this code will have to be
3170Sstevel@tonic-gate  * revisited.
3180Sstevel@tonic-gate  *
3190Sstevel@tonic-gate  * Returns 0 on success and -1 on failure
3200Sstevel@tonic-gate  */
3210Sstevel@tonic-gate int
meta_update_mb(mdsetname_t * sp,md_drive_desc * drivedesc,md_error_t * ep)3220Sstevel@tonic-gate meta_update_mb(mdsetname_t *sp, md_drive_desc *drivedesc, md_error_t *ep)
3230Sstevel@tonic-gate {
3240Sstevel@tonic-gate 	uint_t			sliceno, offset;
3250Sstevel@tonic-gate 	void			*mb;
3260Sstevel@tonic-gate 	mddb_mb_t		*mbp;
3270Sstevel@tonic-gate 	int			fd = -1;
3280Sstevel@tonic-gate 	ddi_devid_t		devid = NULL;
3290Sstevel@tonic-gate 	md_drive_desc		*dd;
3300Sstevel@tonic-gate 	mddrivename_t		*dnp;
3310Sstevel@tonic-gate 	mdname_t		*rsp;
3320Sstevel@tonic-gate 	int			dbcnt;
3330Sstevel@tonic-gate 	int			dbsize;
3340Sstevel@tonic-gate 	size_t 			len;
3350Sstevel@tonic-gate 	md_set_desc		*sd;
3360Sstevel@tonic-gate 
3370Sstevel@tonic-gate 	/*
3380Sstevel@tonic-gate 	 * Don't do anything for MN diskset for now.
3390Sstevel@tonic-gate 	 */
3400Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
3410Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
3420Sstevel@tonic-gate 			return (-1);
3430Sstevel@tonic-gate 
3440Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd))
3450Sstevel@tonic-gate 			return (0);
3460Sstevel@tonic-gate 	}
3470Sstevel@tonic-gate 
3480Sstevel@tonic-gate 	mb = Malloc(DEV_BSIZE);
3490Sstevel@tonic-gate 	mbp = (mddb_mb_t *)mb;
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate 	/*
3520Sstevel@tonic-gate 	 * For every drive in the drive descriptor, iterate through all
3530Sstevel@tonic-gate 	 * the mddbs present on it and check to see if mb_devid_magic is
3540Sstevel@tonic-gate 	 * set. If it isn't, then update the master block with the correct
3550Sstevel@tonic-gate 	 * device id information
3560Sstevel@tonic-gate 	 */
3570Sstevel@tonic-gate 	for (dd = drivedesc; dd != NULL; dd = dd->dd_next) {
3580Sstevel@tonic-gate 		int i = 0;
3590Sstevel@tonic-gate 
3600Sstevel@tonic-gate 		dnp = dd->dd_dnp;
3610Sstevel@tonic-gate 		dbcnt = dd->dd_dbcnt;
3620Sstevel@tonic-gate 		dbsize = dd->dd_dbsize;
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 		/*
3650Sstevel@tonic-gate 		 * When the import support for remotely replicated
3660Sstevel@tonic-gate 		 * disksets gets implemented, we probably want to
3670Sstevel@tonic-gate 		 * inform the user that the disks won't be self
3680Sstevel@tonic-gate 		 * identifying if any of these calls fails
3690Sstevel@tonic-gate 		 */
3700Sstevel@tonic-gate 		if (meta_replicaslice(dnp, &sliceno, ep) != 0)
3710Sstevel@tonic-gate 			return (-1);
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate 		if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL)
3740Sstevel@tonic-gate 			return (-1);
3750Sstevel@tonic-gate 
3760Sstevel@tonic-gate 		if ((fd = open(rsp->rname, O_RDWR)) < 0)
3770Sstevel@tonic-gate 			goto cleanup;
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 		/* if devid_str_decode fails, make sure devid is null */
3800Sstevel@tonic-gate 		if (devid_str_decode(dnp->devid, &devid, NULL) != 0) {
3810Sstevel@tonic-gate 			devid = NULL;
3820Sstevel@tonic-gate 		}
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate 		do {
3850Sstevel@tonic-gate 			int push = 0;
3860Sstevel@tonic-gate 
3870Sstevel@tonic-gate 			offset = (i * dbsize + 16);
3880Sstevel@tonic-gate 			++i;
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate 			if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0)
3910Sstevel@tonic-gate 				goto cleanup;
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate 			if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE)
3940Sstevel@tonic-gate 				goto cleanup;
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate 			if (crcchk((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum,
3970Sstevel@tonic-gate 			    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
3980Sstevel@tonic-gate 				goto cleanup;
3990Sstevel@tonic-gate 
4000Sstevel@tonic-gate 			/*
4010Sstevel@tonic-gate 			 * If the disk is one of the ones that doesn't
4020Sstevel@tonic-gate 			 * have a shared mddb on it, we put a dummy
4030Sstevel@tonic-gate 			 * master block on it.
4040Sstevel@tonic-gate 			 */
4050Sstevel@tonic-gate 			if (mbp->mb_devid_magic != MDDB_MAGIC_DE) {
4060Sstevel@tonic-gate 				if (dbcnt == 0) {
4070Sstevel@tonic-gate 					meta_mkdummymaster(sp, fd, 16);
4080Sstevel@tonic-gate 					break;
4090Sstevel@tonic-gate 				}
4100Sstevel@tonic-gate 			}
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 			/*
4130Sstevel@tonic-gate 			 * if mb_setcreatetime is 0, this field was never
4140Sstevel@tonic-gate 			 * filled in so do it now.
4150Sstevel@tonic-gate 			 */
4160Sstevel@tonic-gate 			if ((mbp->mb_setcreatetime.tv_sec == 0) &&
4170Sstevel@tonic-gate 			    (mbp->mb_setcreatetime.tv_usec == 0)) {
4180Sstevel@tonic-gate 				mbp->mb_setcreatetime =
4190Sstevel@tonic-gate 				    meta_get_lb_inittime(sp, ep);
4200Sstevel@tonic-gate 				push = 1;
4210Sstevel@tonic-gate 			}
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 			/*
4240Sstevel@tonic-gate 			 * If MDDB_MAGIC_DE is set in the
4250Sstevel@tonic-gate 			 * mb_devid_magic field then we know we
4260Sstevel@tonic-gate 			 * have a valid device id and we don't
4270Sstevel@tonic-gate 			 * need to add it to the master block.
4280Sstevel@tonic-gate 			 *
4290Sstevel@tonic-gate 			 * This would have to be revisited if device
4300Sstevel@tonic-gate 			 * ids change as a result of device id
4310Sstevel@tonic-gate 			 * algorithms changing or somesuch.
4320Sstevel@tonic-gate 			 */
4330Sstevel@tonic-gate 			if (mbp->mb_devid_magic != MDDB_MAGIC_DE) {
4340Sstevel@tonic-gate 				if (devid != NULL) {
4350Sstevel@tonic-gate 					len = devid_sizeof(devid);
4360Sstevel@tonic-gate 					if (len <= (DEV_BSIZE -
4370Sstevel@tonic-gate 					    sizeof (mddb_mb_t))) {
4380Sstevel@tonic-gate 						/*
4390Sstevel@tonic-gate 						 * there's enough space to
4400Sstevel@tonic-gate 						 * store the devid
4410Sstevel@tonic-gate 						 */
4420Sstevel@tonic-gate 						mbp->mb_devid_magic =
4430Sstevel@tonic-gate 						    MDDB_MAGIC_DE;
4440Sstevel@tonic-gate 						mbp->mb_devid_len = len;
4450Sstevel@tonic-gate 						(void) memcpy(mbp->mb_devid,
4460Sstevel@tonic-gate 						    (char *)devid, len);
4470Sstevel@tonic-gate 						push = 1;
4480Sstevel@tonic-gate 					}
4490Sstevel@tonic-gate 				}
4500Sstevel@tonic-gate 			}
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 			/*
4530Sstevel@tonic-gate 			 * write out (push) any changes we have to the mb
4540Sstevel@tonic-gate 			 */
4550Sstevel@tonic-gate 			if (push) {
4560Sstevel@tonic-gate 				crcgen((uchar_t *)mbp,
4570Sstevel@tonic-gate 				    (uint_t *)&mbp->mb_checksum,
4580Sstevel@tonic-gate 				    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL);
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate 				if (lseek(fd, (off_t)dbtob(offset), SEEK_SET)
4610Sstevel@tonic-gate 				    < 0)
4620Sstevel@tonic-gate 					goto cleanup;
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate 				if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE)
4650Sstevel@tonic-gate 					goto cleanup;
4660Sstevel@tonic-gate 			}
4670Sstevel@tonic-gate 			if (devid)
4680Sstevel@tonic-gate 				devid_free(devid);
4690Sstevel@tonic-gate 		} while (i < dbcnt);
4700Sstevel@tonic-gate 		(void) close(fd);
4710Sstevel@tonic-gate 	}
4720Sstevel@tonic-gate 	/* success */
4730Sstevel@tonic-gate 	return (0);
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate cleanup:
4760Sstevel@tonic-gate 	if (fd != -1)
4770Sstevel@tonic-gate 		(void) close(fd);
4780Sstevel@tonic-gate 	if (devid)
4790Sstevel@tonic-gate 		devid_free(devid);
4800Sstevel@tonic-gate 	return (-1);
4810Sstevel@tonic-gate }
4820Sstevel@tonic-gate 
4831945Sjeanm extern int *replicated_disk_list_built;
4841945Sjeanm extern int replicated_disk_list_built_pass1;
4850Sstevel@tonic-gate /*
4860Sstevel@tonic-gate  * Exported Entry Points
4870Sstevel@tonic-gate  */
4880Sstevel@tonic-gate int
meta_set_take(mdsetname_t * sp,mhd_mhiargs_t * mhiargsp,int flags,int usetag,md_error_t * ep)4890Sstevel@tonic-gate meta_set_take(
4900Sstevel@tonic-gate 	mdsetname_t		*sp,
4910Sstevel@tonic-gate 	mhd_mhiargs_t		*mhiargsp,
4920Sstevel@tonic-gate 	int			flags,
4930Sstevel@tonic-gate 	int			usetag,
4940Sstevel@tonic-gate 	md_error_t		*ep
4950Sstevel@tonic-gate )
4960Sstevel@tonic-gate {
4970Sstevel@tonic-gate 	md_set_desc		*sd;
4980Sstevel@tonic-gate 	md_drive_desc		*dd;
4990Sstevel@tonic-gate 	md_drive_desc		*d = NULL;
5000Sstevel@tonic-gate 	char			*owner = NULL;
5010Sstevel@tonic-gate 	int			rval = 0;
5021945Sjeanm 	int			pathname_return = 0;
5030Sstevel@tonic-gate 	int			i;
5040Sstevel@tonic-gate 	int			has_set;
5050Sstevel@tonic-gate 	int			matches = 0;
5060Sstevel@tonic-gate 	int			numsides = 0;
5070Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
5080Sstevel@tonic-gate 	sigset_t		oldsigs;
5090Sstevel@tonic-gate 	md_setkey_t		*cl_sk;
5100Sstevel@tonic-gate 	int			rb_level = 0;
5110Sstevel@tonic-gate 	md_error_t		xep = mdnullerror;
5120Sstevel@tonic-gate 	mdsetname_t		*local_sp = NULL;
513115Srd117015 	side_t			side;
5140Sstevel@tonic-gate 	int			ret = 0;
5150Sstevel@tonic-gate 	char			*newname = NULL;
5160Sstevel@tonic-gate 	mdkey_t			side_names_key;
5171945Sjeanm 	int			unrslv_replicated = 0;
5181945Sjeanm 	mddrivenamelist_t	*dnlp = NULL;
5191945Sjeanm 	int			retake_flag = 0;
5202902Spetede 	unsigned long 		node_active[BT_BITOUL(MD_MAXSIDES)];
521*12630SRay.Hassan@oracle.COM 	mdnamelist_t		*nlp = NULL;
5222902Spetede 
5232902Spetede 	bzero(node_active, sizeof (unsigned long) * BT_BITOUL(MD_MAXSIDES));
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate 	if ((flags & TAKE_USETAG) || (flags & TAKE_USEIT)) {
5260Sstevel@tonic-gate 		if (flags & TAKE_USETAG) {
5270Sstevel@tonic-gate 			if (usetag_take(sp->setno, usetag, ep))
5280Sstevel@tonic-gate 				return (-1);
5290Sstevel@tonic-gate 		} else {
5300Sstevel@tonic-gate 			if (useit_take(sp->setno, ep))
5310Sstevel@tonic-gate 				return (-1);
5320Sstevel@tonic-gate 		}
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate 		if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, ep) != 0)
5350Sstevel@tonic-gate 			mdclrerror(ep);
5360Sstevel@tonic-gate 	}
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate 	/* Do we own the set? */
5390Sstevel@tonic-gate 	i = own_set(sp, &owner, (flags & TAKE_FORCE), ep);
5400Sstevel@tonic-gate 	if (! mdisok(ep)) {
5410Sstevel@tonic-gate 		if (owner != NULL)
5420Sstevel@tonic-gate 			Free(owner);
5430Sstevel@tonic-gate 		return (-1);
5440Sstevel@tonic-gate 	}
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 	if (i == MD_SETOWNER_NO) {
5470Sstevel@tonic-gate 		(void) mddserror(ep, MDE_DS_NOTOWNER, sp->setno, owner, NULL,
5480Sstevel@tonic-gate 		    sp->setname);
5490Sstevel@tonic-gate 		if (owner != NULL)
5500Sstevel@tonic-gate 			Free(owner);
5510Sstevel@tonic-gate 		return (-1);
5520Sstevel@tonic-gate 	}
5530Sstevel@tonic-gate 
5540Sstevel@tonic-gate 	if (owner != NULL) {
5550Sstevel@tonic-gate 		Free(owner);
5560Sstevel@tonic-gate 		owner = NULL;
5570Sstevel@tonic-gate 	}
5580Sstevel@tonic-gate 
5590Sstevel@tonic-gate 	/* We already own it, we are done. */
5600Sstevel@tonic-gate 	if (i == MD_SETOWNER_YES)
5610Sstevel@tonic-gate 		return (0);
5620Sstevel@tonic-gate 
5630Sstevel@tonic-gate 	if ((sd = metaget_setdesc(sp, &xep)) == NULL)
5640Sstevel@tonic-gate 		return (-1);
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 	/* You can not take ownership of a set that has no drives */
5670Sstevel@tonic-gate 	if (sd->sd_flags & MD_SR_MB_DEVID)
5680Sstevel@tonic-gate 		dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, ep);
5690Sstevel@tonic-gate 	else
5700Sstevel@tonic-gate 		dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep);
5710Sstevel@tonic-gate 
5720Sstevel@tonic-gate 	if (dd == NULL) {
5730Sstevel@tonic-gate 		if (! mdisok(ep))
5740Sstevel@tonic-gate 			return (-1);
5750Sstevel@tonic-gate 		return (0);
5760Sstevel@tonic-gate 	}
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate 	/* END CHECK CODE */
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate 	md_rb_sig_handling_on();
5810Sstevel@tonic-gate 
5820Sstevel@tonic-gate 	/* Lock the set on our side */
5830Sstevel@tonic-gate 	if (clnt_lock_set(mynode(), sp, ep)) {
5840Sstevel@tonic-gate 		rval = -1;
5850Sstevel@tonic-gate 		goto out;
5860Sstevel@tonic-gate 	}
587115Srd117015 
5880Sstevel@tonic-gate 	/*
589115Srd117015 	 * Find the "side" value so that it can be used to deal with
590115Srd117015 	 * the devids.
5910Sstevel@tonic-gate 	 */
592115Srd117015 	side = getnodeside(mynode(), sd);
593115Srd117015 
594115Srd117015 	if (side == MD_SIDEWILD) {
595*12630SRay.Hassan@oracle.COM 		(void) mddserror(ep, MDE_DS_HOSTNOSIDE, sp->setno, mynode(),
596*12630SRay.Hassan@oracle.COM 		    NULL, mynode());
597*12630SRay.Hassan@oracle.COM 		rval = -1;
598*12630SRay.Hassan@oracle.COM 		goto out;
5990Sstevel@tonic-gate 	}
600115Srd117015 
601115Srd117015 	/*
602115Srd117015 	 * A local sets' side 0 references records associated with
603115Srd117015 	 * that node's local set. As this is a non-local set, "side"
604115Srd117015 	 * must be modified (by adding a SKEW) before we reference
605115Srd117015 	 * records in the local set [setno = 0] for the non-local set
606115Srd117015 	 * [setno = 1..n].
607115Srd117015 	 */
608115Srd117015 	side += SKEW;
6090Sstevel@tonic-gate 
6100Sstevel@tonic-gate 	/*
6111945Sjeanm 	 * If this set had been previously imported as a partial replicated
6121945Sjeanm 	 * diskset, then must attempt to updated any unresolved drive
6131945Sjeanm 	 * records in diskset with new devid information.  Must set
6141945Sjeanm 	 * flags in drivedesc list before loading up set so that the
6151945Sjeanm 	 * md driver will fix up names and devids correctly in the
6161945Sjeanm 	 * locator block.
6171945Sjeanm 	 */
6181945Sjeanm 	if (sd->sd_flags & MD_SR_UNRSLV_REPLICATED) {
6191945Sjeanm 		md_im_names_t		cnames = { 0, NULL};
6201945Sjeanm 		ddi_devid_t		old_devid, new_devid;
6211945Sjeanm 		char			*search_path = "/dev";
6221945Sjeanm 		devid_nmlist_t		*nmlist;
6231945Sjeanm 		int			indx;
6241945Sjeanm 		mddrivenamelist_t	**dnlpp = &dnlp;
6251945Sjeanm 
6261945Sjeanm 		if (meta_list_disks(ep, &cnames) != 0) {
6271945Sjeanm 			rval = -1;
6281945Sjeanm 			goto out;
6291945Sjeanm 		}
6301945Sjeanm 
6311945Sjeanm 		for (indx = 0; indx < cnames.min_count; ++indx) {
6321945Sjeanm 			mddrivename_t   *dnp;
6331945Sjeanm 			mdsetname_t	*sp =  metasetname(MD_LOCAL_NAME, ep);
6341945Sjeanm 			int		fd = -1;
6351945Sjeanm 			ddi_devid_t	devid1;
6361945Sjeanm 			char		*cdevidp;
6371945Sjeanm 			int		len;
6381945Sjeanm 			char		*fp;
6391945Sjeanm 
6401945Sjeanm 			/*
6411945Sjeanm 			 * We may have name collision here so we need to get
6421945Sjeanm 			 * the dnp using the devid and not the name.
6431945Sjeanm 			 */
6441945Sjeanm 			len = strlen(cnames.min_names[indx]) + strlen("s0");
6451945Sjeanm 			if ((fp = (char *)Malloc(len+1)) == NULL) {
6461945Sjeanm 				(void) mdsyserror(ep, ENOMEM, NULL);
6471945Sjeanm 				rval = -1;
6481945Sjeanm 				goto out;
6491945Sjeanm 			}
6501945Sjeanm 			(void) snprintf(fp, len + 1, "%ss0",
6511945Sjeanm 			    cnames.min_names[indx]);
6521945Sjeanm 			if ((fd = open(fp, O_RDONLY|O_NDELAY)) < 0) {
6531945Sjeanm 				(void) mdsyserror(ep, EIO, fp);
6541945Sjeanm 				rval = -1;
6551945Sjeanm 				goto out;
6561945Sjeanm 			}
6571945Sjeanm 			Free(fp);
6581945Sjeanm 			/* if no device id, what error?) */
6591945Sjeanm 			if (devid_get(fd, &devid1) != 0) {
6601945Sjeanm 				(void) mdsyserror(ep, EIO, fp);
6611945Sjeanm 				rval = -1;
6621945Sjeanm 				goto out;
6631945Sjeanm 			}
6641945Sjeanm 			if (close(fd) < 0) {
6651945Sjeanm 				(void) mdsyserror(ep, EIO, fp);
6661945Sjeanm 				rval = -1;
6671945Sjeanm 				goto out;
6681945Sjeanm 			}
6691945Sjeanm 			cdevidp = devid_str_encode(devid1, NULL);
6701945Sjeanm 			if (cdevidp == NULL) {
6711945Sjeanm 				(void) mdsyserror(ep, EIO, fp);
6721945Sjeanm 				rval = -1;
6731945Sjeanm 				goto out;
6741945Sjeanm 			}
6751945Sjeanm 			devid_free(devid1);
6761945Sjeanm 			dnp = metadrivenamebydevid(&sp, cdevidp,
6771945Sjeanm 			    cnames.min_names[indx], ep);
6781945Sjeanm 			devid_str_free(cdevidp);
6791945Sjeanm 			if (dnp == NULL) {
6801945Sjeanm 				/*
6811945Sjeanm 				 * Assuming we're interested in knowing about
6821945Sjeanm 				 * whatever error occurred, but not in stopping.
6831945Sjeanm 				 */
6841945Sjeanm 				mde_perror(ep, cnames.min_names[indx]);
6851945Sjeanm 				mdclrerror(ep);
6861945Sjeanm 				continue;
6871945Sjeanm 			}
6881945Sjeanm 
6891945Sjeanm 			dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp);
6901945Sjeanm 		}
6911945Sjeanm 		/* Reget sd and dd since freed by meta_prune_cnames. */
6921945Sjeanm 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
6931945Sjeanm 			rval = -1;
6941945Sjeanm 			goto out;
6951945Sjeanm 		}
6961945Sjeanm 
6971945Sjeanm 		if (sd->sd_flags & MD_SR_MB_DEVID)
6981945Sjeanm 			dd = metaget_drivedesc(sp,
699*12630SRay.Hassan@oracle.COM 			    MD_BASICNAME_OK | PRINT_FAST, ep);
7001945Sjeanm 		else
7011945Sjeanm 			dd = metaget_drivedesc(sp,
702*12630SRay.Hassan@oracle.COM 			    MD_BASICNAME_OK, ep);
7031945Sjeanm 		/* If ep has error, then there was a failure, set rval */
7041945Sjeanm 		if (!mdisok(ep)) {
7051945Sjeanm 			rval = -1;
7061945Sjeanm 			goto out;
7071945Sjeanm 		}
7081945Sjeanm 
7091945Sjeanm 		/* Builds global replicated disk list */
7101945Sjeanm 		replicated_disk_list_built = &replicated_disk_list_built_pass1;
7111945Sjeanm 
7121945Sjeanm 		/* If success, then clear error structure */
7131945Sjeanm 		if (build_replicated_disks_list(ep, dnlp) == 1)
7141945Sjeanm 			mdclrerror(ep);
7151945Sjeanm 		/* If ep has error, then there was a failure, set rval */
7161945Sjeanm 		if (! mdisok(ep)) {
7171945Sjeanm 			rval = -1;
7181945Sjeanm 			goto out;
7191945Sjeanm 		}
7201945Sjeanm 
7211945Sjeanm 		for (d = dd; d != NULL; d = d->dd_next) {
7221945Sjeanm 			if (d->dd_flags & MD_DR_UNRSLV_REPLICATED) {
7231945Sjeanm 				/* Get old devid from drive record */
7241945Sjeanm 				(void) devid_str_decode(d->dd_dnp->devid,
7251945Sjeanm 				    &old_devid, NULL);
7261945Sjeanm 
7271945Sjeanm 				/*
7281945Sjeanm 				 * If the devid stored in the drive record
7291945Sjeanm 				 * (old_devid) matches a devid known by
7301945Sjeanm 				 * the system, then this disk has already
7311945Sjeanm 				 * been partially resolved.  This situation
7321945Sjeanm 				 * could occur if a panic happened during a
7331945Sjeanm 				 * previous take of this diskset.
7341945Sjeanm 				 * Set flag to later handle fixing the master
7351945Sjeanm 				 * block on disk and turning off the unresolved
7361945Sjeanm 				 * replicated flag.
7371945Sjeanm 				 */
7381945Sjeanm 				if (meta_deviceid_to_nmlist(search_path,
7391945Sjeanm 				    (ddi_devid_t)old_devid,
7401945Sjeanm 				    DEVID_MINOR_NAME_ALL,
7411945Sjeanm 				    &nmlist) == 0) {
7421945Sjeanm 					d->dd_flags |= MD_DR_FIX_MB_DID;
7431945Sjeanm 					retake_flag = 1;
7441945Sjeanm 					continue;
7451945Sjeanm 				}
7461945Sjeanm 
7471945Sjeanm 				/*
7481945Sjeanm 				 * If the devid stored in the drive record
7491945Sjeanm 				 * is on the list of replicated disks found
7501945Sjeanm 				 * during a system scan then set both flags
7511945Sjeanm 				 * so that the locator block, namespaces
7521945Sjeanm 				 * (diskset and local set), master block
7531945Sjeanm 				 * and unresolved replicated flag are updated.
7541945Sjeanm 				 */
7551945Sjeanm 				new_devid = replicated_list_lookup(
7561945Sjeanm 				    devid_sizeof((ddi_devid_t)old_devid),
7571945Sjeanm 				    old_devid);
7581945Sjeanm 				devid_free(old_devid);
7591945Sjeanm 
7601945Sjeanm 				/*
7611945Sjeanm 				 * If devid stored in the drive record is
7621945Sjeanm 				 * not found then set flag to mark
7631945Sjeanm 				 * that set is still unresolved and
7641945Sjeanm 				 * continue to next drive record.
7651945Sjeanm 				 */
7661945Sjeanm 				if (new_devid == NULL) {
7671945Sjeanm 					unrslv_replicated = 1;
7681945Sjeanm 					continue;
7691945Sjeanm 				}
7701945Sjeanm 
7711945Sjeanm 				/*
7721945Sjeanm 				 * Set flags to fix up the master block,
7731945Sjeanm 				 * locator block of the diskset, diskset
7741945Sjeanm 				 * namespace and the local set namespace.
7751945Sjeanm 				 */
7761945Sjeanm 				d->dd_flags |= (MD_DR_FIX_MB_DID |
777*12630SRay.Hassan@oracle.COM 				    MD_DR_FIX_LB_NM_DID);
7781945Sjeanm 				retake_flag = 1;
7791945Sjeanm 			}
7801945Sjeanm 		}
7811945Sjeanm 
7821945Sjeanm 	}
7831945Sjeanm 
7841945Sjeanm 	/*
7850Sstevel@tonic-gate 	 * Check the local devid namespace to see if the disks
7860Sstevel@tonic-gate 	 * have been moved. Use the local set first of all as this contains
7870Sstevel@tonic-gate 	 * entries for the disks in the set.
7880Sstevel@tonic-gate 	 *
7890Sstevel@tonic-gate 	 * This is being done before the tk_own_bydd because the disks
7900Sstevel@tonic-gate 	 * in the dd list could be wrong! But it should be done with the lock
7910Sstevel@tonic-gate 	 * held for the set.
7920Sstevel@tonic-gate 	 */
7930Sstevel@tonic-gate 	local_sp = metasetname(MD_LOCAL_NAME, ep);
7940Sstevel@tonic-gate 	for (d = dd; d != NULL; d = d->dd_next) {
7950Sstevel@tonic-gate 		/*
7960Sstevel@tonic-gate 		 * Actually do the check of the disks.
7970Sstevel@tonic-gate 		 */
7980Sstevel@tonic-gate 		ret = meta_upd_ctdnames(&local_sp, 0, side, d->dd_dnp, &newname,
7990Sstevel@tonic-gate 		    ep);
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate 		if ((ret == METADEVADM_ERR) ||
8020Sstevel@tonic-gate 		    (ret == METADEVADM_DSKNAME_ERR)) {
8030Sstevel@tonic-gate 			/* check failed in some unknown manner */
8040Sstevel@tonic-gate 			rval = -1;
8050Sstevel@tonic-gate 			goto out;
8060Sstevel@tonic-gate 		} else if (ret == METADEVADM_DISKMOVE) {
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate 			/*
8090Sstevel@tonic-gate 			 * Update the dd namelist so that the rpc.metamhd
8100Sstevel@tonic-gate 			 * gets the correct disks to reserve - it is the rname
8110Sstevel@tonic-gate 			 * we are interested in.
8120Sstevel@tonic-gate 			 */
8130Sstevel@tonic-gate 			if (newname != NULL) {
8141945Sjeanm 				char	*save_devid;
8150Sstevel@tonic-gate 				/*
8160Sstevel@tonic-gate 				 * Need to save the side names key as this
8170Sstevel@tonic-gate 				 * points to the namespace entry that will
8180Sstevel@tonic-gate 				 * need to be updated. In addition the call
8190Sstevel@tonic-gate 				 * to meta_make_sidenmlist does not actually
8200Sstevel@tonic-gate 				 * set the namespace key.
8210Sstevel@tonic-gate 				 */
8220Sstevel@tonic-gate 				side_names_key = d->dd_dnp->side_names_key;
8231945Sjeanm 
8241945Sjeanm 				/*
8251945Sjeanm 				 * There is the possibility that there
8261945Sjeanm 				 * will be multiple disks with the same
8271945Sjeanm 				 * name but different devids in the
8281945Sjeanm 				 * drivelist. Because of this, we need
8291945Sjeanm 				 * to look for a new dnp based on devid
8301945Sjeanm 				 * and not name.
8311945Sjeanm 				 */
8321945Sjeanm 				save_devid = Strdup(d->dd_dnp->devid);
8330Sstevel@tonic-gate 				metafreedrivename(d->dd_dnp);
8341945Sjeanm 				d->dd_dnp = metadrivenamebydevid(&sp,
8351945Sjeanm 				    save_devid, newname, ep);
8361945Sjeanm 				Free(save_devid);
8370Sstevel@tonic-gate 				Free(newname);
8380Sstevel@tonic-gate 				/*
8390Sstevel@tonic-gate 				 * null newname so we are reset for next time
8400Sstevel@tonic-gate 				 * through
8410Sstevel@tonic-gate 				 */
8420Sstevel@tonic-gate 				newname = NULL;
8431945Sjeanm 				ret = meta_make_sidenmlist(sp,
844*12630SRay.Hassan@oracle.COM 				    d->dd_dnp, 0, NULL, ep);
8450Sstevel@tonic-gate 				d->dd_dnp->side_names_key = side_names_key;
8460Sstevel@tonic-gate 				if (ret == -1) {
8470Sstevel@tonic-gate 					rval = -1;
8480Sstevel@tonic-gate 					goto out;
8490Sstevel@tonic-gate 				}
8500Sstevel@tonic-gate 			}
8510Sstevel@tonic-gate 		}
8520Sstevel@tonic-gate 	}
8530Sstevel@tonic-gate 
8540Sstevel@tonic-gate 
8550Sstevel@tonic-gate 	RB_TEST(1, "take", ep)
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate 	RB_PREEMPT;
8580Sstevel@tonic-gate 	rb_level = 1;	/* level 1 */
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate 	RB_TEST(2, "take", ep)
8610Sstevel@tonic-gate 
8620Sstevel@tonic-gate 	if (!MD_ATSET_DESC(sd)) {
8631945Sjeanm 		if (tk_own_bydd(sp, dd, mhiargsp,
8641945Sjeanm 		    flags & MD_IM_PARTIAL_DISKSET, ep))
8650Sstevel@tonic-gate 			goto rollback;
8660Sstevel@tonic-gate 	}
8670Sstevel@tonic-gate 
8680Sstevel@tonic-gate 	RB_TEST(3, "take", ep)
8690Sstevel@tonic-gate 
8700Sstevel@tonic-gate 	RB_PREEMPT;
8710Sstevel@tonic-gate 	rb_level = 2;	/* level 2 */
8720Sstevel@tonic-gate 
8730Sstevel@tonic-gate 	RB_TEST(4, "take", ep)
8740Sstevel@tonic-gate 
8750Sstevel@tonic-gate 	if (clnt_stimeout(mynode(), sp, mhiargsp, ep) == -1)
8760Sstevel@tonic-gate 		goto rollback;
8770Sstevel@tonic-gate 
8780Sstevel@tonic-gate 	if (setup_db_bydd(sp, dd, (flags & TAKE_FORCE), ep) == -1) {
8790Sstevel@tonic-gate 		if (! mdismddberror(ep, MDE_DB_ACCOK) &&
8800Sstevel@tonic-gate 		    ! mdismddberror(ep, MDE_DB_TAGDATA))
8810Sstevel@tonic-gate 			goto rollback;
8820Sstevel@tonic-gate 		mdclrerror(ep);
8830Sstevel@tonic-gate 	}
8840Sstevel@tonic-gate 
8850Sstevel@tonic-gate 	RB_TEST(5, "take", ep)
8860Sstevel@tonic-gate 
8870Sstevel@tonic-gate 	RB_PREEMPT;
8880Sstevel@tonic-gate 	rb_level = 3;	/* level 3 */
8890Sstevel@tonic-gate 
8900Sstevel@tonic-gate 	RB_TEST(6, "take", ep)
8910Sstevel@tonic-gate 
8920Sstevel@tonic-gate 	/* Snarf set of traditional diskset doesn't use stale information */
8930Sstevel@tonic-gate 	if (snarf_set(sp, FALSE, ep)) {
8940Sstevel@tonic-gate 		if (mdismddberror(ep, MDE_DB_STALE) ||
8950Sstevel@tonic-gate 		    mdismddberror(ep, MDE_DB_ACCOK) ||
8960Sstevel@tonic-gate 		    mdismddberror(ep, MDE_DB_TAGDATA)) {
8970Sstevel@tonic-gate 			rval = -1;
8980Sstevel@tonic-gate 			goto out;
8990Sstevel@tonic-gate 		}
9000Sstevel@tonic-gate 
9010Sstevel@tonic-gate 		if (! mdismddberror(ep, MDE_DB_NODB) &&
9020Sstevel@tonic-gate 		    ! mdismddberror(ep, MDE_DB_NOTOWNER))
9030Sstevel@tonic-gate 			goto rollback;
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate 		/*
9060Sstevel@tonic-gate 		 * Look at the set on all other hosts, if every other host
9070Sstevel@tonic-gate 		 * has the same set with a larger genid, then we destroy this
9080Sstevel@tonic-gate 		 * copy.
9090Sstevel@tonic-gate 		 */
9100Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
9110Sstevel@tonic-gate 			/* Skip empty slots */
9120Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
9130Sstevel@tonic-gate 				continue;
9140Sstevel@tonic-gate 
9150Sstevel@tonic-gate 			/* Skip this node */
9160Sstevel@tonic-gate 			if (strcmp(sd->sd_nodes[i], mynode()) == 0)
9170Sstevel@tonic-gate 				continue;
9180Sstevel@tonic-gate 
9190Sstevel@tonic-gate 			numsides++;
9200Sstevel@tonic-gate 
9210Sstevel@tonic-gate 			has_set = nodehasset(sp, sd->sd_nodes[i],
9220Sstevel@tonic-gate 			    NHS_NST_EQ_G_GT, &xep);
9230Sstevel@tonic-gate 
9240Sstevel@tonic-gate 			if (has_set < 0) {
9250Sstevel@tonic-gate 				if (! mdiserror(&xep, MDE_NO_SET) &&
9260Sstevel@tonic-gate 				    ! mdismddberror(&xep, MDE_DB_NODB))
9270Sstevel@tonic-gate 					goto rollback;
9280Sstevel@tonic-gate 				matches++;
9290Sstevel@tonic-gate 				mdclrerror(&xep);
9300Sstevel@tonic-gate 				continue;
9310Sstevel@tonic-gate 			}
9320Sstevel@tonic-gate 
9330Sstevel@tonic-gate 			if (has_set)
9340Sstevel@tonic-gate 				matches++;
9350Sstevel@tonic-gate 		}
9360Sstevel@tonic-gate 
9370Sstevel@tonic-gate 		/* Destroy the set */
9380Sstevel@tonic-gate 		if (numsides > 0 && (numsides - matches) == 0) {
9390Sstevel@tonic-gate 			if (meta_set_destroy(sp, FALSE, &xep))
9400Sstevel@tonic-gate 				mdclrerror(&xep);
9410Sstevel@tonic-gate 			(void) mddserror(ep, MDE_DS_SETCLEANUP, sp->setno,
9420Sstevel@tonic-gate 			    sp->setname, NULL, mynode());
9430Sstevel@tonic-gate 			rval = -1;
9440Sstevel@tonic-gate 		}
9450Sstevel@tonic-gate 		goto rollback;
9460Sstevel@tonic-gate 	}
9470Sstevel@tonic-gate 
9481945Sjeanm 	/*
9491945Sjeanm 	 * If an unresolved replicated diskset, fix up diskset
9501945Sjeanm 	 * and local namespaces, master block and drive record
9511945Sjeanm 	 * with the new devid.  If all drives in diskset are
9521945Sjeanm 	 * now resolved, then clear set unresolved replicated flag.
9531945Sjeanm 	 * If an error is encountered, don't fail the take, but
9541945Sjeanm 	 * don't proceed any further in resolving the replicated disks.
9551945Sjeanm 	 */
9561945Sjeanm 	if (sd->sd_flags & MD_SR_UNRSLV_REPLICATED) {
9571945Sjeanm 		/* Fix up diskset and local namespaces with new devids */
9581945Sjeanm 		meta_unrslv_replicated_nm(sp, dd, dnlp, ep);
9591945Sjeanm 		if (mdisok(ep)) {
9601945Sjeanm 			/* Fix up master block with new devids  */
9611945Sjeanm 			meta_unrslv_replicated_mb(sp, dd, dnlp, ep);
9621945Sjeanm 		}
9631945Sjeanm 
9641945Sjeanm 		/* If all drives are resolved, set OK flag in set record. */
9651945Sjeanm 		if (mdisok(ep) && (unrslv_replicated == 0)) {
9661945Sjeanm 			/* Ignore failure since no bad effect. */
9671945Sjeanm 			(void) clnt_upd_sr_flags(mynode(), sp, MD_SR_OK, ep);
9681945Sjeanm 		}
9691945Sjeanm 		mdclrerror(ep);
9701945Sjeanm 
9711945Sjeanm 	}
9721945Sjeanm 
973*12630SRay.Hassan@oracle.COM 	/*
974*12630SRay.Hassan@oracle.COM 	 * meta_getalldevs() will ultimately force devfsadmd to create
975*12630SRay.Hassan@oracle.COM 	 * the /dev links for all the configured metadevices if they
976*12630SRay.Hassan@oracle.COM 	 * do not already exist. This ensures that once the set is
977*12630SRay.Hassan@oracle.COM 	 * owned all the metadevices are accessible as opposed to waiting
978*12630SRay.Hassan@oracle.COM 	 * for devfsadmd to create them.
979*12630SRay.Hassan@oracle.COM 	 */
980*12630SRay.Hassan@oracle.COM 	if (meta_getalldevs(sp, &nlp, FALSE, ep) != 0) {
981*12630SRay.Hassan@oracle.COM 		metafreenamelist(nlp);
982*12630SRay.Hassan@oracle.COM 		goto rollback;
983*12630SRay.Hassan@oracle.COM 	}
984*12630SRay.Hassan@oracle.COM 
985*12630SRay.Hassan@oracle.COM 	metafreenamelist(nlp);
986*12630SRay.Hassan@oracle.COM 
9871945Sjeanm 	pathname_return = pathname_reload(&sp, sp->setno, ep);
9881945Sjeanm 	if ((pathname_return == METADEVADM_ERR) ||
9891945Sjeanm 	    (pathname_return == METADEVADM_DSKNAME_ERR)) {
9900Sstevel@tonic-gate 		goto rollback;
9910Sstevel@tonic-gate 	}
9920Sstevel@tonic-gate 
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate 	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
9950Sstevel@tonic-gate 		goto rollback;
9960Sstevel@tonic-gate 
9970Sstevel@tonic-gate 	if (upd_dr_dbinfo(sp, sd, dd, rlp, (flags & TAKE_FORCE), ep) < 0) {
9980Sstevel@tonic-gate 		metafreereplicalist(rlp);
9990Sstevel@tonic-gate 		goto rollback;
10000Sstevel@tonic-gate 	}
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 	metafreereplicalist(rlp);
10030Sstevel@tonic-gate 
10040Sstevel@tonic-gate 	/*
10050Sstevel@tonic-gate 	 * If the set doesn't have the MD_SR_MB_DEVID bit set, i.e
10060Sstevel@tonic-gate 	 * the drives in the set don't have the device id information,
10070Sstevel@tonic-gate 	 * then stick it in if possible.
10080Sstevel@tonic-gate 	 *
10090Sstevel@tonic-gate 	 * If updating the master block fails for whatever reason, it's
10100Sstevel@tonic-gate 	 * okay. It just means the disk(s) in the diskset won't be self
10110Sstevel@tonic-gate 	 * identifying.
10120Sstevel@tonic-gate 	 */
10130Sstevel@tonic-gate 	if (!(sd->sd_flags & MD_SR_MB_DEVID)) {
10140Sstevel@tonic-gate 		/* Lock the set on current set members */
10150Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
10160Sstevel@tonic-gate 			/* Skip empty slots */
10170Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
10180Sstevel@tonic-gate 				continue;
10190Sstevel@tonic-gate 
10200Sstevel@tonic-gate 			/* We already locked this side */
10210Sstevel@tonic-gate 			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
10220Sstevel@tonic-gate 				continue;
10230Sstevel@tonic-gate 
10240Sstevel@tonic-gate 			if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) {
10252902Spetede 				/*
10262902Spetede 				 * Ignore any RPC errors on a force
10272902Spetede 				 * take. The set will have been taken
10282902Spetede 				 * above and we still need to continue.
10292902Spetede 				 */
10302902Spetede 				if (flags & TAKE_FORCE)
10312902Spetede 					continue;
10320Sstevel@tonic-gate 				rval = -1;
10330Sstevel@tonic-gate 				goto out;
10340Sstevel@tonic-gate 			}
10352902Spetede 			BT_SET(node_active, i);
10360Sstevel@tonic-gate 		}
10370Sstevel@tonic-gate 		rb_level = 4;	/* level 4 */
10380Sstevel@tonic-gate 
10390Sstevel@tonic-gate 		if (meta_update_mb(sp, dd, ep) == 0)
10400Sstevel@tonic-gate 			/* update the sr_flags on all hosts */
10410Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
10420Sstevel@tonic-gate 				/* Skip empty slots */
10430Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
10440Sstevel@tonic-gate 					continue;
10450Sstevel@tonic-gate 
10462902Spetede 				/*
10472902Spetede 				 * Only update those nodes that
10482902Spetede 				 * are active (ie those that the
10492902Spetede 				 * set is locked on).
10502902Spetede 				 */
10512902Spetede 				if (!BT_TEST(node_active, i))
10522902Spetede 					continue;
10532902Spetede 
10540Sstevel@tonic-gate 				if (clnt_upd_sr_flags(sd->sd_nodes[i],
10550Sstevel@tonic-gate 				    sp, (sd->sd_flags | MD_SR_MB_DEVID), ep))
10560Sstevel@tonic-gate 					goto rollback;
10570Sstevel@tonic-gate 			}
10580Sstevel@tonic-gate 
10590Sstevel@tonic-gate 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
10600Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
10610Sstevel@tonic-gate 			/* Skip empty slots */
10620Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
10630Sstevel@tonic-gate 				continue;
10640Sstevel@tonic-gate 
10650Sstevel@tonic-gate 			/* Unlocked of this side is done later */
10660Sstevel@tonic-gate 			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
10670Sstevel@tonic-gate 				continue;
10680Sstevel@tonic-gate 
10692902Spetede 			/* no point calling dead nodes */
10702902Spetede 			if (!BT_TEST(node_active, i))
10712902Spetede 				continue;
10722902Spetede 
10730Sstevel@tonic-gate 			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
10740Sstevel@tonic-gate 				if (rval == 0)
10750Sstevel@tonic-gate 					(void) mdstealerror(ep, &xep);
10760Sstevel@tonic-gate 				rval = -1;
10770Sstevel@tonic-gate 			}
10780Sstevel@tonic-gate 		}
10790Sstevel@tonic-gate 	}
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate 	/*
10820Sstevel@tonic-gate 	 * If we get here, we need to unlock the set before the resync
10830Sstevel@tonic-gate 	 * gets called, otherwise the "daemon" will hold the set lock
10840Sstevel@tonic-gate 	 * until the resync is done!
10850Sstevel@tonic-gate 	 */
10860Sstevel@tonic-gate 
10870Sstevel@tonic-gate 	cl_sk = cl_get_setkey(sp->setno, sp->setname);
10880Sstevel@tonic-gate 	if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
10890Sstevel@tonic-gate 		if (rval == 0)
10900Sstevel@tonic-gate 			(void) mdstealerror(ep, &xep);
10910Sstevel@tonic-gate 		rval = -1;
10920Sstevel@tonic-gate 	}
10930Sstevel@tonic-gate 	cl_set_setkey(NULL);
10940Sstevel@tonic-gate 
10950Sstevel@tonic-gate 	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
10960Sstevel@tonic-gate 
10970Sstevel@tonic-gate 	/* We try to get things resync'ed, but this can fail */
10980Sstevel@tonic-gate 	mdclrerror(&xep);
10990Sstevel@tonic-gate 	if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, &xep) != 0) {
11000Sstevel@tonic-gate 		if (rval == 0)
11010Sstevel@tonic-gate 			(void) mdstealerror(ep, &xep);
11020Sstevel@tonic-gate 		rval = -1;
11030Sstevel@tonic-gate 	}
11040Sstevel@tonic-gate 
11050Sstevel@tonic-gate 	RB_TEST(7, "take", ep)
11060Sstevel@tonic-gate 
11071945Sjeanm 	/*
11081945Sjeanm 	 * In order to resolve the namespace major driver names and
11091945Sjeanm 	 * to have the subdrivers attempt to re-associate devts from
11101945Sjeanm 	 * the newly resolved replicated device ids, return a '2'.
11111945Sjeanm 	 * This instructs metaset to release the diskset and re-take.
11121945Sjeanm 	 *
11131945Sjeanm 	 * Return a 2 if
11141945Sjeanm 	 * 	- no error was detected on the take
11151945Sjeanm 	 *	- a replicated unresolved devid was resolved during take
11161945Sjeanm 	 *	- take isn't being called during an import
11171945Sjeanm 	 *	- this isn't already a re-take situation
11181945Sjeanm 	 */
11191945Sjeanm 	if ((rval == 0) && (retake_flag == 1) &&
11201945Sjeanm 	    ((flags & (TAKE_RETAKE | TAKE_IMP)) == 0)) {
11211945Sjeanm 		rval = 2;
11221945Sjeanm 	}
11231945Sjeanm 
11240Sstevel@tonic-gate 	return (rval);
11250Sstevel@tonic-gate 
11260Sstevel@tonic-gate out:
11270Sstevel@tonic-gate 	cl_sk = cl_get_setkey(sp->setno, sp->setname);
11280Sstevel@tonic-gate 	if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
11290Sstevel@tonic-gate 		if (rval == 0)
11300Sstevel@tonic-gate 			(void) mdstealerror(ep, &xep);
11310Sstevel@tonic-gate 		rval = -1;
11320Sstevel@tonic-gate 	}
11330Sstevel@tonic-gate 	if (!(sd->sd_flags & MD_SR_MB_DEVID) && (rb_level > 2)) {
11340Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
11350Sstevel@tonic-gate 			/* Skip empty slots */
11360Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
11370Sstevel@tonic-gate 				continue;
11380Sstevel@tonic-gate 
11390Sstevel@tonic-gate 			/* We already unlocked this side */
11400Sstevel@tonic-gate 			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
11410Sstevel@tonic-gate 				continue;
11420Sstevel@tonic-gate 
11432902Spetede 			/* no point calling dead nodes */
11442902Spetede 			if (!BT_TEST(node_active, i))
11452902Spetede 				continue;
11462902Spetede 
11470Sstevel@tonic-gate 			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) {
11480Sstevel@tonic-gate 				if (rval == 0)
11490Sstevel@tonic-gate 					(void) mdstealerror(ep, &xep);
11500Sstevel@tonic-gate 				rval = -1;
11510Sstevel@tonic-gate 			}
11520Sstevel@tonic-gate 		}
11530Sstevel@tonic-gate 	}
11540Sstevel@tonic-gate 	cl_set_setkey(NULL);
11550Sstevel@tonic-gate 
11560Sstevel@tonic-gate 	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
11570Sstevel@tonic-gate 
11580Sstevel@tonic-gate 	return (rval);
11590Sstevel@tonic-gate 
11600Sstevel@tonic-gate rollback:
11610Sstevel@tonic-gate 	/* Make sure we are blocking all signals */
11620Sstevel@tonic-gate 	if (procsigs(TRUE, &oldsigs, &xep) < 0)
11630Sstevel@tonic-gate 		mdclrerror(&xep);
11640Sstevel@tonic-gate 
11650Sstevel@tonic-gate 	rval = -1;
11660Sstevel@tonic-gate 
11670Sstevel@tonic-gate 	/* level 4 */
11680Sstevel@tonic-gate 	if (rb_level > 3) {
11690Sstevel@tonic-gate 		if (sd->sd_flags & MD_SR_MB_DEVID) {
11700Sstevel@tonic-gate 			/* update the sr_flags on all hosts */
11710Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
11720Sstevel@tonic-gate 				/* Skip empty slots */
11730Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
11740Sstevel@tonic-gate 					continue;
11750Sstevel@tonic-gate 
11762902Spetede 				/* no point calling dead nodes */
11772902Spetede 				if (!BT_TEST(node_active, i))
11782902Spetede 					continue;
11792902Spetede 
11800Sstevel@tonic-gate 				if (clnt_upd_sr_flags(sd->sd_nodes[i], sp,
11810Sstevel@tonic-gate 				    (sd->sd_flags & ~MD_SR_MB_DEVID), &xep))
11820Sstevel@tonic-gate 					mdclrerror(&xep);
11830Sstevel@tonic-gate 			}
11840Sstevel@tonic-gate 		}
11850Sstevel@tonic-gate 
11860Sstevel@tonic-gate 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
11870Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
11880Sstevel@tonic-gate 			/* Skip empty slots */
11890Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
11900Sstevel@tonic-gate 				continue;
11910Sstevel@tonic-gate 
11920Sstevel@tonic-gate 			/* We will unlocked this side below */
11930Sstevel@tonic-gate 			if (strcmp(mynode(), sd->sd_nodes[i]) == 0)
11940Sstevel@tonic-gate 				continue;
11950Sstevel@tonic-gate 
11962902Spetede 			/* no point calling dead nodes */
11972902Spetede 			if (!BT_TEST(node_active, i))
11982902Spetede 				continue;
11992902Spetede 
12000Sstevel@tonic-gate 			if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep))
12010Sstevel@tonic-gate 				mdclrerror(&xep);
12020Sstevel@tonic-gate 		}
12030Sstevel@tonic-gate 	}
12040Sstevel@tonic-gate 
12050Sstevel@tonic-gate 	/* level 3 */
12060Sstevel@tonic-gate 	if (rb_level > 2) {
12070Sstevel@tonic-gate 		if (halt_set(sp, &xep))
12080Sstevel@tonic-gate 			mdclrerror(&xep);
12090Sstevel@tonic-gate 	}
12100Sstevel@tonic-gate 
12110Sstevel@tonic-gate 	/* level 2 */
12120Sstevel@tonic-gate 	if (rb_level > 1) {
12130Sstevel@tonic-gate 		if (clnt_stimeout(mynode(), sp, &defmhiargs, &xep) == -1)
12140Sstevel@tonic-gate 			mdclrerror(&xep);
12150Sstevel@tonic-gate 	}
12160Sstevel@tonic-gate 
12170Sstevel@tonic-gate 	/* level 1 */
12180Sstevel@tonic-gate 	if (rb_level > 0) {
12190Sstevel@tonic-gate 		if (!MD_ATSET_DESC(sd)) {
12200Sstevel@tonic-gate 			if (rel_own_bydd(sp, dd, FALSE, &xep))
12210Sstevel@tonic-gate 				mdclrerror(&xep);
12220Sstevel@tonic-gate 		}
12230Sstevel@tonic-gate 	}
12240Sstevel@tonic-gate 
12250Sstevel@tonic-gate 	/* level 0 */
12260Sstevel@tonic-gate 	cl_sk = cl_get_setkey(sp->setno, sp->setname);
12270Sstevel@tonic-gate 	if (clnt_unlock_set(mynode(), cl_sk, &xep))
12280Sstevel@tonic-gate 		mdclrerror(&xep);
12290Sstevel@tonic-gate 	cl_set_setkey(NULL);
12300Sstevel@tonic-gate 
12310Sstevel@tonic-gate 	/* release signals back to what they were on entry */
12320Sstevel@tonic-gate 	if (procsigs(FALSE, &oldsigs, &xep) < 0)
12330Sstevel@tonic-gate 		mdclrerror(&xep);
12340Sstevel@tonic-gate 
12350Sstevel@tonic-gate 	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
12360Sstevel@tonic-gate 
12370Sstevel@tonic-gate 	return (rval);
12380Sstevel@tonic-gate }
12390Sstevel@tonic-gate 
12400Sstevel@tonic-gate int
meta_set_release(mdsetname_t * sp,md_error_t * ep)12410Sstevel@tonic-gate meta_set_release(
12420Sstevel@tonic-gate 	mdsetname_t		*sp,
12430Sstevel@tonic-gate 	md_error_t		*ep
12440Sstevel@tonic-gate )
12450Sstevel@tonic-gate {
12460Sstevel@tonic-gate 	int			rval = 0;
12470Sstevel@tonic-gate 	md_drive_desc		*dd;
12480Sstevel@tonic-gate 	mhd_mhiargs_t		mhiargs;
12490Sstevel@tonic-gate 	sigset_t		oldsigs;
12500Sstevel@tonic-gate 	md_setkey_t		*cl_sk;
12510Sstevel@tonic-gate 	int			rb_level = 0;
12520Sstevel@tonic-gate 	md_error_t		xep = mdnullerror;
12530Sstevel@tonic-gate 
12540Sstevel@tonic-gate 	/* Make sure we own the set */
12550Sstevel@tonic-gate 	if (meta_check_ownership(sp, ep) != 0)
12560Sstevel@tonic-gate 		return (-1);
12570Sstevel@tonic-gate 
12580Sstevel@tonic-gate 	/* Get the drive descriptors */
12590Sstevel@tonic-gate 	if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
12600Sstevel@tonic-gate 	    ep)) == NULL)
12610Sstevel@tonic-gate 		if (! mdisok(ep))
12620Sstevel@tonic-gate 			return (-1);
12630Sstevel@tonic-gate 
12640Sstevel@tonic-gate 	/* Get timeout values in case we need to roll back this release */
12650Sstevel@tonic-gate 	(void) memset(&mhiargs, '\0', sizeof (mhiargs));
12660Sstevel@tonic-gate 	if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0)
12670Sstevel@tonic-gate 		return (-1);
12680Sstevel@tonic-gate 
12690Sstevel@tonic-gate 	/* END CHECK CODE */
12700Sstevel@tonic-gate 
12710Sstevel@tonic-gate 	md_rb_sig_handling_on();
12720Sstevel@tonic-gate 
12730Sstevel@tonic-gate 	/* Lock the set on our side */
12740Sstevel@tonic-gate 	if (clnt_lock_set(mynode(), sp, ep)) {
12750Sstevel@tonic-gate 		rval = -1;
12760Sstevel@tonic-gate 		goto out;
12770Sstevel@tonic-gate 	}
12780Sstevel@tonic-gate 
12790Sstevel@tonic-gate 	RB_TEST(1, "release", ep)
12800Sstevel@tonic-gate 
12810Sstevel@tonic-gate 	RB_PREEMPT;
12820Sstevel@tonic-gate 	rb_level = 1;	/* level 1 */
12830Sstevel@tonic-gate 
12840Sstevel@tonic-gate 	RB_TEST(2, "release", ep)
12850Sstevel@tonic-gate 
12860Sstevel@tonic-gate 	if (halt_set(sp, ep))
12870Sstevel@tonic-gate 		goto rollback;
12880Sstevel@tonic-gate 
12890Sstevel@tonic-gate 	RB_TEST(3, "release", ep)
12900Sstevel@tonic-gate 
12910Sstevel@tonic-gate 	RB_PREEMPT;
12920Sstevel@tonic-gate 	rb_level = 2;	/* level 2 */
12930Sstevel@tonic-gate 
12940Sstevel@tonic-gate 	RB_TEST(4, "release", ep)
12950Sstevel@tonic-gate 
12960Sstevel@tonic-gate 	if (rel_own_bydd(sp, dd, FALSE, ep))
12970Sstevel@tonic-gate 		goto rollback;
12980Sstevel@tonic-gate 
12990Sstevel@tonic-gate 	RB_TEST(5, "release", ep)
13000Sstevel@tonic-gate 
13010Sstevel@tonic-gate 	RB_PREEMPT;
13020Sstevel@tonic-gate 	rb_level = 3;	/* level 3 */
13030Sstevel@tonic-gate 
13040Sstevel@tonic-gate 	RB_TEST(6, "release", ep)
13050Sstevel@tonic-gate 
13060Sstevel@tonic-gate 	if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1)
13070Sstevel@tonic-gate 		goto rollback;
13080Sstevel@tonic-gate 
13090Sstevel@tonic-gate 	RB_TEST(7, "release", ep)
13100Sstevel@tonic-gate 
13110Sstevel@tonic-gate out:
13120Sstevel@tonic-gate 	cl_sk = cl_get_setkey(sp->setno, sp->setname);
13130Sstevel@tonic-gate 	if (clnt_unlock_set(mynode(), cl_sk, &xep)) {
13140Sstevel@tonic-gate 		if (rval == 0)
13150Sstevel@tonic-gate 			(void) mdstealerror(ep, &xep);
13160Sstevel@tonic-gate 		rval = -1;
13170Sstevel@tonic-gate 	}
13180Sstevel@tonic-gate 	cl_set_setkey(NULL);
13190Sstevel@tonic-gate 
13200Sstevel@tonic-gate 	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
13210Sstevel@tonic-gate 
13220Sstevel@tonic-gate 	return (rval);
13230Sstevel@tonic-gate 
13240Sstevel@tonic-gate rollback:
13250Sstevel@tonic-gate 	/* Make sure we are blocking all signals */
13260Sstevel@tonic-gate 	if (procsigs(TRUE, &oldsigs, &xep) < 0)
13270Sstevel@tonic-gate 		mdclrerror(&xep);
13280Sstevel@tonic-gate 
13290Sstevel@tonic-gate 	rval = -1;
13300Sstevel@tonic-gate 
13310Sstevel@tonic-gate 	/* level 3 */
13320Sstevel@tonic-gate 	if (rb_level > 2) {
13330Sstevel@tonic-gate 		if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1)
13340Sstevel@tonic-gate 			mdclrerror(&xep);
13350Sstevel@tonic-gate 	}
13360Sstevel@tonic-gate 
13370Sstevel@tonic-gate 	/* level 2 */
13380Sstevel@tonic-gate 	if (rb_level > 1) {
13390Sstevel@tonic-gate 		if (tk_own_bydd(sp, dd, &mhiargs, FALSE, &xep))
13400Sstevel@tonic-gate 			mdclrerror(&xep);
13410Sstevel@tonic-gate 	}
13420Sstevel@tonic-gate 
13430Sstevel@tonic-gate 	/* level 1 */
13440Sstevel@tonic-gate 	if (rb_level > 0) {
13450Sstevel@tonic-gate 		if (setup_db_bydd(sp, dd, TRUE, &xep) == -1)
13460Sstevel@tonic-gate 			mdclrerror(&xep);
13470Sstevel@tonic-gate 
13480Sstevel@tonic-gate 		/* Snarf set of trad diskset doesn't use stale information */
13490Sstevel@tonic-gate 		if (snarf_set(sp, FALSE, &xep))
13500Sstevel@tonic-gate 			mdclrerror(&xep);
13510Sstevel@tonic-gate 	}
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate 	/* level 0 */
13540Sstevel@tonic-gate 	cl_sk = cl_get_setkey(sp->setno, sp->setname);
13550Sstevel@tonic-gate 	if (clnt_unlock_set(mynode(), cl_sk, &xep))
13560Sstevel@tonic-gate 		mdclrerror(&xep);
13570Sstevel@tonic-gate 	cl_set_setkey(NULL);
13580Sstevel@tonic-gate 
13590Sstevel@tonic-gate 	/* release signals back to what they were on entry */
13600Sstevel@tonic-gate 	if (procsigs(FALSE, &oldsigs, &xep) < 0)
13610Sstevel@tonic-gate 		mdclrerror(&xep);
13620Sstevel@tonic-gate 
13630Sstevel@tonic-gate 	md_rb_sig_handling_off(md_got_sig(), md_which_sig());
13640Sstevel@tonic-gate 
13650Sstevel@tonic-gate 	return (rval);
13660Sstevel@tonic-gate }
1367