xref: /onnv-gate/usr/src/lib/lvm/libmeta/common/meta_db.c (revision 8452:89d32dfdae6e)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51623Stw21770  * Common Development and Distribution License (the "License").
61623Stw21770  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
21*8452SJohn.Wren.Kennedy@Sun.COM 
220Sstevel@tonic-gate /*
236195Sachimm  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate  * Just in case we're not in a build environment, make sure that
290Sstevel@tonic-gate  * TEXT_DOMAIN gets set to something.
300Sstevel@tonic-gate  */
310Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
320Sstevel@tonic-gate #define	TEXT_DOMAIN "SYS_TEST"
330Sstevel@tonic-gate #endif
340Sstevel@tonic-gate 
350Sstevel@tonic-gate /*
360Sstevel@tonic-gate  * Metadevice database interfaces.
370Sstevel@tonic-gate  */
380Sstevel@tonic-gate 
390Sstevel@tonic-gate #define	MDDB
400Sstevel@tonic-gate 
410Sstevel@tonic-gate #include <meta.h>
420Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
430Sstevel@tonic-gate #include <sys/lvm/md_crc.h>
440Sstevel@tonic-gate #include <sys/lvm/mdio.h>
450Sstevel@tonic-gate #include <string.h>
460Sstevel@tonic-gate #include <strings.h>
470Sstevel@tonic-gate #include <ctype.h>
480Sstevel@tonic-gate 
490Sstevel@tonic-gate struct svm_daemon {
500Sstevel@tonic-gate 	char *svmd_name;
510Sstevel@tonic-gate 	char *svmd_kill_val;
520Sstevel@tonic-gate };
530Sstevel@tonic-gate 
542614Spetede /*
552614Spetede  * This is a list of the daemons that are not stopped by the SVM smf(5)
562614Spetede  * services. The mdmonitord is started via svc:/system/mdmonitor:default
572614Spetede  * but no contract(4) is constructed and so it is not stopped by smf(5).
582614Spetede  */
590Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = {
600Sstevel@tonic-gate 		{"mdmonitord", "HUP"},
610Sstevel@tonic-gate 		{"mddoors", "KILL"},
620Sstevel@tonic-gate 	};
630Sstevel@tonic-gate 
640Sstevel@tonic-gate #define	DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
650Sstevel@tonic-gate 
660Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
670Sstevel@tonic-gate 
680Sstevel@tonic-gate /*
695109Spetede  * Are the locator blocks for the replicas using devids
705109Spetede  */
715109Spetede static int	devid_in_use = FALSE;
725109Spetede 
735109Spetede static char *
getlongname(struct mddb_config * c,md_error_t * ep)745109Spetede getlongname(
755109Spetede 	struct mddb_config	*c,
765109Spetede 	md_error_t		*ep
775109Spetede )
785109Spetede {
795109Spetede 	char		*diskname = NULL;
805109Spetede 	char		*devid_str;
815109Spetede 	devid_nmlist_t	*disklist = NULL;
825109Spetede 
835109Spetede 	c->c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
845109Spetede 	if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
855109Spetede 		(void) mdstealerror(ep, &c->c_mde);
865109Spetede 		return (NULL);
875109Spetede 	}
885109Spetede 
895109Spetede 	if (c->c_locator.l_devid_flags & MDDB_DEVID_SZ) {
905109Spetede 		c->c_locator.l_devid = (uintptr_t)
915109Spetede 		    Malloc(c->c_locator.l_devid_sz);
925109Spetede 		c->c_locator.l_devid_flags =
935109Spetede 		    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
945109Spetede 	} else {
955109Spetede 		(void) mderror(ep, MDE_NODEVID, "");
965109Spetede 		goto out;
975109Spetede 	}
985109Spetede 
995109Spetede 	if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
1005109Spetede 		(void) mdstealerror(ep, &c->c_mde);
1015109Spetede 		goto out;
1025109Spetede 	}
1035109Spetede 
1045109Spetede 	if (c->c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
1055109Spetede 		(void) mderror(ep, MDE_NODEVID, "");
1065109Spetede 		goto out;
1075109Spetede 	}
1085109Spetede 
1095109Spetede 	if (metaioctl(MD_DB_GETDEV, c, &c->c_mde, NULL) != 0) {
1105109Spetede 		(void) mdstealerror(ep, &c->c_mde);
1115109Spetede 		goto out;
1125109Spetede 	}
1135109Spetede 
1145109Spetede 	if (c->c_locator.l_devid != NULL) {
1155109Spetede 		if (meta_deviceid_to_nmlist("/dev/dsk",
1165109Spetede 		    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
1175109Spetede 		    c->c_locator.l_minor_name, &disklist) != 0) {
1185109Spetede 			devid_str = devid_str_encode(
1195109Spetede 			    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid, NULL);
1205109Spetede 			(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
1215109Spetede 			mderrorextra(ep, devid_str);
1225109Spetede 			if (devid_str != NULL)
1235109Spetede 				devid_str_free(devid_str);
1245109Spetede 			goto out;
1255109Spetede 		}
1265109Spetede 		diskname = Strdup(disklist[0].devname);
1275109Spetede 	}
1285109Spetede 
1295109Spetede out:
1305109Spetede 	if (disklist != NULL)
1315109Spetede 		devid_free_nmlist(disklist);
1325109Spetede 
1335109Spetede 	if (c->c_locator.l_devid != NULL)
1345109Spetede 		Free((void *)(uintptr_t)c->c_locator.l_devid);
1355109Spetede 
1365109Spetede 	return (diskname);
1375109Spetede }
1385109Spetede 
1395109Spetede /*
1400Sstevel@tonic-gate  * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
1410Sstevel@tonic-gate  */
1420Sstevel@tonic-gate md_timeval32_t
meta_get_lb_inittime(mdsetname_t * sp,md_error_t * ep)1430Sstevel@tonic-gate meta_get_lb_inittime(
1440Sstevel@tonic-gate 	mdsetname_t	*sp,
1450Sstevel@tonic-gate 	md_error_t	*ep
1460Sstevel@tonic-gate )
1470Sstevel@tonic-gate {
1480Sstevel@tonic-gate 	mddb_config_t	c;
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
1510Sstevel@tonic-gate 
1520Sstevel@tonic-gate 	/* Fill in setno, setname, and sideno */
1530Sstevel@tonic-gate 	c.c_setno = sp->setno;
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate 	if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
1560Sstevel@tonic-gate 		(void) mdstealerror(ep, &c.c_mde);
1570Sstevel@tonic-gate 	}
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 	return (c.c_timestamp);
1600Sstevel@tonic-gate }
1610Sstevel@tonic-gate 
1620Sstevel@tonic-gate /*
1630Sstevel@tonic-gate  * mkmasterblks writes out the master blocks of the mddb to the replica.
1640Sstevel@tonic-gate  *
1650Sstevel@tonic-gate  * In a MN diskset, this is called by the node that is adding this replica
1660Sstevel@tonic-gate  * to the diskset.
1670Sstevel@tonic-gate  */
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate #define	MDDB_VERIFY_SIZE	8192
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate static int
mkmasterblks(mdsetname_t * sp,mdname_t * np,int fd,daddr_t firstblk,int dbsize,md_timeval32_t inittime,md_error_t * ep)1720Sstevel@tonic-gate mkmasterblks(
1730Sstevel@tonic-gate 	mdsetname_t	*sp,
1740Sstevel@tonic-gate 	mdname_t	*np,
1750Sstevel@tonic-gate 	int		fd,
1760Sstevel@tonic-gate 	daddr_t		firstblk,
1770Sstevel@tonic-gate 	int		dbsize,
1780Sstevel@tonic-gate 	md_timeval32_t	inittime,
1790Sstevel@tonic-gate 	md_error_t	*ep
1800Sstevel@tonic-gate )
1810Sstevel@tonic-gate {
1820Sstevel@tonic-gate 	int		consecutive;
1830Sstevel@tonic-gate 	md_timeval32_t	tp;
1840Sstevel@tonic-gate 	struct mddb_mb	*mb;
1850Sstevel@tonic-gate 	char		*buffer;
1860Sstevel@tonic-gate 	int		iosize;
1870Sstevel@tonic-gate 	md_set_desc	*sd;
1880Sstevel@tonic-gate 	int		mn_set = 0;
1890Sstevel@tonic-gate 	daddr_t		startblk;
1900Sstevel@tonic-gate 	int		cnt;
1910Sstevel@tonic-gate 	ddi_devid_t	devid;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
1940Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1950Sstevel@tonic-gate 			return (-1);
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
1980Sstevel@tonic-gate 			mn_set = 1;		/* Used later */
1990Sstevel@tonic-gate 		}
2000Sstevel@tonic-gate 	}
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	/*
2030Sstevel@tonic-gate 	 * Loop to verify the entire mddb region on disk is read/writable.
2040Sstevel@tonic-gate 	 * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
2050Sstevel@tonic-gate 	 * chunks.
2060Sstevel@tonic-gate 	 *
2070Sstevel@tonic-gate 	 * A side-effect of this loop is to zero out the entire mddb region
2080Sstevel@tonic-gate 	 */
2090Sstevel@tonic-gate 	if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
2100Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate 	startblk = firstblk;
2130Sstevel@tonic-gate 	for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate 		if (cnt > MDDB_VERIFY_SIZE)
2160Sstevel@tonic-gate 			consecutive = MDDB_VERIFY_SIZE;
2170Sstevel@tonic-gate 		else
2180Sstevel@tonic-gate 			consecutive = cnt;
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
2210Sstevel@tonic-gate 			Free(buffer);
2220Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
2230Sstevel@tonic-gate 		}
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate 		iosize = DEV_BSIZE * consecutive;
2260Sstevel@tonic-gate 		if (write(fd, buffer, iosize) != iosize) {
2270Sstevel@tonic-gate 			Free(buffer);
2280Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
2290Sstevel@tonic-gate 		}
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
2320Sstevel@tonic-gate 			Free(buffer);
2330Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
2340Sstevel@tonic-gate 		}
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate 		if (read(fd, buffer, iosize) != iosize) {
2370Sstevel@tonic-gate 			Free(buffer);
2380Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
2390Sstevel@tonic-gate 		}
2400Sstevel@tonic-gate 
2410Sstevel@tonic-gate 		startblk += consecutive;
2420Sstevel@tonic-gate 	}
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate 	Free(buffer);
2450Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
2460Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) == -1) {
2490Sstevel@tonic-gate 		Free(mb);
2500Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2510Sstevel@tonic-gate 	}
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_MB;
2540Sstevel@tonic-gate 	/*
2550Sstevel@tonic-gate 	 * If a MN diskset, set master block revision for a MN set.
2560Sstevel@tonic-gate 	 * Even though the master block structure is no different
2570Sstevel@tonic-gate 	 * for a MN set, setting the revision field to a different
2580Sstevel@tonic-gate 	 * number keeps any pre-MN_diskset code from accessing
2590Sstevel@tonic-gate 	 * this diskset.  It also allows for an early determination
2600Sstevel@tonic-gate 	 * of a MN diskset when reading in from disk so that the
2610Sstevel@tonic-gate 	 * proper size locator block and locator names structure
2620Sstevel@tonic-gate 	 * can be read in thus saving time on diskset startup.
2630Sstevel@tonic-gate 	 */
2640Sstevel@tonic-gate 	if (mn_set)
2650Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MNMB;
2660Sstevel@tonic-gate 	else
2670Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MB;
2680Sstevel@tonic-gate 	mb->mb_timestamp = tp;
2690Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
2700Sstevel@tonic-gate 	mb->mb_blkcnt = dbsize - 1;
2710Sstevel@tonic-gate 	mb->mb_blkno = firstblk;
2720Sstevel@tonic-gate 	mb->mb_nextblk = 0;
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 	mb->mb_blkmap.m_firstblk = firstblk + 1;
2750Sstevel@tonic-gate 	mb->mb_blkmap.m_consecutive = dbsize - 1;
2760Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
2770Sstevel@tonic-gate 		mb->mb_setcreatetime = inittime;
2780Sstevel@tonic-gate 	}
2790Sstevel@tonic-gate 
2800Sstevel@tonic-gate 	/*
2810Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
2820Sstevel@tonic-gate 	 * the master block. The saved devid is used to provide a mapping
2830Sstevel@tonic-gate 	 * between this disk's devid and the devid stored into the master
2840Sstevel@tonic-gate 	 * block. This allows the disk image to be self-identifying
2850Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
2860Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
2870Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
2880Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
2890Sstevel@tonic-gate 	 * in the remote copy scenario.
2900Sstevel@tonic-gate 	 */
2910Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
2920Sstevel@tonic-gate 		size_t len;
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 		len = devid_sizeof(devid);
2950Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
2960Sstevel@tonic-gate 			/* there is enough space to store the devid */
2970Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
2980Sstevel@tonic-gate 			mb->mb_devid_len = len;
2990Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, devid, len);
3000Sstevel@tonic-gate 		}
3010Sstevel@tonic-gate 		devid_free(devid);
3020Sstevel@tonic-gate 	}
3030Sstevel@tonic-gate 
3040Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
3050Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
3060Sstevel@tonic-gate 
3070Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
3080Sstevel@tonic-gate 		Free(mb);
3090Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
3100Sstevel@tonic-gate 	}
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
3130Sstevel@tonic-gate 		Free(mb);
3140Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
3150Sstevel@tonic-gate 	}
3160Sstevel@tonic-gate 
3170Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
3180Sstevel@tonic-gate 		Free(mb);
3190Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
3200Sstevel@tonic-gate 	}
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
3230Sstevel@tonic-gate 		Free(mb);
3240Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
3250Sstevel@tonic-gate 	}
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
3285109Spetede 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
3290Sstevel@tonic-gate 		Free(mb);
3300Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_NOTVERIFIED,
3315109Spetede 		    meta_getminor(np->dev), sp->setno, 0, np->rname));
3320Sstevel@tonic-gate 	}
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate 	Free(mb);
3350Sstevel@tonic-gate 	return (0);
3360Sstevel@tonic-gate }
3370Sstevel@tonic-gate 
3380Sstevel@tonic-gate void
meta_mkdummymaster(mdsetname_t * sp,int fd,daddr_t firstblk)3390Sstevel@tonic-gate meta_mkdummymaster(
3400Sstevel@tonic-gate 	mdsetname_t	*sp,
3410Sstevel@tonic-gate 	int		fd,
3420Sstevel@tonic-gate 	daddr_t		firstblk
3430Sstevel@tonic-gate )
3440Sstevel@tonic-gate {
3450Sstevel@tonic-gate 	md_timeval32_t	tp;
3460Sstevel@tonic-gate 	struct mddb_mb	*mb;
3470Sstevel@tonic-gate 	ddi_devid_t	devid;
3480Sstevel@tonic-gate 	md_set_desc	*sd;
3490Sstevel@tonic-gate 	md_error_t	ep = mdnullerror;
3500Sstevel@tonic-gate 	md_timeval32_t	inittime;
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 	/*
3530Sstevel@tonic-gate 	 * No dummy master blocks are written for a MN diskset since devids
3540Sstevel@tonic-gate 	 * are not supported in MN disksets.
3550Sstevel@tonic-gate 	 */
3560Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
3570Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, &ep)) == NULL)
3580Sstevel@tonic-gate 			return;
3590Sstevel@tonic-gate 
3600Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd))
3610Sstevel@tonic-gate 			return;
3620Sstevel@tonic-gate 	}
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
3650Sstevel@tonic-gate 		return;
3660Sstevel@tonic-gate 
3670Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_DU;
3680Sstevel@tonic-gate 	mb->mb_revision = MDDB_REV_MB;
3690Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
3700Sstevel@tonic-gate 	inittime = meta_get_lb_inittime(sp, &ep);
3710Sstevel@tonic-gate 	mb->mb_setcreatetime = inittime;
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) != -1)
3740Sstevel@tonic-gate 		mb->mb_timestamp = tp;
3750Sstevel@tonic-gate 
3760Sstevel@tonic-gate 	/*
3770Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
3780Sstevel@tonic-gate 	 * the master block.  This allows the disk image to be self-identifying
3790Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
3800Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
3810Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
3820Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
3830Sstevel@tonic-gate 	 * in the remote copy scenario.
3840Sstevel@tonic-gate 	 */
3850Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
3860Sstevel@tonic-gate 		int len;
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 		len = devid_sizeof(devid);
3890Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
3900Sstevel@tonic-gate 			/* there is enough space to store the devid */
3910Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
3920Sstevel@tonic-gate 			mb->mb_devid_len = len;
3930Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, (char *)devid, len);
3940Sstevel@tonic-gate 		}
3950Sstevel@tonic-gate 		devid_free(devid);
3960Sstevel@tonic-gate 	}
3970Sstevel@tonic-gate 
3980Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
3990Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate 	/*
4020Sstevel@tonic-gate 	 * If any of these operations fail, we need to inform the
4030Sstevel@tonic-gate 	 * user that the disk won't be self identifying. When support
4040Sstevel@tonic-gate 	 * for importing remotely replicated disksets is added, we
4050Sstevel@tonic-gate 	 * want to add the error messages here.
4060Sstevel@tonic-gate 	 */
4070Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
4080Sstevel@tonic-gate 		goto out;
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
4110Sstevel@tonic-gate 		goto out;
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
4140Sstevel@tonic-gate 		goto out;
4150Sstevel@tonic-gate 
4160Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
4170Sstevel@tonic-gate 		goto out;
4180Sstevel@tonic-gate 
4190Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
4200Sstevel@tonic-gate 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
4210Sstevel@tonic-gate 		goto out;
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate out:
4240Sstevel@tonic-gate 	Free(mb);
4250Sstevel@tonic-gate }
4260Sstevel@tonic-gate 
4270Sstevel@tonic-gate static int
buildconf(mdsetname_t * sp,md_error_t * ep)4280Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep)
4290Sstevel@tonic-gate {
4300Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
4310Sstevel@tonic-gate 	md_replicalist_t	*rl;
4320Sstevel@tonic-gate 	FILE			*cfp = NULL;
4330Sstevel@tonic-gate 	FILE			*mfp = NULL;
4340Sstevel@tonic-gate 	struct stat		sbuf;
4350Sstevel@tonic-gate 	int			rval = 0;
4360Sstevel@tonic-gate 	int			in_miniroot = 0;
4370Sstevel@tonic-gate 	char			line[MDDB_BOOTLIST_MAX_LEN];
4380Sstevel@tonic-gate 	char			*tname = NULL;
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate 	/* get list of local replicas */
4410Sstevel@tonic-gate 	if (! metaislocalset(sp))
4420Sstevel@tonic-gate 		return (0);
4430Sstevel@tonic-gate 
4440Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
4450Sstevel@tonic-gate 		return (-1);
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate 	/* open tempfile, copy permissions of original file */
4480Sstevel@tonic-gate 	if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
4490Sstevel@tonic-gate 		/*
4500Sstevel@tonic-gate 		 * On the miniroot tmp files must be created in /var/tmp.
4510Sstevel@tonic-gate 		 * If we get a EROFS error, we assume that we are in the
4520Sstevel@tonic-gate 		 * miniroot.
4530Sstevel@tonic-gate 		 */
4540Sstevel@tonic-gate 		if (errno != EROFS)
4550Sstevel@tonic-gate 			goto error;
4560Sstevel@tonic-gate 		in_miniroot = 1;
4570Sstevel@tonic-gate 		errno = 0;
4580Sstevel@tonic-gate 		tname = tempnam("/var/tmp", "slvm_");
4590Sstevel@tonic-gate 		if (tname == NULL && errno == EROFS) {
4600Sstevel@tonic-gate 			/*
4610Sstevel@tonic-gate 			 * If we are booted on a read-only root because
4620Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
4630Sstevel@tonic-gate 			 * any scary error messages.
4640Sstevel@tonic-gate 			 */
4650Sstevel@tonic-gate 			errno = 0;
4660Sstevel@tonic-gate 			goto out;
4670Sstevel@tonic-gate 		}
4680Sstevel@tonic-gate 
4690Sstevel@tonic-gate 		/* open tempfile, copy permissions of original file */
4700Sstevel@tonic-gate 		if ((cfp = fopen(tname, "w+")) == NULL)
4710Sstevel@tonic-gate 			goto error;
4720Sstevel@tonic-gate 	}
4730Sstevel@tonic-gate 	if (stat(META_DBCONF, &sbuf) == 0) {
4740Sstevel@tonic-gate 		if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
4750Sstevel@tonic-gate 			goto error;
4760Sstevel@tonic-gate 		if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
4770Sstevel@tonic-gate 			goto error;
4780Sstevel@tonic-gate 	}
4790Sstevel@tonic-gate 
4800Sstevel@tonic-gate 	/* print header */
4810Sstevel@tonic-gate 	if (fprintf(cfp, "#metadevice database location file ") == EOF)
4820Sstevel@tonic-gate 		goto error;
4830Sstevel@tonic-gate 	if (fprintf(cfp, "do not hand edit\n") < 0)
4840Sstevel@tonic-gate 		goto error;
4850Sstevel@tonic-gate 	if (fprintf(cfp,
4865109Spetede 	    "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
4870Sstevel@tonic-gate 		goto error;
4880Sstevel@tonic-gate 
4890Sstevel@tonic-gate 	/* dump replicas */
4900Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
4910Sstevel@tonic-gate 		md_replica_t	*r = rl->rl_repp;
4920Sstevel@tonic-gate 		int		checksum = 42;
4930Sstevel@tonic-gate 		int		i;
4940Sstevel@tonic-gate 		char		*devidp;
4950Sstevel@tonic-gate 		minor_t		min;
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 		devidp = devid_str_encode(r->r_devid, r->r_minor_name);
4980Sstevel@tonic-gate 		/* If devid code can't encode devidp - skip entry */
4990Sstevel@tonic-gate 		if (devidp == NULL) {
5000Sstevel@tonic-gate 			continue;
5010Sstevel@tonic-gate 		}
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 		/* compute checksum */
5040Sstevel@tonic-gate 		for (i = 0; ((r->r_driver_name[i] != '\0') &&
5050Sstevel@tonic-gate 		    (i < sizeof (r->r_driver_name))); i++) {
5060Sstevel@tonic-gate 			checksum -= r->r_driver_name[i];
5070Sstevel@tonic-gate 		}
5080Sstevel@tonic-gate 		min = meta_getminor(r->r_namep->dev);
5090Sstevel@tonic-gate 		checksum -= min;
5100Sstevel@tonic-gate 		checksum -= r->r_blkno;
5110Sstevel@tonic-gate 
5120Sstevel@tonic-gate 		for (i = 0; i < strlen(devidp); i++) {
5130Sstevel@tonic-gate 			checksum -= devidp[i];
5140Sstevel@tonic-gate 		}
5150Sstevel@tonic-gate 		/* print info */
5160Sstevel@tonic-gate 		if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
5170Sstevel@tonic-gate 		    r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
5180Sstevel@tonic-gate 			goto error;
5190Sstevel@tonic-gate 		}
5200Sstevel@tonic-gate 
5210Sstevel@tonic-gate 		devid_str_free(devidp);
5220Sstevel@tonic-gate 	}
5230Sstevel@tonic-gate 
5240Sstevel@tonic-gate 	/* close and rename to real file */
5250Sstevel@tonic-gate 	if (fflush(cfp) != 0)
5260Sstevel@tonic-gate 		goto error;
5270Sstevel@tonic-gate 	if (fsync(fileno(cfp)) != 0)
5280Sstevel@tonic-gate 		goto error;
5290Sstevel@tonic-gate 	if (fclose(cfp) != 0) {
5300Sstevel@tonic-gate 		cfp = NULL;
5310Sstevel@tonic-gate 		goto error;
5320Sstevel@tonic-gate 	}
5330Sstevel@tonic-gate 	cfp = NULL;
5340Sstevel@tonic-gate 
5350Sstevel@tonic-gate 	/*
5360Sstevel@tonic-gate 	 * Renames don't work in the miniroot since tmpfiles are
5370Sstevel@tonic-gate 	 * created in /var/tmp. Hence we copy the data out.
5380Sstevel@tonic-gate 	 */
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 	if (! in_miniroot) {
5410Sstevel@tonic-gate 		if (rename(META_DBCONFTMP, META_DBCONF) != 0)
5420Sstevel@tonic-gate 			goto error;
5430Sstevel@tonic-gate 	} else {
5440Sstevel@tonic-gate 		if ((cfp = fopen(tname, "r")) == NULL)
5450Sstevel@tonic-gate 			goto error;
5460Sstevel@tonic-gate 		if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
5470Sstevel@tonic-gate 			goto error;
5480Sstevel@tonic-gate 		while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
5490Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
5500Sstevel@tonic-gate 				goto error;
5510Sstevel@tonic-gate 		}
5520Sstevel@tonic-gate 		(void) fclose(cfp);
5530Sstevel@tonic-gate 		cfp = NULL;
5540Sstevel@tonic-gate 		if (fflush(mfp) != 0)
5550Sstevel@tonic-gate 			goto error;
5560Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
5570Sstevel@tonic-gate 			goto error;
5580Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
5590Sstevel@tonic-gate 			mfp = NULL;
5600Sstevel@tonic-gate 			goto error;
5610Sstevel@tonic-gate 		}
5620Sstevel@tonic-gate 		/* delete the tempfile */
5630Sstevel@tonic-gate 		(void) unlink(tname);
5640Sstevel@tonic-gate 	}
5650Sstevel@tonic-gate 	/* success */
5660Sstevel@tonic-gate 	rval = 0;
5670Sstevel@tonic-gate 	goto out;
5680Sstevel@tonic-gate 
5690Sstevel@tonic-gate 	/* tempfile error */
5700Sstevel@tonic-gate error:
5710Sstevel@tonic-gate 	rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
5725109Spetede 	    mdsyserror(ep, errno, META_DBCONFTMP);
5730Sstevel@tonic-gate 
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 	/* cleanup, return success */
5760Sstevel@tonic-gate out:
5770Sstevel@tonic-gate 	if (rlp != NULL)
5780Sstevel@tonic-gate 		metafreereplicalist(rlp);
5790Sstevel@tonic-gate 	if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
5800Sstevel@tonic-gate 		rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
5815109Spetede 		    mdsyserror(ep, errno, META_DBCONFTMP);
5820Sstevel@tonic-gate 	}
5830Sstevel@tonic-gate 	free(tname);
5840Sstevel@tonic-gate 	return (rval);
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate /*
5880Sstevel@tonic-gate  * check replica for dev
5890Sstevel@tonic-gate  */
5900Sstevel@tonic-gate static int
in_replica(mdsetname_t * sp,md_replica_t * rp,mdname_t * np,diskaddr_t slblk,diskaddr_t nblks,md_error_t * ep)5910Sstevel@tonic-gate in_replica(
5920Sstevel@tonic-gate 	mdsetname_t	*sp,
5930Sstevel@tonic-gate 	md_replica_t	*rp,
5940Sstevel@tonic-gate 	mdname_t	*np,
5950Sstevel@tonic-gate 	diskaddr_t	slblk,
5960Sstevel@tonic-gate 	diskaddr_t	nblks,
5970Sstevel@tonic-gate 	md_error_t	*ep
5980Sstevel@tonic-gate )
5990Sstevel@tonic-gate {
6000Sstevel@tonic-gate 	mdname_t	*repnp = rp->r_namep;
6010Sstevel@tonic-gate 	diskaddr_t	rep_sblk = rp->r_blkno;
6020Sstevel@tonic-gate 	diskaddr_t	rep_nblks = rp->r_nblk;
6030Sstevel@tonic-gate 
6040Sstevel@tonic-gate 	/* should be in the same set */
6050Sstevel@tonic-gate 	assert(sp != NULL);
6060Sstevel@tonic-gate 
6070Sstevel@tonic-gate 	/* if error in master block, assume whole partition */
6080Sstevel@tonic-gate 	if ((rep_sblk == MD_DISKADDR_ERROR) ||
6090Sstevel@tonic-gate 	    (rep_nblks == MD_DISKADDR_ERROR)) {
6100Sstevel@tonic-gate 		rep_sblk = 0;
6110Sstevel@tonic-gate 		rep_nblks = MD_DISKADDR_ERROR;
6120Sstevel@tonic-gate 	}
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate 	/* check overlap */
6150Sstevel@tonic-gate 	if (meta_check_overlap(
6160Sstevel@tonic-gate 	    MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
6170Sstevel@tonic-gate 		return (-1);
6180Sstevel@tonic-gate 	}
6190Sstevel@tonic-gate 
6200Sstevel@tonic-gate 	/* return success */
6210Sstevel@tonic-gate 	return (0);
6220Sstevel@tonic-gate }
6230Sstevel@tonic-gate 
6240Sstevel@tonic-gate /*
6250Sstevel@tonic-gate  * check to see if we're in a replica
6260Sstevel@tonic-gate  */
6270Sstevel@tonic-gate int
meta_check_inreplica(mdsetname_t * sp,mdname_t * np,diskaddr_t slblk,diskaddr_t nblks,md_error_t * ep)6280Sstevel@tonic-gate meta_check_inreplica(
6290Sstevel@tonic-gate 	mdsetname_t		*sp,
6300Sstevel@tonic-gate 	mdname_t		*np,
6310Sstevel@tonic-gate 	diskaddr_t		slblk,
6320Sstevel@tonic-gate 	diskaddr_t		nblks,
6330Sstevel@tonic-gate 	md_error_t		*ep
6340Sstevel@tonic-gate )
6350Sstevel@tonic-gate {
6360Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
6370Sstevel@tonic-gate 	md_replicalist_t	*rl;
6380Sstevel@tonic-gate 	int			rval = 0;
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate 	/* should have a set */
6410Sstevel@tonic-gate 	assert(sp != NULL);
6420Sstevel@tonic-gate 
6430Sstevel@tonic-gate 	/* for each replica */
6440Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
6450Sstevel@tonic-gate 		return (-1);
6460Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
6470Sstevel@tonic-gate 		md_replica_t	*rp = rl->rl_repp;
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate 		/* check replica */
6500Sstevel@tonic-gate 		if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
6510Sstevel@tonic-gate 			rval = -1;
6520Sstevel@tonic-gate 			break;
6530Sstevel@tonic-gate 		}
6540Sstevel@tonic-gate 	}
6550Sstevel@tonic-gate 
6560Sstevel@tonic-gate 	/* cleanup, return success */
6570Sstevel@tonic-gate 	metafreereplicalist(rlp);
6580Sstevel@tonic-gate 	return (rval);
6590Sstevel@tonic-gate }
6600Sstevel@tonic-gate 
6610Sstevel@tonic-gate /*
6620Sstevel@tonic-gate  * check replica
6630Sstevel@tonic-gate  */
6640Sstevel@tonic-gate int
meta_check_replica(mdsetname_t * sp,mdname_t * np,mdchkopts_t options,diskaddr_t slblk,diskaddr_t nblks,md_error_t * ep)6650Sstevel@tonic-gate meta_check_replica(
6660Sstevel@tonic-gate 	mdsetname_t	*sp,		/* set to check against */
6670Sstevel@tonic-gate 	mdname_t	*np,		/* component to check against */
6680Sstevel@tonic-gate 	mdchkopts_t	options,	/* option flags */
6690Sstevel@tonic-gate 	diskaddr_t	slblk,		/* start logical block */
6700Sstevel@tonic-gate 	diskaddr_t	nblks,		/* number of blocks (-1,rest of them) */
6710Sstevel@tonic-gate 	md_error_t	*ep		/* error packet */
6720Sstevel@tonic-gate )
6730Sstevel@tonic-gate {
6740Sstevel@tonic-gate 	mdchkopts_t	chkoptions = MDCHK_ALLOW_REPSLICE;
6750Sstevel@tonic-gate 
6760Sstevel@tonic-gate 	/* make sure we have a disk */
6770Sstevel@tonic-gate 	if (metachkcomp(np, ep) != 0)
6780Sstevel@tonic-gate 		return (-1);
6790Sstevel@tonic-gate 
6800Sstevel@tonic-gate 	/* check to ensure that it is not already in use */
6810Sstevel@tonic-gate 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
6820Sstevel@tonic-gate 		return (-1);
6830Sstevel@tonic-gate 	}
6840Sstevel@tonic-gate 
6850Sstevel@tonic-gate 	if (options & MDCHK_ALLOW_NODBS)
6860Sstevel@tonic-gate 		return (0);
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate 	if (options & MDCHK_DRVINSET)
6890Sstevel@tonic-gate 		return (0);
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate 	/* make sure it is in the set */
6920Sstevel@tonic-gate 	if (meta_check_inset(sp, np, ep) != 0)
6930Sstevel@tonic-gate 		return (-1);
6940Sstevel@tonic-gate 
6950Sstevel@tonic-gate 	/* make sure its not in a metadevice */
6960Sstevel@tonic-gate 	if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
6970Sstevel@tonic-gate 		return (-1);
6980Sstevel@tonic-gate 
6990Sstevel@tonic-gate 	/* return success */
7000Sstevel@tonic-gate 	return (0);
7010Sstevel@tonic-gate }
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate static int
update_dbinfo_on_drives(mdsetname_t * sp,md_drive_desc * dd,int set_locked,int force,md_error_t * ep)7040Sstevel@tonic-gate update_dbinfo_on_drives(
7050Sstevel@tonic-gate 	mdsetname_t	*sp,
7060Sstevel@tonic-gate 	md_drive_desc	*dd,
7070Sstevel@tonic-gate 	int		set_locked,
7080Sstevel@tonic-gate 	int		force,
7090Sstevel@tonic-gate 	md_error_t	*ep
7100Sstevel@tonic-gate )
7110Sstevel@tonic-gate {
7120Sstevel@tonic-gate 	md_set_desc		*sd;
7130Sstevel@tonic-gate 	int			i;
7140Sstevel@tonic-gate 	md_setkey_t		*cl_sk;
7150Sstevel@tonic-gate 	int			rval = 0;
7160Sstevel@tonic-gate 	md_mnnode_desc		*nd;
7170Sstevel@tonic-gate 
7180Sstevel@tonic-gate 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
7190Sstevel@tonic-gate 		return (-1);
7200Sstevel@tonic-gate 
7210Sstevel@tonic-gate 	if (! set_locked) {
7220Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
7230Sstevel@tonic-gate 			md_error_t xep = mdnullerror;
7240Sstevel@tonic-gate 			sigset_t sigs;
7250Sstevel@tonic-gate 			/* Make sure we are blocking all signals */
7260Sstevel@tonic-gate 			if (procsigs(TRUE, &sigs, &xep) < 0)
7270Sstevel@tonic-gate 				mdclrerror(&xep);
7280Sstevel@tonic-gate 
7290Sstevel@tonic-gate 			nd = sd->sd_nodelist;
7300Sstevel@tonic-gate 			while (nd) {
7310Sstevel@tonic-gate 				if (force && strcmp(nd->nd_nodename,
7320Sstevel@tonic-gate 				    mynode()) != 0) {
7330Sstevel@tonic-gate 					nd = nd->nd_next;
7340Sstevel@tonic-gate 					continue;
7350Sstevel@tonic-gate 				}
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
7380Sstevel@tonic-gate 					nd = nd->nd_next;
7390Sstevel@tonic-gate 					continue;
7400Sstevel@tonic-gate 				}
7410Sstevel@tonic-gate 
7420Sstevel@tonic-gate 				if (clnt_lock_set(nd->nd_nodename, sp, ep))
7430Sstevel@tonic-gate 					return (-1);
7440Sstevel@tonic-gate 				nd = nd->nd_next;
7450Sstevel@tonic-gate 			}
7460Sstevel@tonic-gate 		} else {
7470Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
7480Sstevel@tonic-gate 				/* Skip empty slots */
7490Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
7500Sstevel@tonic-gate 					continue;
7510Sstevel@tonic-gate 
7520Sstevel@tonic-gate 				if (force && strcmp(sd->sd_nodes[i],
7530Sstevel@tonic-gate 				    mynode()) != 0)
7540Sstevel@tonic-gate 					continue;
7550Sstevel@tonic-gate 
7560Sstevel@tonic-gate 				if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
7570Sstevel@tonic-gate 					return (-1);
7580Sstevel@tonic-gate 			}
7590Sstevel@tonic-gate 		}
7600Sstevel@tonic-gate 	}
7610Sstevel@tonic-gate 
7620Sstevel@tonic-gate 	if (MD_MNSET_DESC(sd)) {
7630Sstevel@tonic-gate 		nd = sd->sd_nodelist;
7640Sstevel@tonic-gate 		while (nd) {
7650Sstevel@tonic-gate 			if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
7660Sstevel@tonic-gate 				nd = nd->nd_next;
7670Sstevel@tonic-gate 				continue;
7680Sstevel@tonic-gate 			}
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
7710Sstevel@tonic-gate 				nd = nd->nd_next;
7720Sstevel@tonic-gate 				continue;
7730Sstevel@tonic-gate 			}
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
7760Sstevel@tonic-gate 			    == -1) {
7770Sstevel@tonic-gate 				rval = -1;
7780Sstevel@tonic-gate 				break;
7790Sstevel@tonic-gate 			}
7800Sstevel@tonic-gate 			nd = nd->nd_next;
7810Sstevel@tonic-gate 		}
7820Sstevel@tonic-gate 	} else {
7830Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
7840Sstevel@tonic-gate 			/* Skip empty slots */
7850Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
7860Sstevel@tonic-gate 				continue;
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate 			if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
7890Sstevel@tonic-gate 				continue;
7900Sstevel@tonic-gate 
7910Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
7920Sstevel@tonic-gate 			    == -1) {
7930Sstevel@tonic-gate 				rval = -1;
7940Sstevel@tonic-gate 				break;
7950Sstevel@tonic-gate 			}
7960Sstevel@tonic-gate 		}
7970Sstevel@tonic-gate 	}
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 	if (! set_locked) {
8000Sstevel@tonic-gate 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
8010Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
8020Sstevel@tonic-gate 			nd = sd->sd_nodelist;
8030Sstevel@tonic-gate 			while (nd) {
8040Sstevel@tonic-gate 				if (force &&
8050Sstevel@tonic-gate 				    strcmp(nd->nd_nodename, mynode()) != 0) {
8060Sstevel@tonic-gate 					nd = nd->nd_next;
8070Sstevel@tonic-gate 					continue;
8080Sstevel@tonic-gate 				}
8090Sstevel@tonic-gate 
8100Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
8110Sstevel@tonic-gate 					nd = nd->nd_next;
8120Sstevel@tonic-gate 					continue;
8130Sstevel@tonic-gate 				}
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate 				if (clnt_unlock_set(nd->nd_nodename, cl_sk,
8160Sstevel@tonic-gate 				    ep)) {
8170Sstevel@tonic-gate 					rval = -1;
8180Sstevel@tonic-gate 					break;
8190Sstevel@tonic-gate 				}
8200Sstevel@tonic-gate 				nd = nd->nd_next;
8210Sstevel@tonic-gate 			}
8220Sstevel@tonic-gate 		} else {
8230Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
8240Sstevel@tonic-gate 				/* Skip empty slots */
8250Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
8260Sstevel@tonic-gate 					continue;
8270Sstevel@tonic-gate 
8280Sstevel@tonic-gate 				if (force &&
8290Sstevel@tonic-gate 				    strcmp(sd->sd_nodes[i], mynode()) != 0)
8300Sstevel@tonic-gate 					continue;
8310Sstevel@tonic-gate 
8320Sstevel@tonic-gate 				if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
8330Sstevel@tonic-gate 				    ep)) {
8340Sstevel@tonic-gate 					rval = -1;
8350Sstevel@tonic-gate 					break;
8360Sstevel@tonic-gate 				}
8370Sstevel@tonic-gate 			}
8380Sstevel@tonic-gate 
8390Sstevel@tonic-gate 		}
8400Sstevel@tonic-gate 		cl_set_setkey(NULL);
8410Sstevel@tonic-gate 	}
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate 	return (rval);
8440Sstevel@tonic-gate }
8450Sstevel@tonic-gate 
8460Sstevel@tonic-gate int
meta_db_addsidenms(mdsetname_t * sp,mdname_t * np,daddr_t blkno,int bcast,md_error_t * ep)8470Sstevel@tonic-gate meta_db_addsidenms(
8480Sstevel@tonic-gate 	mdsetname_t	*sp,
8490Sstevel@tonic-gate 	mdname_t	*np,
8500Sstevel@tonic-gate 	daddr_t		blkno,
8510Sstevel@tonic-gate 	int		bcast,
8520Sstevel@tonic-gate 	md_error_t	*ep
8530Sstevel@tonic-gate )
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate 	side_t		sideno;
8560Sstevel@tonic-gate 	char		*bname = NULL;
8570Sstevel@tonic-gate 	char		*dname = NULL;
8580Sstevel@tonic-gate 	minor_t		mnum;
8590Sstevel@tonic-gate 	mddb_config_t	c;
8600Sstevel@tonic-gate 	int		done;
8610Sstevel@tonic-gate 	int		rval = 0;
8620Sstevel@tonic-gate 	md_set_desc	*sd;
8630Sstevel@tonic-gate 
8640Sstevel@tonic-gate 	sideno = MD_SIDEWILD;
8650Sstevel@tonic-gate 	/*CONSTCOND*/
8660Sstevel@tonic-gate 	while (1) {
8670Sstevel@tonic-gate 		if (bname != NULL) {
8680Sstevel@tonic-gate 			Free(bname);
8690Sstevel@tonic-gate 			bname = NULL;
8700Sstevel@tonic-gate 		}
8710Sstevel@tonic-gate 		if (dname != NULL) {
8720Sstevel@tonic-gate 			Free(dname);
8730Sstevel@tonic-gate 			dname = NULL;
8740Sstevel@tonic-gate 		}
8750Sstevel@tonic-gate 		if ((done = meta_getnextside_devinfo(sp, np->bname,
8760Sstevel@tonic-gate 		    &sideno, &bname, &dname, &mnum, ep)) == -1) {
8770Sstevel@tonic-gate 			rval = -1;
8780Sstevel@tonic-gate 			break;
8790Sstevel@tonic-gate 		}
8800Sstevel@tonic-gate 
8810Sstevel@tonic-gate 		if (done == 0)
8820Sstevel@tonic-gate 			break;
8830Sstevel@tonic-gate 
8840Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
8850Sstevel@tonic-gate 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
8860Sstevel@tonic-gate 				rval = -1;
8870Sstevel@tonic-gate 				break;
8880Sstevel@tonic-gate 			}
8890Sstevel@tonic-gate 		}
8900Sstevel@tonic-gate 
8910Sstevel@tonic-gate 		/*
8920Sstevel@tonic-gate 		 * Send addsidenms to all nodes using rpc.mdcommd if
8930Sstevel@tonic-gate 		 * sidename is being added to MN diskset.
8940Sstevel@tonic-gate 		 *
8950Sstevel@tonic-gate 		 *   It's ok to broadcast this call to other nodes.
8960Sstevel@tonic-gate 		 *
8970Sstevel@tonic-gate 		 *   Note: The broadcast to other nodes isn't needed during
8980Sstevel@tonic-gate 		 *   the addition of the first mddbs to the set since the
8990Sstevel@tonic-gate 		 *   other nodes haven't been joined to the set yet.  All
9000Sstevel@tonic-gate 		 *   nodes in a MN diskset are (implicitly) joined to the set
9010Sstevel@tonic-gate 		 *   on the addition of the first mddb.
9020Sstevel@tonic-gate 		 */
9030Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
9040Sstevel@tonic-gate 		    (bcast == DB_ADDSIDENMS_BCAST)) {
9050Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
9060Sstevel@tonic-gate 			md_mn_msg_meta_db_newside_t	db_ns;
9070Sstevel@tonic-gate 			int				send_rval;
9080Sstevel@tonic-gate 
9090Sstevel@tonic-gate 			db_ns.msg_l_dev = np->dev;
9100Sstevel@tonic-gate 			db_ns.msg_sideno = sideno;
9110Sstevel@tonic-gate 			db_ns.msg_blkno = blkno;
9120Sstevel@tonic-gate 			(void) strncpy(db_ns.msg_dname, dname,
9130Sstevel@tonic-gate 			    sizeof (db_ns.msg_dname));
9140Sstevel@tonic-gate 			(void) splitname(np->bname, &db_ns.msg_splitname);
9150Sstevel@tonic-gate 			db_ns.msg_mnum = mnum;
9160Sstevel@tonic-gate 
9170Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
9180Sstevel@tonic-gate 			db_ns.msg_devid[0] = NULL;
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate 			/*
9210Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
9220Sstevel@tonic-gate 			 * stuck in in the return step until this command has
9230Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
9240Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
9250Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
9260Sstevel@tonic-gate 			 * cycle to proceed.
9270Sstevel@tonic-gate 			 */
9280Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
9290Sstevel@tonic-gate 			    MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
930*8452SJohn.Wren.Kennedy@Sun.COM 			    MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ns,
9310Sstevel@tonic-gate 			    sizeof (md_mn_msg_meta_db_newside_t),
9320Sstevel@tonic-gate 			    &resultp, ep);
9330Sstevel@tonic-gate 			if (send_rval != 0) {
9340Sstevel@tonic-gate 				rval = -1;
9350Sstevel@tonic-gate 				if (resultp == NULL)
9360Sstevel@tonic-gate 					(void) mddserror(ep,
9370Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
9380Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
9390Sstevel@tonic-gate 					    sp->setname);
9400Sstevel@tonic-gate 				else {
9410Sstevel@tonic-gate 					(void) mdstealerror(ep,
9420Sstevel@tonic-gate 					    &(resultp->mmr_ep));
9430Sstevel@tonic-gate 					if (mdisok(ep)) {
9440Sstevel@tonic-gate 						(void) mddserror(ep,
9450Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
9460Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
9470Sstevel@tonic-gate 						    sp->setname);
9480Sstevel@tonic-gate 					}
9490Sstevel@tonic-gate 					free_result(resultp);
9500Sstevel@tonic-gate 				}
9510Sstevel@tonic-gate 				break;
9520Sstevel@tonic-gate 			}
9530Sstevel@tonic-gate 			if (resultp)
9540Sstevel@tonic-gate 				free_result(resultp);
9550Sstevel@tonic-gate 		} else {
9560Sstevel@tonic-gate 			/*
9570Sstevel@tonic-gate 			 * Let this side's  device name, minor # and driver name
9580Sstevel@tonic-gate 			 * be known to the database replica.
9590Sstevel@tonic-gate 			 */
9600Sstevel@tonic-gate 			(void) memset(&c, 0, sizeof (c));
9610Sstevel@tonic-gate 
9620Sstevel@tonic-gate 			/* Fill in device/replica info */
9630Sstevel@tonic-gate 			c.c_locator.l_dev = meta_cmpldev(np->dev);
9640Sstevel@tonic-gate 			c.c_locator.l_blkno = blkno;
9650Sstevel@tonic-gate 			(void) strncpy(c.c_locator.l_driver, dname,
9660Sstevel@tonic-gate 			    sizeof (c.c_locator.l_driver));
9676195Sachimm 			if (splitname(np->bname, &c.c_devname) ==
9685109Spetede 			    METASPLIT_LONGDISKNAME && devid_in_use == FALSE) {
9695109Spetede 				rval = mddeverror(ep, MDE_DISKNAMETOOLONG,
9705109Spetede 				    NODEV64, np->rname);
9715109Spetede 				break;
9725109Spetede 			}
9735109Spetede 
9740Sstevel@tonic-gate 			c.c_locator.l_mnum = mnum;
9750Sstevel@tonic-gate 
9760Sstevel@tonic-gate 			/* Fill in setno, setname, and sideno */
9770Sstevel@tonic-gate 			c.c_setno = sp->setno;
9780Sstevel@tonic-gate 			(void) strncpy(c.c_setname, sp->setname,
9795109Spetede 			    sizeof (c.c_setname));
9800Sstevel@tonic-gate 			c.c_sideno = sideno;
9810Sstevel@tonic-gate 
9820Sstevel@tonic-gate 			/*
9830Sstevel@tonic-gate 			 * Don't need device id information from this ioctl
9840Sstevel@tonic-gate 			 * Kernel determines device id from dev_t, which
9850Sstevel@tonic-gate 			 * is just what this code would do.
9860Sstevel@tonic-gate 			 */
9870Sstevel@tonic-gate 			c.c_locator.l_devid = (uint64_t)0;
9880Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
9890Sstevel@tonic-gate 
9900Sstevel@tonic-gate 			if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
9910Sstevel@tonic-gate 				rval = mdstealerror(ep, &c.c_mde);
9920Sstevel@tonic-gate 				break;
9930Sstevel@tonic-gate 			}
9940Sstevel@tonic-gate 		}
9950Sstevel@tonic-gate 	}
9960Sstevel@tonic-gate 
9970Sstevel@tonic-gate 	/* cleanup, return success */
9980Sstevel@tonic-gate 	if (bname != NULL) {
9990Sstevel@tonic-gate 		Free(bname);
10000Sstevel@tonic-gate 		bname = NULL;
10010Sstevel@tonic-gate 	}
10020Sstevel@tonic-gate 	if (dname != NULL) {
10030Sstevel@tonic-gate 		Free(dname);
10040Sstevel@tonic-gate 		dname = NULL;
10050Sstevel@tonic-gate 	}
10060Sstevel@tonic-gate 	return (rval);
10070Sstevel@tonic-gate }
10080Sstevel@tonic-gate 
10090Sstevel@tonic-gate 
10100Sstevel@tonic-gate int
meta_db_delsidenm(mdsetname_t * sp,side_t sideno,mdname_t * np,daddr_t blkno,md_error_t * ep)10110Sstevel@tonic-gate meta_db_delsidenm(
10120Sstevel@tonic-gate 	mdsetname_t	*sp,
10130Sstevel@tonic-gate 	side_t		sideno,
10140Sstevel@tonic-gate 	mdname_t	*np,
10150Sstevel@tonic-gate 	daddr_t		blkno,
10160Sstevel@tonic-gate 	md_error_t	*ep
10170Sstevel@tonic-gate )
10180Sstevel@tonic-gate {
10190Sstevel@tonic-gate 	mddb_config_t	c;
10200Sstevel@tonic-gate 	md_set_desc	*sd;
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
10230Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
10240Sstevel@tonic-gate 			return (-1);
10250Sstevel@tonic-gate 	}
10260Sstevel@tonic-gate 	/* Use rpc.mdcommd to delete mddb side from all nodes */
10270Sstevel@tonic-gate 	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
10280Sstevel@tonic-gate 	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
10290Sstevel@tonic-gate 		md_mn_result_t			*resultp = NULL;
10300Sstevel@tonic-gate 		md_mn_msg_meta_db_delside_t	db_ds;
10310Sstevel@tonic-gate 		int				send_rval;
10320Sstevel@tonic-gate 
10330Sstevel@tonic-gate 		db_ds.msg_l_dev = np->dev;
10340Sstevel@tonic-gate 		db_ds.msg_blkno = blkno;
10350Sstevel@tonic-gate 		db_ds.msg_sideno = sideno;
10360Sstevel@tonic-gate 
10370Sstevel@tonic-gate 		/* Set devid to NULL until devids are supported */
10380Sstevel@tonic-gate 		db_ds.msg_devid[0] = NULL;
10390Sstevel@tonic-gate 
10400Sstevel@tonic-gate 		/*
10410Sstevel@tonic-gate 		 * If reconfig cycle has been started, this node is
10420Sstevel@tonic-gate 		 * stuck in in the return step until this command has
10430Sstevel@tonic-gate 		 * completed.  If mdcommd is suspended, ask
10440Sstevel@tonic-gate 		 * send_message to fail (instead of retrying)
10450Sstevel@tonic-gate 		 * so that metaset can finish allowing the reconfig
10460Sstevel@tonic-gate 		 * cycle to proceed.
10470Sstevel@tonic-gate 		 */
10480Sstevel@tonic-gate 		send_rval = mdmn_send_message(sp->setno,
10490Sstevel@tonic-gate 		    MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
1050*8452SJohn.Wren.Kennedy@Sun.COM 		    MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ds,
10510Sstevel@tonic-gate 		    sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
10520Sstevel@tonic-gate 		if (send_rval != 0) {
10530Sstevel@tonic-gate 			if (resultp == NULL)
10540Sstevel@tonic-gate 				(void) mddserror(ep,
10550Sstevel@tonic-gate 				    MDE_DS_COMMD_SEND_FAIL,
10560Sstevel@tonic-gate 				    sp->setno, NULL, NULL,
10570Sstevel@tonic-gate 				    sp->setname);
10580Sstevel@tonic-gate 			else {
10590Sstevel@tonic-gate 				(void) mdstealerror(ep, &(resultp->mmr_ep));
10600Sstevel@tonic-gate 				if (mdisok(ep)) {
10610Sstevel@tonic-gate 					(void) mddserror(ep,
10620Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
10630Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
10640Sstevel@tonic-gate 					    sp->setname);
10650Sstevel@tonic-gate 				}
10660Sstevel@tonic-gate 				free_result(resultp);
10670Sstevel@tonic-gate 			}
10680Sstevel@tonic-gate 			return (-1);
10690Sstevel@tonic-gate 		}
10700Sstevel@tonic-gate 		if (resultp)
10710Sstevel@tonic-gate 			free_result(resultp);
10720Sstevel@tonic-gate 
10730Sstevel@tonic-gate 	} else {
10740Sstevel@tonic-gate 		/*
10750Sstevel@tonic-gate 		 * Let this side's  device name, minor # and driver name
10760Sstevel@tonic-gate 		 * be known to the database replica.
10770Sstevel@tonic-gate 		 */
10780Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
10790Sstevel@tonic-gate 
10800Sstevel@tonic-gate 		/* Fill in device/replica info */
10810Sstevel@tonic-gate 		c.c_locator.l_dev = meta_cmpldev(np->dev);
10820Sstevel@tonic-gate 		c.c_locator.l_blkno = blkno;
10830Sstevel@tonic-gate 
10840Sstevel@tonic-gate 		/* Fill in setno, setname, and sideno */
10850Sstevel@tonic-gate 		c.c_setno = sp->setno;
10860Sstevel@tonic-gate 		(void) strcpy(c.c_setname, sp->setname);
10870Sstevel@tonic-gate 		c.c_sideno = sideno;
10880Sstevel@tonic-gate 
10890Sstevel@tonic-gate 		/*
10900Sstevel@tonic-gate 		 * Don't need device id information from this ioctl
10910Sstevel@tonic-gate 		 * Kernel determines device id from dev_t, which
10920Sstevel@tonic-gate 		 * is just what this code would do.
10930Sstevel@tonic-gate 		 */
10940Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
10950Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
10960Sstevel@tonic-gate 
10970Sstevel@tonic-gate 		if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
10980Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
10990Sstevel@tonic-gate 	}
11000Sstevel@tonic-gate 	return (0);
11010Sstevel@tonic-gate }
11020Sstevel@tonic-gate 
11030Sstevel@tonic-gate 
11040Sstevel@tonic-gate static int
mdnamesareunique(mdnamelist_t * nlp,md_error_t * ep)11050Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
11060Sstevel@tonic-gate {
11070Sstevel@tonic-gate 	mdnamelist_t		*dnp1, *dnp2;
11080Sstevel@tonic-gate 
11090Sstevel@tonic-gate 	for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
11100Sstevel@tonic-gate 		for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
11110Sstevel@tonic-gate 			if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
11120Sstevel@tonic-gate 				return (mderror(ep, MDE_DUPDRIVE,
11130Sstevel@tonic-gate 				    dnp1->namep->cname));
11140Sstevel@tonic-gate 		}
11150Sstevel@tonic-gate 	}
11160Sstevel@tonic-gate 	return (0);
11170Sstevel@tonic-gate }
11180Sstevel@tonic-gate 
11190Sstevel@tonic-gate 
11200Sstevel@tonic-gate /*
11210Sstevel@tonic-gate  * Return 1 if files are different, else return 0
11220Sstevel@tonic-gate  */
11230Sstevel@tonic-gate static int
filediff(char * tsname,char * sname)11240Sstevel@tonic-gate filediff(char *tsname, char *sname)
11250Sstevel@tonic-gate {
11260Sstevel@tonic-gate 	int ret = 1, fd;
11270Sstevel@tonic-gate 	size_t tsz, sz;
11280Sstevel@tonic-gate 	struct stat sbuf;
11290Sstevel@tonic-gate 	char *tbuf, *buf;
11300Sstevel@tonic-gate 
11310Sstevel@tonic-gate 	if (stat(tsname, &sbuf) != 0)
11320Sstevel@tonic-gate 		return (1);
11330Sstevel@tonic-gate 	tsz = sbuf.st_size;
11340Sstevel@tonic-gate 	if (stat(sname, &sbuf) != 0)
11350Sstevel@tonic-gate 		return (1);
11360Sstevel@tonic-gate 	sz = sbuf.st_size;
11370Sstevel@tonic-gate 	if (tsz != sz)
11380Sstevel@tonic-gate 		return (1);
11390Sstevel@tonic-gate 
11400Sstevel@tonic-gate 	/* allocate memory and read both files into buffer */
11410Sstevel@tonic-gate 	tbuf = malloc(tsz);
11420Sstevel@tonic-gate 	buf = malloc(sz);
11430Sstevel@tonic-gate 	if (tbuf == NULL || buf == NULL)
11440Sstevel@tonic-gate 		goto out;
11450Sstevel@tonic-gate 
11460Sstevel@tonic-gate 	fd = open(tsname, O_RDONLY);
11470Sstevel@tonic-gate 	if (fd == -1)
11480Sstevel@tonic-gate 		goto out;
11490Sstevel@tonic-gate 	sz = read(fd, tbuf, tsz);
11500Sstevel@tonic-gate 	(void) close(fd);
11510Sstevel@tonic-gate 	if (sz != tsz)
11520Sstevel@tonic-gate 		goto out;
11530Sstevel@tonic-gate 
11540Sstevel@tonic-gate 	fd = open(sname, O_RDONLY);
11550Sstevel@tonic-gate 	if (fd == -1)
11560Sstevel@tonic-gate 		goto out;
11570Sstevel@tonic-gate 	sz = read(fd, buf, tsz);
11580Sstevel@tonic-gate 	(void) close(fd);
11590Sstevel@tonic-gate 	if (sz != tsz)
11600Sstevel@tonic-gate 		goto out;
11610Sstevel@tonic-gate 
11620Sstevel@tonic-gate 	/* compare content */
11630Sstevel@tonic-gate 	ret = bcmp(tbuf, buf, tsz);
11640Sstevel@tonic-gate out:
11650Sstevel@tonic-gate 	if (tbuf)
11660Sstevel@tonic-gate 		free(tbuf);
11670Sstevel@tonic-gate 	if (buf)
11680Sstevel@tonic-gate 		free(buf);
11690Sstevel@tonic-gate 	return (ret);
11700Sstevel@tonic-gate }
11710Sstevel@tonic-gate 
11720Sstevel@tonic-gate /*
11730Sstevel@tonic-gate  * patch md.conf file with mddb locations
11740Sstevel@tonic-gate  */
11750Sstevel@tonic-gate int
meta_db_patch(char * sname,char * cname,int patch,md_error_t * ep)11760Sstevel@tonic-gate meta_db_patch(
11770Sstevel@tonic-gate 	char		*sname,		/* system file name */
11780Sstevel@tonic-gate 	char		*cname,		/* mddb.cf file name */
11790Sstevel@tonic-gate 	int		patch,		/* patching locally */
11800Sstevel@tonic-gate 	md_error_t	*ep
11810Sstevel@tonic-gate )
11820Sstevel@tonic-gate {
11830Sstevel@tonic-gate 	char		*tsname = NULL;
11840Sstevel@tonic-gate 	char		line[MDDB_BOOTLIST_MAX_LEN];
11850Sstevel@tonic-gate 	FILE		*tsfp = NULL;
11860Sstevel@tonic-gate 	FILE		*mfp = NULL;
11870Sstevel@tonic-gate 	int		rval = -1;
11880Sstevel@tonic-gate 
11890Sstevel@tonic-gate 	/* check names */
11900Sstevel@tonic-gate 	if (sname == NULL) {
11910Sstevel@tonic-gate 		if (patch)
11920Sstevel@tonic-gate 			sname = "md.conf";
11930Sstevel@tonic-gate 		else
11940Sstevel@tonic-gate 			sname = "/kernel/drv/md.conf";
11950Sstevel@tonic-gate 	}
11960Sstevel@tonic-gate 	if (cname == NULL)
11970Sstevel@tonic-gate 		cname = META_DBCONF;
11980Sstevel@tonic-gate 
11990Sstevel@tonic-gate 	/*
12000Sstevel@tonic-gate 	 * edit file
12010Sstevel@tonic-gate 	 */
12020Sstevel@tonic-gate 	if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
12030Sstevel@tonic-gate 		if (mdissyserror(ep, EROFS)) {
12040Sstevel@tonic-gate 			/*
12050Sstevel@tonic-gate 			 * If we are booted on a read-only root because
12060Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
12070Sstevel@tonic-gate 			 * any scary error messages.
12080Sstevel@tonic-gate 			 */
12090Sstevel@tonic-gate 			mdclrerror(ep);
12100Sstevel@tonic-gate 			rval = 0;
12110Sstevel@tonic-gate 		}
12120Sstevel@tonic-gate 		goto out;
12130Sstevel@tonic-gate 	}
12140Sstevel@tonic-gate 
12152063Shshaw 	if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 0,
12160Sstevel@tonic-gate 	    ep) != 0)
12170Sstevel@tonic-gate 		goto out;
12180Sstevel@tonic-gate 
12190Sstevel@tonic-gate 	/* if file content is identical, skip rename */
12200Sstevel@tonic-gate 	if (filediff(tsname, sname) == 0) {
12210Sstevel@tonic-gate 		rval = 0;
12220Sstevel@tonic-gate 		goto out;
12230Sstevel@tonic-gate 	}
12240Sstevel@tonic-gate 
12250Sstevel@tonic-gate 	if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
12265109Spetede 	    (fclose(tsfp) != 0)) {
12270Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, tsname);
12280Sstevel@tonic-gate 		goto out;
12290Sstevel@tonic-gate 	}
12300Sstevel@tonic-gate 
12310Sstevel@tonic-gate 	tsfp = NULL;
12320Sstevel@tonic-gate 
12330Sstevel@tonic-gate 	/*
12340Sstevel@tonic-gate 	 * rename file. If we get a Cross Device error then it
12350Sstevel@tonic-gate 	 * is because we are in the miniroot.
12360Sstevel@tonic-gate 	 */
12370Sstevel@tonic-gate 	if (rename(tsname, sname) != 0 && errno != EXDEV) {
12380Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, sname);
12390Sstevel@tonic-gate 		goto out;
12400Sstevel@tonic-gate 	}
12410Sstevel@tonic-gate 
12420Sstevel@tonic-gate 	if (errno == EXDEV) {
12430Sstevel@tonic-gate 		if ((tsfp = fopen(tsname, "r")) == NULL)
12440Sstevel@tonic-gate 			goto out;
12450Sstevel@tonic-gate 		if ((mfp = fopen(sname, "w+")) == NULL)
12460Sstevel@tonic-gate 			goto out;
12470Sstevel@tonic-gate 		while (fgets(line, sizeof (line), tsfp) != NULL) {
12480Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
12490Sstevel@tonic-gate 				goto out;
12500Sstevel@tonic-gate 		}
12510Sstevel@tonic-gate 		(void) fclose(tsfp);
12520Sstevel@tonic-gate 		tsfp = NULL;
12530Sstevel@tonic-gate 		if (fflush(mfp) != 0)
12540Sstevel@tonic-gate 			goto out;
12550Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
12560Sstevel@tonic-gate 			goto out;
12570Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
12580Sstevel@tonic-gate 			mfp = NULL;
12590Sstevel@tonic-gate 			goto out;
12600Sstevel@tonic-gate 		}
12610Sstevel@tonic-gate 	}
12620Sstevel@tonic-gate 
12630Sstevel@tonic-gate 	Free(tsname);
12640Sstevel@tonic-gate 	tsname = NULL;
12650Sstevel@tonic-gate 	rval = 0;
12660Sstevel@tonic-gate 
12670Sstevel@tonic-gate 	/* cleanup, return error */
12680Sstevel@tonic-gate out:
12690Sstevel@tonic-gate 	if (tsfp != NULL)
12700Sstevel@tonic-gate 		(void) fclose(tsfp);
12710Sstevel@tonic-gate 	if (tsname != NULL) {
12720Sstevel@tonic-gate 		(void) unlink(tsname);
12730Sstevel@tonic-gate 		Free(tsname);
12740Sstevel@tonic-gate 	}
12750Sstevel@tonic-gate 	return (rval);
12760Sstevel@tonic-gate }
12770Sstevel@tonic-gate 
12780Sstevel@tonic-gate /*
12790Sstevel@tonic-gate  * Add replicas to set.  This happens as a result of:
12800Sstevel@tonic-gate  *	- metadb [-s set_name] -a
12810Sstevel@tonic-gate  *	- metaset -s set_name -a disk
12820Sstevel@tonic-gate  *	- metaset -s set_name -d disk	 (causes a rebalance of mddbs)
12830Sstevel@tonic-gate  *	- metaset -s set_name -b
12840Sstevel@tonic-gate  *
12850Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
12860Sstevel@tonic-gate  *
12870Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
12880Sstevel@tonic-gate  * is running the metaset command.
12890Sstevel@tonic-gate  *
12900Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
12910Sstevel@tonic-gate  * running the metaset command.  If this is the first mddb added to
12920Sstevel@tonic-gate  * the MN diskset, then no communication is made to other nodes via commd
12930Sstevel@tonic-gate  * since the other nodes will be in-sync with respect to the mddbs when
12940Sstevel@tonic-gate  * those other nodes join the set and snarf in the newly created mddb.
12950Sstevel@tonic-gate  * If this is not the first mddb added to the MN diskset, then this
12960Sstevel@tonic-gate  * attach command is sent to all of the nodes using commd.  This keeps
12970Sstevel@tonic-gate  * the nodes in-sync.
12980Sstevel@tonic-gate  */
12990Sstevel@tonic-gate int
meta_db_attach(mdsetname_t * sp,mdnamelist_t * db_nlp,mdchkopts_t options,md_timeval32_t * timeval,int dbcnt,int dbsize,char * sysfilename,md_error_t * ep)13000Sstevel@tonic-gate meta_db_attach(
13010Sstevel@tonic-gate 	mdsetname_t		*sp,
13020Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
13030Sstevel@tonic-gate 	mdchkopts_t		options,
13040Sstevel@tonic-gate 	md_timeval32_t		*timeval,
13050Sstevel@tonic-gate 	int			dbcnt,
13060Sstevel@tonic-gate 	int			dbsize,
13070Sstevel@tonic-gate 	char			*sysfilename,
13080Sstevel@tonic-gate 	md_error_t		*ep
13090Sstevel@tonic-gate )
13100Sstevel@tonic-gate {
13110Sstevel@tonic-gate 	struct mddb_config	c;
13120Sstevel@tonic-gate 	mdnamelist_t		*nlp;
13130Sstevel@tonic-gate 	mdname_t		*np;
13140Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
13150Sstevel@tonic-gate 	md_drive_desc		*p;
13160Sstevel@tonic-gate 	int			i;
13170Sstevel@tonic-gate 	int			fd;
13180Sstevel@tonic-gate 	side_t			sideno;
13190Sstevel@tonic-gate 	daddr_t			blkno;
13200Sstevel@tonic-gate 	int			replicacount = 0;
13212614Spetede 	int			start_svmdaemons = 0;
13220Sstevel@tonic-gate 	int			rval = 0;
13230Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
13240Sstevel@tonic-gate 	md_set_desc		*sd;
13250Sstevel@tonic-gate 	int			stale_bool = FALSE;
13260Sstevel@tonic-gate 	int			flags;
13270Sstevel@tonic-gate 	int			firstmddb = 1;
13280Sstevel@tonic-gate 	md_timeval32_t		inittime = {0, 0};
13290Sstevel@tonic-gate 
13300Sstevel@tonic-gate 	/*
13310Sstevel@tonic-gate 	 * Error if we don't get some work to do.
13320Sstevel@tonic-gate 	 */
13330Sstevel@tonic-gate 	if (db_nlp == NULL)
13340Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
13350Sstevel@tonic-gate 
13360Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
13370Sstevel@tonic-gate 		return (-1);
13380Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
13390Sstevel@tonic-gate 	c.c_id = 0;
13400Sstevel@tonic-gate 	c.c_setno = sp->setno;
13410Sstevel@tonic-gate 
13420Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
13430Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
13440Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
13450Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
13460Sstevel@tonic-gate 		if (metaislocalset(sp)) {
13470Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
13480Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
13490Sstevel@tonic-gate 			else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
13500Sstevel@tonic-gate 			    (! (options & MDCHK_ALLOW_NODBS)))
13510Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
13520Sstevel@tonic-gate 		} else {
13530Sstevel@tonic-gate 			if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
13540Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
13550Sstevel@tonic-gate 		}
13560Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
13570Sstevel@tonic-gate 	}
13580Sstevel@tonic-gate 	/*
13590Sstevel@tonic-gate 	 * Is current set STALE?
13600Sstevel@tonic-gate 	 */
13610Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
13620Sstevel@tonic-gate 		stale_bool = TRUE;
13630Sstevel@tonic-gate 	}
13640Sstevel@tonic-gate 
13650Sstevel@tonic-gate 	assert(db_nlp != NULL);
13660Sstevel@tonic-gate 
13672614Spetede 	/* if these are the first replicas then the SVM daemons need to run */
13680Sstevel@tonic-gate 	if (c.c_dbcnt == 0)
13692614Spetede 		start_svmdaemons = 1;
13700Sstevel@tonic-gate 
13710Sstevel@tonic-gate 	/*
13720Sstevel@tonic-gate 	 * check to see if we will go over the total possible number
13730Sstevel@tonic-gate 	 * of data bases
13740Sstevel@tonic-gate 	 */
13750Sstevel@tonic-gate 	nlp = db_nlp;
13760Sstevel@tonic-gate 	while (nlp) {
13770Sstevel@tonic-gate 		replicacount += dbcnt;
13780Sstevel@tonic-gate 		nlp = nlp->next;
13790Sstevel@tonic-gate 	}
13800Sstevel@tonic-gate 
13810Sstevel@tonic-gate 	if ((replicacount + c.c_dbcnt) > c.c_dbmax)
13820Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
13830Sstevel@tonic-gate 		    sp->setno, c.c_dbcnt + replicacount, NULL));
13840Sstevel@tonic-gate 
13850Sstevel@tonic-gate 	/*
13860Sstevel@tonic-gate 	 * go through and check to make sure all locations specified
13870Sstevel@tonic-gate 	 * are legal also pick out driver name;
13880Sstevel@tonic-gate 	 */
13890Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
13900Sstevel@tonic-gate 		diskaddr_t devsize;
13910Sstevel@tonic-gate 
13920Sstevel@tonic-gate 		np = nlp->namep;
13930Sstevel@tonic-gate 
13940Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
13950Sstevel@tonic-gate 			uint_t	partno;
13960Sstevel@tonic-gate 			uint_t	rep_partno;
13970Sstevel@tonic-gate 			mddrivename_t	*dnp = np->drivenamep;
13980Sstevel@tonic-gate 
13990Sstevel@tonic-gate 			/*
14000Sstevel@tonic-gate 			 * make sure that non-local database replicas
14010Sstevel@tonic-gate 			 * are always on the replica slice.
14020Sstevel@tonic-gate 			 */
14030Sstevel@tonic-gate 			if (meta_replicaslice(dnp,
14040Sstevel@tonic-gate 			    &rep_partno, ep) != 0)
14050Sstevel@tonic-gate 				return (-1);
14060Sstevel@tonic-gate 			if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
14070Sstevel@tonic-gate 				return (-1);
14080Sstevel@tonic-gate 			if (partno != rep_partno)
14090Sstevel@tonic-gate 				return (mddeverror(ep, MDE_REPCOMP_ONLY,
14100Sstevel@tonic-gate 				    np->dev, sp->setname));
14110Sstevel@tonic-gate 		}
14120Sstevel@tonic-gate 
14130Sstevel@tonic-gate 		if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
14140Sstevel@tonic-gate 		    ep)) {
14150Sstevel@tonic-gate 			return (-1);
14160Sstevel@tonic-gate 		}
14170Sstevel@tonic-gate 
14180Sstevel@tonic-gate 		if ((devsize = metagetsize(np, ep)) == -1)
14190Sstevel@tonic-gate 			return (-1);
14200Sstevel@tonic-gate 
14210Sstevel@tonic-gate 		if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
14220Sstevel@tonic-gate 			return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
14230Sstevel@tonic-gate 			    meta_getminor(np->dev), sp->setno, devsize,
14240Sstevel@tonic-gate 			    np->cname));
14250Sstevel@tonic-gate 	}
14260Sstevel@tonic-gate 
14270Sstevel@tonic-gate 	/*
14280Sstevel@tonic-gate 	 * If first disk in set we don't have lb_inittime yet for use as
14290Sstevel@tonic-gate 	 * mb_setcreatetime so don't go looking for it. WE'll come back
14300Sstevel@tonic-gate 	 * later and update after the locator block has been created.
14310Sstevel@tonic-gate 	 * If this isn't the first disk in the set, we have a locator
14320Sstevel@tonic-gate 	 * block and thus we have lb_inittime. Set mb_setcreatetime to
14330Sstevel@tonic-gate 	 * lb_inittime.
14340Sstevel@tonic-gate 	 */
14350Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
14360Sstevel@tonic-gate 		if (c.c_dbcnt != 0) {
14370Sstevel@tonic-gate 			firstmddb = 0;
14380Sstevel@tonic-gate 			inittime = meta_get_lb_inittime(sp, ep);
14390Sstevel@tonic-gate 		}
14400Sstevel@tonic-gate 	}
14410Sstevel@tonic-gate 
14420Sstevel@tonic-gate 	/*
14430Sstevel@tonic-gate 	 * go through and write all master blocks
14440Sstevel@tonic-gate 	 */
14450Sstevel@tonic-gate 
14460Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
14470Sstevel@tonic-gate 		np = nlp->namep;
14480Sstevel@tonic-gate 
14490Sstevel@tonic-gate 		if ((fd = open(np->rname, O_RDWR)) < 0)
14500Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
14510Sstevel@tonic-gate 
14520Sstevel@tonic-gate 		for (i = 0; i < dbcnt; i++) {
14530Sstevel@tonic-gate 			if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
14540Sstevel@tonic-gate 			    inittime, ep)) {
14550Sstevel@tonic-gate 				(void) close(fd);
14560Sstevel@tonic-gate 				return (-1);
14570Sstevel@tonic-gate 			}
14580Sstevel@tonic-gate 		}
14590Sstevel@tonic-gate 		(void) close(fd);
14600Sstevel@tonic-gate 	}
14610Sstevel@tonic-gate 
14620Sstevel@tonic-gate 	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
14630Sstevel@tonic-gate 		return (-1);
14640Sstevel@tonic-gate 
14650Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
14660Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
14670Sstevel@tonic-gate 		if (! mdisok(ep))
14680Sstevel@tonic-gate 			return (-1);
14690Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
14700Sstevel@tonic-gate 			return (-1);
14710Sstevel@tonic-gate 
14720Sstevel@tonic-gate 	}
14730Sstevel@tonic-gate 
14740Sstevel@tonic-gate 	/*
14750Sstevel@tonic-gate 	 * go through and tell kernel to add them
14760Sstevel@tonic-gate 	 */
14770Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
14780Sstevel@tonic-gate 		mdcinfo_t	*cinfo;
14790Sstevel@tonic-gate 
14800Sstevel@tonic-gate 		np = nlp->namep;
14810Sstevel@tonic-gate 
14820Sstevel@tonic-gate 		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
14830Sstevel@tonic-gate 			rval = -1;
14840Sstevel@tonic-gate 			goto out;
14850Sstevel@tonic-gate 		}
14860Sstevel@tonic-gate 
14870Sstevel@tonic-gate 		/*
14880Sstevel@tonic-gate 		 * If mddb is being added to MN diskset and there already
14890Sstevel@tonic-gate 		 * exists a valid mddb in the set (which equates to this
14900Sstevel@tonic-gate 		 * node being an owner of the set) then use rpc.mdcommd
14910Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
14920Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
14930Sstevel@tonic-gate 		 * can't write the message to the mddb.
14940Sstevel@tonic-gate 		 *
14950Sstevel@tonic-gate 		 * Otherwise, just add mddb to this node.
14960Sstevel@tonic-gate 		 */
14970Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
14980Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
14990Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
15000Sstevel@tonic-gate 			md_mn_msg_meta_db_attach_t	attach;
15010Sstevel@tonic-gate 			int 				send_rval;
15020Sstevel@tonic-gate 
15030Sstevel@tonic-gate 			/*
15040Sstevel@tonic-gate 			 * In a scenario where new replicas had been added on
15050Sstevel@tonic-gate 			 * the master, and then all of the old replicas failed
15060Sstevel@tonic-gate 			 * before the slaves had knowledge of the new replicas,
15070Sstevel@tonic-gate 			 * the slaves are unable to re-parse in the mddb
15080Sstevel@tonic-gate 			 * from the new replicas since the slaves have no
15090Sstevel@tonic-gate 			 * knowledge of the new replicas.  The following
15100Sstevel@tonic-gate 			 * algorithm solves this problem:
15110Sstevel@tonic-gate 			 * 	- META_DB_ATTACH message generates submsgs
15120Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
15130Sstevel@tonic-gate 			 * 		- MDDB_ATTACH new replicas
15140Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
15150Sstevel@tonic-gate 			 *		information to be sent from master
15160Sstevel@tonic-gate 			 *		to slaves at a higher class than the
15170Sstevel@tonic-gate 			 *		unblock so the parse message will
15180Sstevel@tonic-gate 			 *		reach slaves before unblock message.
15190Sstevel@tonic-gate 			 */
15200Sstevel@tonic-gate 			attach.msg_l_dev = np->dev;
15210Sstevel@tonic-gate 			attach.msg_cnt = dbcnt;
15220Sstevel@tonic-gate 			attach.msg_dbsize = dbsize;
15230Sstevel@tonic-gate 			(void) strncpy(attach.msg_dname, cinfo->dname,
15240Sstevel@tonic-gate 			    sizeof (attach.msg_dname));
15250Sstevel@tonic-gate 			(void) splitname(np->bname, &attach.msg_splitname);
15260Sstevel@tonic-gate 			attach.msg_options = options;
15270Sstevel@tonic-gate 
15280Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
15290Sstevel@tonic-gate 			attach.msg_devid[0] = NULL;
15300Sstevel@tonic-gate 
15310Sstevel@tonic-gate 			/*
15320Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
15330Sstevel@tonic-gate 			 * stuck in in the return step until this command has
15340Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
15350Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
15360Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
15370Sstevel@tonic-gate 			 * cycle to proceed.
15380Sstevel@tonic-gate 			 */
15390Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
15400Sstevel@tonic-gate 			if (stale_bool == TRUE)
15410Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
15420Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
15435109Spetede 			    MD_MN_MSG_META_DB_ATTACH,
1544*8452SJohn.Wren.Kennedy@Sun.COM 			    flags, 0, (char *)&attach,
15455109Spetede 			    sizeof (md_mn_msg_meta_db_attach_t),
15465109Spetede 			    &resultp, ep);
15470Sstevel@tonic-gate 			if (send_rval != 0) {
15480Sstevel@tonic-gate 				rval = -1;
15490Sstevel@tonic-gate 				if (resultp == NULL)
15500Sstevel@tonic-gate 					(void) mddserror(ep,
15510Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
15520Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
15530Sstevel@tonic-gate 					    sp->setname);
15540Sstevel@tonic-gate 				else {
15550Sstevel@tonic-gate 					(void) mdstealerror(ep,
15560Sstevel@tonic-gate 					    &(resultp->mmr_ep));
15570Sstevel@tonic-gate 					if (mdisok(ep)) {
15580Sstevel@tonic-gate 						(void) mddserror(ep,
15590Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
15600Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
15610Sstevel@tonic-gate 						    sp->setname);
15620Sstevel@tonic-gate 					}
15630Sstevel@tonic-gate 					free_result(resultp);
15640Sstevel@tonic-gate 				}
15650Sstevel@tonic-gate 				goto out;
15660Sstevel@tonic-gate 			}
15670Sstevel@tonic-gate 			if (resultp)
15680Sstevel@tonic-gate 				free_result(resultp);
15690Sstevel@tonic-gate 		} else {
15705109Spetede 			/* Adding mddb(s) to just this node */
15715109Spetede 			for (i = 0; i < dbcnt; i++) {
15725109Spetede 				(void) memset(&c, 0, sizeof (c));
15735109Spetede 				/* Fill in device/replica info */
15745109Spetede 				c.c_locator.l_dev = meta_cmpldev(np->dev);
15755109Spetede 				c.c_locator.l_blkno = i * dbsize + 16;
15765109Spetede 				blkno = c.c_locator.l_blkno;
15775109Spetede 				(void) strncpy(c.c_locator.l_driver,
15785109Spetede 				    cinfo->dname,
15795109Spetede 				    sizeof (c.c_locator.l_driver));
15800Sstevel@tonic-gate 
15815109Spetede 				if (splitname(np->bname, &c.c_devname) ==
15825109Spetede 				    METASPLIT_LONGDISKNAME && devid_in_use ==
15835109Spetede 				    FALSE) {
15845109Spetede 					rval = mddeverror(ep,
15855109Spetede 					    MDE_DISKNAMETOOLONG,
15865109Spetede 					    NODEV64, np->rname);
15875109Spetede 					goto out;
15880Sstevel@tonic-gate 				}
15895109Spetede 
15905109Spetede 				c.c_locator.l_mnum = meta_getminor(np->dev);
15915109Spetede 
15925109Spetede 				/* Fill in setno, setname, and sideno */
15935109Spetede 				c.c_setno = sp->setno;
15945109Spetede 				if (! metaislocalset(sp)) {
15955109Spetede 					if (MD_MNSET_DESC(sd)) {
15965109Spetede 						c.c_multi_node = 1;
15975109Spetede 					}
15985109Spetede 				}
15995109Spetede 				(void) strcpy(c.c_setname, sp->setname);
16005109Spetede 				c.c_sideno = sideno;
16010Sstevel@tonic-gate 
16025109Spetede 				/*
16035109Spetede 				 * Don't need device id information from this
16045109Spetede 				 * ioctl Kernel determines device id from
16055109Spetede 				 * dev_t, which is just what this code would do.
16065109Spetede 				 */
16075109Spetede 				c.c_locator.l_devid = (uint64_t)0;
16085109Spetede 				c.c_locator.l_devid_flags = 0;
16090Sstevel@tonic-gate 
16105109Spetede 				if (timeval != NULL)
16115109Spetede 					c.c_timestamp = *timeval;
16120Sstevel@tonic-gate 
16135109Spetede 				if (setup_med_cfg(sp, &c,
16145109Spetede 				    (options & MDCHK_SET_FORCE), ep)) {
16155109Spetede 					rval = -1;
16165109Spetede 					goto out;
16175109Spetede 				}
16180Sstevel@tonic-gate 
16195109Spetede 				if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde,
16205109Spetede 				    NULL) != 0) {
16215109Spetede 					rval = mdstealerror(ep, &c.c_mde);
16225109Spetede 					goto out;
16235109Spetede 				}
16245109Spetede 				/*
16255109Spetede 				 * This is either a traditional diskset OR this
16265109Spetede 				 * is the first replica added to a MN diskset.
16275109Spetede 				 * In either case, set broadcast to NO_BCAST so
16285109Spetede 				 * that message won't go through rpc.mdcommd.
16295109Spetede 				 * If this is a traditional diskset, the bcast
16305109Spetede 				 * flag is ignored since traditional disksets
16315109Spetede 				 * don't use the rpc.mdcommd.
16325109Spetede 				 */
16335109Spetede 				if (meta_db_addsidenms(sp, np, blkno,
16345109Spetede 				    DB_ADDSIDENMS_NO_BCAST, ep))
16355109Spetede 					goto out;
16360Sstevel@tonic-gate 			}
16370Sstevel@tonic-gate 		}
16380Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
16390Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
16400Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next)
16410Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
16420Sstevel@tonic-gate 					p->dd_dbcnt = dbcnt;
16430Sstevel@tonic-gate 					p->dd_dbsize  = dbsize;
16440Sstevel@tonic-gate 					break;
16450Sstevel@tonic-gate 				}
16460Sstevel@tonic-gate 		}
16470Sstevel@tonic-gate 
16480Sstevel@tonic-gate 		/*
16490Sstevel@tonic-gate 		 * If this was the first addition of disks to the
16500Sstevel@tonic-gate 		 * diskset you now need to update the mb_setcreatetime
16510Sstevel@tonic-gate 		 * which needed lb_inittime which wasn't there until now.
16520Sstevel@tonic-gate 		 */
16530Sstevel@tonic-gate 		if (firstmddb) {
16540Sstevel@tonic-gate 			if (meta_update_mb(sp, dd, ep) != 0) {
16550Sstevel@tonic-gate 				return (-1);
16560Sstevel@tonic-gate 			}
16570Sstevel@tonic-gate 		}
16580Sstevel@tonic-gate 		(void) close(fd);
16590Sstevel@tonic-gate 	}
16600Sstevel@tonic-gate 
16610Sstevel@tonic-gate out:
16620Sstevel@tonic-gate 	if (metaislocalset(sp)) {
16630Sstevel@tonic-gate 
16640Sstevel@tonic-gate 		/* everything looks fine. Start mdmonitord */
16652614Spetede 		if (rval == 0 && start_svmdaemons == 1) {
16660Sstevel@tonic-gate 			if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
16670Sstevel@tonic-gate 				mde_perror(&status, "");
16680Sstevel@tonic-gate 				mdclrerror(&status);
16690Sstevel@tonic-gate 			}
16700Sstevel@tonic-gate 		}
16710Sstevel@tonic-gate 
16720Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
16730Sstevel@tonic-gate 			/* Don't mask any previous errors */
16740Sstevel@tonic-gate 			if (rval == 0)
16750Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
16760Sstevel@tonic-gate 			return (rval);
16770Sstevel@tonic-gate 		}
16780Sstevel@tonic-gate 
16790Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
16800Sstevel@tonic-gate 			/* Don't mask any previous errors */
16810Sstevel@tonic-gate 			if (rval == 0)
16820Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
16830Sstevel@tonic-gate 		}
16840Sstevel@tonic-gate 	} else {
16850Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
16860Sstevel@tonic-gate 		    (options & MDCHK_SET_LOCKED),
16870Sstevel@tonic-gate 		    (options & MDCHK_SET_FORCE),
16880Sstevel@tonic-gate 		    &status)) {
16890Sstevel@tonic-gate 			/* Don't mask any previous errors */
16900Sstevel@tonic-gate 			if (rval == 0)
16910Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
16920Sstevel@tonic-gate 			else
16930Sstevel@tonic-gate 				mdclrerror(&status);
16940Sstevel@tonic-gate 		}
16950Sstevel@tonic-gate 		metafreedrivedesc(&dd);
16960Sstevel@tonic-gate 	}
16970Sstevel@tonic-gate 	/*
16980Sstevel@tonic-gate 	 * For MN disksets that already had already had nodes joined
16990Sstevel@tonic-gate 	 * before the attach of this mddb(s), the name invalidation is
17000Sstevel@tonic-gate 	 * done by the commd handler routine.  Otherwise, if this
17010Sstevel@tonic-gate 	 * is the first attach of a MN diskset mddb, the invalidation
17020Sstevel@tonic-gate 	 * must be done here since the first attach cannot be sent
17030Sstevel@tonic-gate 	 * via the commd since there are no nodes joined to the set yet.
17040Sstevel@tonic-gate 	 */
17050Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
17060Sstevel@tonic-gate 	    (MD_MNSET_DESC(sd) &&
17070Sstevel@tonic-gate 	    (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
17080Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
17090Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
17100Sstevel@tonic-gate 		}
17110Sstevel@tonic-gate 	}
17120Sstevel@tonic-gate 	return (rval);
17130Sstevel@tonic-gate }
17140Sstevel@tonic-gate 
17150Sstevel@tonic-gate /*
17160Sstevel@tonic-gate  * deletelist_length
17170Sstevel@tonic-gate  *
17180Sstevel@tonic-gate  *	return the number of slices that have been specified for deletion
17190Sstevel@tonic-gate  *	on the metadb command line.  This does not calculate the number
17200Sstevel@tonic-gate  *	of replicas because there may be multiple replicas per slice.
17210Sstevel@tonic-gate  */
17220Sstevel@tonic-gate static int
deletelist_length(mdnamelist_t * db_nlp)17230Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp)
17240Sstevel@tonic-gate {
17250Sstevel@tonic-gate 
17260Sstevel@tonic-gate 	mdnamelist_t		*nlp;
17270Sstevel@tonic-gate 	int			list_length = 0;
17280Sstevel@tonic-gate 
17290Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
17300Sstevel@tonic-gate 		list_length++;
17310Sstevel@tonic-gate 	}
17320Sstevel@tonic-gate 
17330Sstevel@tonic-gate 	return (list_length);
17340Sstevel@tonic-gate }
17350Sstevel@tonic-gate 
17360Sstevel@tonic-gate static int
in_deletelist(char * devname,mdnamelist_t * db_nlp)17370Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp)
17380Sstevel@tonic-gate {
17390Sstevel@tonic-gate 
17400Sstevel@tonic-gate 	mdnamelist_t		*nlp;
17410Sstevel@tonic-gate 	mdname_t		*np;
17420Sstevel@tonic-gate 	int			index = 0;
17430Sstevel@tonic-gate 
17440Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
17450Sstevel@tonic-gate 		np = nlp->namep;
17460Sstevel@tonic-gate 
17470Sstevel@tonic-gate 		if (strcmp(devname, np->bname) == 0)
17480Sstevel@tonic-gate 			return (index);
17490Sstevel@tonic-gate 		index++;
17500Sstevel@tonic-gate 	}
17510Sstevel@tonic-gate 
17520Sstevel@tonic-gate 	return (-1);
17530Sstevel@tonic-gate }
17540Sstevel@tonic-gate 
17550Sstevel@tonic-gate /*
17560Sstevel@tonic-gate  * Delete replicas from set.  This happens as a result of:
17570Sstevel@tonic-gate  *	- metadb [-s set_name] -d
17580Sstevel@tonic-gate  *	- metaset -s set_name -a disk	(causes a rebalance of mddbs)
17590Sstevel@tonic-gate  *	- metaset -s set_name -d disk
17600Sstevel@tonic-gate  *	- metaset -s set_name -b
17610Sstevel@tonic-gate  *
17620Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
17630Sstevel@tonic-gate  *
17640Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
17650Sstevel@tonic-gate  * is running the metaset command.
17660Sstevel@tonic-gate  *
17670Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
17680Sstevel@tonic-gate  * running the metaset command.  This detach routine is sent to all
17690Sstevel@tonic-gate  * of the joined nodes in the diskset using commd.  This keeps
17700Sstevel@tonic-gate  * the nodes in-sync.
17710Sstevel@tonic-gate  */
17720Sstevel@tonic-gate int
meta_db_detach(mdsetname_t * sp,mdnamelist_t * db_nlp,mdforceopts_t force_option,char * sysfilename,md_error_t * ep)17730Sstevel@tonic-gate meta_db_detach(
17740Sstevel@tonic-gate 	mdsetname_t		*sp,
17750Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
17760Sstevel@tonic-gate 	mdforceopts_t		force_option,
17770Sstevel@tonic-gate 	char			*sysfilename,
17780Sstevel@tonic-gate 	md_error_t		*ep
17790Sstevel@tonic-gate )
17800Sstevel@tonic-gate {
17810Sstevel@tonic-gate 	struct mddb_config	c;
17820Sstevel@tonic-gate 	mdnamelist_t		*nlp;
17830Sstevel@tonic-gate 	mdname_t		*np;
17840Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
17850Sstevel@tonic-gate 	md_drive_desc		*p;
17860Sstevel@tonic-gate 	int			replicacount;
17870Sstevel@tonic-gate 	int			replica_delete_count;
17880Sstevel@tonic-gate 	int			nr_replica_slices;
17890Sstevel@tonic-gate 	int			i;
17900Sstevel@tonic-gate 	int			stop_svmdaemons = 0;
17910Sstevel@tonic-gate 	int			rval = 0;
17920Sstevel@tonic-gate 	int			index;
17930Sstevel@tonic-gate 	int			valid_replicas_nottodelete = 0;
17940Sstevel@tonic-gate 	int			invalid_replicas_nottodelete = 0;
17950Sstevel@tonic-gate 	int			invalid_replicas_todelete = 0;
17960Sstevel@tonic-gate 	int			errored = 0;
17970Sstevel@tonic-gate 	int			*tag_array;
17980Sstevel@tonic-gate 	int			fd = -1;
17990Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
18000Sstevel@tonic-gate 	md_set_desc		*sd;
18010Sstevel@tonic-gate 	int			stale_bool = FALSE;
18020Sstevel@tonic-gate 	int			flags;
18030Sstevel@tonic-gate 
18040Sstevel@tonic-gate 	/*
18050Sstevel@tonic-gate 	 * Error if we don't get some work to do.
18060Sstevel@tonic-gate 	 */
18070Sstevel@tonic-gate 	if (db_nlp == NULL)
18080Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
18090Sstevel@tonic-gate 
18100Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
18110Sstevel@tonic-gate 		return (-1);
18120Sstevel@tonic-gate 
18130Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
18140Sstevel@tonic-gate 	c.c_id = 0;
18150Sstevel@tonic-gate 	c.c_setno = sp->setno;
18160Sstevel@tonic-gate 
18170Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
18180Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
18190Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
18200Sstevel@tonic-gate 
18210Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
18220Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
18230Sstevel@tonic-gate 
18240Sstevel@tonic-gate 	/*
18250Sstevel@tonic-gate 	 * Is current set STALE?
18260Sstevel@tonic-gate 	 */
18270Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
18280Sstevel@tonic-gate 		stale_bool = TRUE;
18290Sstevel@tonic-gate 	}
18300Sstevel@tonic-gate 
18310Sstevel@tonic-gate 	replicacount = c.c_dbcnt;
18320Sstevel@tonic-gate 
18330Sstevel@tonic-gate 	assert(db_nlp != NULL);
18340Sstevel@tonic-gate 
18350Sstevel@tonic-gate 	/*
18360Sstevel@tonic-gate 	 * go through and gather how many data bases are on each
18370Sstevel@tonic-gate 	 * device specified.
18380Sstevel@tonic-gate 	 */
18390Sstevel@tonic-gate 
18400Sstevel@tonic-gate 	nr_replica_slices = deletelist_length(db_nlp);
18410Sstevel@tonic-gate 	tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
18420Sstevel@tonic-gate 
18430Sstevel@tonic-gate 	replica_delete_count = 0;
18440Sstevel@tonic-gate 	for (i = 0; i < replicacount; i++) {
18450Sstevel@tonic-gate 		char	*devname;
18460Sstevel@tonic-gate 		int	found = 0;
18470Sstevel@tonic-gate 
18480Sstevel@tonic-gate 		c.c_id = i;
18490Sstevel@tonic-gate 
18500Sstevel@tonic-gate 		/* Don't need device id information from this ioctl */
18510Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
18520Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
18530Sstevel@tonic-gate 
18540Sstevel@tonic-gate 		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
18550Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
18560Sstevel@tonic-gate 
18570Sstevel@tonic-gate 		devname = splicename(&c.c_devname);
18580Sstevel@tonic-gate 
18595109Spetede 		if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
18605109Spetede 			Free(devname);
18615109Spetede 			devname = getlongname(&c, ep);
18625109Spetede 			if (devname == NULL) {
18635109Spetede 				return (-1);
18645109Spetede 			}
18655109Spetede 		}
18665109Spetede 
18670Sstevel@tonic-gate 		if ((index = in_deletelist(devname, db_nlp)) != -1) {
18680Sstevel@tonic-gate 			found = 1;
18690Sstevel@tonic-gate 			tag_array[index] = 1;
18700Sstevel@tonic-gate 			replica_delete_count++;
18710Sstevel@tonic-gate 		}
18720Sstevel@tonic-gate 
18730Sstevel@tonic-gate 		errored = c.c_locator.l_flags & (MDDB_F_EREAD |
18745109Spetede 		    MDDB_F_EWRITE | MDDB_F_TOOSMALL | MDDB_F_EFMT |
18755109Spetede 		    MDDB_F_EDATA | MDDB_F_EMASTER);
18760Sstevel@tonic-gate 
18770Sstevel@tonic-gate 		/*
18780Sstevel@tonic-gate 		 * There are four combinations of "errored" and "found"
18790Sstevel@tonic-gate 		 * and they are used to find the number of
18800Sstevel@tonic-gate 		 * (a) valid/invalid replicas that are not in the delete
18810Sstevel@tonic-gate 		 * list and are available in the system.
18820Sstevel@tonic-gate 		 * (b) valid/invalid replicas that are to be deleted.
18830Sstevel@tonic-gate 		 */
18840Sstevel@tonic-gate 
18850Sstevel@tonic-gate 		if (errored && !found)		/* errored and !found */
18860Sstevel@tonic-gate 			invalid_replicas_nottodelete++;
18870Sstevel@tonic-gate 		else if (!found)		/* !errored and !found */
18880Sstevel@tonic-gate 			valid_replicas_nottodelete++;
18890Sstevel@tonic-gate 		else if (errored)		/* errored and found */
18900Sstevel@tonic-gate 			invalid_replicas_todelete++;
18910Sstevel@tonic-gate 		/*
18920Sstevel@tonic-gate 		 * else it is !errored and found. This means
18930Sstevel@tonic-gate 		 * valid_replicas_todelete++; But this variable will not
18940Sstevel@tonic-gate 		 * be used anywhere
18950Sstevel@tonic-gate 		 */
18960Sstevel@tonic-gate 
18970Sstevel@tonic-gate 		Free(devname);
18980Sstevel@tonic-gate 	}
18990Sstevel@tonic-gate 
19000Sstevel@tonic-gate 	index = 0;
19010Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
19020Sstevel@tonic-gate 		np = nlp->namep;
19030Sstevel@tonic-gate 		if (tag_array[index++] != 1) {
19040Sstevel@tonic-gate 			Free(tag_array);
19050Sstevel@tonic-gate 			return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
19060Sstevel@tonic-gate 		}
19070Sstevel@tonic-gate 	}
19080Sstevel@tonic-gate 
19090Sstevel@tonic-gate 	Free(tag_array);
19100Sstevel@tonic-gate 
19110Sstevel@tonic-gate 
19120Sstevel@tonic-gate 	/* if all replicas are deleted stop mdmonitord */
19130Sstevel@tonic-gate 	if ((replicacount - replica_delete_count) == 0)
19140Sstevel@tonic-gate 		stop_svmdaemons = 1;
19150Sstevel@tonic-gate 
19160Sstevel@tonic-gate 	if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
19170Sstevel@tonic-gate 		if (force_option & MDFORCE_NONE)
19180Sstevel@tonic-gate 			return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
19190Sstevel@tonic-gate 		if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
19200Sstevel@tonic-gate 			return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
19210Sstevel@tonic-gate 	}
19220Sstevel@tonic-gate 
19230Sstevel@tonic-gate 	/*
19240Sstevel@tonic-gate 	 * The following algorithms are followed to check for deletion:
19250Sstevel@tonic-gate 	 * (a) If the delete list(db_nlp) has all invalid replicas and no valid
19260Sstevel@tonic-gate 	 * replicas, then deletion should be allowed.
19270Sstevel@tonic-gate 	 * (b) Deletion should be allowed only if valid replicas that are "not"
19280Sstevel@tonic-gate 	 * to be deleted is always greater than the invalid replicas that
19290Sstevel@tonic-gate 	 * are "not" to be deleted.
19300Sstevel@tonic-gate 	 * (c) If the user uses -f option, then deletion should be allowed.
19310Sstevel@tonic-gate 	 */
19320Sstevel@tonic-gate 
19330Sstevel@tonic-gate 	if ((invalid_replicas_todelete != replica_delete_count) &&
19345109Spetede 	    (invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
19355109Spetede 	    (force_option != MDFORCE_LOCAL))
19360Sstevel@tonic-gate 		return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
19370Sstevel@tonic-gate 
19380Sstevel@tonic-gate 	/*
19390Sstevel@tonic-gate 	 * go through and tell kernel to delete them
19400Sstevel@tonic-gate 	 */
19410Sstevel@tonic-gate 
19420Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
19430Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
19440Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
19450Sstevel@tonic-gate 
19460Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
19470Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
19480Sstevel@tonic-gate 
19490Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
19500Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
19510Sstevel@tonic-gate 		if (! mdisok(ep))
19520Sstevel@tonic-gate 			return (-1);
19530Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
19540Sstevel@tonic-gate 			return (-1);
19550Sstevel@tonic-gate 	}
19560Sstevel@tonic-gate 
19570Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
19580Sstevel@tonic-gate 		np = nlp->namep;
19590Sstevel@tonic-gate 
19600Sstevel@tonic-gate 		/*
19610Sstevel@tonic-gate 		 * If mddb is being deleted from MN diskset and node is
19620Sstevel@tonic-gate 		 * an owner of the diskset then use rpc.mdcommd
19630Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
19640Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
19650Sstevel@tonic-gate 		 * can't write the message to the mddb.
19660Sstevel@tonic-gate 		 *
19670Sstevel@tonic-gate 		 * When mddbs are first being added to set, a detach can
19680Sstevel@tonic-gate 		 * be called before any node has joined the diskset, so
19690Sstevel@tonic-gate 		 * must check to see if node is an owner of the diskset.
19700Sstevel@tonic-gate 		 *
19710Sstevel@tonic-gate 		 * Otherwise, just delete mddb from this node.
19720Sstevel@tonic-gate 		 */
19730Sstevel@tonic-gate 
19740Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
19750Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
19760Sstevel@tonic-gate 			md_mn_result_t			*resultp;
19770Sstevel@tonic-gate 			md_mn_msg_meta_db_detach_t	detach;
19780Sstevel@tonic-gate 			int				send_rval;
19790Sstevel@tonic-gate 
19800Sstevel@tonic-gate 			/*
19810Sstevel@tonic-gate 			 * The following algorithm is used to detach replicas.
19820Sstevel@tonic-gate 			 * 	- META_DB_DETACH message generates submsgs
19830Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
19840Sstevel@tonic-gate 			 * 		- MDDB_DETACH replicas
19850Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
19860Sstevel@tonic-gate 			 *		information to be sent from master
19870Sstevel@tonic-gate 			 *		to slaves at a higher class than the
19880Sstevel@tonic-gate 			 *		unblock so the parse message will
19890Sstevel@tonic-gate 			 *		reach slaves before unblock message.
19900Sstevel@tonic-gate 			 */
19910Sstevel@tonic-gate 			(void) splitname(np->bname, &detach.msg_splitname);
19920Sstevel@tonic-gate 
19930Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
19940Sstevel@tonic-gate 			detach.msg_devid[0] = NULL;
19950Sstevel@tonic-gate 
19960Sstevel@tonic-gate 			/*
19970Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
19980Sstevel@tonic-gate 			 * stuck in in the return step until this command has
19990Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
20000Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
20010Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
20020Sstevel@tonic-gate 			 * cycle to proceed.
20030Sstevel@tonic-gate 			 */
20040Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
20050Sstevel@tonic-gate 			if (stale_bool == TRUE)
20060Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
20070Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
20085109Spetede 			    MD_MN_MSG_META_DB_DETACH,
2009*8452SJohn.Wren.Kennedy@Sun.COM 			    flags, 0, (char *)&detach,
20105109Spetede 			    sizeof (md_mn_msg_meta_db_detach_t),
20115109Spetede 			    &resultp, ep);
20120Sstevel@tonic-gate 			if (send_rval != 0) {
20130Sstevel@tonic-gate 				rval = -1;
20140Sstevel@tonic-gate 				if (resultp == NULL)
20150Sstevel@tonic-gate 					(void) mddserror(ep,
20160Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
20170Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
20180Sstevel@tonic-gate 					    sp->setname);
20190Sstevel@tonic-gate 				else {
20200Sstevel@tonic-gate 					(void) mdstealerror(ep,
20210Sstevel@tonic-gate 					    &(resultp->mmr_ep));
20220Sstevel@tonic-gate 					if (mdisok(ep)) {
20230Sstevel@tonic-gate 						(void) mddserror(ep,
20240Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
20250Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
20260Sstevel@tonic-gate 						    sp->setname);
20270Sstevel@tonic-gate 					}
20280Sstevel@tonic-gate 					free_result(resultp);
20290Sstevel@tonic-gate 				}
20300Sstevel@tonic-gate 				goto out;
20310Sstevel@tonic-gate 			}
20320Sstevel@tonic-gate 			if (resultp)
20330Sstevel@tonic-gate 				free_result(resultp);
20340Sstevel@tonic-gate 		} else {
20350Sstevel@tonic-gate 			i = 0;
20360Sstevel@tonic-gate 			while (i < c.c_dbcnt) {
20370Sstevel@tonic-gate 				char	*devname;
20380Sstevel@tonic-gate 
20390Sstevel@tonic-gate 				c.c_id = i;
20400Sstevel@tonic-gate 
20410Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
20420Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
20430Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
20440Sstevel@tonic-gate 
20450Sstevel@tonic-gate 				if (metaioctl(MD_DB_GETDEV, &c,
20460Sstevel@tonic-gate 				    &c.c_mde, NULL)) {
20470Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
20480Sstevel@tonic-gate 					goto out;
20490Sstevel@tonic-gate 				}
20500Sstevel@tonic-gate 
20510Sstevel@tonic-gate 				devname = splicename(&c.c_devname);
20525109Spetede 
20535109Spetede 				if (strstr(devname, META_LONGDISKNAME_STR)
20545109Spetede 				    != NULL) {
20555109Spetede 					Free(devname);
20565109Spetede 					devname = getlongname(&c, ep);
20575109Spetede 					if (devname == NULL) {
20585109Spetede 						return (-1);
20595109Spetede 					}
20605109Spetede 				}
20615109Spetede 
20620Sstevel@tonic-gate 				if (strcmp(devname, np->bname) != 0) {
20630Sstevel@tonic-gate 					Free(devname);
20640Sstevel@tonic-gate 					i++;
20650Sstevel@tonic-gate 					continue;
20660Sstevel@tonic-gate 				}
20670Sstevel@tonic-gate 				Free(devname);
20680Sstevel@tonic-gate 
20690Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
20700Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
20710Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
20720Sstevel@tonic-gate 
20730Sstevel@tonic-gate 				if (metaioctl(MD_DB_DELDEV, &c,
20740Sstevel@tonic-gate 				    &c.c_mde, NULL) != 0) {
20750Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
20760Sstevel@tonic-gate 					goto out;
20770Sstevel@tonic-gate 				}
20780Sstevel@tonic-gate 
20790Sstevel@tonic-gate 				/* Not incrementing "i" intentionally */
20800Sstevel@tonic-gate 			}
20810Sstevel@tonic-gate 		}
20820Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
20830Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
20840Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next) {
20850Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
20860Sstevel@tonic-gate 					p->dd_dbcnt = 0;
20870Sstevel@tonic-gate 					p->dd_dbsize  = 0;
20880Sstevel@tonic-gate 					break;
20890Sstevel@tonic-gate 				}
20900Sstevel@tonic-gate 			}
20910Sstevel@tonic-gate 
20920Sstevel@tonic-gate 			/*
20930Sstevel@tonic-gate 			 * Slam a dummy master block and make it self
20940Sstevel@tonic-gate 			 * identifying
20950Sstevel@tonic-gate 			 */
20960Sstevel@tonic-gate 			if ((fd = open(np->rname, O_RDWR)) >= 0) {
20970Sstevel@tonic-gate 				meta_mkdummymaster(sp, fd, 16);
20980Sstevel@tonic-gate 				(void) close(fd);
20990Sstevel@tonic-gate 			}
21000Sstevel@tonic-gate 		}
21010Sstevel@tonic-gate 	}
21020Sstevel@tonic-gate out:
21030Sstevel@tonic-gate 	if (metaislocalset(sp)) {
21040Sstevel@tonic-gate 		/*
21050Sstevel@tonic-gate 		 * Stop all the daemons if there are
21060Sstevel@tonic-gate 		 * no more replicas so that the module can be
21070Sstevel@tonic-gate 		 * unloaded.
21080Sstevel@tonic-gate 		 */
21090Sstevel@tonic-gate 		if (rval == 0 && stop_svmdaemons == 1) {
21100Sstevel@tonic-gate 			char buf[MAXPATHLEN];
21110Sstevel@tonic-gate 			int i;
21120Sstevel@tonic-gate 
21130Sstevel@tonic-gate 			for (i = 0; i < DAEMON_COUNT; i++) {
21140Sstevel@tonic-gate 				(void) snprintf(buf, MAXPATHLEN,
21155109Spetede 				    "/usr/bin/pkill -%s -x %s",
21165109Spetede 				    svmd_kill_list[i].svmd_kill_val,
21175109Spetede 				    svmd_kill_list[i].svmd_name);
21180Sstevel@tonic-gate 				if (pclose(popen(buf, "w")) == -1)
21190Sstevel@tonic-gate 					md_perror(buf);
21200Sstevel@tonic-gate 			}
21210Sstevel@tonic-gate 
21220Sstevel@tonic-gate 			if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
21230Sstevel@tonic-gate 				mde_perror(&status, "");
21240Sstevel@tonic-gate 				mdclrerror(&status);
21250Sstevel@tonic-gate 			}
21260Sstevel@tonic-gate 		}
21270Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
21280Sstevel@tonic-gate 			/* Don't mask any previous errors */
21290Sstevel@tonic-gate 			if (rval == 0)
21300Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
21310Sstevel@tonic-gate 			else
21320Sstevel@tonic-gate 				mdclrerror(&status);
21330Sstevel@tonic-gate 			return (rval);
21340Sstevel@tonic-gate 		}
21350Sstevel@tonic-gate 
21360Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
21370Sstevel@tonic-gate 			/* Don't mask any previous errors */
21380Sstevel@tonic-gate 			if (rval == 0)
21390Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
21400Sstevel@tonic-gate 			else
21410Sstevel@tonic-gate 				mdclrerror(&status);
21420Sstevel@tonic-gate 		}
21430Sstevel@tonic-gate 	} else {
21440Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
21450Sstevel@tonic-gate 		    (force_option & MDFORCE_SET_LOCKED),
21460Sstevel@tonic-gate 		    ((force_option & MDFORCE_LOCAL) |
21470Sstevel@tonic-gate 		    (force_option & MDFORCE_DS)), &status)) {
21480Sstevel@tonic-gate 			/* Don't mask any previous errors */
21490Sstevel@tonic-gate 			if (rval == 0)
21500Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
21510Sstevel@tonic-gate 			else
21520Sstevel@tonic-gate 				mdclrerror(&status);
21530Sstevel@tonic-gate 		}
21540Sstevel@tonic-gate 		metafreedrivedesc(&dd);
21550Sstevel@tonic-gate 	}
21560Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
21570Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
21580Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
21590Sstevel@tonic-gate 		}
21600Sstevel@tonic-gate 	}
21610Sstevel@tonic-gate 	return (rval);
21620Sstevel@tonic-gate }
21630Sstevel@tonic-gate 
21640Sstevel@tonic-gate static md_replica_t *
metareplicaname(mdsetname_t * sp,int flags,struct mddb_config * c,md_error_t * ep)21650Sstevel@tonic-gate metareplicaname(
21660Sstevel@tonic-gate 	mdsetname_t		*sp,
21670Sstevel@tonic-gate 	int			flags,
21680Sstevel@tonic-gate 	struct mddb_config	*c,
21690Sstevel@tonic-gate 	md_error_t		*ep
21700Sstevel@tonic-gate )
21710Sstevel@tonic-gate {
21720Sstevel@tonic-gate 	md_replica_t	*rp;
21730Sstevel@tonic-gate 	char		*devname;
21740Sstevel@tonic-gate 	size_t		sz;
21755109Spetede 	devid_nmlist_t	*disklist = NULL;
21765109Spetede 	char		*devid_str;
21770Sstevel@tonic-gate 
21780Sstevel@tonic-gate 	/* allocate replicaname */
21790Sstevel@tonic-gate 	rp = Zalloc(sizeof (*rp));
21800Sstevel@tonic-gate 
21810Sstevel@tonic-gate 	/* get device name */
21820Sstevel@tonic-gate 	devname = splicename(&c->c_devname);
21835109Spetede 
21845109Spetede 	/*
21855109Spetede 	 * Check if the device has a long name (>40 characters) and
21865109Spetede 	 * if so then we have to use devids to get the device name.
21875109Spetede 	 * If this cannot be done then we have to fail the request.
21885109Spetede 	 */
21895109Spetede 	if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
21905109Spetede 		if (c->c_locator.l_devid != NULL) {
21915109Spetede 			if (meta_deviceid_to_nmlist("/dev/dsk",
21925109Spetede 			    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
21935109Spetede 			    c->c_locator.l_minor_name, &disklist) != 0) {
21945109Spetede 				devid_str = devid_str_encode(
21955109Spetede 				    (ddi_devid_t)(uintptr_t)
21965109Spetede 				    c->c_locator.l_devid, NULL);
21975109Spetede 				(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
21985109Spetede 				mderrorextra(ep, devid_str);
21995109Spetede 				if (devid_str != NULL)
22005109Spetede 					devid_str_free(devid_str);
22015109Spetede 				Free(rp);
22025109Spetede 				Free(devname);
22035109Spetede 				return (NULL);
22045109Spetede 			}
22055109Spetede 		} else {
22065109Spetede 			(void) mderror(ep, MDE_NODEVID, "");
22075109Spetede 			Free(rp);
22085109Spetede 			Free(devname);
22095109Spetede 			return (NULL);
22105109Spetede 		}
22115109Spetede 		Free(devname);
22125109Spetede 		devname = disklist[0].devname;
22135109Spetede 	}
22145109Spetede 
22150Sstevel@tonic-gate 	if (flags & PRINT_FAST) {
22161623Stw21770 		if ((rp->r_namep = metaname_fast(&sp, devname,
22171623Stw21770 		    LOGICAL_DEVICE, ep)) == NULL) {
22180Sstevel@tonic-gate 			Free(devname);
22190Sstevel@tonic-gate 			Free(rp);
22200Sstevel@tonic-gate 			return (NULL);
22210Sstevel@tonic-gate 		}
22220Sstevel@tonic-gate 	} else {
22231623Stw21770 		if ((rp->r_namep = metaname(&sp, devname,
22241623Stw21770 		    LOGICAL_DEVICE, ep)) == NULL) {
22250Sstevel@tonic-gate 			Free(devname);
22260Sstevel@tonic-gate 			Free(rp);
22270Sstevel@tonic-gate 			return (NULL);
22280Sstevel@tonic-gate 		}
22290Sstevel@tonic-gate 	}
22300Sstevel@tonic-gate 	Free(devname);
22310Sstevel@tonic-gate 
22320Sstevel@tonic-gate 	/* make sure it's OK */
22330Sstevel@tonic-gate 	if ((! (flags & MD_BASICNAME_OK)) &&
22340Sstevel@tonic-gate 	    (metachkcomp(rp->r_namep, ep) != 0)) {
22350Sstevel@tonic-gate 		Free(rp);
22360Sstevel@tonic-gate 		return (NULL);
22370Sstevel@tonic-gate 	}
22380Sstevel@tonic-gate 
223962Sjeanm 	rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR;
224062Sjeanm 	rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR;
22410Sstevel@tonic-gate 	rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
22420Sstevel@tonic-gate 	if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
224362Sjeanm 		sz = devid_sizeof((ddi_devid_t)(uintptr_t)
224462Sjeanm 		    (c->c_locator.l_devid));
22450Sstevel@tonic-gate 		if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
22460Sstevel@tonic-gate 		    (ddi_devid_t)NULL) {
22470Sstevel@tonic-gate 			Free(rp);
22480Sstevel@tonic-gate 			return (NULL);
22490Sstevel@tonic-gate 		}
22500Sstevel@tonic-gate 		(void) memcpy((void *)rp->r_devid,
225162Sjeanm 		    (void *)(uintptr_t)c->c_locator.l_devid, sz);
22520Sstevel@tonic-gate 		(void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
22530Sstevel@tonic-gate 		rp->r_flags &= ~MDDB_F_NODEVID;
22540Sstevel@tonic-gate 		/* Overwrite dev derived from name with dev from devid */
22550Sstevel@tonic-gate 		rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
22560Sstevel@tonic-gate 	}
22570Sstevel@tonic-gate 	(void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
22580Sstevel@tonic-gate 
22590Sstevel@tonic-gate 	rp->r_blkno = c->c_locator.l_blkno;
22600Sstevel@tonic-gate 	if (c->c_dbend != 0)
22610Sstevel@tonic-gate 		rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
22620Sstevel@tonic-gate 
22630Sstevel@tonic-gate 	/* return replica */
22640Sstevel@tonic-gate 	return (rp);
22650Sstevel@tonic-gate }
22660Sstevel@tonic-gate 
22670Sstevel@tonic-gate /*
22680Sstevel@tonic-gate  * free replica list
22690Sstevel@tonic-gate  */
22700Sstevel@tonic-gate void
metafreereplicalist(md_replicalist_t * rlp)22710Sstevel@tonic-gate metafreereplicalist(
22720Sstevel@tonic-gate 	md_replicalist_t	*rlp
22730Sstevel@tonic-gate )
22740Sstevel@tonic-gate {
22750Sstevel@tonic-gate 	md_replicalist_t	*rl = NULL;
22760Sstevel@tonic-gate 
22770Sstevel@tonic-gate 	for (/* void */; (rlp != NULL); rlp = rl) {
22780Sstevel@tonic-gate 		rl = rlp->rl_next;
22790Sstevel@tonic-gate 		if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
22800Sstevel@tonic-gate 			free(rlp->rl_repp->r_devid);
22810Sstevel@tonic-gate 		}
22820Sstevel@tonic-gate 		Free(rlp->rl_repp);
22830Sstevel@tonic-gate 		Free(rlp);
22840Sstevel@tonic-gate 	}
22850Sstevel@tonic-gate }
22860Sstevel@tonic-gate 
22870Sstevel@tonic-gate /*
22880Sstevel@tonic-gate  * return list of all replicas in set
22890Sstevel@tonic-gate  */
22900Sstevel@tonic-gate int
metareplicalist(mdsetname_t * sp,int flags,md_replicalist_t ** rlpp,md_error_t * ep)22910Sstevel@tonic-gate metareplicalist(
22920Sstevel@tonic-gate 	mdsetname_t		*sp,
22930Sstevel@tonic-gate 	int			flags,
22940Sstevel@tonic-gate 	md_replicalist_t	**rlpp,
22950Sstevel@tonic-gate 	md_error_t		*ep
22960Sstevel@tonic-gate )
22970Sstevel@tonic-gate {
22980Sstevel@tonic-gate 	md_replicalist_t	**tail = rlpp;
22990Sstevel@tonic-gate 	int			count = 0;
23000Sstevel@tonic-gate 	struct mddb_config	c;
23010Sstevel@tonic-gate 	int			i;
23020Sstevel@tonic-gate 	char			*devid;
23030Sstevel@tonic-gate 
23040Sstevel@tonic-gate 	/* for each replica */
23050Sstevel@tonic-gate 	i = 0;
23060Sstevel@tonic-gate 	do {
23070Sstevel@tonic-gate 		md_replica_t	*rp;
23080Sstevel@tonic-gate 
23090Sstevel@tonic-gate 		/* get next replica */
23100Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
23110Sstevel@tonic-gate 		c.c_id = i;
23120Sstevel@tonic-gate 		c.c_setno = sp->setno;
23130Sstevel@tonic-gate 
23140Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
23150Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
23160Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
23170Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
23180Sstevel@tonic-gate 				break;	/* handle none at all */
23190Sstevel@tonic-gate 			}
23200Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
23210Sstevel@tonic-gate 			goto out;
23220Sstevel@tonic-gate 		}
23230Sstevel@tonic-gate 
23240Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
23250Sstevel@tonic-gate 			if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
23260Sstevel@tonic-gate 				(void) mdsyserror(ep, ENOMEM, META_DBCONF);
23270Sstevel@tonic-gate 				goto out;
23280Sstevel@tonic-gate 			}
23290Sstevel@tonic-gate 			c.c_locator.l_devid = (uintptr_t)devid;
23300Sstevel@tonic-gate 			/*
23310Sstevel@tonic-gate 			 * Turn on space and sz flags since 'sz' amount of
23320Sstevel@tonic-gate 			 * space has been alloc'd.
23330Sstevel@tonic-gate 			 */
23340Sstevel@tonic-gate 			c.c_locator.l_devid_flags =
23355109Spetede 			    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
23360Sstevel@tonic-gate 		}
23370Sstevel@tonic-gate 
23380Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
23390Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
23400Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
23410Sstevel@tonic-gate 				break;	/* handle none at all */
23420Sstevel@tonic-gate 			}
23430Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
23440Sstevel@tonic-gate 			goto out;
23450Sstevel@tonic-gate 		}
23460Sstevel@tonic-gate 
23470Sstevel@tonic-gate 		/*
23480Sstevel@tonic-gate 		 * Paranoid check - shouldn't happen, but is left as
23490Sstevel@tonic-gate 		 * a place holder for changes that will be needed after
23500Sstevel@tonic-gate 		 * dynamic reconfiguration changes are added to SVM (to
23510Sstevel@tonic-gate 		 * support movement of disks at any point in time).
23520Sstevel@tonic-gate 		 */
23530Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
23540Sstevel@tonic-gate 			(void) fprintf(stderr,
23550Sstevel@tonic-gate 			    dgettext(TEXT_DOMAIN,
23565109Spetede 			    "Error: Relocation Information "
23575109Spetede 			    "(drvnm=%s, mnum=0x%lx) \n"
23585109Spetede 			    "relocation information size changed - \n"
23595109Spetede 			    "rerun command\n"),
23600Sstevel@tonic-gate 			    c.c_locator.l_driver, c.c_locator.l_mnum);
23610Sstevel@tonic-gate 			(void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
23620Sstevel@tonic-gate 			goto out;
23630Sstevel@tonic-gate 		}
23640Sstevel@tonic-gate 
23650Sstevel@tonic-gate 		if (c.c_dbcnt == 0)
23660Sstevel@tonic-gate 			break;		/* handle none at all */
23670Sstevel@tonic-gate 
23680Sstevel@tonic-gate 		/* get info */
23690Sstevel@tonic-gate 		if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
23700Sstevel@tonic-gate 			goto out;
23710Sstevel@tonic-gate 
23720Sstevel@tonic-gate 		/* append to list */
23730Sstevel@tonic-gate 		*tail = Zalloc(sizeof (**tail));
23740Sstevel@tonic-gate 		(*tail)->rl_repp = rp;
23750Sstevel@tonic-gate 		tail = &(*tail)->rl_next;
23760Sstevel@tonic-gate 		++count;
23770Sstevel@tonic-gate 
23780Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
23790Sstevel@tonic-gate 			free(devid);
23800Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
23810Sstevel@tonic-gate 		}
23820Sstevel@tonic-gate 
23830Sstevel@tonic-gate 	} while (++i < c.c_dbcnt);
23840Sstevel@tonic-gate 
23850Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
23860Sstevel@tonic-gate 		free(devid);
23870Sstevel@tonic-gate 	}
23880Sstevel@tonic-gate 
23890Sstevel@tonic-gate 	/* return count */
23900Sstevel@tonic-gate 	return (count);
23910Sstevel@tonic-gate 
23920Sstevel@tonic-gate 	/* cleanup, return error */
23930Sstevel@tonic-gate out:
23940Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
23950Sstevel@tonic-gate 		free(devid);
23960Sstevel@tonic-gate 	}
23970Sstevel@tonic-gate 	metafreereplicalist(*rlpp);
23980Sstevel@tonic-gate 	*rlpp = NULL;
23990Sstevel@tonic-gate 	return (-1);
24000Sstevel@tonic-gate }
24010Sstevel@tonic-gate 
24020Sstevel@tonic-gate /*
24030Sstevel@tonic-gate  * meta_sync_db_locations - get list of replicas from kernel and write
24040Sstevel@tonic-gate  * 	out to mddb.cf and md.conf.  'Syncs up' the replica list in
24050Sstevel@tonic-gate  * 	the kernel with the replica list in the conf files.
24060Sstevel@tonic-gate  *
24070Sstevel@tonic-gate  */
24080Sstevel@tonic-gate void
meta_sync_db_locations(mdsetname_t * sp,md_error_t * ep)24090Sstevel@tonic-gate meta_sync_db_locations(
24100Sstevel@tonic-gate 	mdsetname_t	*sp,
24110Sstevel@tonic-gate 	md_error_t	*ep
24120Sstevel@tonic-gate )
24130Sstevel@tonic-gate {
24140Sstevel@tonic-gate 	char		*sname = 0;		/* system file name */
24150Sstevel@tonic-gate 	char 		*cname = 0;		/* config file name */
24160Sstevel@tonic-gate 
24170Sstevel@tonic-gate 	if (!metaislocalset(sp))
24180Sstevel@tonic-gate 		return;
24190Sstevel@tonic-gate 
24200Sstevel@tonic-gate 	/* Updates backup of configuration file (aka mddb.cf) */
24210Sstevel@tonic-gate 	if (buildconf(sp, ep) != 0)
24220Sstevel@tonic-gate 		return;
24230Sstevel@tonic-gate 
24240Sstevel@tonic-gate 	/* Updates system configuration file (aka md.conf) */
24250Sstevel@tonic-gate 	(void) meta_db_patch(sname, cname, 0, ep);
24260Sstevel@tonic-gate }
24270Sstevel@tonic-gate 
24280Sstevel@tonic-gate /*
24290Sstevel@tonic-gate  * setup_db_locations - parse the mddb.cf file and
24300Sstevel@tonic-gate  *			tells the driver which db locations to use.
24310Sstevel@tonic-gate  */
24320Sstevel@tonic-gate int
meta_setup_db_locations(md_error_t * ep)24330Sstevel@tonic-gate meta_setup_db_locations(
24340Sstevel@tonic-gate 	md_error_t	*ep
24350Sstevel@tonic-gate )
24360Sstevel@tonic-gate {
24370Sstevel@tonic-gate 	mddb_config_t	c;
24380Sstevel@tonic-gate 	FILE		*fp;
24390Sstevel@tonic-gate 	char		inbuff[1024];
24400Sstevel@tonic-gate 	char		*buff;
24410Sstevel@tonic-gate 	uint_t		i;
24420Sstevel@tonic-gate 	size_t		sz;
24430Sstevel@tonic-gate 	int		rval = 0;
24440Sstevel@tonic-gate 	char		*devidp;
24450Sstevel@tonic-gate 	uint_t		devid_size;
24460Sstevel@tonic-gate 	char		*minor_name = NULL;
24470Sstevel@tonic-gate 	ddi_devid_t	devid_decode;
24480Sstevel@tonic-gate 	int		checksum;
24490Sstevel@tonic-gate 
24500Sstevel@tonic-gate 	/* do mddb.cf file */
24510Sstevel@tonic-gate 	(void) memset(&c, '\0', sizeof (c));
24520Sstevel@tonic-gate 	if ((fp = fopen(META_DBCONF, "r")) == NULL) {
24530Sstevel@tonic-gate 		if (errno != ENOENT)
24540Sstevel@tonic-gate 			return (mdsyserror(ep, errno, META_DBCONF));
24550Sstevel@tonic-gate 	}
24560Sstevel@tonic-gate 	while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
24570Sstevel@tonic-gate 	    fp)) != NULL)) {
24580Sstevel@tonic-gate 
24590Sstevel@tonic-gate 		/* ignore comments */
24600Sstevel@tonic-gate 		if (*buff == '#')
24610Sstevel@tonic-gate 			continue;
24620Sstevel@tonic-gate 
24630Sstevel@tonic-gate 		/* parse locator */
24640Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
24650Sstevel@tonic-gate 		c.c_setno = MD_LOCAL_SET;
24660Sstevel@tonic-gate 		i = strcspn(buff, " \t");
24670Sstevel@tonic-gate 		if (i > sizeof (c.c_locator.l_driver))
24680Sstevel@tonic-gate 			i = sizeof (c.c_locator.l_driver);
24690Sstevel@tonic-gate 		(void) strncpy(c.c_locator.l_driver, buff, i);
24700Sstevel@tonic-gate 		buff += i;
24710Sstevel@tonic-gate 		c.c_locator.l_dev =
24720Sstevel@tonic-gate 		    makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
24730Sstevel@tonic-gate 		c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
24740Sstevel@tonic-gate 		c.c_locator.l_mnum = minor(c.c_locator.l_dev);
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 		/* parse out devid */
24770Sstevel@tonic-gate 		while (isspace((int)(*buff)))
24780Sstevel@tonic-gate 			buff += 1;
24790Sstevel@tonic-gate 		i = strcspn(buff, " \t");
24800Sstevel@tonic-gate 		if ((devidp = (char *)malloc(i+1)) == NULL)
24810Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
24820Sstevel@tonic-gate 
24830Sstevel@tonic-gate 		(void) strncpy(devidp, buff, i);
24840Sstevel@tonic-gate 		devidp[i] = '\0';
24850Sstevel@tonic-gate 		if (devid_str_decode(devidp, &devid_decode,
24860Sstevel@tonic-gate 		    &minor_name) == -1) {
24870Sstevel@tonic-gate 			free(devidp);
24880Sstevel@tonic-gate 			continue;
24890Sstevel@tonic-gate 		}
24900Sstevel@tonic-gate 
24910Sstevel@tonic-gate 		/* Conf file must have minor name associated with devid */
24920Sstevel@tonic-gate 		if (minor_name == NULL) {
24930Sstevel@tonic-gate 			free(devidp);
24940Sstevel@tonic-gate 			devid_free(devid_decode);
24950Sstevel@tonic-gate 			continue;
24960Sstevel@tonic-gate 		}
24970Sstevel@tonic-gate 
24980Sstevel@tonic-gate 		sz = devid_sizeof(devid_decode);
24990Sstevel@tonic-gate 		/* Copy to devid size buffer that ioctl expects */
25000Sstevel@tonic-gate 		if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
25010Sstevel@tonic-gate 			devid_free(devid_decode);
25020Sstevel@tonic-gate 			free(minor_name);
25030Sstevel@tonic-gate 			free(devidp);
25040Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
25050Sstevel@tonic-gate 		}
25060Sstevel@tonic-gate 
250762Sjeanm 		(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
25080Sstevel@tonic-gate 		    (void *)devid_decode, sz);
25090Sstevel@tonic-gate 
25100Sstevel@tonic-gate 		devid_free(devid_decode);
25110Sstevel@tonic-gate 
25120Sstevel@tonic-gate 		if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
25130Sstevel@tonic-gate 			free(minor_name);
25140Sstevel@tonic-gate 			free(devidp);
251562Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
25160Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
25170Sstevel@tonic-gate 		}
25180Sstevel@tonic-gate 		(void) strcpy(c.c_locator.l_minor_name, minor_name);
25190Sstevel@tonic-gate 		free(minor_name);
25200Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
25215109Spetede 		    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
25220Sstevel@tonic-gate 		c.c_locator.l_devid_sz = sz;
25230Sstevel@tonic-gate 
25240Sstevel@tonic-gate 		devid_size = strlen(devidp);
25250Sstevel@tonic-gate 		buff += devid_size;
25260Sstevel@tonic-gate 
25270Sstevel@tonic-gate 		checksum = strtol(buff, &buff, 10);
25280Sstevel@tonic-gate 		for (i = 0; c.c_locator.l_driver[i] != 0; i++)
25290Sstevel@tonic-gate 			checksum += c.c_locator.l_driver[i];
25300Sstevel@tonic-gate 		for (i = 0; i < devid_size; i++) {
25310Sstevel@tonic-gate 			checksum += devidp[i];
25320Sstevel@tonic-gate 		}
25330Sstevel@tonic-gate 		free(devidp);
25340Sstevel@tonic-gate 
25350Sstevel@tonic-gate 		checksum += minor(c.c_locator.l_dev);
25360Sstevel@tonic-gate 		checksum += c.c_locator.l_blkno;
25370Sstevel@tonic-gate 		if (checksum != 42) {
25380Sstevel@tonic-gate 			/* overwritten later for more serious problems */
25390Sstevel@tonic-gate 			rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
254062Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
25410Sstevel@tonic-gate 			continue;
25420Sstevel@tonic-gate 		}
25430Sstevel@tonic-gate 		c.c_locator.l_flags = 0;
25440Sstevel@tonic-gate 
25450Sstevel@tonic-gate 		/* use db location */
25460Sstevel@tonic-gate 		if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
254762Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
25480Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
25490Sstevel@tonic-gate 		}
25500Sstevel@tonic-gate 
25510Sstevel@tonic-gate 		/* free up devid if in use */
255262Sjeanm 		free((void *)(uintptr_t)c.c_locator.l_devid);
25530Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
25540Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
25550Sstevel@tonic-gate 	}
25560Sstevel@tonic-gate 	if ((fp) && (fclose(fp) != 0))
25570Sstevel@tonic-gate 		return (mdsyserror(ep, errno, META_DBCONF));
25580Sstevel@tonic-gate 
25590Sstevel@tonic-gate 	/* check for stale database */
25600Sstevel@tonic-gate 	(void) memset((char *)&c, 0, sizeof (struct mddb_config));
25610Sstevel@tonic-gate 	c.c_id = 0;
25620Sstevel@tonic-gate 	c.c_setno = MD_LOCAL_SET;
25630Sstevel@tonic-gate 
25645109Spetede 	/*
25655109Spetede 	 * While we do not need the devid here we may need to
25665109Spetede 	 * know if devid's are being used by the kernel for
25675109Spetede 	 * the replicas. This is because under some circumstances
25685109Spetede 	 * we can only manipulate the SVM configuration if the
25695109Spetede 	 * kernel is using devid's.
25705109Spetede 	 */
25710Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
25725109Spetede 	c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
25735109Spetede 	c.c_locator.l_devid_sz = 0;
25740Sstevel@tonic-gate 
25750Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
25760Sstevel@tonic-gate 		if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
25770Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
25780Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
25790Sstevel@tonic-gate 	}
25800Sstevel@tonic-gate 
25810Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE)
25820Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
25830Sstevel@tonic-gate 		    0, NULL));
25840Sstevel@tonic-gate 
25855109Spetede 	if (c.c_locator.l_devid_sz != 0) {
25865109Spetede 		/*
25875109Spetede 		 * Devid's are being used to track the replicas because
25885109Spetede 		 * there is space for a devid.
25895109Spetede 		 */
25905109Spetede 		devid_in_use = TRUE;
25915109Spetede 	}
25925109Spetede 
25930Sstevel@tonic-gate 	/* success */
25940Sstevel@tonic-gate 	return (rval);
25950Sstevel@tonic-gate }
25960Sstevel@tonic-gate 
25970Sstevel@tonic-gate /*
25980Sstevel@tonic-gate  * meta_db_minreplica - returns the minimum size replica currently in use.
25990Sstevel@tonic-gate  */
26000Sstevel@tonic-gate daddr_t
meta_db_minreplica(mdsetname_t * sp,md_error_t * ep)26010Sstevel@tonic-gate meta_db_minreplica(
26020Sstevel@tonic-gate 	mdsetname_t	*sp,
26030Sstevel@tonic-gate 	md_error_t	*ep
26040Sstevel@tonic-gate )
26050Sstevel@tonic-gate {
26060Sstevel@tonic-gate 	md_replica_t		*r;
26070Sstevel@tonic-gate 	md_replicalist_t	*rl, *rlp = NULL;
26080Sstevel@tonic-gate 	daddr_t			nblks = 0;
26090Sstevel@tonic-gate 
26100Sstevel@tonic-gate 	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
26110Sstevel@tonic-gate 		return (-1);
26120Sstevel@tonic-gate 
26130Sstevel@tonic-gate 	if (rlp == NULL)
26140Sstevel@tonic-gate 		return (-1);
26150Sstevel@tonic-gate 
26160Sstevel@tonic-gate 	/* find the smallest existing replica */
26170Sstevel@tonic-gate 	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
26180Sstevel@tonic-gate 		r = rl->rl_repp;
26190Sstevel@tonic-gate 		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
26200Sstevel@tonic-gate 	}
26210Sstevel@tonic-gate 
26220Sstevel@tonic-gate 	metafreereplicalist(rlp);
26230Sstevel@tonic-gate 	return (nblks);
26240Sstevel@tonic-gate }
26250Sstevel@tonic-gate 
26260Sstevel@tonic-gate /*
26270Sstevel@tonic-gate  * meta_get_replica_names
26280Sstevel@tonic-gate  *  returns an mdnamelist_t of replica slices
26290Sstevel@tonic-gate  */
26300Sstevel@tonic-gate /*ARGSUSED*/
26310Sstevel@tonic-gate int
meta_get_replica_names(mdsetname_t * sp,mdnamelist_t ** nlpp,int options,md_error_t * ep)26320Sstevel@tonic-gate meta_get_replica_names(
26330Sstevel@tonic-gate 	mdsetname_t	*sp,
26340Sstevel@tonic-gate 	mdnamelist_t	**nlpp,
26350Sstevel@tonic-gate 	int		options,
26360Sstevel@tonic-gate 	md_error_t	*ep
26370Sstevel@tonic-gate )
26380Sstevel@tonic-gate {
26390Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
26400Sstevel@tonic-gate 	md_replicalist_t	*rl;
26410Sstevel@tonic-gate 	mdnamelist_t		**tailpp = nlpp;
26420Sstevel@tonic-gate 	int			cnt = 0;
26430Sstevel@tonic-gate 
26440Sstevel@tonic-gate 	assert(nlpp != NULL);
26450Sstevel@tonic-gate 
26460Sstevel@tonic-gate 	if (!metaislocalset(sp))
26470Sstevel@tonic-gate 		goto out;
26480Sstevel@tonic-gate 
26490Sstevel@tonic-gate 	/* get replicas */
26500Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
26510Sstevel@tonic-gate 		cnt = -1;
26520Sstevel@tonic-gate 		goto out;
26530Sstevel@tonic-gate 	}
26540Sstevel@tonic-gate 
26550Sstevel@tonic-gate 	/* build name list */
26560Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
26570Sstevel@tonic-gate 		/*
26580Sstevel@tonic-gate 		 * Add the name struct to the end of the
26590Sstevel@tonic-gate 		 * namelist but keep a pointer to the last
26600Sstevel@tonic-gate 		 * element so that we don't incur the overhead
26610Sstevel@tonic-gate 		 * of traversing the list each time
26620Sstevel@tonic-gate 		 */
26630Sstevel@tonic-gate 		tailpp = meta_namelist_append_wrapper(
26645109Spetede 		    tailpp, rl->rl_repp->r_namep);
26650Sstevel@tonic-gate 		++cnt;
26660Sstevel@tonic-gate 	}
26670Sstevel@tonic-gate 
26680Sstevel@tonic-gate 	/* cleanup, return count or error */
26690Sstevel@tonic-gate out:
26700Sstevel@tonic-gate 	metafreereplicalist(rlp);
26710Sstevel@tonic-gate 	return (cnt);
26720Sstevel@tonic-gate }
2673