xref: /onnv-gate/usr/src/lib/lvm/libmeta/common/meta_db.c (revision 5109:0876b6c2ea48)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51623Stw21770  * Common Development and Distribution License (the "License").
61623Stw21770  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*5109Spetede  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * Just in case we're not in a build environment, make sure that
300Sstevel@tonic-gate  * TEXT_DOMAIN gets set to something.
310Sstevel@tonic-gate  */
320Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
330Sstevel@tonic-gate #define	TEXT_DOMAIN "SYS_TEST"
340Sstevel@tonic-gate #endif
350Sstevel@tonic-gate 
360Sstevel@tonic-gate /*
370Sstevel@tonic-gate  * Metadevice database interfaces.
380Sstevel@tonic-gate  */
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #define	MDDB
410Sstevel@tonic-gate 
420Sstevel@tonic-gate #include <meta.h>
430Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
440Sstevel@tonic-gate #include <sys/lvm/md_crc.h>
450Sstevel@tonic-gate #include <sys/lvm/mdio.h>
460Sstevel@tonic-gate #include <string.h>
470Sstevel@tonic-gate #include <strings.h>
480Sstevel@tonic-gate #include <ctype.h>
490Sstevel@tonic-gate 
500Sstevel@tonic-gate struct svm_daemon {
510Sstevel@tonic-gate 	char *svmd_name;
520Sstevel@tonic-gate 	char *svmd_kill_val;
530Sstevel@tonic-gate };
540Sstevel@tonic-gate 
552614Spetede /*
562614Spetede  * This is a list of the daemons that are not stopped by the SVM smf(5)
572614Spetede  * services. The mdmonitord is started via svc:/system/mdmonitor:default
582614Spetede  * but no contract(4) is constructed and so it is not stopped by smf(5).
592614Spetede  */
600Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = {
610Sstevel@tonic-gate 		{"mdmonitord", "HUP"},
620Sstevel@tonic-gate 		{"mddoors", "KILL"},
630Sstevel@tonic-gate 	};
640Sstevel@tonic-gate 
650Sstevel@tonic-gate #define	DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
660Sstevel@tonic-gate 
670Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
680Sstevel@tonic-gate 
690Sstevel@tonic-gate /*
70*5109Spetede  * Are the locator blocks for the replicas using devids
71*5109Spetede  */
72*5109Spetede static int	devid_in_use = FALSE;
73*5109Spetede 
74*5109Spetede static char *
75*5109Spetede getlongname(
76*5109Spetede 	struct mddb_config	*c,
77*5109Spetede 	md_error_t		*ep
78*5109Spetede )
79*5109Spetede {
80*5109Spetede 	char		*diskname = NULL;
81*5109Spetede 	char		*devid_str;
82*5109Spetede 	devid_nmlist_t	*disklist = NULL;
83*5109Spetede 
84*5109Spetede 	c->c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
85*5109Spetede 	if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
86*5109Spetede 		(void) mdstealerror(ep, &c->c_mde);
87*5109Spetede 		return (NULL);
88*5109Spetede 	}
89*5109Spetede 
90*5109Spetede 	if (c->c_locator.l_devid_flags & MDDB_DEVID_SZ) {
91*5109Spetede 		c->c_locator.l_devid = (uintptr_t)
92*5109Spetede 		    Malloc(c->c_locator.l_devid_sz);
93*5109Spetede 		c->c_locator.l_devid_flags =
94*5109Spetede 		    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
95*5109Spetede 	} else {
96*5109Spetede 		(void) mderror(ep, MDE_NODEVID, "");
97*5109Spetede 		goto out;
98*5109Spetede 	}
99*5109Spetede 
100*5109Spetede 	if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
101*5109Spetede 		(void) mdstealerror(ep, &c->c_mde);
102*5109Spetede 		goto out;
103*5109Spetede 	}
104*5109Spetede 
105*5109Spetede 	if (c->c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
106*5109Spetede 		(void) mderror(ep, MDE_NODEVID, "");
107*5109Spetede 		goto out;
108*5109Spetede 	}
109*5109Spetede 
110*5109Spetede 	if (metaioctl(MD_DB_GETDEV, c, &c->c_mde, NULL) != 0) {
111*5109Spetede 		(void) mdstealerror(ep, &c->c_mde);
112*5109Spetede 		goto out;
113*5109Spetede 	}
114*5109Spetede 
115*5109Spetede 	if (c->c_locator.l_devid != NULL) {
116*5109Spetede 		if (meta_deviceid_to_nmlist("/dev/dsk",
117*5109Spetede 		    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
118*5109Spetede 		    c->c_locator.l_minor_name, &disklist) != 0) {
119*5109Spetede 			devid_str = devid_str_encode(
120*5109Spetede 			    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid, NULL);
121*5109Spetede 			(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
122*5109Spetede 			mderrorextra(ep, devid_str);
123*5109Spetede 			if (devid_str != NULL)
124*5109Spetede 				devid_str_free(devid_str);
125*5109Spetede 			goto out;
126*5109Spetede 		}
127*5109Spetede 		diskname = Strdup(disklist[0].devname);
128*5109Spetede 	}
129*5109Spetede 
130*5109Spetede out:
131*5109Spetede 	if (disklist != NULL)
132*5109Spetede 		devid_free_nmlist(disklist);
133*5109Spetede 
134*5109Spetede 	if (c->c_locator.l_devid != NULL)
135*5109Spetede 		Free((void *)(uintptr_t)c->c_locator.l_devid);
136*5109Spetede 
137*5109Spetede 	return (diskname);
138*5109Spetede }
139*5109Spetede 
140*5109Spetede /*
1410Sstevel@tonic-gate  * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
1420Sstevel@tonic-gate  */
1430Sstevel@tonic-gate md_timeval32_t
1440Sstevel@tonic-gate meta_get_lb_inittime(
1450Sstevel@tonic-gate 	mdsetname_t	*sp,
1460Sstevel@tonic-gate 	md_error_t	*ep
1470Sstevel@tonic-gate )
1480Sstevel@tonic-gate {
1490Sstevel@tonic-gate 	mddb_config_t	c;
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 	/* Fill in setno, setname, and sideno */
1540Sstevel@tonic-gate 	c.c_setno = sp->setno;
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 	if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
1570Sstevel@tonic-gate 		(void) mdstealerror(ep, &c.c_mde);
1580Sstevel@tonic-gate 	}
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate 	return (c.c_timestamp);
1610Sstevel@tonic-gate }
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate /*
1640Sstevel@tonic-gate  * mkmasterblks writes out the master blocks of the mddb to the replica.
1650Sstevel@tonic-gate  *
1660Sstevel@tonic-gate  * In a MN diskset, this is called by the node that is adding this replica
1670Sstevel@tonic-gate  * to the diskset.
1680Sstevel@tonic-gate  */
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate #define	MDDB_VERIFY_SIZE	8192
1710Sstevel@tonic-gate 
1720Sstevel@tonic-gate static int
1730Sstevel@tonic-gate mkmasterblks(
1740Sstevel@tonic-gate 	mdsetname_t	*sp,
1750Sstevel@tonic-gate 	mdname_t	*np,
1760Sstevel@tonic-gate 	int		fd,
1770Sstevel@tonic-gate 	daddr_t		firstblk,
1780Sstevel@tonic-gate 	int		dbsize,
1790Sstevel@tonic-gate 	md_timeval32_t	inittime,
1800Sstevel@tonic-gate 	md_error_t	*ep
1810Sstevel@tonic-gate )
1820Sstevel@tonic-gate {
1830Sstevel@tonic-gate 	int		consecutive;
1840Sstevel@tonic-gate 	md_timeval32_t	tp;
1850Sstevel@tonic-gate 	struct mddb_mb	*mb;
1860Sstevel@tonic-gate 	char		*buffer;
1870Sstevel@tonic-gate 	int		iosize;
1880Sstevel@tonic-gate 	md_set_desc	*sd;
1890Sstevel@tonic-gate 	int		mn_set = 0;
1900Sstevel@tonic-gate 	daddr_t		startblk;
1910Sstevel@tonic-gate 	int		cnt;
1920Sstevel@tonic-gate 	ddi_devid_t	devid;
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
1950Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1960Sstevel@tonic-gate 			return (-1);
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
1990Sstevel@tonic-gate 			mn_set = 1;		/* Used later */
2000Sstevel@tonic-gate 		}
2010Sstevel@tonic-gate 	}
2020Sstevel@tonic-gate 
2030Sstevel@tonic-gate 	/*
2040Sstevel@tonic-gate 	 * Loop to verify the entire mddb region on disk is read/writable.
2050Sstevel@tonic-gate 	 * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
2060Sstevel@tonic-gate 	 * chunks.
2070Sstevel@tonic-gate 	 *
2080Sstevel@tonic-gate 	 * A side-effect of this loop is to zero out the entire mddb region
2090Sstevel@tonic-gate 	 */
2100Sstevel@tonic-gate 	if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
2110Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate 	startblk = firstblk;
2140Sstevel@tonic-gate 	for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 		if (cnt > MDDB_VERIFY_SIZE)
2170Sstevel@tonic-gate 			consecutive = MDDB_VERIFY_SIZE;
2180Sstevel@tonic-gate 		else
2190Sstevel@tonic-gate 			consecutive = cnt;
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
2220Sstevel@tonic-gate 			Free(buffer);
2230Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
2240Sstevel@tonic-gate 		}
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate 		iosize = DEV_BSIZE * consecutive;
2270Sstevel@tonic-gate 		if (write(fd, buffer, iosize) != iosize) {
2280Sstevel@tonic-gate 			Free(buffer);
2290Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
2300Sstevel@tonic-gate 		}
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
2330Sstevel@tonic-gate 			Free(buffer);
2340Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
2350Sstevel@tonic-gate 		}
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 		if (read(fd, buffer, iosize) != iosize) {
2380Sstevel@tonic-gate 			Free(buffer);
2390Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
2400Sstevel@tonic-gate 		}
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 		startblk += consecutive;
2430Sstevel@tonic-gate 	}
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate 	Free(buffer);
2460Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
2470Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
2480Sstevel@tonic-gate 
2490Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) == -1) {
2500Sstevel@tonic-gate 		Free(mb);
2510Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2520Sstevel@tonic-gate 	}
2530Sstevel@tonic-gate 
2540Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_MB;
2550Sstevel@tonic-gate 	/*
2560Sstevel@tonic-gate 	 * If a MN diskset, set master block revision for a MN set.
2570Sstevel@tonic-gate 	 * Even though the master block structure is no different
2580Sstevel@tonic-gate 	 * for a MN set, setting the revision field to a different
2590Sstevel@tonic-gate 	 * number keeps any pre-MN_diskset code from accessing
2600Sstevel@tonic-gate 	 * this diskset.  It also allows for an early determination
2610Sstevel@tonic-gate 	 * of a MN diskset when reading in from disk so that the
2620Sstevel@tonic-gate 	 * proper size locator block and locator names structure
2630Sstevel@tonic-gate 	 * can be read in thus saving time on diskset startup.
2640Sstevel@tonic-gate 	 */
2650Sstevel@tonic-gate 	if (mn_set)
2660Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MNMB;
2670Sstevel@tonic-gate 	else
2680Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MB;
2690Sstevel@tonic-gate 	mb->mb_timestamp = tp;
2700Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
2710Sstevel@tonic-gate 	mb->mb_blkcnt = dbsize - 1;
2720Sstevel@tonic-gate 	mb->mb_blkno = firstblk;
2730Sstevel@tonic-gate 	mb->mb_nextblk = 0;
2740Sstevel@tonic-gate 
2750Sstevel@tonic-gate 	mb->mb_blkmap.m_firstblk = firstblk + 1;
2760Sstevel@tonic-gate 	mb->mb_blkmap.m_consecutive = dbsize - 1;
2770Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
2780Sstevel@tonic-gate 		mb->mb_setcreatetime = inittime;
2790Sstevel@tonic-gate 	}
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 	/*
2820Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
2830Sstevel@tonic-gate 	 * the master block. The saved devid is used to provide a mapping
2840Sstevel@tonic-gate 	 * between this disk's devid and the devid stored into the master
2850Sstevel@tonic-gate 	 * block. This allows the disk image to be self-identifying
2860Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
2870Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
2880Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
2890Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
2900Sstevel@tonic-gate 	 * in the remote copy scenario.
2910Sstevel@tonic-gate 	 */
2920Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
2930Sstevel@tonic-gate 		size_t len;
2940Sstevel@tonic-gate 
2950Sstevel@tonic-gate 		len = devid_sizeof(devid);
2960Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
2970Sstevel@tonic-gate 			/* there is enough space to store the devid */
2980Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
2990Sstevel@tonic-gate 			mb->mb_devid_len = len;
3000Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, devid, len);
3010Sstevel@tonic-gate 		}
3020Sstevel@tonic-gate 		devid_free(devid);
3030Sstevel@tonic-gate 	}
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
3060Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
3090Sstevel@tonic-gate 		Free(mb);
3100Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
3110Sstevel@tonic-gate 	}
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
3140Sstevel@tonic-gate 		Free(mb);
3150Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
3160Sstevel@tonic-gate 	}
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
3190Sstevel@tonic-gate 		Free(mb);
3200Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
3210Sstevel@tonic-gate 	}
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
3240Sstevel@tonic-gate 		Free(mb);
3250Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
3260Sstevel@tonic-gate 	}
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
329*5109Spetede 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
3300Sstevel@tonic-gate 		Free(mb);
3310Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_NOTVERIFIED,
332*5109Spetede 		    meta_getminor(np->dev), sp->setno, 0, np->rname));
3330Sstevel@tonic-gate 	}
3340Sstevel@tonic-gate 
3350Sstevel@tonic-gate 	Free(mb);
3360Sstevel@tonic-gate 	return (0);
3370Sstevel@tonic-gate }
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate void
3400Sstevel@tonic-gate meta_mkdummymaster(
3410Sstevel@tonic-gate 	mdsetname_t	*sp,
3420Sstevel@tonic-gate 	int		fd,
3430Sstevel@tonic-gate 	daddr_t		firstblk
3440Sstevel@tonic-gate )
3450Sstevel@tonic-gate {
3460Sstevel@tonic-gate 	md_timeval32_t	tp;
3470Sstevel@tonic-gate 	struct mddb_mb	*mb;
3480Sstevel@tonic-gate 	ddi_devid_t	devid;
3490Sstevel@tonic-gate 	md_set_desc	*sd;
3500Sstevel@tonic-gate 	md_error_t	ep = mdnullerror;
3510Sstevel@tonic-gate 	md_timeval32_t	inittime;
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 	/*
3540Sstevel@tonic-gate 	 * No dummy master blocks are written for a MN diskset since devids
3550Sstevel@tonic-gate 	 * are not supported in MN disksets.
3560Sstevel@tonic-gate 	 */
3570Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
3580Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, &ep)) == NULL)
3590Sstevel@tonic-gate 			return;
3600Sstevel@tonic-gate 
3610Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd))
3620Sstevel@tonic-gate 			return;
3630Sstevel@tonic-gate 	}
3640Sstevel@tonic-gate 
3650Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
3660Sstevel@tonic-gate 		return;
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_DU;
3690Sstevel@tonic-gate 	mb->mb_revision = MDDB_REV_MB;
3700Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
3710Sstevel@tonic-gate 	inittime = meta_get_lb_inittime(sp, &ep);
3720Sstevel@tonic-gate 	mb->mb_setcreatetime = inittime;
3730Sstevel@tonic-gate 
3740Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) != -1)
3750Sstevel@tonic-gate 		mb->mb_timestamp = tp;
3760Sstevel@tonic-gate 
3770Sstevel@tonic-gate 	/*
3780Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
3790Sstevel@tonic-gate 	 * the master block.  This allows the disk image to be self-identifying
3800Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
3810Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
3820Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
3830Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
3840Sstevel@tonic-gate 	 * in the remote copy scenario.
3850Sstevel@tonic-gate 	 */
3860Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
3870Sstevel@tonic-gate 		int len;
3880Sstevel@tonic-gate 
3890Sstevel@tonic-gate 		len = devid_sizeof(devid);
3900Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
3910Sstevel@tonic-gate 			/* there is enough space to store the devid */
3920Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
3930Sstevel@tonic-gate 			mb->mb_devid_len = len;
3940Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, (char *)devid, len);
3950Sstevel@tonic-gate 		}
3960Sstevel@tonic-gate 		devid_free(devid);
3970Sstevel@tonic-gate 	}
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
4000Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
4010Sstevel@tonic-gate 
4020Sstevel@tonic-gate 	/*
4030Sstevel@tonic-gate 	 * If any of these operations fail, we need to inform the
4040Sstevel@tonic-gate 	 * user that the disk won't be self identifying. When support
4050Sstevel@tonic-gate 	 * for importing remotely replicated disksets is added, we
4060Sstevel@tonic-gate 	 * want to add the error messages here.
4070Sstevel@tonic-gate 	 */
4080Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
4090Sstevel@tonic-gate 		goto out;
4100Sstevel@tonic-gate 
4110Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
4120Sstevel@tonic-gate 		goto out;
4130Sstevel@tonic-gate 
4140Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
4150Sstevel@tonic-gate 		goto out;
4160Sstevel@tonic-gate 
4170Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
4180Sstevel@tonic-gate 		goto out;
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
4210Sstevel@tonic-gate 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
4220Sstevel@tonic-gate 		goto out;
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate out:
4250Sstevel@tonic-gate 	Free(mb);
4260Sstevel@tonic-gate }
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate static int
4290Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep)
4300Sstevel@tonic-gate {
4310Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
4320Sstevel@tonic-gate 	md_replicalist_t	*rl;
4330Sstevel@tonic-gate 	FILE			*cfp = NULL;
4340Sstevel@tonic-gate 	FILE			*mfp = NULL;
4350Sstevel@tonic-gate 	struct stat		sbuf;
4360Sstevel@tonic-gate 	int			rval = 0;
4370Sstevel@tonic-gate 	int			in_miniroot = 0;
4380Sstevel@tonic-gate 	char			line[MDDB_BOOTLIST_MAX_LEN];
4390Sstevel@tonic-gate 	char			*tname = NULL;
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 	/* get list of local replicas */
4420Sstevel@tonic-gate 	if (! metaislocalset(sp))
4430Sstevel@tonic-gate 		return (0);
4440Sstevel@tonic-gate 
4450Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
4460Sstevel@tonic-gate 		return (-1);
4470Sstevel@tonic-gate 
4480Sstevel@tonic-gate 	/* open tempfile, copy permissions of original file */
4490Sstevel@tonic-gate 	if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
4500Sstevel@tonic-gate 		/*
4510Sstevel@tonic-gate 		 * On the miniroot tmp files must be created in /var/tmp.
4520Sstevel@tonic-gate 		 * If we get a EROFS error, we assume that we are in the
4530Sstevel@tonic-gate 		 * miniroot.
4540Sstevel@tonic-gate 		 */
4550Sstevel@tonic-gate 		if (errno != EROFS)
4560Sstevel@tonic-gate 			goto error;
4570Sstevel@tonic-gate 		in_miniroot = 1;
4580Sstevel@tonic-gate 		errno = 0;
4590Sstevel@tonic-gate 		tname = tempnam("/var/tmp", "slvm_");
4600Sstevel@tonic-gate 		if (tname == NULL && errno == EROFS) {
4610Sstevel@tonic-gate 			/*
4620Sstevel@tonic-gate 			 * If we are booted on a read-only root because
4630Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
4640Sstevel@tonic-gate 			 * any scary error messages.
4650Sstevel@tonic-gate 			 */
4660Sstevel@tonic-gate 			errno = 0;
4670Sstevel@tonic-gate 			goto out;
4680Sstevel@tonic-gate 		}
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 		/* open tempfile, copy permissions of original file */
4710Sstevel@tonic-gate 		if ((cfp = fopen(tname, "w+")) == NULL)
4720Sstevel@tonic-gate 			goto error;
4730Sstevel@tonic-gate 	}
4740Sstevel@tonic-gate 	if (stat(META_DBCONF, &sbuf) == 0) {
4750Sstevel@tonic-gate 		if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
4760Sstevel@tonic-gate 			goto error;
4770Sstevel@tonic-gate 		if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
4780Sstevel@tonic-gate 			goto error;
4790Sstevel@tonic-gate 	}
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 	/* print header */
4820Sstevel@tonic-gate 	if (fprintf(cfp, "#metadevice database location file ") == EOF)
4830Sstevel@tonic-gate 		goto error;
4840Sstevel@tonic-gate 	if (fprintf(cfp, "do not hand edit\n") < 0)
4850Sstevel@tonic-gate 		goto error;
4860Sstevel@tonic-gate 	if (fprintf(cfp,
487*5109Spetede 	    "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
4880Sstevel@tonic-gate 		goto error;
4890Sstevel@tonic-gate 
4900Sstevel@tonic-gate 	/* dump replicas */
4910Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
4920Sstevel@tonic-gate 		md_replica_t	*r = rl->rl_repp;
4930Sstevel@tonic-gate 		int		checksum = 42;
4940Sstevel@tonic-gate 		int		i;
4950Sstevel@tonic-gate 		char		*devidp;
4960Sstevel@tonic-gate 		minor_t		min;
4970Sstevel@tonic-gate 
4980Sstevel@tonic-gate 		devidp = devid_str_encode(r->r_devid, r->r_minor_name);
4990Sstevel@tonic-gate 		/* If devid code can't encode devidp - skip entry */
5000Sstevel@tonic-gate 		if (devidp == NULL) {
5010Sstevel@tonic-gate 			continue;
5020Sstevel@tonic-gate 		}
5030Sstevel@tonic-gate 
5040Sstevel@tonic-gate 		/* compute checksum */
5050Sstevel@tonic-gate 		for (i = 0; ((r->r_driver_name[i] != '\0') &&
5060Sstevel@tonic-gate 		    (i < sizeof (r->r_driver_name))); i++) {
5070Sstevel@tonic-gate 			checksum -= r->r_driver_name[i];
5080Sstevel@tonic-gate 		}
5090Sstevel@tonic-gate 		min = meta_getminor(r->r_namep->dev);
5100Sstevel@tonic-gate 		checksum -= min;
5110Sstevel@tonic-gate 		checksum -= r->r_blkno;
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate 		for (i = 0; i < strlen(devidp); i++) {
5140Sstevel@tonic-gate 			checksum -= devidp[i];
5150Sstevel@tonic-gate 		}
5160Sstevel@tonic-gate 		/* print info */
5170Sstevel@tonic-gate 		if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
5180Sstevel@tonic-gate 		    r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
5190Sstevel@tonic-gate 			goto error;
5200Sstevel@tonic-gate 		}
5210Sstevel@tonic-gate 
5220Sstevel@tonic-gate 		devid_str_free(devidp);
5230Sstevel@tonic-gate 	}
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate 	/* close and rename to real file */
5260Sstevel@tonic-gate 	if (fflush(cfp) != 0)
5270Sstevel@tonic-gate 		goto error;
5280Sstevel@tonic-gate 	if (fsync(fileno(cfp)) != 0)
5290Sstevel@tonic-gate 		goto error;
5300Sstevel@tonic-gate 	if (fclose(cfp) != 0) {
5310Sstevel@tonic-gate 		cfp = NULL;
5320Sstevel@tonic-gate 		goto error;
5330Sstevel@tonic-gate 	}
5340Sstevel@tonic-gate 	cfp = NULL;
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 	/*
5370Sstevel@tonic-gate 	 * Renames don't work in the miniroot since tmpfiles are
5380Sstevel@tonic-gate 	 * created in /var/tmp. Hence we copy the data out.
5390Sstevel@tonic-gate 	 */
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	if (! in_miniroot) {
5420Sstevel@tonic-gate 		if (rename(META_DBCONFTMP, META_DBCONF) != 0)
5430Sstevel@tonic-gate 			goto error;
5440Sstevel@tonic-gate 	} else {
5450Sstevel@tonic-gate 		if ((cfp = fopen(tname, "r")) == NULL)
5460Sstevel@tonic-gate 			goto error;
5470Sstevel@tonic-gate 		if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
5480Sstevel@tonic-gate 			goto error;
5490Sstevel@tonic-gate 		while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
5500Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
5510Sstevel@tonic-gate 				goto error;
5520Sstevel@tonic-gate 		}
5530Sstevel@tonic-gate 		(void) fclose(cfp);
5540Sstevel@tonic-gate 		cfp = NULL;
5550Sstevel@tonic-gate 		if (fflush(mfp) != 0)
5560Sstevel@tonic-gate 			goto error;
5570Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
5580Sstevel@tonic-gate 			goto error;
5590Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
5600Sstevel@tonic-gate 			mfp = NULL;
5610Sstevel@tonic-gate 			goto error;
5620Sstevel@tonic-gate 		}
5630Sstevel@tonic-gate 		/* delete the tempfile */
5640Sstevel@tonic-gate 		(void) unlink(tname);
5650Sstevel@tonic-gate 	}
5660Sstevel@tonic-gate 	/* success */
5670Sstevel@tonic-gate 	rval = 0;
5680Sstevel@tonic-gate 	goto out;
5690Sstevel@tonic-gate 
5700Sstevel@tonic-gate 	/* tempfile error */
5710Sstevel@tonic-gate error:
5720Sstevel@tonic-gate 	rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
573*5109Spetede 	    mdsyserror(ep, errno, META_DBCONFTMP);
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 
5760Sstevel@tonic-gate 	/* cleanup, return success */
5770Sstevel@tonic-gate out:
5780Sstevel@tonic-gate 	if (rlp != NULL)
5790Sstevel@tonic-gate 		metafreereplicalist(rlp);
5800Sstevel@tonic-gate 	if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
5810Sstevel@tonic-gate 		rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
582*5109Spetede 		    mdsyserror(ep, errno, META_DBCONFTMP);
5830Sstevel@tonic-gate 	}
5840Sstevel@tonic-gate 	free(tname);
5850Sstevel@tonic-gate 	return (rval);
5860Sstevel@tonic-gate }
5870Sstevel@tonic-gate 
5880Sstevel@tonic-gate /*
5890Sstevel@tonic-gate  * check replica for dev
5900Sstevel@tonic-gate  */
5910Sstevel@tonic-gate static int
5920Sstevel@tonic-gate in_replica(
5930Sstevel@tonic-gate 	mdsetname_t	*sp,
5940Sstevel@tonic-gate 	md_replica_t	*rp,
5950Sstevel@tonic-gate 	mdname_t	*np,
5960Sstevel@tonic-gate 	diskaddr_t	slblk,
5970Sstevel@tonic-gate 	diskaddr_t	nblks,
5980Sstevel@tonic-gate 	md_error_t	*ep
5990Sstevel@tonic-gate )
6000Sstevel@tonic-gate {
6010Sstevel@tonic-gate 	mdname_t	*repnp = rp->r_namep;
6020Sstevel@tonic-gate 	diskaddr_t	rep_sblk = rp->r_blkno;
6030Sstevel@tonic-gate 	diskaddr_t	rep_nblks = rp->r_nblk;
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate 	/* should be in the same set */
6060Sstevel@tonic-gate 	assert(sp != NULL);
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate 	/* if error in master block, assume whole partition */
6090Sstevel@tonic-gate 	if ((rep_sblk == MD_DISKADDR_ERROR) ||
6100Sstevel@tonic-gate 	    (rep_nblks == MD_DISKADDR_ERROR)) {
6110Sstevel@tonic-gate 		rep_sblk = 0;
6120Sstevel@tonic-gate 		rep_nblks = MD_DISKADDR_ERROR;
6130Sstevel@tonic-gate 	}
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate 	/* check overlap */
6160Sstevel@tonic-gate 	if (meta_check_overlap(
6170Sstevel@tonic-gate 	    MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
6180Sstevel@tonic-gate 		return (-1);
6190Sstevel@tonic-gate 	}
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate 	/* return success */
6220Sstevel@tonic-gate 	return (0);
6230Sstevel@tonic-gate }
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate /*
6260Sstevel@tonic-gate  * check to see if we're in a replica
6270Sstevel@tonic-gate  */
6280Sstevel@tonic-gate int
6290Sstevel@tonic-gate meta_check_inreplica(
6300Sstevel@tonic-gate 	mdsetname_t		*sp,
6310Sstevel@tonic-gate 	mdname_t		*np,
6320Sstevel@tonic-gate 	diskaddr_t		slblk,
6330Sstevel@tonic-gate 	diskaddr_t		nblks,
6340Sstevel@tonic-gate 	md_error_t		*ep
6350Sstevel@tonic-gate )
6360Sstevel@tonic-gate {
6370Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
6380Sstevel@tonic-gate 	md_replicalist_t	*rl;
6390Sstevel@tonic-gate 	int			rval = 0;
6400Sstevel@tonic-gate 
6410Sstevel@tonic-gate 	/* should have a set */
6420Sstevel@tonic-gate 	assert(sp != NULL);
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	/* for each replica */
6450Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
6460Sstevel@tonic-gate 		return (-1);
6470Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
6480Sstevel@tonic-gate 		md_replica_t	*rp = rl->rl_repp;
6490Sstevel@tonic-gate 
6500Sstevel@tonic-gate 		/* check replica */
6510Sstevel@tonic-gate 		if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
6520Sstevel@tonic-gate 			rval = -1;
6530Sstevel@tonic-gate 			break;
6540Sstevel@tonic-gate 		}
6550Sstevel@tonic-gate 	}
6560Sstevel@tonic-gate 
6570Sstevel@tonic-gate 	/* cleanup, return success */
6580Sstevel@tonic-gate 	metafreereplicalist(rlp);
6590Sstevel@tonic-gate 	return (rval);
6600Sstevel@tonic-gate }
6610Sstevel@tonic-gate 
6620Sstevel@tonic-gate /*
6630Sstevel@tonic-gate  * check replica
6640Sstevel@tonic-gate  */
6650Sstevel@tonic-gate int
6660Sstevel@tonic-gate meta_check_replica(
6670Sstevel@tonic-gate 	mdsetname_t	*sp,		/* set to check against */
6680Sstevel@tonic-gate 	mdname_t	*np,		/* component to check against */
6690Sstevel@tonic-gate 	mdchkopts_t	options,	/* option flags */
6700Sstevel@tonic-gate 	diskaddr_t	slblk,		/* start logical block */
6710Sstevel@tonic-gate 	diskaddr_t	nblks,		/* number of blocks (-1,rest of them) */
6720Sstevel@tonic-gate 	md_error_t	*ep		/* error packet */
6730Sstevel@tonic-gate )
6740Sstevel@tonic-gate {
6750Sstevel@tonic-gate 	mdchkopts_t	chkoptions = MDCHK_ALLOW_REPSLICE;
6760Sstevel@tonic-gate 
6770Sstevel@tonic-gate 	/* make sure we have a disk */
6780Sstevel@tonic-gate 	if (metachkcomp(np, ep) != 0)
6790Sstevel@tonic-gate 		return (-1);
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate 	/* check to ensure that it is not already in use */
6820Sstevel@tonic-gate 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
6830Sstevel@tonic-gate 		return (-1);
6840Sstevel@tonic-gate 	}
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 	if (options & MDCHK_ALLOW_NODBS)
6870Sstevel@tonic-gate 		return (0);
6880Sstevel@tonic-gate 
6890Sstevel@tonic-gate 	if (options & MDCHK_DRVINSET)
6900Sstevel@tonic-gate 		return (0);
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate 	/* make sure it is in the set */
6930Sstevel@tonic-gate 	if (meta_check_inset(sp, np, ep) != 0)
6940Sstevel@tonic-gate 		return (-1);
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate 	/* make sure its not in a metadevice */
6970Sstevel@tonic-gate 	if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
6980Sstevel@tonic-gate 		return (-1);
6990Sstevel@tonic-gate 
7000Sstevel@tonic-gate 	/* return success */
7010Sstevel@tonic-gate 	return (0);
7020Sstevel@tonic-gate }
7030Sstevel@tonic-gate 
7040Sstevel@tonic-gate static int
7050Sstevel@tonic-gate update_dbinfo_on_drives(
7060Sstevel@tonic-gate 	mdsetname_t	*sp,
7070Sstevel@tonic-gate 	md_drive_desc	*dd,
7080Sstevel@tonic-gate 	int		set_locked,
7090Sstevel@tonic-gate 	int		force,
7100Sstevel@tonic-gate 	md_error_t	*ep
7110Sstevel@tonic-gate )
7120Sstevel@tonic-gate {
7130Sstevel@tonic-gate 	md_set_desc		*sd;
7140Sstevel@tonic-gate 	int			i;
7150Sstevel@tonic-gate 	md_setkey_t		*cl_sk;
7160Sstevel@tonic-gate 	int			rval = 0;
7170Sstevel@tonic-gate 	md_mnnode_desc		*nd;
7180Sstevel@tonic-gate 
7190Sstevel@tonic-gate 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
7200Sstevel@tonic-gate 		return (-1);
7210Sstevel@tonic-gate 
7220Sstevel@tonic-gate 	if (! set_locked) {
7230Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
7240Sstevel@tonic-gate 			md_error_t xep = mdnullerror;
7250Sstevel@tonic-gate 			sigset_t sigs;
7260Sstevel@tonic-gate 			/* Make sure we are blocking all signals */
7270Sstevel@tonic-gate 			if (procsigs(TRUE, &sigs, &xep) < 0)
7280Sstevel@tonic-gate 				mdclrerror(&xep);
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 			nd = sd->sd_nodelist;
7310Sstevel@tonic-gate 			while (nd) {
7320Sstevel@tonic-gate 				if (force && strcmp(nd->nd_nodename,
7330Sstevel@tonic-gate 				    mynode()) != 0) {
7340Sstevel@tonic-gate 					nd = nd->nd_next;
7350Sstevel@tonic-gate 					continue;
7360Sstevel@tonic-gate 				}
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
7390Sstevel@tonic-gate 					nd = nd->nd_next;
7400Sstevel@tonic-gate 					continue;
7410Sstevel@tonic-gate 				}
7420Sstevel@tonic-gate 
7430Sstevel@tonic-gate 				if (clnt_lock_set(nd->nd_nodename, sp, ep))
7440Sstevel@tonic-gate 					return (-1);
7450Sstevel@tonic-gate 				nd = nd->nd_next;
7460Sstevel@tonic-gate 			}
7470Sstevel@tonic-gate 		} else {
7480Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
7490Sstevel@tonic-gate 				/* Skip empty slots */
7500Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
7510Sstevel@tonic-gate 					continue;
7520Sstevel@tonic-gate 
7530Sstevel@tonic-gate 				if (force && strcmp(sd->sd_nodes[i],
7540Sstevel@tonic-gate 				    mynode()) != 0)
7550Sstevel@tonic-gate 					continue;
7560Sstevel@tonic-gate 
7570Sstevel@tonic-gate 				if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
7580Sstevel@tonic-gate 					return (-1);
7590Sstevel@tonic-gate 			}
7600Sstevel@tonic-gate 		}
7610Sstevel@tonic-gate 	}
7620Sstevel@tonic-gate 
7630Sstevel@tonic-gate 	if (MD_MNSET_DESC(sd)) {
7640Sstevel@tonic-gate 		nd = sd->sd_nodelist;
7650Sstevel@tonic-gate 		while (nd) {
7660Sstevel@tonic-gate 			if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
7670Sstevel@tonic-gate 				nd = nd->nd_next;
7680Sstevel@tonic-gate 				continue;
7690Sstevel@tonic-gate 			}
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
7720Sstevel@tonic-gate 				nd = nd->nd_next;
7730Sstevel@tonic-gate 				continue;
7740Sstevel@tonic-gate 			}
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
7770Sstevel@tonic-gate 			    == -1) {
7780Sstevel@tonic-gate 				rval = -1;
7790Sstevel@tonic-gate 				break;
7800Sstevel@tonic-gate 			}
7810Sstevel@tonic-gate 			nd = nd->nd_next;
7820Sstevel@tonic-gate 		}
7830Sstevel@tonic-gate 	} else {
7840Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
7850Sstevel@tonic-gate 			/* Skip empty slots */
7860Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
7870Sstevel@tonic-gate 				continue;
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate 			if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
7900Sstevel@tonic-gate 				continue;
7910Sstevel@tonic-gate 
7920Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
7930Sstevel@tonic-gate 			    == -1) {
7940Sstevel@tonic-gate 				rval = -1;
7950Sstevel@tonic-gate 				break;
7960Sstevel@tonic-gate 			}
7970Sstevel@tonic-gate 		}
7980Sstevel@tonic-gate 	}
7990Sstevel@tonic-gate 
8000Sstevel@tonic-gate 	if (! set_locked) {
8010Sstevel@tonic-gate 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
8020Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
8030Sstevel@tonic-gate 			nd = sd->sd_nodelist;
8040Sstevel@tonic-gate 			while (nd) {
8050Sstevel@tonic-gate 				if (force &&
8060Sstevel@tonic-gate 				    strcmp(nd->nd_nodename, mynode()) != 0) {
8070Sstevel@tonic-gate 					nd = nd->nd_next;
8080Sstevel@tonic-gate 					continue;
8090Sstevel@tonic-gate 				}
8100Sstevel@tonic-gate 
8110Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
8120Sstevel@tonic-gate 					nd = nd->nd_next;
8130Sstevel@tonic-gate 					continue;
8140Sstevel@tonic-gate 				}
8150Sstevel@tonic-gate 
8160Sstevel@tonic-gate 				if (clnt_unlock_set(nd->nd_nodename, cl_sk,
8170Sstevel@tonic-gate 				    ep)) {
8180Sstevel@tonic-gate 					rval = -1;
8190Sstevel@tonic-gate 					break;
8200Sstevel@tonic-gate 				}
8210Sstevel@tonic-gate 				nd = nd->nd_next;
8220Sstevel@tonic-gate 			}
8230Sstevel@tonic-gate 		} else {
8240Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
8250Sstevel@tonic-gate 				/* Skip empty slots */
8260Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
8270Sstevel@tonic-gate 					continue;
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 				if (force &&
8300Sstevel@tonic-gate 				    strcmp(sd->sd_nodes[i], mynode()) != 0)
8310Sstevel@tonic-gate 					continue;
8320Sstevel@tonic-gate 
8330Sstevel@tonic-gate 				if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
8340Sstevel@tonic-gate 				    ep)) {
8350Sstevel@tonic-gate 					rval = -1;
8360Sstevel@tonic-gate 					break;
8370Sstevel@tonic-gate 				}
8380Sstevel@tonic-gate 			}
8390Sstevel@tonic-gate 
8400Sstevel@tonic-gate 		}
8410Sstevel@tonic-gate 		cl_set_setkey(NULL);
8420Sstevel@tonic-gate 	}
8430Sstevel@tonic-gate 
8440Sstevel@tonic-gate 	return (rval);
8450Sstevel@tonic-gate }
8460Sstevel@tonic-gate 
8470Sstevel@tonic-gate int
8480Sstevel@tonic-gate meta_db_addsidenms(
8490Sstevel@tonic-gate 	mdsetname_t	*sp,
8500Sstevel@tonic-gate 	mdname_t	*np,
8510Sstevel@tonic-gate 	daddr_t		blkno,
8520Sstevel@tonic-gate 	int		bcast,
8530Sstevel@tonic-gate 	md_error_t	*ep
8540Sstevel@tonic-gate )
8550Sstevel@tonic-gate {
8560Sstevel@tonic-gate 	side_t		sideno;
8570Sstevel@tonic-gate 	char		*bname = NULL;
8580Sstevel@tonic-gate 	char		*dname = NULL;
8590Sstevel@tonic-gate 	minor_t		mnum;
8600Sstevel@tonic-gate 	mddb_config_t	c;
8610Sstevel@tonic-gate 	int		done;
8620Sstevel@tonic-gate 	int		rval = 0;
8630Sstevel@tonic-gate 	md_set_desc	*sd;
8640Sstevel@tonic-gate 
8650Sstevel@tonic-gate 	sideno = MD_SIDEWILD;
8660Sstevel@tonic-gate 	/*CONSTCOND*/
8670Sstevel@tonic-gate 	while (1) {
8680Sstevel@tonic-gate 		if (bname != NULL) {
8690Sstevel@tonic-gate 			Free(bname);
8700Sstevel@tonic-gate 			bname = NULL;
8710Sstevel@tonic-gate 		}
8720Sstevel@tonic-gate 		if (dname != NULL) {
8730Sstevel@tonic-gate 			Free(dname);
8740Sstevel@tonic-gate 			dname = NULL;
8750Sstevel@tonic-gate 		}
8760Sstevel@tonic-gate 		if ((done = meta_getnextside_devinfo(sp, np->bname,
8770Sstevel@tonic-gate 		    &sideno, &bname, &dname, &mnum, ep)) == -1) {
8780Sstevel@tonic-gate 			rval = -1;
8790Sstevel@tonic-gate 			break;
8800Sstevel@tonic-gate 		}
8810Sstevel@tonic-gate 
8820Sstevel@tonic-gate 		if (done == 0)
8830Sstevel@tonic-gate 			break;
8840Sstevel@tonic-gate 
8850Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
8860Sstevel@tonic-gate 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
8870Sstevel@tonic-gate 				rval = -1;
8880Sstevel@tonic-gate 				break;
8890Sstevel@tonic-gate 			}
8900Sstevel@tonic-gate 		}
8910Sstevel@tonic-gate 
8920Sstevel@tonic-gate 		/*
8930Sstevel@tonic-gate 		 * Send addsidenms to all nodes using rpc.mdcommd if
8940Sstevel@tonic-gate 		 * sidename is being added to MN diskset.
8950Sstevel@tonic-gate 		 *
8960Sstevel@tonic-gate 		 *   It's ok to broadcast this call to other nodes.
8970Sstevel@tonic-gate 		 *
8980Sstevel@tonic-gate 		 *   Note: The broadcast to other nodes isn't needed during
8990Sstevel@tonic-gate 		 *   the addition of the first mddbs to the set since the
9000Sstevel@tonic-gate 		 *   other nodes haven't been joined to the set yet.  All
9010Sstevel@tonic-gate 		 *   nodes in a MN diskset are (implicitly) joined to the set
9020Sstevel@tonic-gate 		 *   on the addition of the first mddb.
9030Sstevel@tonic-gate 		 */
9040Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
9050Sstevel@tonic-gate 		    (bcast == DB_ADDSIDENMS_BCAST)) {
9060Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
9070Sstevel@tonic-gate 			md_mn_msg_meta_db_newside_t	db_ns;
9080Sstevel@tonic-gate 			int				send_rval;
9090Sstevel@tonic-gate 
9100Sstevel@tonic-gate 			db_ns.msg_l_dev = np->dev;
9110Sstevel@tonic-gate 			db_ns.msg_sideno = sideno;
9120Sstevel@tonic-gate 			db_ns.msg_blkno = blkno;
9130Sstevel@tonic-gate 			(void) strncpy(db_ns.msg_dname, dname,
9140Sstevel@tonic-gate 			    sizeof (db_ns.msg_dname));
9150Sstevel@tonic-gate 			(void) splitname(np->bname, &db_ns.msg_splitname);
9160Sstevel@tonic-gate 			db_ns.msg_mnum = mnum;
9170Sstevel@tonic-gate 
9180Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
9190Sstevel@tonic-gate 			db_ns.msg_devid[0] = NULL;
9200Sstevel@tonic-gate 
9210Sstevel@tonic-gate 			/*
9220Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
9230Sstevel@tonic-gate 			 * stuck in in the return step until this command has
9240Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
9250Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
9260Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
9270Sstevel@tonic-gate 			 * cycle to proceed.
9280Sstevel@tonic-gate 			 */
9290Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
9300Sstevel@tonic-gate 			    MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
9310Sstevel@tonic-gate 			    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns,
9320Sstevel@tonic-gate 			    sizeof (md_mn_msg_meta_db_newside_t),
9330Sstevel@tonic-gate 			    &resultp, ep);
9340Sstevel@tonic-gate 			if (send_rval != 0) {
9350Sstevel@tonic-gate 				rval = -1;
9360Sstevel@tonic-gate 				if (resultp == NULL)
9370Sstevel@tonic-gate 					(void) mddserror(ep,
9380Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
9390Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
9400Sstevel@tonic-gate 					    sp->setname);
9410Sstevel@tonic-gate 				else {
9420Sstevel@tonic-gate 					(void) mdstealerror(ep,
9430Sstevel@tonic-gate 					    &(resultp->mmr_ep));
9440Sstevel@tonic-gate 					if (mdisok(ep)) {
9450Sstevel@tonic-gate 						(void) mddserror(ep,
9460Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
9470Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
9480Sstevel@tonic-gate 						    sp->setname);
9490Sstevel@tonic-gate 					}
9500Sstevel@tonic-gate 					free_result(resultp);
9510Sstevel@tonic-gate 				}
9520Sstevel@tonic-gate 				break;
9530Sstevel@tonic-gate 			}
9540Sstevel@tonic-gate 			if (resultp)
9550Sstevel@tonic-gate 				free_result(resultp);
9560Sstevel@tonic-gate 		} else {
9570Sstevel@tonic-gate 			/*
9580Sstevel@tonic-gate 			 * Let this side's  device name, minor # and driver name
9590Sstevel@tonic-gate 			 * be known to the database replica.
9600Sstevel@tonic-gate 			 */
9610Sstevel@tonic-gate 			(void) memset(&c, 0, sizeof (c));
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate 			/* Fill in device/replica info */
9640Sstevel@tonic-gate 			c.c_locator.l_dev = meta_cmpldev(np->dev);
9650Sstevel@tonic-gate 			c.c_locator.l_blkno = blkno;
9660Sstevel@tonic-gate 			(void) strncpy(c.c_locator.l_driver, dname,
9670Sstevel@tonic-gate 			    sizeof (c.c_locator.l_driver));
968*5109Spetede 			if (splitname(bname, &c.c_devname) ==
969*5109Spetede 			    METASPLIT_LONGDISKNAME && devid_in_use == FALSE) {
970*5109Spetede 				rval = mddeverror(ep, MDE_DISKNAMETOOLONG,
971*5109Spetede 				    NODEV64, np->rname);
972*5109Spetede 				break;
973*5109Spetede 			}
974*5109Spetede 
9750Sstevel@tonic-gate 			c.c_locator.l_mnum = mnum;
9760Sstevel@tonic-gate 
9770Sstevel@tonic-gate 			/* Fill in setno, setname, and sideno */
9780Sstevel@tonic-gate 			c.c_setno = sp->setno;
9790Sstevel@tonic-gate 			(void) strncpy(c.c_setname, sp->setname,
980*5109Spetede 			    sizeof (c.c_setname));
9810Sstevel@tonic-gate 			c.c_sideno = sideno;
9820Sstevel@tonic-gate 
9830Sstevel@tonic-gate 			/*
9840Sstevel@tonic-gate 			 * Don't need device id information from this ioctl
9850Sstevel@tonic-gate 			 * Kernel determines device id from dev_t, which
9860Sstevel@tonic-gate 			 * is just what this code would do.
9870Sstevel@tonic-gate 			 */
9880Sstevel@tonic-gate 			c.c_locator.l_devid = (uint64_t)0;
9890Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
9900Sstevel@tonic-gate 
9910Sstevel@tonic-gate 			if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
9920Sstevel@tonic-gate 				rval = mdstealerror(ep, &c.c_mde);
9930Sstevel@tonic-gate 				break;
9940Sstevel@tonic-gate 			}
9950Sstevel@tonic-gate 		}
9960Sstevel@tonic-gate 	}
9970Sstevel@tonic-gate 
9980Sstevel@tonic-gate 	/* cleanup, return success */
9990Sstevel@tonic-gate 	if (bname != NULL) {
10000Sstevel@tonic-gate 		Free(bname);
10010Sstevel@tonic-gate 		bname = NULL;
10020Sstevel@tonic-gate 	}
10030Sstevel@tonic-gate 	if (dname != NULL) {
10040Sstevel@tonic-gate 		Free(dname);
10050Sstevel@tonic-gate 		dname = NULL;
10060Sstevel@tonic-gate 	}
10070Sstevel@tonic-gate 	return (rval);
10080Sstevel@tonic-gate }
10090Sstevel@tonic-gate 
10100Sstevel@tonic-gate 
10110Sstevel@tonic-gate int
10120Sstevel@tonic-gate meta_db_delsidenm(
10130Sstevel@tonic-gate 	mdsetname_t	*sp,
10140Sstevel@tonic-gate 	side_t		sideno,
10150Sstevel@tonic-gate 	mdname_t	*np,
10160Sstevel@tonic-gate 	daddr_t		blkno,
10170Sstevel@tonic-gate 	md_error_t	*ep
10180Sstevel@tonic-gate )
10190Sstevel@tonic-gate {
10200Sstevel@tonic-gate 	mddb_config_t	c;
10210Sstevel@tonic-gate 	md_set_desc	*sd;
10220Sstevel@tonic-gate 
10230Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
10240Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
10250Sstevel@tonic-gate 			return (-1);
10260Sstevel@tonic-gate 	}
10270Sstevel@tonic-gate 	/* Use rpc.mdcommd to delete mddb side from all nodes */
10280Sstevel@tonic-gate 	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
10290Sstevel@tonic-gate 	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
10300Sstevel@tonic-gate 		md_mn_result_t			*resultp = NULL;
10310Sstevel@tonic-gate 		md_mn_msg_meta_db_delside_t	db_ds;
10320Sstevel@tonic-gate 		int				send_rval;
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate 		db_ds.msg_l_dev = np->dev;
10350Sstevel@tonic-gate 		db_ds.msg_blkno = blkno;
10360Sstevel@tonic-gate 		db_ds.msg_sideno = sideno;
10370Sstevel@tonic-gate 
10380Sstevel@tonic-gate 		/* Set devid to NULL until devids are supported */
10390Sstevel@tonic-gate 		db_ds.msg_devid[0] = NULL;
10400Sstevel@tonic-gate 
10410Sstevel@tonic-gate 		/*
10420Sstevel@tonic-gate 		 * If reconfig cycle has been started, this node is
10430Sstevel@tonic-gate 		 * stuck in in the return step until this command has
10440Sstevel@tonic-gate 		 * completed.  If mdcommd is suspended, ask
10450Sstevel@tonic-gate 		 * send_message to fail (instead of retrying)
10460Sstevel@tonic-gate 		 * so that metaset can finish allowing the reconfig
10470Sstevel@tonic-gate 		 * cycle to proceed.
10480Sstevel@tonic-gate 		 */
10490Sstevel@tonic-gate 		send_rval = mdmn_send_message(sp->setno,
10500Sstevel@tonic-gate 		    MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
10510Sstevel@tonic-gate 		    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds,
10520Sstevel@tonic-gate 		    sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
10530Sstevel@tonic-gate 		if (send_rval != 0) {
10540Sstevel@tonic-gate 			if (resultp == NULL)
10550Sstevel@tonic-gate 				(void) mddserror(ep,
10560Sstevel@tonic-gate 				    MDE_DS_COMMD_SEND_FAIL,
10570Sstevel@tonic-gate 				    sp->setno, NULL, NULL,
10580Sstevel@tonic-gate 				    sp->setname);
10590Sstevel@tonic-gate 			else {
10600Sstevel@tonic-gate 				(void) mdstealerror(ep, &(resultp->mmr_ep));
10610Sstevel@tonic-gate 				if (mdisok(ep)) {
10620Sstevel@tonic-gate 					(void) mddserror(ep,
10630Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
10640Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
10650Sstevel@tonic-gate 					    sp->setname);
10660Sstevel@tonic-gate 				}
10670Sstevel@tonic-gate 				free_result(resultp);
10680Sstevel@tonic-gate 			}
10690Sstevel@tonic-gate 			return (-1);
10700Sstevel@tonic-gate 		}
10710Sstevel@tonic-gate 		if (resultp)
10720Sstevel@tonic-gate 			free_result(resultp);
10730Sstevel@tonic-gate 
10740Sstevel@tonic-gate 	} else {
10750Sstevel@tonic-gate 		/*
10760Sstevel@tonic-gate 		 * Let this side's  device name, minor # and driver name
10770Sstevel@tonic-gate 		 * be known to the database replica.
10780Sstevel@tonic-gate 		 */
10790Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate 		/* Fill in device/replica info */
10820Sstevel@tonic-gate 		c.c_locator.l_dev = meta_cmpldev(np->dev);
10830Sstevel@tonic-gate 		c.c_locator.l_blkno = blkno;
10840Sstevel@tonic-gate 
10850Sstevel@tonic-gate 		/* Fill in setno, setname, and sideno */
10860Sstevel@tonic-gate 		c.c_setno = sp->setno;
10870Sstevel@tonic-gate 		(void) strcpy(c.c_setname, sp->setname);
10880Sstevel@tonic-gate 		c.c_sideno = sideno;
10890Sstevel@tonic-gate 
10900Sstevel@tonic-gate 		/*
10910Sstevel@tonic-gate 		 * Don't need device id information from this ioctl
10920Sstevel@tonic-gate 		 * Kernel determines device id from dev_t, which
10930Sstevel@tonic-gate 		 * is just what this code would do.
10940Sstevel@tonic-gate 		 */
10950Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
10960Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
10970Sstevel@tonic-gate 
10980Sstevel@tonic-gate 		if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
10990Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
11000Sstevel@tonic-gate 	}
11010Sstevel@tonic-gate 	return (0);
11020Sstevel@tonic-gate }
11030Sstevel@tonic-gate 
11040Sstevel@tonic-gate 
11050Sstevel@tonic-gate static int
11060Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
11070Sstevel@tonic-gate {
11080Sstevel@tonic-gate 	mdnamelist_t		*dnp1, *dnp2;
11090Sstevel@tonic-gate 
11100Sstevel@tonic-gate 	for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
11110Sstevel@tonic-gate 		for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
11120Sstevel@tonic-gate 			if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
11130Sstevel@tonic-gate 				return (mderror(ep, MDE_DUPDRIVE,
11140Sstevel@tonic-gate 				    dnp1->namep->cname));
11150Sstevel@tonic-gate 		}
11160Sstevel@tonic-gate 	}
11170Sstevel@tonic-gate 	return (0);
11180Sstevel@tonic-gate }
11190Sstevel@tonic-gate 
11200Sstevel@tonic-gate 
11210Sstevel@tonic-gate /*
11220Sstevel@tonic-gate  * Return 1 if files are different, else return 0
11230Sstevel@tonic-gate  */
11240Sstevel@tonic-gate static int
11250Sstevel@tonic-gate filediff(char *tsname, char *sname)
11260Sstevel@tonic-gate {
11270Sstevel@tonic-gate 	int ret = 1, fd;
11280Sstevel@tonic-gate 	size_t tsz, sz;
11290Sstevel@tonic-gate 	struct stat sbuf;
11300Sstevel@tonic-gate 	char *tbuf, *buf;
11310Sstevel@tonic-gate 
11320Sstevel@tonic-gate 	if (stat(tsname, &sbuf) != 0)
11330Sstevel@tonic-gate 		return (1);
11340Sstevel@tonic-gate 	tsz = sbuf.st_size;
11350Sstevel@tonic-gate 	if (stat(sname, &sbuf) != 0)
11360Sstevel@tonic-gate 		return (1);
11370Sstevel@tonic-gate 	sz = sbuf.st_size;
11380Sstevel@tonic-gate 	if (tsz != sz)
11390Sstevel@tonic-gate 		return (1);
11400Sstevel@tonic-gate 
11410Sstevel@tonic-gate 	/* allocate memory and read both files into buffer */
11420Sstevel@tonic-gate 	tbuf = malloc(tsz);
11430Sstevel@tonic-gate 	buf = malloc(sz);
11440Sstevel@tonic-gate 	if (tbuf == NULL || buf == NULL)
11450Sstevel@tonic-gate 		goto out;
11460Sstevel@tonic-gate 
11470Sstevel@tonic-gate 	fd = open(tsname, O_RDONLY);
11480Sstevel@tonic-gate 	if (fd == -1)
11490Sstevel@tonic-gate 		goto out;
11500Sstevel@tonic-gate 	sz = read(fd, tbuf, tsz);
11510Sstevel@tonic-gate 	(void) close(fd);
11520Sstevel@tonic-gate 	if (sz != tsz)
11530Sstevel@tonic-gate 		goto out;
11540Sstevel@tonic-gate 
11550Sstevel@tonic-gate 	fd = open(sname, O_RDONLY);
11560Sstevel@tonic-gate 	if (fd == -1)
11570Sstevel@tonic-gate 		goto out;
11580Sstevel@tonic-gate 	sz = read(fd, buf, tsz);
11590Sstevel@tonic-gate 	(void) close(fd);
11600Sstevel@tonic-gate 	if (sz != tsz)
11610Sstevel@tonic-gate 		goto out;
11620Sstevel@tonic-gate 
11630Sstevel@tonic-gate 	/* compare content */
11640Sstevel@tonic-gate 	ret = bcmp(tbuf, buf, tsz);
11650Sstevel@tonic-gate out:
11660Sstevel@tonic-gate 	if (tbuf)
11670Sstevel@tonic-gate 		free(tbuf);
11680Sstevel@tonic-gate 	if (buf)
11690Sstevel@tonic-gate 		free(buf);
11700Sstevel@tonic-gate 	return (ret);
11710Sstevel@tonic-gate }
11720Sstevel@tonic-gate 
11730Sstevel@tonic-gate /*
11740Sstevel@tonic-gate  * patch md.conf file with mddb locations
11750Sstevel@tonic-gate  */
11760Sstevel@tonic-gate int
11770Sstevel@tonic-gate meta_db_patch(
11780Sstevel@tonic-gate 	char		*sname,		/* system file name */
11790Sstevel@tonic-gate 	char		*cname,		/* mddb.cf file name */
11800Sstevel@tonic-gate 	int		patch,		/* patching locally */
11810Sstevel@tonic-gate 	md_error_t	*ep
11820Sstevel@tonic-gate )
11830Sstevel@tonic-gate {
11840Sstevel@tonic-gate 	char		*tsname = NULL;
11850Sstevel@tonic-gate 	char		line[MDDB_BOOTLIST_MAX_LEN];
11860Sstevel@tonic-gate 	FILE		*tsfp = NULL;
11870Sstevel@tonic-gate 	FILE		*mfp = NULL;
11880Sstevel@tonic-gate 	int		rval = -1;
11890Sstevel@tonic-gate 
11900Sstevel@tonic-gate 	/* check names */
11910Sstevel@tonic-gate 	if (sname == NULL) {
11920Sstevel@tonic-gate 		if (patch)
11930Sstevel@tonic-gate 			sname = "md.conf";
11940Sstevel@tonic-gate 		else
11950Sstevel@tonic-gate 			sname = "/kernel/drv/md.conf";
11960Sstevel@tonic-gate 	}
11970Sstevel@tonic-gate 	if (cname == NULL)
11980Sstevel@tonic-gate 		cname = META_DBCONF;
11990Sstevel@tonic-gate 
12000Sstevel@tonic-gate 	/*
12010Sstevel@tonic-gate 	 * edit file
12020Sstevel@tonic-gate 	 */
12030Sstevel@tonic-gate 	if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
12040Sstevel@tonic-gate 		if (mdissyserror(ep, EROFS)) {
12050Sstevel@tonic-gate 			/*
12060Sstevel@tonic-gate 			 * If we are booted on a read-only root because
12070Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
12080Sstevel@tonic-gate 			 * any scary error messages.
12090Sstevel@tonic-gate 			 */
12100Sstevel@tonic-gate 			mdclrerror(ep);
12110Sstevel@tonic-gate 			rval = 0;
12120Sstevel@tonic-gate 		}
12130Sstevel@tonic-gate 		goto out;
12140Sstevel@tonic-gate 	}
12150Sstevel@tonic-gate 
12162063Shshaw 	if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 0,
12170Sstevel@tonic-gate 	    ep) != 0)
12180Sstevel@tonic-gate 		goto out;
12190Sstevel@tonic-gate 
12200Sstevel@tonic-gate 	/* if file content is identical, skip rename */
12210Sstevel@tonic-gate 	if (filediff(tsname, sname) == 0) {
12220Sstevel@tonic-gate 		rval = 0;
12230Sstevel@tonic-gate 		goto out;
12240Sstevel@tonic-gate 	}
12250Sstevel@tonic-gate 
12260Sstevel@tonic-gate 	if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
1227*5109Spetede 	    (fclose(tsfp) != 0)) {
12280Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, tsname);
12290Sstevel@tonic-gate 		goto out;
12300Sstevel@tonic-gate 	}
12310Sstevel@tonic-gate 
12320Sstevel@tonic-gate 	tsfp = NULL;
12330Sstevel@tonic-gate 
12340Sstevel@tonic-gate 	/*
12350Sstevel@tonic-gate 	 * rename file. If we get a Cross Device error then it
12360Sstevel@tonic-gate 	 * is because we are in the miniroot.
12370Sstevel@tonic-gate 	 */
12380Sstevel@tonic-gate 	if (rename(tsname, sname) != 0 && errno != EXDEV) {
12390Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, sname);
12400Sstevel@tonic-gate 		goto out;
12410Sstevel@tonic-gate 	}
12420Sstevel@tonic-gate 
12430Sstevel@tonic-gate 	if (errno == EXDEV) {
12440Sstevel@tonic-gate 		if ((tsfp = fopen(tsname, "r")) == NULL)
12450Sstevel@tonic-gate 			goto out;
12460Sstevel@tonic-gate 		if ((mfp = fopen(sname, "w+")) == NULL)
12470Sstevel@tonic-gate 			goto out;
12480Sstevel@tonic-gate 		while (fgets(line, sizeof (line), tsfp) != NULL) {
12490Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
12500Sstevel@tonic-gate 				goto out;
12510Sstevel@tonic-gate 		}
12520Sstevel@tonic-gate 		(void) fclose(tsfp);
12530Sstevel@tonic-gate 		tsfp = NULL;
12540Sstevel@tonic-gate 		if (fflush(mfp) != 0)
12550Sstevel@tonic-gate 			goto out;
12560Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
12570Sstevel@tonic-gate 			goto out;
12580Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
12590Sstevel@tonic-gate 			mfp = NULL;
12600Sstevel@tonic-gate 			goto out;
12610Sstevel@tonic-gate 		}
12620Sstevel@tonic-gate 	}
12630Sstevel@tonic-gate 
12640Sstevel@tonic-gate 	Free(tsname);
12650Sstevel@tonic-gate 	tsname = NULL;
12660Sstevel@tonic-gate 	rval = 0;
12670Sstevel@tonic-gate 
12680Sstevel@tonic-gate 	/* cleanup, return error */
12690Sstevel@tonic-gate out:
12700Sstevel@tonic-gate 	if (tsfp != NULL)
12710Sstevel@tonic-gate 		(void) fclose(tsfp);
12720Sstevel@tonic-gate 	if (tsname != NULL) {
12730Sstevel@tonic-gate 		(void) unlink(tsname);
12740Sstevel@tonic-gate 		Free(tsname);
12750Sstevel@tonic-gate 	}
12760Sstevel@tonic-gate 	return (rval);
12770Sstevel@tonic-gate }
12780Sstevel@tonic-gate 
12790Sstevel@tonic-gate /*
12800Sstevel@tonic-gate  * Add replicas to set.  This happens as a result of:
12810Sstevel@tonic-gate  *	- metadb [-s set_name] -a
12820Sstevel@tonic-gate  *	- metaset -s set_name -a disk
12830Sstevel@tonic-gate  *	- metaset -s set_name -d disk	 (causes a rebalance of mddbs)
12840Sstevel@tonic-gate  *	- metaset -s set_name -b
12850Sstevel@tonic-gate  *
12860Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
12870Sstevel@tonic-gate  *
12880Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
12890Sstevel@tonic-gate  * is running the metaset command.
12900Sstevel@tonic-gate  *
12910Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
12920Sstevel@tonic-gate  * running the metaset command.  If this is the first mddb added to
12930Sstevel@tonic-gate  * the MN diskset, then no communication is made to other nodes via commd
12940Sstevel@tonic-gate  * since the other nodes will be in-sync with respect to the mddbs when
12950Sstevel@tonic-gate  * those other nodes join the set and snarf in the newly created mddb.
12960Sstevel@tonic-gate  * If this is not the first mddb added to the MN diskset, then this
12970Sstevel@tonic-gate  * attach command is sent to all of the nodes using commd.  This keeps
12980Sstevel@tonic-gate  * the nodes in-sync.
12990Sstevel@tonic-gate  */
13000Sstevel@tonic-gate int
13010Sstevel@tonic-gate meta_db_attach(
13020Sstevel@tonic-gate 	mdsetname_t		*sp,
13030Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
13040Sstevel@tonic-gate 	mdchkopts_t		options,
13050Sstevel@tonic-gate 	md_timeval32_t		*timeval,
13060Sstevel@tonic-gate 	int			dbcnt,
13070Sstevel@tonic-gate 	int			dbsize,
13080Sstevel@tonic-gate 	char			*sysfilename,
13090Sstevel@tonic-gate 	md_error_t		*ep
13100Sstevel@tonic-gate )
13110Sstevel@tonic-gate {
13120Sstevel@tonic-gate 	struct mddb_config	c;
13130Sstevel@tonic-gate 	mdnamelist_t		*nlp;
13140Sstevel@tonic-gate 	mdname_t		*np;
13150Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
13160Sstevel@tonic-gate 	md_drive_desc		*p;
13170Sstevel@tonic-gate 	int			i;
13180Sstevel@tonic-gate 	int			fd;
13190Sstevel@tonic-gate 	side_t			sideno;
13200Sstevel@tonic-gate 	daddr_t			blkno;
13210Sstevel@tonic-gate 	int			replicacount = 0;
13222614Spetede 	int			start_svmdaemons = 0;
13230Sstevel@tonic-gate 	int			rval = 0;
13240Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
13250Sstevel@tonic-gate 	md_set_desc		*sd;
13260Sstevel@tonic-gate 	int			stale_bool = FALSE;
13270Sstevel@tonic-gate 	int			flags;
13280Sstevel@tonic-gate 	int			firstmddb = 1;
13290Sstevel@tonic-gate 	md_timeval32_t		inittime = {0, 0};
13300Sstevel@tonic-gate 
13310Sstevel@tonic-gate 	/*
13320Sstevel@tonic-gate 	 * Error if we don't get some work to do.
13330Sstevel@tonic-gate 	 */
13340Sstevel@tonic-gate 	if (db_nlp == NULL)
13350Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
13360Sstevel@tonic-gate 
13370Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
13380Sstevel@tonic-gate 		return (-1);
13390Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
13400Sstevel@tonic-gate 	c.c_id = 0;
13410Sstevel@tonic-gate 	c.c_setno = sp->setno;
13420Sstevel@tonic-gate 
13430Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
13440Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
13450Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
13460Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
13470Sstevel@tonic-gate 		if (metaislocalset(sp)) {
13480Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
13490Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
13500Sstevel@tonic-gate 			else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
13510Sstevel@tonic-gate 			    (! (options & MDCHK_ALLOW_NODBS)))
13520Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
13530Sstevel@tonic-gate 		} else {
13540Sstevel@tonic-gate 			if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
13550Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
13560Sstevel@tonic-gate 		}
13570Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
13580Sstevel@tonic-gate 	}
13590Sstevel@tonic-gate 	/*
13600Sstevel@tonic-gate 	 * Is current set STALE?
13610Sstevel@tonic-gate 	 */
13620Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
13630Sstevel@tonic-gate 		stale_bool = TRUE;
13640Sstevel@tonic-gate 	}
13650Sstevel@tonic-gate 
13660Sstevel@tonic-gate 	assert(db_nlp != NULL);
13670Sstevel@tonic-gate 
13682614Spetede 	/* if these are the first replicas then the SVM daemons need to run */
13690Sstevel@tonic-gate 	if (c.c_dbcnt == 0)
13702614Spetede 		start_svmdaemons = 1;
13710Sstevel@tonic-gate 
13720Sstevel@tonic-gate 	/*
13730Sstevel@tonic-gate 	 * check to see if we will go over the total possible number
13740Sstevel@tonic-gate 	 * of data bases
13750Sstevel@tonic-gate 	 */
13760Sstevel@tonic-gate 	nlp = db_nlp;
13770Sstevel@tonic-gate 	while (nlp) {
13780Sstevel@tonic-gate 		replicacount += dbcnt;
13790Sstevel@tonic-gate 		nlp = nlp->next;
13800Sstevel@tonic-gate 	}
13810Sstevel@tonic-gate 
13820Sstevel@tonic-gate 	if ((replicacount + c.c_dbcnt) > c.c_dbmax)
13830Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
13840Sstevel@tonic-gate 		    sp->setno, c.c_dbcnt + replicacount, NULL));
13850Sstevel@tonic-gate 
13860Sstevel@tonic-gate 	/*
13870Sstevel@tonic-gate 	 * go through and check to make sure all locations specified
13880Sstevel@tonic-gate 	 * are legal also pick out driver name;
13890Sstevel@tonic-gate 	 */
13900Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
13910Sstevel@tonic-gate 		diskaddr_t devsize;
13920Sstevel@tonic-gate 
13930Sstevel@tonic-gate 		np = nlp->namep;
13940Sstevel@tonic-gate 
13950Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
13960Sstevel@tonic-gate 			uint_t	partno;
13970Sstevel@tonic-gate 			uint_t	rep_partno;
13980Sstevel@tonic-gate 			mddrivename_t	*dnp = np->drivenamep;
13990Sstevel@tonic-gate 
14000Sstevel@tonic-gate 			/*
14010Sstevel@tonic-gate 			 * make sure that non-local database replicas
14020Sstevel@tonic-gate 			 * are always on the replica slice.
14030Sstevel@tonic-gate 			 */
14040Sstevel@tonic-gate 			if (meta_replicaslice(dnp,
14050Sstevel@tonic-gate 			    &rep_partno, ep) != 0)
14060Sstevel@tonic-gate 				return (-1);
14070Sstevel@tonic-gate 			if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
14080Sstevel@tonic-gate 				return (-1);
14090Sstevel@tonic-gate 			if (partno != rep_partno)
14100Sstevel@tonic-gate 				return (mddeverror(ep, MDE_REPCOMP_ONLY,
14110Sstevel@tonic-gate 				    np->dev, sp->setname));
14120Sstevel@tonic-gate 		}
14130Sstevel@tonic-gate 
14140Sstevel@tonic-gate 		if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
14150Sstevel@tonic-gate 		    ep)) {
14160Sstevel@tonic-gate 			return (-1);
14170Sstevel@tonic-gate 		}
14180Sstevel@tonic-gate 
14190Sstevel@tonic-gate 		if ((devsize = metagetsize(np, ep)) == -1)
14200Sstevel@tonic-gate 			return (-1);
14210Sstevel@tonic-gate 
14220Sstevel@tonic-gate 		if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
14230Sstevel@tonic-gate 			return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
14240Sstevel@tonic-gate 			    meta_getminor(np->dev), sp->setno, devsize,
14250Sstevel@tonic-gate 			    np->cname));
14260Sstevel@tonic-gate 	}
14270Sstevel@tonic-gate 
14280Sstevel@tonic-gate 	/*
14290Sstevel@tonic-gate 	 * If first disk in set we don't have lb_inittime yet for use as
14300Sstevel@tonic-gate 	 * mb_setcreatetime so don't go looking for it. WE'll come back
14310Sstevel@tonic-gate 	 * later and update after the locator block has been created.
14320Sstevel@tonic-gate 	 * If this isn't the first disk in the set, we have a locator
14330Sstevel@tonic-gate 	 * block and thus we have lb_inittime. Set mb_setcreatetime to
14340Sstevel@tonic-gate 	 * lb_inittime.
14350Sstevel@tonic-gate 	 */
14360Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
14370Sstevel@tonic-gate 		if (c.c_dbcnt != 0) {
14380Sstevel@tonic-gate 			firstmddb = 0;
14390Sstevel@tonic-gate 			inittime = meta_get_lb_inittime(sp, ep);
14400Sstevel@tonic-gate 		}
14410Sstevel@tonic-gate 	}
14420Sstevel@tonic-gate 
14430Sstevel@tonic-gate 	/*
14440Sstevel@tonic-gate 	 * go through and write all master blocks
14450Sstevel@tonic-gate 	 */
14460Sstevel@tonic-gate 
14470Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
14480Sstevel@tonic-gate 		np = nlp->namep;
14490Sstevel@tonic-gate 
14500Sstevel@tonic-gate 		if ((fd = open(np->rname, O_RDWR)) < 0)
14510Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
14520Sstevel@tonic-gate 
14530Sstevel@tonic-gate 		for (i = 0; i < dbcnt; i++) {
14540Sstevel@tonic-gate 			if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
14550Sstevel@tonic-gate 			    inittime, ep)) {
14560Sstevel@tonic-gate 				(void) close(fd);
14570Sstevel@tonic-gate 				return (-1);
14580Sstevel@tonic-gate 			}
14590Sstevel@tonic-gate 		}
14600Sstevel@tonic-gate 		(void) close(fd);
14610Sstevel@tonic-gate 	}
14620Sstevel@tonic-gate 
14630Sstevel@tonic-gate 	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
14640Sstevel@tonic-gate 		return (-1);
14650Sstevel@tonic-gate 
14660Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
14670Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
14680Sstevel@tonic-gate 		if (! mdisok(ep))
14690Sstevel@tonic-gate 			return (-1);
14700Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
14710Sstevel@tonic-gate 			return (-1);
14720Sstevel@tonic-gate 
14730Sstevel@tonic-gate 	}
14740Sstevel@tonic-gate 
14750Sstevel@tonic-gate 	/*
14760Sstevel@tonic-gate 	 * go through and tell kernel to add them
14770Sstevel@tonic-gate 	 */
14780Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
14790Sstevel@tonic-gate 		mdcinfo_t	*cinfo;
14800Sstevel@tonic-gate 
14810Sstevel@tonic-gate 		np = nlp->namep;
14820Sstevel@tonic-gate 
14830Sstevel@tonic-gate 		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
14840Sstevel@tonic-gate 			rval = -1;
14850Sstevel@tonic-gate 			goto out;
14860Sstevel@tonic-gate 		}
14870Sstevel@tonic-gate 
14880Sstevel@tonic-gate 		/*
14890Sstevel@tonic-gate 		 * If mddb is being added to MN diskset and there already
14900Sstevel@tonic-gate 		 * exists a valid mddb in the set (which equates to this
14910Sstevel@tonic-gate 		 * node being an owner of the set) then use rpc.mdcommd
14920Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
14930Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
14940Sstevel@tonic-gate 		 * can't write the message to the mddb.
14950Sstevel@tonic-gate 		 *
14960Sstevel@tonic-gate 		 * Otherwise, just add mddb to this node.
14970Sstevel@tonic-gate 		 */
14980Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
14990Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
15000Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
15010Sstevel@tonic-gate 			md_mn_msg_meta_db_attach_t	attach;
15020Sstevel@tonic-gate 			int 				send_rval;
15030Sstevel@tonic-gate 
15040Sstevel@tonic-gate 			/*
15050Sstevel@tonic-gate 			 * In a scenario where new replicas had been added on
15060Sstevel@tonic-gate 			 * the master, and then all of the old replicas failed
15070Sstevel@tonic-gate 			 * before the slaves had knowledge of the new replicas,
15080Sstevel@tonic-gate 			 * the slaves are unable to re-parse in the mddb
15090Sstevel@tonic-gate 			 * from the new replicas since the slaves have no
15100Sstevel@tonic-gate 			 * knowledge of the new replicas.  The following
15110Sstevel@tonic-gate 			 * algorithm solves this problem:
15120Sstevel@tonic-gate 			 * 	- META_DB_ATTACH message generates submsgs
15130Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
15140Sstevel@tonic-gate 			 * 		- MDDB_ATTACH new replicas
15150Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
15160Sstevel@tonic-gate 			 *		information to be sent from master
15170Sstevel@tonic-gate 			 *		to slaves at a higher class than the
15180Sstevel@tonic-gate 			 *		unblock so the parse message will
15190Sstevel@tonic-gate 			 *		reach slaves before unblock message.
15200Sstevel@tonic-gate 			 */
15210Sstevel@tonic-gate 			attach.msg_l_dev = np->dev;
15220Sstevel@tonic-gate 			attach.msg_cnt = dbcnt;
15230Sstevel@tonic-gate 			attach.msg_dbsize = dbsize;
15240Sstevel@tonic-gate 			(void) strncpy(attach.msg_dname, cinfo->dname,
15250Sstevel@tonic-gate 			    sizeof (attach.msg_dname));
15260Sstevel@tonic-gate 			(void) splitname(np->bname, &attach.msg_splitname);
15270Sstevel@tonic-gate 			attach.msg_options = options;
15280Sstevel@tonic-gate 
15290Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
15300Sstevel@tonic-gate 			attach.msg_devid[0] = NULL;
15310Sstevel@tonic-gate 
15320Sstevel@tonic-gate 			/*
15330Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
15340Sstevel@tonic-gate 			 * stuck in in the return step until this command has
15350Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
15360Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
15370Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
15380Sstevel@tonic-gate 			 * cycle to proceed.
15390Sstevel@tonic-gate 			 */
15400Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
15410Sstevel@tonic-gate 			if (stale_bool == TRUE)
15420Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
15430Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
1544*5109Spetede 			    MD_MN_MSG_META_DB_ATTACH,
1545*5109Spetede 			    flags, (char *)&attach,
1546*5109Spetede 			    sizeof (md_mn_msg_meta_db_attach_t),
1547*5109Spetede 			    &resultp, ep);
15480Sstevel@tonic-gate 			if (send_rval != 0) {
15490Sstevel@tonic-gate 				rval = -1;
15500Sstevel@tonic-gate 				if (resultp == NULL)
15510Sstevel@tonic-gate 					(void) mddserror(ep,
15520Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
15530Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
15540Sstevel@tonic-gate 					    sp->setname);
15550Sstevel@tonic-gate 				else {
15560Sstevel@tonic-gate 					(void) mdstealerror(ep,
15570Sstevel@tonic-gate 					    &(resultp->mmr_ep));
15580Sstevel@tonic-gate 					if (mdisok(ep)) {
15590Sstevel@tonic-gate 						(void) mddserror(ep,
15600Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
15610Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
15620Sstevel@tonic-gate 						    sp->setname);
15630Sstevel@tonic-gate 					}
15640Sstevel@tonic-gate 					free_result(resultp);
15650Sstevel@tonic-gate 				}
15660Sstevel@tonic-gate 				goto out;
15670Sstevel@tonic-gate 			}
15680Sstevel@tonic-gate 			if (resultp)
15690Sstevel@tonic-gate 				free_result(resultp);
15700Sstevel@tonic-gate 		} else {
1571*5109Spetede 			/* Adding mddb(s) to just this node */
1572*5109Spetede 			for (i = 0; i < dbcnt; i++) {
1573*5109Spetede 				(void) memset(&c, 0, sizeof (c));
1574*5109Spetede 				/* Fill in device/replica info */
1575*5109Spetede 				c.c_locator.l_dev = meta_cmpldev(np->dev);
1576*5109Spetede 				c.c_locator.l_blkno = i * dbsize + 16;
1577*5109Spetede 				blkno = c.c_locator.l_blkno;
1578*5109Spetede 				(void) strncpy(c.c_locator.l_driver,
1579*5109Spetede 				    cinfo->dname,
1580*5109Spetede 				    sizeof (c.c_locator.l_driver));
15810Sstevel@tonic-gate 
1582*5109Spetede 				if (splitname(np->bname, &c.c_devname) ==
1583*5109Spetede 				    METASPLIT_LONGDISKNAME && devid_in_use ==
1584*5109Spetede 				    FALSE) {
1585*5109Spetede 					rval = mddeverror(ep,
1586*5109Spetede 					    MDE_DISKNAMETOOLONG,
1587*5109Spetede 					    NODEV64, np->rname);
1588*5109Spetede 					goto out;
15890Sstevel@tonic-gate 				}
1590*5109Spetede 
1591*5109Spetede 				c.c_locator.l_mnum = meta_getminor(np->dev);
1592*5109Spetede 
1593*5109Spetede 				/* Fill in setno, setname, and sideno */
1594*5109Spetede 				c.c_setno = sp->setno;
1595*5109Spetede 				if (! metaislocalset(sp)) {
1596*5109Spetede 					if (MD_MNSET_DESC(sd)) {
1597*5109Spetede 						c.c_multi_node = 1;
1598*5109Spetede 					}
1599*5109Spetede 				}
1600*5109Spetede 				(void) strcpy(c.c_setname, sp->setname);
1601*5109Spetede 				c.c_sideno = sideno;
16020Sstevel@tonic-gate 
1603*5109Spetede 				/*
1604*5109Spetede 				 * Don't need device id information from this
1605*5109Spetede 				 * ioctl Kernel determines device id from
1606*5109Spetede 				 * dev_t, which is just what this code would do.
1607*5109Spetede 				 */
1608*5109Spetede 				c.c_locator.l_devid = (uint64_t)0;
1609*5109Spetede 				c.c_locator.l_devid_flags = 0;
16100Sstevel@tonic-gate 
1611*5109Spetede 				if (timeval != NULL)
1612*5109Spetede 					c.c_timestamp = *timeval;
16130Sstevel@tonic-gate 
1614*5109Spetede 				if (setup_med_cfg(sp, &c,
1615*5109Spetede 				    (options & MDCHK_SET_FORCE), ep)) {
1616*5109Spetede 					rval = -1;
1617*5109Spetede 					goto out;
1618*5109Spetede 				}
16190Sstevel@tonic-gate 
1620*5109Spetede 				if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde,
1621*5109Spetede 				    NULL) != 0) {
1622*5109Spetede 					rval = mdstealerror(ep, &c.c_mde);
1623*5109Spetede 					goto out;
1624*5109Spetede 				}
1625*5109Spetede 				/*
1626*5109Spetede 				 * This is either a traditional diskset OR this
1627*5109Spetede 				 * is the first replica added to a MN diskset.
1628*5109Spetede 				 * In either case, set broadcast to NO_BCAST so
1629*5109Spetede 				 * that message won't go through rpc.mdcommd.
1630*5109Spetede 				 * If this is a traditional diskset, the bcast
1631*5109Spetede 				 * flag is ignored since traditional disksets
1632*5109Spetede 				 * don't use the rpc.mdcommd.
1633*5109Spetede 				 */
1634*5109Spetede 				if (meta_db_addsidenms(sp, np, blkno,
1635*5109Spetede 				    DB_ADDSIDENMS_NO_BCAST, ep))
1636*5109Spetede 					goto out;
16370Sstevel@tonic-gate 			}
16380Sstevel@tonic-gate 		}
16390Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
16400Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
16410Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next)
16420Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
16430Sstevel@tonic-gate 					p->dd_dbcnt = dbcnt;
16440Sstevel@tonic-gate 					p->dd_dbsize  = dbsize;
16450Sstevel@tonic-gate 					break;
16460Sstevel@tonic-gate 				}
16470Sstevel@tonic-gate 		}
16480Sstevel@tonic-gate 
16490Sstevel@tonic-gate 		/*
16500Sstevel@tonic-gate 		 * If this was the first addition of disks to the
16510Sstevel@tonic-gate 		 * diskset you now need to update the mb_setcreatetime
16520Sstevel@tonic-gate 		 * which needed lb_inittime which wasn't there until now.
16530Sstevel@tonic-gate 		 */
16540Sstevel@tonic-gate 		if (firstmddb) {
16550Sstevel@tonic-gate 			if (meta_update_mb(sp, dd, ep) != 0) {
16560Sstevel@tonic-gate 				return (-1);
16570Sstevel@tonic-gate 			}
16580Sstevel@tonic-gate 		}
16590Sstevel@tonic-gate 		(void) close(fd);
16600Sstevel@tonic-gate 	}
16610Sstevel@tonic-gate 
16620Sstevel@tonic-gate out:
16630Sstevel@tonic-gate 	if (metaislocalset(sp)) {
16640Sstevel@tonic-gate 
16650Sstevel@tonic-gate 		/* everything looks fine. Start mdmonitord */
16662614Spetede 		if (rval == 0 && start_svmdaemons == 1) {
16670Sstevel@tonic-gate 			if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
16680Sstevel@tonic-gate 				mde_perror(&status, "");
16690Sstevel@tonic-gate 				mdclrerror(&status);
16700Sstevel@tonic-gate 			}
16710Sstevel@tonic-gate 		}
16720Sstevel@tonic-gate 
16730Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
16740Sstevel@tonic-gate 			/* Don't mask any previous errors */
16750Sstevel@tonic-gate 			if (rval == 0)
16760Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
16770Sstevel@tonic-gate 			return (rval);
16780Sstevel@tonic-gate 		}
16790Sstevel@tonic-gate 
16800Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
16810Sstevel@tonic-gate 			/* Don't mask any previous errors */
16820Sstevel@tonic-gate 			if (rval == 0)
16830Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
16840Sstevel@tonic-gate 		}
16850Sstevel@tonic-gate 	} else {
16860Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
16870Sstevel@tonic-gate 		    (options & MDCHK_SET_LOCKED),
16880Sstevel@tonic-gate 		    (options & MDCHK_SET_FORCE),
16890Sstevel@tonic-gate 		    &status)) {
16900Sstevel@tonic-gate 			/* Don't mask any previous errors */
16910Sstevel@tonic-gate 			if (rval == 0)
16920Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
16930Sstevel@tonic-gate 			else
16940Sstevel@tonic-gate 				mdclrerror(&status);
16950Sstevel@tonic-gate 		}
16960Sstevel@tonic-gate 		metafreedrivedesc(&dd);
16970Sstevel@tonic-gate 	}
16980Sstevel@tonic-gate 	/*
16990Sstevel@tonic-gate 	 * For MN disksets that already had already had nodes joined
17000Sstevel@tonic-gate 	 * before the attach of this mddb(s), the name invalidation is
17010Sstevel@tonic-gate 	 * done by the commd handler routine.  Otherwise, if this
17020Sstevel@tonic-gate 	 * is the first attach of a MN diskset mddb, the invalidation
17030Sstevel@tonic-gate 	 * must be done here since the first attach cannot be sent
17040Sstevel@tonic-gate 	 * via the commd since there are no nodes joined to the set yet.
17050Sstevel@tonic-gate 	 */
17060Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
17070Sstevel@tonic-gate 	    (MD_MNSET_DESC(sd) &&
17080Sstevel@tonic-gate 	    (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
17090Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
17100Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
17110Sstevel@tonic-gate 		}
17120Sstevel@tonic-gate 	}
17130Sstevel@tonic-gate 	return (rval);
17140Sstevel@tonic-gate }
17150Sstevel@tonic-gate 
17160Sstevel@tonic-gate /*
17170Sstevel@tonic-gate  * deletelist_length
17180Sstevel@tonic-gate  *
17190Sstevel@tonic-gate  *	return the number of slices that have been specified for deletion
17200Sstevel@tonic-gate  *	on the metadb command line.  This does not calculate the number
17210Sstevel@tonic-gate  *	of replicas because there may be multiple replicas per slice.
17220Sstevel@tonic-gate  */
17230Sstevel@tonic-gate static int
17240Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp)
17250Sstevel@tonic-gate {
17260Sstevel@tonic-gate 
17270Sstevel@tonic-gate 	mdnamelist_t		*nlp;
17280Sstevel@tonic-gate 	int			list_length = 0;
17290Sstevel@tonic-gate 
17300Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
17310Sstevel@tonic-gate 		list_length++;
17320Sstevel@tonic-gate 	}
17330Sstevel@tonic-gate 
17340Sstevel@tonic-gate 	return (list_length);
17350Sstevel@tonic-gate }
17360Sstevel@tonic-gate 
17370Sstevel@tonic-gate static int
17380Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp)
17390Sstevel@tonic-gate {
17400Sstevel@tonic-gate 
17410Sstevel@tonic-gate 	mdnamelist_t		*nlp;
17420Sstevel@tonic-gate 	mdname_t		*np;
17430Sstevel@tonic-gate 	int			index = 0;
17440Sstevel@tonic-gate 
17450Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
17460Sstevel@tonic-gate 		np = nlp->namep;
17470Sstevel@tonic-gate 
17480Sstevel@tonic-gate 		if (strcmp(devname, np->bname) == 0)
17490Sstevel@tonic-gate 			return (index);
17500Sstevel@tonic-gate 		index++;
17510Sstevel@tonic-gate 	}
17520Sstevel@tonic-gate 
17530Sstevel@tonic-gate 	return (-1);
17540Sstevel@tonic-gate }
17550Sstevel@tonic-gate 
17560Sstevel@tonic-gate /*
17570Sstevel@tonic-gate  * Delete replicas from set.  This happens as a result of:
17580Sstevel@tonic-gate  *	- metadb [-s set_name] -d
17590Sstevel@tonic-gate  *	- metaset -s set_name -a disk	(causes a rebalance of mddbs)
17600Sstevel@tonic-gate  *	- metaset -s set_name -d disk
17610Sstevel@tonic-gate  *	- metaset -s set_name -b
17620Sstevel@tonic-gate  *
17630Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
17640Sstevel@tonic-gate  *
17650Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
17660Sstevel@tonic-gate  * is running the metaset command.
17670Sstevel@tonic-gate  *
17680Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
17690Sstevel@tonic-gate  * running the metaset command.  This detach routine is sent to all
17700Sstevel@tonic-gate  * of the joined nodes in the diskset using commd.  This keeps
17710Sstevel@tonic-gate  * the nodes in-sync.
17720Sstevel@tonic-gate  */
17730Sstevel@tonic-gate int
17740Sstevel@tonic-gate meta_db_detach(
17750Sstevel@tonic-gate 	mdsetname_t		*sp,
17760Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
17770Sstevel@tonic-gate 	mdforceopts_t		force_option,
17780Sstevel@tonic-gate 	char			*sysfilename,
17790Sstevel@tonic-gate 	md_error_t		*ep
17800Sstevel@tonic-gate )
17810Sstevel@tonic-gate {
17820Sstevel@tonic-gate 	struct mddb_config	c;
17830Sstevel@tonic-gate 	mdnamelist_t		*nlp;
17840Sstevel@tonic-gate 	mdname_t		*np;
17850Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
17860Sstevel@tonic-gate 	md_drive_desc		*p;
17870Sstevel@tonic-gate 	int			replicacount;
17880Sstevel@tonic-gate 	int			replica_delete_count;
17890Sstevel@tonic-gate 	int			nr_replica_slices;
17900Sstevel@tonic-gate 	int			i;
17910Sstevel@tonic-gate 	int			stop_svmdaemons = 0;
17920Sstevel@tonic-gate 	int			rval = 0;
17930Sstevel@tonic-gate 	int			index;
17940Sstevel@tonic-gate 	int			valid_replicas_nottodelete = 0;
17950Sstevel@tonic-gate 	int			invalid_replicas_nottodelete = 0;
17960Sstevel@tonic-gate 	int			invalid_replicas_todelete = 0;
17970Sstevel@tonic-gate 	int			errored = 0;
17980Sstevel@tonic-gate 	int			*tag_array;
17990Sstevel@tonic-gate 	int			fd = -1;
18000Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
18010Sstevel@tonic-gate 	md_set_desc		*sd;
18020Sstevel@tonic-gate 	int			stale_bool = FALSE;
18030Sstevel@tonic-gate 	int			flags;
18040Sstevel@tonic-gate 
18050Sstevel@tonic-gate 	/*
18060Sstevel@tonic-gate 	 * Error if we don't get some work to do.
18070Sstevel@tonic-gate 	 */
18080Sstevel@tonic-gate 	if (db_nlp == NULL)
18090Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
18100Sstevel@tonic-gate 
18110Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
18120Sstevel@tonic-gate 		return (-1);
18130Sstevel@tonic-gate 
18140Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
18150Sstevel@tonic-gate 	c.c_id = 0;
18160Sstevel@tonic-gate 	c.c_setno = sp->setno;
18170Sstevel@tonic-gate 
18180Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
18190Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
18200Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
18210Sstevel@tonic-gate 
18220Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
18230Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
18240Sstevel@tonic-gate 
18250Sstevel@tonic-gate 	/*
18260Sstevel@tonic-gate 	 * Is current set STALE?
18270Sstevel@tonic-gate 	 */
18280Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
18290Sstevel@tonic-gate 		stale_bool = TRUE;
18300Sstevel@tonic-gate 	}
18310Sstevel@tonic-gate 
18320Sstevel@tonic-gate 	replicacount = c.c_dbcnt;
18330Sstevel@tonic-gate 
18340Sstevel@tonic-gate 	assert(db_nlp != NULL);
18350Sstevel@tonic-gate 
18360Sstevel@tonic-gate 	/*
18370Sstevel@tonic-gate 	 * go through and gather how many data bases are on each
18380Sstevel@tonic-gate 	 * device specified.
18390Sstevel@tonic-gate 	 */
18400Sstevel@tonic-gate 
18410Sstevel@tonic-gate 	nr_replica_slices = deletelist_length(db_nlp);
18420Sstevel@tonic-gate 	tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
18430Sstevel@tonic-gate 
18440Sstevel@tonic-gate 	replica_delete_count = 0;
18450Sstevel@tonic-gate 	for (i = 0; i < replicacount; i++) {
18460Sstevel@tonic-gate 		char	*devname;
18470Sstevel@tonic-gate 		int	found = 0;
18480Sstevel@tonic-gate 
18490Sstevel@tonic-gate 		c.c_id = i;
18500Sstevel@tonic-gate 
18510Sstevel@tonic-gate 		/* Don't need device id information from this ioctl */
18520Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
18530Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
18540Sstevel@tonic-gate 
18550Sstevel@tonic-gate 		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
18560Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
18570Sstevel@tonic-gate 
18580Sstevel@tonic-gate 		devname = splicename(&c.c_devname);
18590Sstevel@tonic-gate 
1860*5109Spetede 		if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
1861*5109Spetede 			Free(devname);
1862*5109Spetede 			devname = getlongname(&c, ep);
1863*5109Spetede 			if (devname == NULL) {
1864*5109Spetede 				return (-1);
1865*5109Spetede 			}
1866*5109Spetede 		}
1867*5109Spetede 
18680Sstevel@tonic-gate 		if ((index = in_deletelist(devname, db_nlp)) != -1) {
18690Sstevel@tonic-gate 			found = 1;
18700Sstevel@tonic-gate 			tag_array[index] = 1;
18710Sstevel@tonic-gate 			replica_delete_count++;
18720Sstevel@tonic-gate 		}
18730Sstevel@tonic-gate 
18740Sstevel@tonic-gate 		errored = c.c_locator.l_flags & (MDDB_F_EREAD |
1875*5109Spetede 		    MDDB_F_EWRITE | MDDB_F_TOOSMALL | MDDB_F_EFMT |
1876*5109Spetede 		    MDDB_F_EDATA | MDDB_F_EMASTER);
18770Sstevel@tonic-gate 
18780Sstevel@tonic-gate 		/*
18790Sstevel@tonic-gate 		 * There are four combinations of "errored" and "found"
18800Sstevel@tonic-gate 		 * and they are used to find the number of
18810Sstevel@tonic-gate 		 * (a) valid/invalid replicas that are not in the delete
18820Sstevel@tonic-gate 		 * list and are available in the system.
18830Sstevel@tonic-gate 		 * (b) valid/invalid replicas that are to be deleted.
18840Sstevel@tonic-gate 		 */
18850Sstevel@tonic-gate 
18860Sstevel@tonic-gate 		if (errored && !found)		/* errored and !found */
18870Sstevel@tonic-gate 			invalid_replicas_nottodelete++;
18880Sstevel@tonic-gate 		else if (!found)		/* !errored and !found */
18890Sstevel@tonic-gate 			valid_replicas_nottodelete++;
18900Sstevel@tonic-gate 		else if (errored)		/* errored and found */
18910Sstevel@tonic-gate 			invalid_replicas_todelete++;
18920Sstevel@tonic-gate 		/*
18930Sstevel@tonic-gate 		 * else it is !errored and found. This means
18940Sstevel@tonic-gate 		 * valid_replicas_todelete++; But this variable will not
18950Sstevel@tonic-gate 		 * be used anywhere
18960Sstevel@tonic-gate 		 */
18970Sstevel@tonic-gate 
18980Sstevel@tonic-gate 		Free(devname);
18990Sstevel@tonic-gate 	}
19000Sstevel@tonic-gate 
19010Sstevel@tonic-gate 	index = 0;
19020Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
19030Sstevel@tonic-gate 		np = nlp->namep;
19040Sstevel@tonic-gate 		if (tag_array[index++] != 1) {
19050Sstevel@tonic-gate 			Free(tag_array);
19060Sstevel@tonic-gate 			return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
19070Sstevel@tonic-gate 		}
19080Sstevel@tonic-gate 	}
19090Sstevel@tonic-gate 
19100Sstevel@tonic-gate 	Free(tag_array);
19110Sstevel@tonic-gate 
19120Sstevel@tonic-gate 
19130Sstevel@tonic-gate 	/* if all replicas are deleted stop mdmonitord */
19140Sstevel@tonic-gate 	if ((replicacount - replica_delete_count) == 0)
19150Sstevel@tonic-gate 		stop_svmdaemons = 1;
19160Sstevel@tonic-gate 
19170Sstevel@tonic-gate 	if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
19180Sstevel@tonic-gate 		if (force_option & MDFORCE_NONE)
19190Sstevel@tonic-gate 			return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
19200Sstevel@tonic-gate 		if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
19210Sstevel@tonic-gate 			return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
19220Sstevel@tonic-gate 	}
19230Sstevel@tonic-gate 
19240Sstevel@tonic-gate 	/*
19250Sstevel@tonic-gate 	 * The following algorithms are followed to check for deletion:
19260Sstevel@tonic-gate 	 * (a) If the delete list(db_nlp) has all invalid replicas and no valid
19270Sstevel@tonic-gate 	 * replicas, then deletion should be allowed.
19280Sstevel@tonic-gate 	 * (b) Deletion should be allowed only if valid replicas that are "not"
19290Sstevel@tonic-gate 	 * to be deleted is always greater than the invalid replicas that
19300Sstevel@tonic-gate 	 * are "not" to be deleted.
19310Sstevel@tonic-gate 	 * (c) If the user uses -f option, then deletion should be allowed.
19320Sstevel@tonic-gate 	 */
19330Sstevel@tonic-gate 
19340Sstevel@tonic-gate 	if ((invalid_replicas_todelete != replica_delete_count) &&
1935*5109Spetede 	    (invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
1936*5109Spetede 	    (force_option != MDFORCE_LOCAL))
19370Sstevel@tonic-gate 		return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
19380Sstevel@tonic-gate 
19390Sstevel@tonic-gate 	/*
19400Sstevel@tonic-gate 	 * go through and tell kernel to delete them
19410Sstevel@tonic-gate 	 */
19420Sstevel@tonic-gate 
19430Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
19440Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
19450Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
19460Sstevel@tonic-gate 
19470Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
19480Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
19490Sstevel@tonic-gate 
19500Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
19510Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
19520Sstevel@tonic-gate 		if (! mdisok(ep))
19530Sstevel@tonic-gate 			return (-1);
19540Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
19550Sstevel@tonic-gate 			return (-1);
19560Sstevel@tonic-gate 	}
19570Sstevel@tonic-gate 
19580Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
19590Sstevel@tonic-gate 		np = nlp->namep;
19600Sstevel@tonic-gate 
19610Sstevel@tonic-gate 		/*
19620Sstevel@tonic-gate 		 * If mddb is being deleted from MN diskset and node is
19630Sstevel@tonic-gate 		 * an owner of the diskset then use rpc.mdcommd
19640Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
19650Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
19660Sstevel@tonic-gate 		 * can't write the message to the mddb.
19670Sstevel@tonic-gate 		 *
19680Sstevel@tonic-gate 		 * When mddbs are first being added to set, a detach can
19690Sstevel@tonic-gate 		 * be called before any node has joined the diskset, so
19700Sstevel@tonic-gate 		 * must check to see if node is an owner of the diskset.
19710Sstevel@tonic-gate 		 *
19720Sstevel@tonic-gate 		 * Otherwise, just delete mddb from this node.
19730Sstevel@tonic-gate 		 */
19740Sstevel@tonic-gate 
19750Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
19760Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
19770Sstevel@tonic-gate 			md_mn_result_t			*resultp;
19780Sstevel@tonic-gate 			md_mn_msg_meta_db_detach_t	detach;
19790Sstevel@tonic-gate 			int				send_rval;
19800Sstevel@tonic-gate 
19810Sstevel@tonic-gate 			/*
19820Sstevel@tonic-gate 			 * The following algorithm is used to detach replicas.
19830Sstevel@tonic-gate 			 * 	- META_DB_DETACH message generates submsgs
19840Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
19850Sstevel@tonic-gate 			 * 		- MDDB_DETACH replicas
19860Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
19870Sstevel@tonic-gate 			 *		information to be sent from master
19880Sstevel@tonic-gate 			 *		to slaves at a higher class than the
19890Sstevel@tonic-gate 			 *		unblock so the parse message will
19900Sstevel@tonic-gate 			 *		reach slaves before unblock message.
19910Sstevel@tonic-gate 			 */
19920Sstevel@tonic-gate 			(void) splitname(np->bname, &detach.msg_splitname);
19930Sstevel@tonic-gate 
19940Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
19950Sstevel@tonic-gate 			detach.msg_devid[0] = NULL;
19960Sstevel@tonic-gate 
19970Sstevel@tonic-gate 			/*
19980Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
19990Sstevel@tonic-gate 			 * stuck in in the return step until this command has
20000Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
20010Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
20020Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
20030Sstevel@tonic-gate 			 * cycle to proceed.
20040Sstevel@tonic-gate 			 */
20050Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
20060Sstevel@tonic-gate 			if (stale_bool == TRUE)
20070Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
20080Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
2009*5109Spetede 			    MD_MN_MSG_META_DB_DETACH,
2010*5109Spetede 			    flags, (char *)&detach,
2011*5109Spetede 			    sizeof (md_mn_msg_meta_db_detach_t),
2012*5109Spetede 			    &resultp, ep);
20130Sstevel@tonic-gate 			if (send_rval != 0) {
20140Sstevel@tonic-gate 				rval = -1;
20150Sstevel@tonic-gate 				if (resultp == NULL)
20160Sstevel@tonic-gate 					(void) mddserror(ep,
20170Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
20180Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
20190Sstevel@tonic-gate 					    sp->setname);
20200Sstevel@tonic-gate 				else {
20210Sstevel@tonic-gate 					(void) mdstealerror(ep,
20220Sstevel@tonic-gate 					    &(resultp->mmr_ep));
20230Sstevel@tonic-gate 					if (mdisok(ep)) {
20240Sstevel@tonic-gate 						(void) mddserror(ep,
20250Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
20260Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
20270Sstevel@tonic-gate 						    sp->setname);
20280Sstevel@tonic-gate 					}
20290Sstevel@tonic-gate 					free_result(resultp);
20300Sstevel@tonic-gate 				}
20310Sstevel@tonic-gate 				goto out;
20320Sstevel@tonic-gate 			}
20330Sstevel@tonic-gate 			if (resultp)
20340Sstevel@tonic-gate 				free_result(resultp);
20350Sstevel@tonic-gate 		} else {
20360Sstevel@tonic-gate 			i = 0;
20370Sstevel@tonic-gate 			while (i < c.c_dbcnt) {
20380Sstevel@tonic-gate 				char	*devname;
20390Sstevel@tonic-gate 
20400Sstevel@tonic-gate 				c.c_id = i;
20410Sstevel@tonic-gate 
20420Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
20430Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
20440Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
20450Sstevel@tonic-gate 
20460Sstevel@tonic-gate 				if (metaioctl(MD_DB_GETDEV, &c,
20470Sstevel@tonic-gate 				    &c.c_mde, NULL)) {
20480Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
20490Sstevel@tonic-gate 					goto out;
20500Sstevel@tonic-gate 				}
20510Sstevel@tonic-gate 
20520Sstevel@tonic-gate 				devname = splicename(&c.c_devname);
2053*5109Spetede 
2054*5109Spetede 				if (strstr(devname, META_LONGDISKNAME_STR)
2055*5109Spetede 				    != NULL) {
2056*5109Spetede 					Free(devname);
2057*5109Spetede 					devname = getlongname(&c, ep);
2058*5109Spetede 					if (devname == NULL) {
2059*5109Spetede 						return (-1);
2060*5109Spetede 					}
2061*5109Spetede 				}
2062*5109Spetede 
20630Sstevel@tonic-gate 				if (strcmp(devname, np->bname) != 0) {
20640Sstevel@tonic-gate 					Free(devname);
20650Sstevel@tonic-gate 					i++;
20660Sstevel@tonic-gate 					continue;
20670Sstevel@tonic-gate 				}
20680Sstevel@tonic-gate 				Free(devname);
20690Sstevel@tonic-gate 
20700Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
20710Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
20720Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
20730Sstevel@tonic-gate 
20740Sstevel@tonic-gate 				if (metaioctl(MD_DB_DELDEV, &c,
20750Sstevel@tonic-gate 				    &c.c_mde, NULL) != 0) {
20760Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
20770Sstevel@tonic-gate 					goto out;
20780Sstevel@tonic-gate 				}
20790Sstevel@tonic-gate 
20800Sstevel@tonic-gate 				/* Not incrementing "i" intentionally */
20810Sstevel@tonic-gate 			}
20820Sstevel@tonic-gate 		}
20830Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
20840Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
20850Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next) {
20860Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
20870Sstevel@tonic-gate 					p->dd_dbcnt = 0;
20880Sstevel@tonic-gate 					p->dd_dbsize  = 0;
20890Sstevel@tonic-gate 					break;
20900Sstevel@tonic-gate 				}
20910Sstevel@tonic-gate 			}
20920Sstevel@tonic-gate 
20930Sstevel@tonic-gate 			/*
20940Sstevel@tonic-gate 			 * Slam a dummy master block and make it self
20950Sstevel@tonic-gate 			 * identifying
20960Sstevel@tonic-gate 			 */
20970Sstevel@tonic-gate 			if ((fd = open(np->rname, O_RDWR)) >= 0) {
20980Sstevel@tonic-gate 				meta_mkdummymaster(sp, fd, 16);
20990Sstevel@tonic-gate 				(void) close(fd);
21000Sstevel@tonic-gate 			}
21010Sstevel@tonic-gate 		}
21020Sstevel@tonic-gate 	}
21030Sstevel@tonic-gate out:
21040Sstevel@tonic-gate 	if (metaislocalset(sp)) {
21050Sstevel@tonic-gate 		/*
21060Sstevel@tonic-gate 		 * Stop all the daemons if there are
21070Sstevel@tonic-gate 		 * no more replicas so that the module can be
21080Sstevel@tonic-gate 		 * unloaded.
21090Sstevel@tonic-gate 		 */
21100Sstevel@tonic-gate 		if (rval == 0 && stop_svmdaemons == 1) {
21110Sstevel@tonic-gate 			char buf[MAXPATHLEN];
21120Sstevel@tonic-gate 			int i;
21130Sstevel@tonic-gate 
21140Sstevel@tonic-gate 			for (i = 0; i < DAEMON_COUNT; i++) {
21150Sstevel@tonic-gate 				(void) snprintf(buf, MAXPATHLEN,
2116*5109Spetede 				    "/usr/bin/pkill -%s -x %s",
2117*5109Spetede 				    svmd_kill_list[i].svmd_kill_val,
2118*5109Spetede 				    svmd_kill_list[i].svmd_name);
21190Sstevel@tonic-gate 				if (pclose(popen(buf, "w")) == -1)
21200Sstevel@tonic-gate 					md_perror(buf);
21210Sstevel@tonic-gate 			}
21220Sstevel@tonic-gate 
21230Sstevel@tonic-gate 			if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
21240Sstevel@tonic-gate 				mde_perror(&status, "");
21250Sstevel@tonic-gate 				mdclrerror(&status);
21260Sstevel@tonic-gate 			}
21270Sstevel@tonic-gate 		}
21280Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
21290Sstevel@tonic-gate 			/* Don't mask any previous errors */
21300Sstevel@tonic-gate 			if (rval == 0)
21310Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
21320Sstevel@tonic-gate 			else
21330Sstevel@tonic-gate 				mdclrerror(&status);
21340Sstevel@tonic-gate 			return (rval);
21350Sstevel@tonic-gate 		}
21360Sstevel@tonic-gate 
21370Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
21380Sstevel@tonic-gate 			/* Don't mask any previous errors */
21390Sstevel@tonic-gate 			if (rval == 0)
21400Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
21410Sstevel@tonic-gate 			else
21420Sstevel@tonic-gate 				mdclrerror(&status);
21430Sstevel@tonic-gate 		}
21440Sstevel@tonic-gate 	} else {
21450Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
21460Sstevel@tonic-gate 		    (force_option & MDFORCE_SET_LOCKED),
21470Sstevel@tonic-gate 		    ((force_option & MDFORCE_LOCAL) |
21480Sstevel@tonic-gate 		    (force_option & MDFORCE_DS)), &status)) {
21490Sstevel@tonic-gate 			/* Don't mask any previous errors */
21500Sstevel@tonic-gate 			if (rval == 0)
21510Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
21520Sstevel@tonic-gate 			else
21530Sstevel@tonic-gate 				mdclrerror(&status);
21540Sstevel@tonic-gate 		}
21550Sstevel@tonic-gate 		metafreedrivedesc(&dd);
21560Sstevel@tonic-gate 	}
21570Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
21580Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
21590Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
21600Sstevel@tonic-gate 		}
21610Sstevel@tonic-gate 	}
21620Sstevel@tonic-gate 	return (rval);
21630Sstevel@tonic-gate }
21640Sstevel@tonic-gate 
21650Sstevel@tonic-gate static md_replica_t *
21660Sstevel@tonic-gate metareplicaname(
21670Sstevel@tonic-gate 	mdsetname_t		*sp,
21680Sstevel@tonic-gate 	int			flags,
21690Sstevel@tonic-gate 	struct mddb_config	*c,
21700Sstevel@tonic-gate 	md_error_t		*ep
21710Sstevel@tonic-gate )
21720Sstevel@tonic-gate {
21730Sstevel@tonic-gate 	md_replica_t	*rp;
21740Sstevel@tonic-gate 	char		*devname;
21750Sstevel@tonic-gate 	size_t		sz;
2176*5109Spetede 	devid_nmlist_t	*disklist = NULL;
2177*5109Spetede 	char		*devid_str;
21780Sstevel@tonic-gate 
21790Sstevel@tonic-gate 	/* allocate replicaname */
21800Sstevel@tonic-gate 	rp = Zalloc(sizeof (*rp));
21810Sstevel@tonic-gate 
21820Sstevel@tonic-gate 	/* get device name */
21830Sstevel@tonic-gate 	devname = splicename(&c->c_devname);
2184*5109Spetede 
2185*5109Spetede 	/*
2186*5109Spetede 	 * Check if the device has a long name (>40 characters) and
2187*5109Spetede 	 * if so then we have to use devids to get the device name.
2188*5109Spetede 	 * If this cannot be done then we have to fail the request.
2189*5109Spetede 	 */
2190*5109Spetede 	if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
2191*5109Spetede 		if (c->c_locator.l_devid != NULL) {
2192*5109Spetede 			if (meta_deviceid_to_nmlist("/dev/dsk",
2193*5109Spetede 			    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
2194*5109Spetede 			    c->c_locator.l_minor_name, &disklist) != 0) {
2195*5109Spetede 				devid_str = devid_str_encode(
2196*5109Spetede 				    (ddi_devid_t)(uintptr_t)
2197*5109Spetede 				    c->c_locator.l_devid, NULL);
2198*5109Spetede 				(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
2199*5109Spetede 				mderrorextra(ep, devid_str);
2200*5109Spetede 				if (devid_str != NULL)
2201*5109Spetede 					devid_str_free(devid_str);
2202*5109Spetede 				Free(rp);
2203*5109Spetede 				Free(devname);
2204*5109Spetede 				return (NULL);
2205*5109Spetede 			}
2206*5109Spetede 		} else {
2207*5109Spetede 			(void) mderror(ep, MDE_NODEVID, "");
2208*5109Spetede 			Free(rp);
2209*5109Spetede 			Free(devname);
2210*5109Spetede 			return (NULL);
2211*5109Spetede 		}
2212*5109Spetede 		Free(devname);
2213*5109Spetede 		devname = disklist[0].devname;
2214*5109Spetede 	}
2215*5109Spetede 
22160Sstevel@tonic-gate 	if (flags & PRINT_FAST) {
22171623Stw21770 		if ((rp->r_namep = metaname_fast(&sp, devname,
22181623Stw21770 		    LOGICAL_DEVICE, ep)) == NULL) {
22190Sstevel@tonic-gate 			Free(devname);
22200Sstevel@tonic-gate 			Free(rp);
22210Sstevel@tonic-gate 			return (NULL);
22220Sstevel@tonic-gate 		}
22230Sstevel@tonic-gate 	} else {
22241623Stw21770 		if ((rp->r_namep = metaname(&sp, devname,
22251623Stw21770 		    LOGICAL_DEVICE, ep)) == NULL) {
22260Sstevel@tonic-gate 			Free(devname);
22270Sstevel@tonic-gate 			Free(rp);
22280Sstevel@tonic-gate 			return (NULL);
22290Sstevel@tonic-gate 		}
22300Sstevel@tonic-gate 	}
22310Sstevel@tonic-gate 	Free(devname);
22320Sstevel@tonic-gate 
22330Sstevel@tonic-gate 	/* make sure it's OK */
22340Sstevel@tonic-gate 	if ((! (flags & MD_BASICNAME_OK)) &&
22350Sstevel@tonic-gate 	    (metachkcomp(rp->r_namep, ep) != 0)) {
22360Sstevel@tonic-gate 		Free(rp);
22370Sstevel@tonic-gate 		return (NULL);
22380Sstevel@tonic-gate 	}
22390Sstevel@tonic-gate 
224062Sjeanm 	rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR;
224162Sjeanm 	rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR;
22420Sstevel@tonic-gate 	rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
22430Sstevel@tonic-gate 	if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
224462Sjeanm 		sz = devid_sizeof((ddi_devid_t)(uintptr_t)
224562Sjeanm 		    (c->c_locator.l_devid));
22460Sstevel@tonic-gate 		if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
22470Sstevel@tonic-gate 		    (ddi_devid_t)NULL) {
22480Sstevel@tonic-gate 			Free(rp);
22490Sstevel@tonic-gate 			return (NULL);
22500Sstevel@tonic-gate 		}
22510Sstevel@tonic-gate 		(void) memcpy((void *)rp->r_devid,
225262Sjeanm 		    (void *)(uintptr_t)c->c_locator.l_devid, sz);
22530Sstevel@tonic-gate 		(void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
22540Sstevel@tonic-gate 		rp->r_flags &= ~MDDB_F_NODEVID;
22550Sstevel@tonic-gate 		/* Overwrite dev derived from name with dev from devid */
22560Sstevel@tonic-gate 		rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
22570Sstevel@tonic-gate 	}
22580Sstevel@tonic-gate 	(void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
22590Sstevel@tonic-gate 
22600Sstevel@tonic-gate 	rp->r_blkno = c->c_locator.l_blkno;
22610Sstevel@tonic-gate 	if (c->c_dbend != 0)
22620Sstevel@tonic-gate 		rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
22630Sstevel@tonic-gate 
22640Sstevel@tonic-gate 	/* return replica */
22650Sstevel@tonic-gate 	return (rp);
22660Sstevel@tonic-gate }
22670Sstevel@tonic-gate 
22680Sstevel@tonic-gate /*
22690Sstevel@tonic-gate  * free replica list
22700Sstevel@tonic-gate  */
22710Sstevel@tonic-gate void
22720Sstevel@tonic-gate metafreereplicalist(
22730Sstevel@tonic-gate 	md_replicalist_t	*rlp
22740Sstevel@tonic-gate )
22750Sstevel@tonic-gate {
22760Sstevel@tonic-gate 	md_replicalist_t	*rl = NULL;
22770Sstevel@tonic-gate 
22780Sstevel@tonic-gate 	for (/* void */; (rlp != NULL); rlp = rl) {
22790Sstevel@tonic-gate 		rl = rlp->rl_next;
22800Sstevel@tonic-gate 		if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
22810Sstevel@tonic-gate 			free(rlp->rl_repp->r_devid);
22820Sstevel@tonic-gate 		}
22830Sstevel@tonic-gate 		Free(rlp->rl_repp);
22840Sstevel@tonic-gate 		Free(rlp);
22850Sstevel@tonic-gate 	}
22860Sstevel@tonic-gate }
22870Sstevel@tonic-gate 
22880Sstevel@tonic-gate /*
22890Sstevel@tonic-gate  * return list of all replicas in set
22900Sstevel@tonic-gate  */
22910Sstevel@tonic-gate int
22920Sstevel@tonic-gate metareplicalist(
22930Sstevel@tonic-gate 	mdsetname_t		*sp,
22940Sstevel@tonic-gate 	int			flags,
22950Sstevel@tonic-gate 	md_replicalist_t	**rlpp,
22960Sstevel@tonic-gate 	md_error_t		*ep
22970Sstevel@tonic-gate )
22980Sstevel@tonic-gate {
22990Sstevel@tonic-gate 	md_replicalist_t	**tail = rlpp;
23000Sstevel@tonic-gate 	int			count = 0;
23010Sstevel@tonic-gate 	struct mddb_config	c;
23020Sstevel@tonic-gate 	int			i;
23030Sstevel@tonic-gate 	char			*devid;
23040Sstevel@tonic-gate 
23050Sstevel@tonic-gate 	/* for each replica */
23060Sstevel@tonic-gate 	i = 0;
23070Sstevel@tonic-gate 	do {
23080Sstevel@tonic-gate 		md_replica_t	*rp;
23090Sstevel@tonic-gate 
23100Sstevel@tonic-gate 		/* get next replica */
23110Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
23120Sstevel@tonic-gate 		c.c_id = i;
23130Sstevel@tonic-gate 		c.c_setno = sp->setno;
23140Sstevel@tonic-gate 
23150Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
23160Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
23170Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
23180Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
23190Sstevel@tonic-gate 				break;	/* handle none at all */
23200Sstevel@tonic-gate 			}
23210Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
23220Sstevel@tonic-gate 			goto out;
23230Sstevel@tonic-gate 		}
23240Sstevel@tonic-gate 
23250Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
23260Sstevel@tonic-gate 			if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
23270Sstevel@tonic-gate 				(void) mdsyserror(ep, ENOMEM, META_DBCONF);
23280Sstevel@tonic-gate 				goto out;
23290Sstevel@tonic-gate 			}
23300Sstevel@tonic-gate 			c.c_locator.l_devid = (uintptr_t)devid;
23310Sstevel@tonic-gate 			/*
23320Sstevel@tonic-gate 			 * Turn on space and sz flags since 'sz' amount of
23330Sstevel@tonic-gate 			 * space has been alloc'd.
23340Sstevel@tonic-gate 			 */
23350Sstevel@tonic-gate 			c.c_locator.l_devid_flags =
2336*5109Spetede 			    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
23370Sstevel@tonic-gate 		}
23380Sstevel@tonic-gate 
23390Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
23400Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
23410Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
23420Sstevel@tonic-gate 				break;	/* handle none at all */
23430Sstevel@tonic-gate 			}
23440Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
23450Sstevel@tonic-gate 			goto out;
23460Sstevel@tonic-gate 		}
23470Sstevel@tonic-gate 
23480Sstevel@tonic-gate 		/*
23490Sstevel@tonic-gate 		 * Paranoid check - shouldn't happen, but is left as
23500Sstevel@tonic-gate 		 * a place holder for changes that will be needed after
23510Sstevel@tonic-gate 		 * dynamic reconfiguration changes are added to SVM (to
23520Sstevel@tonic-gate 		 * support movement of disks at any point in time).
23530Sstevel@tonic-gate 		 */
23540Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
23550Sstevel@tonic-gate 			(void) fprintf(stderr,
23560Sstevel@tonic-gate 			    dgettext(TEXT_DOMAIN,
2357*5109Spetede 			    "Error: Relocation Information "
2358*5109Spetede 			    "(drvnm=%s, mnum=0x%lx) \n"
2359*5109Spetede 			    "relocation information size changed - \n"
2360*5109Spetede 			    "rerun command\n"),
23610Sstevel@tonic-gate 			    c.c_locator.l_driver, c.c_locator.l_mnum);
23620Sstevel@tonic-gate 			(void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
23630Sstevel@tonic-gate 			goto out;
23640Sstevel@tonic-gate 		}
23650Sstevel@tonic-gate 
23660Sstevel@tonic-gate 		if (c.c_dbcnt == 0)
23670Sstevel@tonic-gate 			break;		/* handle none at all */
23680Sstevel@tonic-gate 
23690Sstevel@tonic-gate 		/* get info */
23700Sstevel@tonic-gate 		if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
23710Sstevel@tonic-gate 			goto out;
23720Sstevel@tonic-gate 
23730Sstevel@tonic-gate 		/* append to list */
23740Sstevel@tonic-gate 		*tail = Zalloc(sizeof (**tail));
23750Sstevel@tonic-gate 		(*tail)->rl_repp = rp;
23760Sstevel@tonic-gate 		tail = &(*tail)->rl_next;
23770Sstevel@tonic-gate 		++count;
23780Sstevel@tonic-gate 
23790Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
23800Sstevel@tonic-gate 			free(devid);
23810Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
23820Sstevel@tonic-gate 		}
23830Sstevel@tonic-gate 
23840Sstevel@tonic-gate 	} while (++i < c.c_dbcnt);
23850Sstevel@tonic-gate 
23860Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
23870Sstevel@tonic-gate 		free(devid);
23880Sstevel@tonic-gate 	}
23890Sstevel@tonic-gate 
23900Sstevel@tonic-gate 	/* return count */
23910Sstevel@tonic-gate 	return (count);
23920Sstevel@tonic-gate 
23930Sstevel@tonic-gate 	/* cleanup, return error */
23940Sstevel@tonic-gate out:
23950Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
23960Sstevel@tonic-gate 		free(devid);
23970Sstevel@tonic-gate 	}
23980Sstevel@tonic-gate 	metafreereplicalist(*rlpp);
23990Sstevel@tonic-gate 	*rlpp = NULL;
24000Sstevel@tonic-gate 	return (-1);
24010Sstevel@tonic-gate }
24020Sstevel@tonic-gate 
24030Sstevel@tonic-gate /*
24040Sstevel@tonic-gate  * meta_sync_db_locations - get list of replicas from kernel and write
24050Sstevel@tonic-gate  * 	out to mddb.cf and md.conf.  'Syncs up' the replica list in
24060Sstevel@tonic-gate  * 	the kernel with the replica list in the conf files.
24070Sstevel@tonic-gate  *
24080Sstevel@tonic-gate  */
24090Sstevel@tonic-gate void
24100Sstevel@tonic-gate meta_sync_db_locations(
24110Sstevel@tonic-gate 	mdsetname_t	*sp,
24120Sstevel@tonic-gate 	md_error_t	*ep
24130Sstevel@tonic-gate )
24140Sstevel@tonic-gate {
24150Sstevel@tonic-gate 	char		*sname = 0;		/* system file name */
24160Sstevel@tonic-gate 	char 		*cname = 0;		/* config file name */
24170Sstevel@tonic-gate 
24180Sstevel@tonic-gate 	if (!metaislocalset(sp))
24190Sstevel@tonic-gate 		return;
24200Sstevel@tonic-gate 
24210Sstevel@tonic-gate 	/* Updates backup of configuration file (aka mddb.cf) */
24220Sstevel@tonic-gate 	if (buildconf(sp, ep) != 0)
24230Sstevel@tonic-gate 		return;
24240Sstevel@tonic-gate 
24250Sstevel@tonic-gate 	/* Updates system configuration file (aka md.conf) */
24260Sstevel@tonic-gate 	(void) meta_db_patch(sname, cname, 0, ep);
24270Sstevel@tonic-gate }
24280Sstevel@tonic-gate 
24290Sstevel@tonic-gate /*
24300Sstevel@tonic-gate  * setup_db_locations - parse the mddb.cf file and
24310Sstevel@tonic-gate  *			tells the driver which db locations to use.
24320Sstevel@tonic-gate  */
24330Sstevel@tonic-gate int
24340Sstevel@tonic-gate meta_setup_db_locations(
24350Sstevel@tonic-gate 	md_error_t	*ep
24360Sstevel@tonic-gate )
24370Sstevel@tonic-gate {
24380Sstevel@tonic-gate 	mddb_config_t	c;
24390Sstevel@tonic-gate 	FILE		*fp;
24400Sstevel@tonic-gate 	char		inbuff[1024];
24410Sstevel@tonic-gate 	char		*buff;
24420Sstevel@tonic-gate 	uint_t		i;
24430Sstevel@tonic-gate 	size_t		sz;
24440Sstevel@tonic-gate 	int		rval = 0;
24450Sstevel@tonic-gate 	char		*devidp;
24460Sstevel@tonic-gate 	uint_t		devid_size;
24470Sstevel@tonic-gate 	char		*minor_name = NULL;
24480Sstevel@tonic-gate 	ddi_devid_t	devid_decode;
24490Sstevel@tonic-gate 	int		checksum;
24500Sstevel@tonic-gate 
24510Sstevel@tonic-gate 	/* do mddb.cf file */
24520Sstevel@tonic-gate 	(void) memset(&c, '\0', sizeof (c));
24530Sstevel@tonic-gate 	if ((fp = fopen(META_DBCONF, "r")) == NULL) {
24540Sstevel@tonic-gate 		if (errno != ENOENT)
24550Sstevel@tonic-gate 			return (mdsyserror(ep, errno, META_DBCONF));
24560Sstevel@tonic-gate 	}
24570Sstevel@tonic-gate 	while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
24580Sstevel@tonic-gate 	    fp)) != NULL)) {
24590Sstevel@tonic-gate 
24600Sstevel@tonic-gate 		/* ignore comments */
24610Sstevel@tonic-gate 		if (*buff == '#')
24620Sstevel@tonic-gate 			continue;
24630Sstevel@tonic-gate 
24640Sstevel@tonic-gate 		/* parse locator */
24650Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
24660Sstevel@tonic-gate 		c.c_setno = MD_LOCAL_SET;
24670Sstevel@tonic-gate 		i = strcspn(buff, " \t");
24680Sstevel@tonic-gate 		if (i > sizeof (c.c_locator.l_driver))
24690Sstevel@tonic-gate 			i = sizeof (c.c_locator.l_driver);
24700Sstevel@tonic-gate 		(void) strncpy(c.c_locator.l_driver, buff, i);
24710Sstevel@tonic-gate 		buff += i;
24720Sstevel@tonic-gate 		c.c_locator.l_dev =
24730Sstevel@tonic-gate 		    makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
24740Sstevel@tonic-gate 		c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
24750Sstevel@tonic-gate 		c.c_locator.l_mnum = minor(c.c_locator.l_dev);
24760Sstevel@tonic-gate 
24770Sstevel@tonic-gate 		/* parse out devid */
24780Sstevel@tonic-gate 		while (isspace((int)(*buff)))
24790Sstevel@tonic-gate 			buff += 1;
24800Sstevel@tonic-gate 		i = strcspn(buff, " \t");
24810Sstevel@tonic-gate 		if ((devidp = (char *)malloc(i+1)) == NULL)
24820Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
24830Sstevel@tonic-gate 
24840Sstevel@tonic-gate 		(void) strncpy(devidp, buff, i);
24850Sstevel@tonic-gate 		devidp[i] = '\0';
24860Sstevel@tonic-gate 		if (devid_str_decode(devidp, &devid_decode,
24870Sstevel@tonic-gate 		    &minor_name) == -1) {
24880Sstevel@tonic-gate 			free(devidp);
24890Sstevel@tonic-gate 			continue;
24900Sstevel@tonic-gate 		}
24910Sstevel@tonic-gate 
24920Sstevel@tonic-gate 		/* Conf file must have minor name associated with devid */
24930Sstevel@tonic-gate 		if (minor_name == NULL) {
24940Sstevel@tonic-gate 			free(devidp);
24950Sstevel@tonic-gate 			devid_free(devid_decode);
24960Sstevel@tonic-gate 			continue;
24970Sstevel@tonic-gate 		}
24980Sstevel@tonic-gate 
24990Sstevel@tonic-gate 		sz = devid_sizeof(devid_decode);
25000Sstevel@tonic-gate 		/* Copy to devid size buffer that ioctl expects */
25010Sstevel@tonic-gate 		if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
25020Sstevel@tonic-gate 			devid_free(devid_decode);
25030Sstevel@tonic-gate 			free(minor_name);
25040Sstevel@tonic-gate 			free(devidp);
25050Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
25060Sstevel@tonic-gate 		}
25070Sstevel@tonic-gate 
250862Sjeanm 		(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
25090Sstevel@tonic-gate 		    (void *)devid_decode, sz);
25100Sstevel@tonic-gate 
25110Sstevel@tonic-gate 		devid_free(devid_decode);
25120Sstevel@tonic-gate 
25130Sstevel@tonic-gate 		if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
25140Sstevel@tonic-gate 			free(minor_name);
25150Sstevel@tonic-gate 			free(devidp);
251662Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
25170Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
25180Sstevel@tonic-gate 		}
25190Sstevel@tonic-gate 		(void) strcpy(c.c_locator.l_minor_name, minor_name);
25200Sstevel@tonic-gate 		free(minor_name);
25210Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
2522*5109Spetede 		    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
25230Sstevel@tonic-gate 		c.c_locator.l_devid_sz = sz;
25240Sstevel@tonic-gate 
25250Sstevel@tonic-gate 		devid_size = strlen(devidp);
25260Sstevel@tonic-gate 		buff += devid_size;
25270Sstevel@tonic-gate 
25280Sstevel@tonic-gate 		checksum = strtol(buff, &buff, 10);
25290Sstevel@tonic-gate 		for (i = 0; c.c_locator.l_driver[i] != 0; i++)
25300Sstevel@tonic-gate 			checksum += c.c_locator.l_driver[i];
25310Sstevel@tonic-gate 		for (i = 0; i < devid_size; i++) {
25320Sstevel@tonic-gate 			checksum += devidp[i];
25330Sstevel@tonic-gate 		}
25340Sstevel@tonic-gate 		free(devidp);
25350Sstevel@tonic-gate 
25360Sstevel@tonic-gate 		checksum += minor(c.c_locator.l_dev);
25370Sstevel@tonic-gate 		checksum += c.c_locator.l_blkno;
25380Sstevel@tonic-gate 		if (checksum != 42) {
25390Sstevel@tonic-gate 			/* overwritten later for more serious problems */
25400Sstevel@tonic-gate 			rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
254162Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
25420Sstevel@tonic-gate 			continue;
25430Sstevel@tonic-gate 		}
25440Sstevel@tonic-gate 		c.c_locator.l_flags = 0;
25450Sstevel@tonic-gate 
25460Sstevel@tonic-gate 		/* use db location */
25470Sstevel@tonic-gate 		if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
254862Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
25490Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
25500Sstevel@tonic-gate 		}
25510Sstevel@tonic-gate 
25520Sstevel@tonic-gate 		/* free up devid if in use */
255362Sjeanm 		free((void *)(uintptr_t)c.c_locator.l_devid);
25540Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
25550Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
25560Sstevel@tonic-gate 	}
25570Sstevel@tonic-gate 	if ((fp) && (fclose(fp) != 0))
25580Sstevel@tonic-gate 		return (mdsyserror(ep, errno, META_DBCONF));
25590Sstevel@tonic-gate 
25600Sstevel@tonic-gate 	/* check for stale database */
25610Sstevel@tonic-gate 	(void) memset((char *)&c, 0, sizeof (struct mddb_config));
25620Sstevel@tonic-gate 	c.c_id = 0;
25630Sstevel@tonic-gate 	c.c_setno = MD_LOCAL_SET;
25640Sstevel@tonic-gate 
2565*5109Spetede 	/*
2566*5109Spetede 	 * While we do not need the devid here we may need to
2567*5109Spetede 	 * know if devid's are being used by the kernel for
2568*5109Spetede 	 * the replicas. This is because under some circumstances
2569*5109Spetede 	 * we can only manipulate the SVM configuration if the
2570*5109Spetede 	 * kernel is using devid's.
2571*5109Spetede 	 */
25720Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
2573*5109Spetede 	c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
2574*5109Spetede 	c.c_locator.l_devid_sz = 0;
25750Sstevel@tonic-gate 
25760Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
25770Sstevel@tonic-gate 		if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
25780Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
25790Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
25800Sstevel@tonic-gate 	}
25810Sstevel@tonic-gate 
25820Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE)
25830Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
25840Sstevel@tonic-gate 		    0, NULL));
25850Sstevel@tonic-gate 
2586*5109Spetede 	if (c.c_locator.l_devid_sz != 0) {
2587*5109Spetede 		/*
2588*5109Spetede 		 * Devid's are being used to track the replicas because
2589*5109Spetede 		 * there is space for a devid.
2590*5109Spetede 		 */
2591*5109Spetede 		devid_in_use = TRUE;
2592*5109Spetede 	}
2593*5109Spetede 
25940Sstevel@tonic-gate 	/* success */
25950Sstevel@tonic-gate 	return (rval);
25960Sstevel@tonic-gate }
25970Sstevel@tonic-gate 
25980Sstevel@tonic-gate /*
25990Sstevel@tonic-gate  * meta_db_minreplica - returns the minimum size replica currently in use.
26000Sstevel@tonic-gate  */
26010Sstevel@tonic-gate daddr_t
26020Sstevel@tonic-gate meta_db_minreplica(
26030Sstevel@tonic-gate 	mdsetname_t	*sp,
26040Sstevel@tonic-gate 	md_error_t	*ep
26050Sstevel@tonic-gate )
26060Sstevel@tonic-gate {
26070Sstevel@tonic-gate 	md_replica_t		*r;
26080Sstevel@tonic-gate 	md_replicalist_t	*rl, *rlp = NULL;
26090Sstevel@tonic-gate 	daddr_t			nblks = 0;
26100Sstevel@tonic-gate 
26110Sstevel@tonic-gate 	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
26120Sstevel@tonic-gate 		return (-1);
26130Sstevel@tonic-gate 
26140Sstevel@tonic-gate 	if (rlp == NULL)
26150Sstevel@tonic-gate 		return (-1);
26160Sstevel@tonic-gate 
26170Sstevel@tonic-gate 	/* find the smallest existing replica */
26180Sstevel@tonic-gate 	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
26190Sstevel@tonic-gate 		r = rl->rl_repp;
26200Sstevel@tonic-gate 		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
26210Sstevel@tonic-gate 	}
26220Sstevel@tonic-gate 
26230Sstevel@tonic-gate 	metafreereplicalist(rlp);
26240Sstevel@tonic-gate 	return (nblks);
26250Sstevel@tonic-gate }
26260Sstevel@tonic-gate 
26270Sstevel@tonic-gate /*
26280Sstevel@tonic-gate  * meta_get_replica_names
26290Sstevel@tonic-gate  *  returns an mdnamelist_t of replica slices
26300Sstevel@tonic-gate  */
26310Sstevel@tonic-gate /*ARGSUSED*/
26320Sstevel@tonic-gate int
26330Sstevel@tonic-gate meta_get_replica_names(
26340Sstevel@tonic-gate 	mdsetname_t	*sp,
26350Sstevel@tonic-gate 	mdnamelist_t	**nlpp,
26360Sstevel@tonic-gate 	int		options,
26370Sstevel@tonic-gate 	md_error_t	*ep
26380Sstevel@tonic-gate )
26390Sstevel@tonic-gate {
26400Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
26410Sstevel@tonic-gate 	md_replicalist_t	*rl;
26420Sstevel@tonic-gate 	mdnamelist_t		**tailpp = nlpp;
26430Sstevel@tonic-gate 	int			cnt = 0;
26440Sstevel@tonic-gate 
26450Sstevel@tonic-gate 	assert(nlpp != NULL);
26460Sstevel@tonic-gate 
26470Sstevel@tonic-gate 	if (!metaislocalset(sp))
26480Sstevel@tonic-gate 		goto out;
26490Sstevel@tonic-gate 
26500Sstevel@tonic-gate 	/* get replicas */
26510Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
26520Sstevel@tonic-gate 		cnt = -1;
26530Sstevel@tonic-gate 		goto out;
26540Sstevel@tonic-gate 	}
26550Sstevel@tonic-gate 
26560Sstevel@tonic-gate 	/* build name list */
26570Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
26580Sstevel@tonic-gate 		/*
26590Sstevel@tonic-gate 		 * Add the name struct to the end of the
26600Sstevel@tonic-gate 		 * namelist but keep a pointer to the last
26610Sstevel@tonic-gate 		 * element so that we don't incur the overhead
26620Sstevel@tonic-gate 		 * of traversing the list each time
26630Sstevel@tonic-gate 		 */
26640Sstevel@tonic-gate 		tailpp = meta_namelist_append_wrapper(
2665*5109Spetede 		    tailpp, rl->rl_repp->r_namep);
26660Sstevel@tonic-gate 		++cnt;
26670Sstevel@tonic-gate 	}
26680Sstevel@tonic-gate 
26690Sstevel@tonic-gate 	/* cleanup, return count or error */
26700Sstevel@tonic-gate out:
26710Sstevel@tonic-gate 	metafreereplicalist(rlp);
26720Sstevel@tonic-gate 	return (cnt);
26730Sstevel@tonic-gate }
2674