10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*1623Stw21770  * Common Development and Distribution License (the "License").
6*1623Stw21770  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*1623Stw21770  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * Just in case we're not in a build environment, make sure that
300Sstevel@tonic-gate  * TEXT_DOMAIN gets set to something.
310Sstevel@tonic-gate  */
320Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
330Sstevel@tonic-gate #define	TEXT_DOMAIN "SYS_TEST"
340Sstevel@tonic-gate #endif
350Sstevel@tonic-gate 
360Sstevel@tonic-gate /*
370Sstevel@tonic-gate  * Metadevice database interfaces.
380Sstevel@tonic-gate  */
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #define	MDDB
410Sstevel@tonic-gate 
420Sstevel@tonic-gate #include <meta.h>
430Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
440Sstevel@tonic-gate #include <sys/lvm/md_crc.h>
450Sstevel@tonic-gate #include <sys/lvm/mdio.h>
460Sstevel@tonic-gate #include <string.h>
470Sstevel@tonic-gate #include <strings.h>
480Sstevel@tonic-gate #include <ctype.h>
490Sstevel@tonic-gate 
500Sstevel@tonic-gate struct svm_daemon {
510Sstevel@tonic-gate 	char *svmd_name;
520Sstevel@tonic-gate 	char *svmd_kill_val;
530Sstevel@tonic-gate };
540Sstevel@tonic-gate 
550Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = {
560Sstevel@tonic-gate 		{"mdmonitord", "HUP"},
570Sstevel@tonic-gate 		{"mddoors", "KILL"},
580Sstevel@tonic-gate 	};
590Sstevel@tonic-gate 
600Sstevel@tonic-gate #define	DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
610Sstevel@tonic-gate #define	MDMONITORD	"/usr/sbin/mdmonitord"
620Sstevel@tonic-gate 
630Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
640Sstevel@tonic-gate 
650Sstevel@tonic-gate /*
660Sstevel@tonic-gate  * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
670Sstevel@tonic-gate  */
680Sstevel@tonic-gate md_timeval32_t
690Sstevel@tonic-gate meta_get_lb_inittime(
700Sstevel@tonic-gate 	mdsetname_t	*sp,
710Sstevel@tonic-gate 	md_error_t	*ep
720Sstevel@tonic-gate )
730Sstevel@tonic-gate {
740Sstevel@tonic-gate 	mddb_config_t	c;
750Sstevel@tonic-gate 
760Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
770Sstevel@tonic-gate 
780Sstevel@tonic-gate 	/* Fill in setno, setname, and sideno */
790Sstevel@tonic-gate 	c.c_setno = sp->setno;
800Sstevel@tonic-gate 
810Sstevel@tonic-gate 	if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
820Sstevel@tonic-gate 		(void) mdstealerror(ep, &c.c_mde);
830Sstevel@tonic-gate 	}
840Sstevel@tonic-gate 
850Sstevel@tonic-gate 	return (c.c_timestamp);
860Sstevel@tonic-gate }
870Sstevel@tonic-gate 
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate  * mkmasterblks writes out the master blocks of the mddb to the replica.
900Sstevel@tonic-gate  *
910Sstevel@tonic-gate  * In a MN diskset, this is called by the node that is adding this replica
920Sstevel@tonic-gate  * to the diskset.
930Sstevel@tonic-gate  */
940Sstevel@tonic-gate 
950Sstevel@tonic-gate #define	MDDB_VERIFY_SIZE	8192
960Sstevel@tonic-gate 
970Sstevel@tonic-gate static int
980Sstevel@tonic-gate mkmasterblks(
990Sstevel@tonic-gate 	mdsetname_t	*sp,
1000Sstevel@tonic-gate 	mdname_t	*np,
1010Sstevel@tonic-gate 	int		fd,
1020Sstevel@tonic-gate 	daddr_t		firstblk,
1030Sstevel@tonic-gate 	int		dbsize,
1040Sstevel@tonic-gate 	md_timeval32_t	inittime,
1050Sstevel@tonic-gate 	md_error_t	*ep
1060Sstevel@tonic-gate )
1070Sstevel@tonic-gate {
1080Sstevel@tonic-gate 	int		consecutive;
1090Sstevel@tonic-gate 	md_timeval32_t	tp;
1100Sstevel@tonic-gate 	struct mddb_mb	*mb;
1110Sstevel@tonic-gate 	char		*buffer;
1120Sstevel@tonic-gate 	int		iosize;
1130Sstevel@tonic-gate 	md_set_desc	*sd;
1140Sstevel@tonic-gate 	int		mn_set = 0;
1150Sstevel@tonic-gate 	daddr_t		startblk;
1160Sstevel@tonic-gate 	int		cnt;
1170Sstevel@tonic-gate 	ddi_devid_t	devid;
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
1200Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1210Sstevel@tonic-gate 			return (-1);
1220Sstevel@tonic-gate 
1230Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
1240Sstevel@tonic-gate 			mn_set = 1;		/* Used later */
1250Sstevel@tonic-gate 		}
1260Sstevel@tonic-gate 	}
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate 	/*
1290Sstevel@tonic-gate 	 * Loop to verify the entire mddb region on disk is read/writable.
1300Sstevel@tonic-gate 	 * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
1310Sstevel@tonic-gate 	 * chunks.
1320Sstevel@tonic-gate 	 *
1330Sstevel@tonic-gate 	 * A side-effect of this loop is to zero out the entire mddb region
1340Sstevel@tonic-gate 	 */
1350Sstevel@tonic-gate 	if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
1360Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate 	startblk = firstblk;
1390Sstevel@tonic-gate 	for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate 		if (cnt > MDDB_VERIFY_SIZE)
1420Sstevel@tonic-gate 			consecutive = MDDB_VERIFY_SIZE;
1430Sstevel@tonic-gate 		else
1440Sstevel@tonic-gate 			consecutive = cnt;
1450Sstevel@tonic-gate 
1460Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
1470Sstevel@tonic-gate 			Free(buffer);
1480Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
1490Sstevel@tonic-gate 		}
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate 		iosize = DEV_BSIZE * consecutive;
1520Sstevel@tonic-gate 		if (write(fd, buffer, iosize) != iosize) {
1530Sstevel@tonic-gate 			Free(buffer);
1540Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
1550Sstevel@tonic-gate 		}
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
1580Sstevel@tonic-gate 			Free(buffer);
1590Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
1600Sstevel@tonic-gate 		}
1610Sstevel@tonic-gate 
1620Sstevel@tonic-gate 		if (read(fd, buffer, iosize) != iosize) {
1630Sstevel@tonic-gate 			Free(buffer);
1640Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
1650Sstevel@tonic-gate 		}
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate 		startblk += consecutive;
1680Sstevel@tonic-gate 	}
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 	Free(buffer);
1710Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
1720Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) == -1) {
1750Sstevel@tonic-gate 		Free(mb);
1760Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
1770Sstevel@tonic-gate 	}
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_MB;
1800Sstevel@tonic-gate 	/*
1810Sstevel@tonic-gate 	 * If a MN diskset, set master block revision for a MN set.
1820Sstevel@tonic-gate 	 * Even though the master block structure is no different
1830Sstevel@tonic-gate 	 * for a MN set, setting the revision field to a different
1840Sstevel@tonic-gate 	 * number keeps any pre-MN_diskset code from accessing
1850Sstevel@tonic-gate 	 * this diskset.  It also allows for an early determination
1860Sstevel@tonic-gate 	 * of a MN diskset when reading in from disk so that the
1870Sstevel@tonic-gate 	 * proper size locator block and locator names structure
1880Sstevel@tonic-gate 	 * can be read in thus saving time on diskset startup.
1890Sstevel@tonic-gate 	 */
1900Sstevel@tonic-gate 	if (mn_set)
1910Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MNMB;
1920Sstevel@tonic-gate 	else
1930Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MB;
1940Sstevel@tonic-gate 	mb->mb_timestamp = tp;
1950Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
1960Sstevel@tonic-gate 	mb->mb_blkcnt = dbsize - 1;
1970Sstevel@tonic-gate 	mb->mb_blkno = firstblk;
1980Sstevel@tonic-gate 	mb->mb_nextblk = 0;
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate 	mb->mb_blkmap.m_firstblk = firstblk + 1;
2010Sstevel@tonic-gate 	mb->mb_blkmap.m_consecutive = dbsize - 1;
2020Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
2030Sstevel@tonic-gate 		mb->mb_setcreatetime = inittime;
2040Sstevel@tonic-gate 	}
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate 	/*
2070Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
2080Sstevel@tonic-gate 	 * the master block. The saved devid is used to provide a mapping
2090Sstevel@tonic-gate 	 * between this disk's devid and the devid stored into the master
2100Sstevel@tonic-gate 	 * block. This allows the disk image to be self-identifying
2110Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
2120Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
2130Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
2140Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
2150Sstevel@tonic-gate 	 * in the remote copy scenario.
2160Sstevel@tonic-gate 	 */
2170Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
2180Sstevel@tonic-gate 		size_t len;
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate 		len = devid_sizeof(devid);
2210Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
2220Sstevel@tonic-gate 			/* there is enough space to store the devid */
2230Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
2240Sstevel@tonic-gate 			mb->mb_devid_len = len;
2250Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, devid, len);
2260Sstevel@tonic-gate 		}
2270Sstevel@tonic-gate 		devid_free(devid);
2280Sstevel@tonic-gate 	}
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
2310Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
2340Sstevel@tonic-gate 		Free(mb);
2350Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2360Sstevel@tonic-gate 	}
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
2390Sstevel@tonic-gate 		Free(mb);
2400Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2410Sstevel@tonic-gate 	}
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
2440Sstevel@tonic-gate 		Free(mb);
2450Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2460Sstevel@tonic-gate 	}
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
2490Sstevel@tonic-gate 		Free(mb);
2500Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2510Sstevel@tonic-gate 	}
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
2540Sstevel@tonic-gate 		(uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
2550Sstevel@tonic-gate 		Free(mb);
2560Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_NOTVERIFIED,
2570Sstevel@tonic-gate 			meta_getminor(np->dev), sp->setno, 0, np->rname));
2580Sstevel@tonic-gate 	}
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate 	Free(mb);
2610Sstevel@tonic-gate 	return (0);
2620Sstevel@tonic-gate }
2630Sstevel@tonic-gate 
2640Sstevel@tonic-gate void
2650Sstevel@tonic-gate meta_mkdummymaster(
2660Sstevel@tonic-gate 	mdsetname_t	*sp,
2670Sstevel@tonic-gate 	int		fd,
2680Sstevel@tonic-gate 	daddr_t		firstblk
2690Sstevel@tonic-gate )
2700Sstevel@tonic-gate {
2710Sstevel@tonic-gate 	md_timeval32_t	tp;
2720Sstevel@tonic-gate 	struct mddb_mb	*mb;
2730Sstevel@tonic-gate 	ddi_devid_t	devid;
2740Sstevel@tonic-gate 	md_set_desc	*sd;
2750Sstevel@tonic-gate 	md_error_t	ep = mdnullerror;
2760Sstevel@tonic-gate 	md_timeval32_t	inittime;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 	/*
2790Sstevel@tonic-gate 	 * No dummy master blocks are written for a MN diskset since devids
2800Sstevel@tonic-gate 	 * are not supported in MN disksets.
2810Sstevel@tonic-gate 	 */
2820Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
2830Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, &ep)) == NULL)
2840Sstevel@tonic-gate 			return;
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd))
2870Sstevel@tonic-gate 			return;
2880Sstevel@tonic-gate 	}
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
2910Sstevel@tonic-gate 		return;
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_DU;
2940Sstevel@tonic-gate 	mb->mb_revision = MDDB_REV_MB;
2950Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
2960Sstevel@tonic-gate 	inittime = meta_get_lb_inittime(sp, &ep);
2970Sstevel@tonic-gate 	mb->mb_setcreatetime = inittime;
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) != -1)
3000Sstevel@tonic-gate 		mb->mb_timestamp = tp;
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 	/*
3030Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
3040Sstevel@tonic-gate 	 * the master block.  This allows the disk image to be self-identifying
3050Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
3060Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
3070Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
3080Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
3090Sstevel@tonic-gate 	 * in the remote copy scenario.
3100Sstevel@tonic-gate 	 */
3110Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
3120Sstevel@tonic-gate 		int len;
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate 		len = devid_sizeof(devid);
3150Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
3160Sstevel@tonic-gate 			/* there is enough space to store the devid */
3170Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
3180Sstevel@tonic-gate 			mb->mb_devid_len = len;
3190Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, (char *)devid, len);
3200Sstevel@tonic-gate 		}
3210Sstevel@tonic-gate 		devid_free(devid);
3220Sstevel@tonic-gate 	}
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
3250Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate 	/*
3280Sstevel@tonic-gate 	 * If any of these operations fail, we need to inform the
3290Sstevel@tonic-gate 	 * user that the disk won't be self identifying. When support
3300Sstevel@tonic-gate 	 * for importing remotely replicated disksets is added, we
3310Sstevel@tonic-gate 	 * want to add the error messages here.
3320Sstevel@tonic-gate 	 */
3330Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
3340Sstevel@tonic-gate 		goto out;
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
3370Sstevel@tonic-gate 		goto out;
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
3400Sstevel@tonic-gate 		goto out;
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
3430Sstevel@tonic-gate 		goto out;
3440Sstevel@tonic-gate 
3450Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
3460Sstevel@tonic-gate 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
3470Sstevel@tonic-gate 		goto out;
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate out:
3500Sstevel@tonic-gate 	Free(mb);
3510Sstevel@tonic-gate }
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate static int
3540Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep)
3550Sstevel@tonic-gate {
3560Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
3570Sstevel@tonic-gate 	md_replicalist_t	*rl;
3580Sstevel@tonic-gate 	FILE			*cfp = NULL;
3590Sstevel@tonic-gate 	FILE			*mfp = NULL;
3600Sstevel@tonic-gate 	struct stat		sbuf;
3610Sstevel@tonic-gate 	int			rval = 0;
3620Sstevel@tonic-gate 	int			in_miniroot = 0;
3630Sstevel@tonic-gate 	char			line[MDDB_BOOTLIST_MAX_LEN];
3640Sstevel@tonic-gate 	char			*tname = NULL;
3650Sstevel@tonic-gate 
3660Sstevel@tonic-gate 	/* get list of local replicas */
3670Sstevel@tonic-gate 	if (! metaislocalset(sp))
3680Sstevel@tonic-gate 		return (0);
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
3710Sstevel@tonic-gate 		return (-1);
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate 	/* open tempfile, copy permissions of original file */
3740Sstevel@tonic-gate 	if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
3750Sstevel@tonic-gate 		/*
3760Sstevel@tonic-gate 		 * On the miniroot tmp files must be created in /var/tmp.
3770Sstevel@tonic-gate 		 * If we get a EROFS error, we assume that we are in the
3780Sstevel@tonic-gate 		 * miniroot.
3790Sstevel@tonic-gate 		 */
3800Sstevel@tonic-gate 		if (errno != EROFS)
3810Sstevel@tonic-gate 			goto error;
3820Sstevel@tonic-gate 		in_miniroot = 1;
3830Sstevel@tonic-gate 		errno = 0;
3840Sstevel@tonic-gate 		tname = tempnam("/var/tmp", "slvm_");
3850Sstevel@tonic-gate 		if (tname == NULL && errno == EROFS) {
3860Sstevel@tonic-gate 			/*
3870Sstevel@tonic-gate 			 * If we are booted on a read-only root because
3880Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
3890Sstevel@tonic-gate 			 * any scary error messages.
3900Sstevel@tonic-gate 			 */
3910Sstevel@tonic-gate 			errno = 0;
3920Sstevel@tonic-gate 			goto out;
3930Sstevel@tonic-gate 		}
3940Sstevel@tonic-gate 
3950Sstevel@tonic-gate 		/* open tempfile, copy permissions of original file */
3960Sstevel@tonic-gate 		if ((cfp = fopen(tname, "w+")) == NULL)
3970Sstevel@tonic-gate 			goto error;
3980Sstevel@tonic-gate 	}
3990Sstevel@tonic-gate 	if (stat(META_DBCONF, &sbuf) == 0) {
4000Sstevel@tonic-gate 		if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
4010Sstevel@tonic-gate 			goto error;
4020Sstevel@tonic-gate 		if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
4030Sstevel@tonic-gate 			goto error;
4040Sstevel@tonic-gate 	}
4050Sstevel@tonic-gate 
4060Sstevel@tonic-gate 	/* print header */
4070Sstevel@tonic-gate 	if (fprintf(cfp, "#metadevice database location file ") == EOF)
4080Sstevel@tonic-gate 		goto error;
4090Sstevel@tonic-gate 	if (fprintf(cfp, "do not hand edit\n") < 0)
4100Sstevel@tonic-gate 		goto error;
4110Sstevel@tonic-gate 	if (fprintf(cfp,
4120Sstevel@tonic-gate 		"#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
4130Sstevel@tonic-gate 		goto error;
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 	/* dump replicas */
4160Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
4170Sstevel@tonic-gate 		md_replica_t	*r = rl->rl_repp;
4180Sstevel@tonic-gate 		int		checksum = 42;
4190Sstevel@tonic-gate 		int		i;
4200Sstevel@tonic-gate 		char		*devidp;
4210Sstevel@tonic-gate 		minor_t		min;
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 		devidp = devid_str_encode(r->r_devid, r->r_minor_name);
4240Sstevel@tonic-gate 		/* If devid code can't encode devidp - skip entry */
4250Sstevel@tonic-gate 		if (devidp == NULL) {
4260Sstevel@tonic-gate 			continue;
4270Sstevel@tonic-gate 		}
4280Sstevel@tonic-gate 
4290Sstevel@tonic-gate 		/* compute checksum */
4300Sstevel@tonic-gate 		for (i = 0; ((r->r_driver_name[i] != '\0') &&
4310Sstevel@tonic-gate 		    (i < sizeof (r->r_driver_name))); i++) {
4320Sstevel@tonic-gate 			checksum -= r->r_driver_name[i];
4330Sstevel@tonic-gate 		}
4340Sstevel@tonic-gate 		min = meta_getminor(r->r_namep->dev);
4350Sstevel@tonic-gate 		checksum -= min;
4360Sstevel@tonic-gate 		checksum -= r->r_blkno;
4370Sstevel@tonic-gate 
4380Sstevel@tonic-gate 		for (i = 0; i < strlen(devidp); i++) {
4390Sstevel@tonic-gate 			checksum -= devidp[i];
4400Sstevel@tonic-gate 		}
4410Sstevel@tonic-gate 		/* print info */
4420Sstevel@tonic-gate 		if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
4430Sstevel@tonic-gate 		    r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
4440Sstevel@tonic-gate 			goto error;
4450Sstevel@tonic-gate 		}
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate 		devid_str_free(devidp);
4480Sstevel@tonic-gate 	}
4490Sstevel@tonic-gate 
4500Sstevel@tonic-gate 	/* close and rename to real file */
4510Sstevel@tonic-gate 	if (fflush(cfp) != 0)
4520Sstevel@tonic-gate 		goto error;
4530Sstevel@tonic-gate 	if (fsync(fileno(cfp)) != 0)
4540Sstevel@tonic-gate 		goto error;
4550Sstevel@tonic-gate 	if (fclose(cfp) != 0) {
4560Sstevel@tonic-gate 		cfp = NULL;
4570Sstevel@tonic-gate 		goto error;
4580Sstevel@tonic-gate 	}
4590Sstevel@tonic-gate 	cfp = NULL;
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate 	/*
4620Sstevel@tonic-gate 	 * Renames don't work in the miniroot since tmpfiles are
4630Sstevel@tonic-gate 	 * created in /var/tmp. Hence we copy the data out.
4640Sstevel@tonic-gate 	 */
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate 	if (! in_miniroot) {
4670Sstevel@tonic-gate 		if (rename(META_DBCONFTMP, META_DBCONF) != 0)
4680Sstevel@tonic-gate 			goto error;
4690Sstevel@tonic-gate 	} else {
4700Sstevel@tonic-gate 		if ((cfp = fopen(tname, "r")) == NULL)
4710Sstevel@tonic-gate 			goto error;
4720Sstevel@tonic-gate 		if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
4730Sstevel@tonic-gate 			goto error;
4740Sstevel@tonic-gate 		while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
4750Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
4760Sstevel@tonic-gate 				goto error;
4770Sstevel@tonic-gate 		}
4780Sstevel@tonic-gate 		(void) fclose(cfp);
4790Sstevel@tonic-gate 		cfp = NULL;
4800Sstevel@tonic-gate 		if (fflush(mfp) != 0)
4810Sstevel@tonic-gate 			goto error;
4820Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
4830Sstevel@tonic-gate 			goto error;
4840Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
4850Sstevel@tonic-gate 			mfp = NULL;
4860Sstevel@tonic-gate 			goto error;
4870Sstevel@tonic-gate 		}
4880Sstevel@tonic-gate 		/* delete the tempfile */
4890Sstevel@tonic-gate 		(void) unlink(tname);
4900Sstevel@tonic-gate 	}
4910Sstevel@tonic-gate 	/* success */
4920Sstevel@tonic-gate 	rval = 0;
4930Sstevel@tonic-gate 	goto out;
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate 	/* tempfile error */
4960Sstevel@tonic-gate error:
4970Sstevel@tonic-gate 	rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
4980Sstevel@tonic-gate 				mdsyserror(ep, errno, META_DBCONFTMP);
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate 	/* cleanup, return success */
5020Sstevel@tonic-gate out:
5030Sstevel@tonic-gate 	if (rlp != NULL)
5040Sstevel@tonic-gate 		metafreereplicalist(rlp);
5050Sstevel@tonic-gate 	if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
5060Sstevel@tonic-gate 		rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
5070Sstevel@tonic-gate 					mdsyserror(ep, errno, META_DBCONFTMP);
5080Sstevel@tonic-gate 	}
5090Sstevel@tonic-gate 	free(tname);
5100Sstevel@tonic-gate 	return (rval);
5110Sstevel@tonic-gate }
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate /*
5140Sstevel@tonic-gate  * check replica for dev
5150Sstevel@tonic-gate  */
5160Sstevel@tonic-gate static int
5170Sstevel@tonic-gate in_replica(
5180Sstevel@tonic-gate 	mdsetname_t	*sp,
5190Sstevel@tonic-gate 	md_replica_t	*rp,
5200Sstevel@tonic-gate 	mdname_t	*np,
5210Sstevel@tonic-gate 	diskaddr_t	slblk,
5220Sstevel@tonic-gate 	diskaddr_t	nblks,
5230Sstevel@tonic-gate 	md_error_t	*ep
5240Sstevel@tonic-gate )
5250Sstevel@tonic-gate {
5260Sstevel@tonic-gate 	mdname_t	*repnp = rp->r_namep;
5270Sstevel@tonic-gate 	diskaddr_t	rep_sblk = rp->r_blkno;
5280Sstevel@tonic-gate 	diskaddr_t	rep_nblks = rp->r_nblk;
5290Sstevel@tonic-gate 
5300Sstevel@tonic-gate 	/* should be in the same set */
5310Sstevel@tonic-gate 	assert(sp != NULL);
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate 	/* if error in master block, assume whole partition */
5340Sstevel@tonic-gate 	if ((rep_sblk == MD_DISKADDR_ERROR) ||
5350Sstevel@tonic-gate 	    (rep_nblks == MD_DISKADDR_ERROR)) {
5360Sstevel@tonic-gate 		rep_sblk = 0;
5370Sstevel@tonic-gate 		rep_nblks = MD_DISKADDR_ERROR;
5380Sstevel@tonic-gate 	}
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 	/* check overlap */
5410Sstevel@tonic-gate 	if (meta_check_overlap(
5420Sstevel@tonic-gate 	    MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
5430Sstevel@tonic-gate 		return (-1);
5440Sstevel@tonic-gate 	}
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 	/* return success */
5470Sstevel@tonic-gate 	return (0);
5480Sstevel@tonic-gate }
5490Sstevel@tonic-gate 
5500Sstevel@tonic-gate /*
5510Sstevel@tonic-gate  * check to see if we're in a replica
5520Sstevel@tonic-gate  */
5530Sstevel@tonic-gate int
5540Sstevel@tonic-gate meta_check_inreplica(
5550Sstevel@tonic-gate 	mdsetname_t		*sp,
5560Sstevel@tonic-gate 	mdname_t		*np,
5570Sstevel@tonic-gate 	diskaddr_t		slblk,
5580Sstevel@tonic-gate 	diskaddr_t		nblks,
5590Sstevel@tonic-gate 	md_error_t		*ep
5600Sstevel@tonic-gate )
5610Sstevel@tonic-gate {
5620Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
5630Sstevel@tonic-gate 	md_replicalist_t	*rl;
5640Sstevel@tonic-gate 	int			rval = 0;
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 	/* should have a set */
5670Sstevel@tonic-gate 	assert(sp != NULL);
5680Sstevel@tonic-gate 
5690Sstevel@tonic-gate 	/* for each replica */
5700Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
5710Sstevel@tonic-gate 		return (-1);
5720Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
5730Sstevel@tonic-gate 		md_replica_t	*rp = rl->rl_repp;
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 		/* check replica */
5760Sstevel@tonic-gate 		if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
5770Sstevel@tonic-gate 			rval = -1;
5780Sstevel@tonic-gate 			break;
5790Sstevel@tonic-gate 		}
5800Sstevel@tonic-gate 	}
5810Sstevel@tonic-gate 
5820Sstevel@tonic-gate 	/* cleanup, return success */
5830Sstevel@tonic-gate 	metafreereplicalist(rlp);
5840Sstevel@tonic-gate 	return (rval);
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate /*
5880Sstevel@tonic-gate  * check replica
5890Sstevel@tonic-gate  */
5900Sstevel@tonic-gate int
5910Sstevel@tonic-gate meta_check_replica(
5920Sstevel@tonic-gate 	mdsetname_t	*sp,		/* set to check against */
5930Sstevel@tonic-gate 	mdname_t	*np,		/* component to check against */
5940Sstevel@tonic-gate 	mdchkopts_t	options,	/* option flags */
5950Sstevel@tonic-gate 	diskaddr_t	slblk,		/* start logical block */
5960Sstevel@tonic-gate 	diskaddr_t	nblks,		/* number of blocks (-1,rest of them) */
5970Sstevel@tonic-gate 	md_error_t	*ep		/* error packet */
5980Sstevel@tonic-gate )
5990Sstevel@tonic-gate {
6000Sstevel@tonic-gate 	mdchkopts_t	chkoptions = MDCHK_ALLOW_REPSLICE;
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate 	/* make sure we have a disk */
6030Sstevel@tonic-gate 	if (metachkcomp(np, ep) != 0)
6040Sstevel@tonic-gate 		return (-1);
6050Sstevel@tonic-gate 
6060Sstevel@tonic-gate 	/* check to ensure that it is not already in use */
6070Sstevel@tonic-gate 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
6080Sstevel@tonic-gate 		return (-1);
6090Sstevel@tonic-gate 	}
6100Sstevel@tonic-gate 
6110Sstevel@tonic-gate 	if (options & MDCHK_ALLOW_NODBS)
6120Sstevel@tonic-gate 		return (0);
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate 	if (options & MDCHK_DRVINSET)
6150Sstevel@tonic-gate 		return (0);
6160Sstevel@tonic-gate 
6170Sstevel@tonic-gate 	/* make sure it is in the set */
6180Sstevel@tonic-gate 	if (meta_check_inset(sp, np, ep) != 0)
6190Sstevel@tonic-gate 		return (-1);
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate 	/* make sure its not in a metadevice */
6220Sstevel@tonic-gate 	if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
6230Sstevel@tonic-gate 		return (-1);
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate 	/* return success */
6260Sstevel@tonic-gate 	return (0);
6270Sstevel@tonic-gate }
6280Sstevel@tonic-gate 
6290Sstevel@tonic-gate static int
6300Sstevel@tonic-gate update_dbinfo_on_drives(
6310Sstevel@tonic-gate 	mdsetname_t	*sp,
6320Sstevel@tonic-gate 	md_drive_desc	*dd,
6330Sstevel@tonic-gate 	int		set_locked,
6340Sstevel@tonic-gate 	int		force,
6350Sstevel@tonic-gate 	md_error_t	*ep
6360Sstevel@tonic-gate )
6370Sstevel@tonic-gate {
6380Sstevel@tonic-gate 	md_set_desc		*sd;
6390Sstevel@tonic-gate 	int			i;
6400Sstevel@tonic-gate 	md_setkey_t		*cl_sk;
6410Sstevel@tonic-gate 	int			rval = 0;
6420Sstevel@tonic-gate 	md_mnnode_desc		*nd;
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
6450Sstevel@tonic-gate 		return (-1);
6460Sstevel@tonic-gate 
6470Sstevel@tonic-gate 	if (! set_locked) {
6480Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
6490Sstevel@tonic-gate 			md_error_t xep = mdnullerror;
6500Sstevel@tonic-gate 			sigset_t sigs;
6510Sstevel@tonic-gate 			/* Make sure we are blocking all signals */
6520Sstevel@tonic-gate 			if (procsigs(TRUE, &sigs, &xep) < 0)
6530Sstevel@tonic-gate 				mdclrerror(&xep);
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate 			nd = sd->sd_nodelist;
6560Sstevel@tonic-gate 			while (nd) {
6570Sstevel@tonic-gate 				if (force && strcmp(nd->nd_nodename,
6580Sstevel@tonic-gate 				    mynode()) != 0) {
6590Sstevel@tonic-gate 					nd = nd->nd_next;
6600Sstevel@tonic-gate 					continue;
6610Sstevel@tonic-gate 				}
6620Sstevel@tonic-gate 
6630Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
6640Sstevel@tonic-gate 					nd = nd->nd_next;
6650Sstevel@tonic-gate 					continue;
6660Sstevel@tonic-gate 				}
6670Sstevel@tonic-gate 
6680Sstevel@tonic-gate 				if (clnt_lock_set(nd->nd_nodename, sp, ep))
6690Sstevel@tonic-gate 					return (-1);
6700Sstevel@tonic-gate 				nd = nd->nd_next;
6710Sstevel@tonic-gate 			}
6720Sstevel@tonic-gate 		} else {
6730Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
6740Sstevel@tonic-gate 				/* Skip empty slots */
6750Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
6760Sstevel@tonic-gate 					continue;
6770Sstevel@tonic-gate 
6780Sstevel@tonic-gate 				if (force && strcmp(sd->sd_nodes[i],
6790Sstevel@tonic-gate 				    mynode()) != 0)
6800Sstevel@tonic-gate 					continue;
6810Sstevel@tonic-gate 
6820Sstevel@tonic-gate 				if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
6830Sstevel@tonic-gate 					return (-1);
6840Sstevel@tonic-gate 			}
6850Sstevel@tonic-gate 		}
6860Sstevel@tonic-gate 	}
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate 	if (MD_MNSET_DESC(sd)) {
6890Sstevel@tonic-gate 		nd = sd->sd_nodelist;
6900Sstevel@tonic-gate 		while (nd) {
6910Sstevel@tonic-gate 			if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
6920Sstevel@tonic-gate 				nd = nd->nd_next;
6930Sstevel@tonic-gate 				continue;
6940Sstevel@tonic-gate 			}
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
6970Sstevel@tonic-gate 				nd = nd->nd_next;
6980Sstevel@tonic-gate 				continue;
6990Sstevel@tonic-gate 			}
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
7020Sstevel@tonic-gate 			    == -1) {
7030Sstevel@tonic-gate 				rval = -1;
7040Sstevel@tonic-gate 				break;
7050Sstevel@tonic-gate 			}
7060Sstevel@tonic-gate 			nd = nd->nd_next;
7070Sstevel@tonic-gate 		}
7080Sstevel@tonic-gate 	} else {
7090Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
7100Sstevel@tonic-gate 			/* Skip empty slots */
7110Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
7120Sstevel@tonic-gate 				continue;
7130Sstevel@tonic-gate 
7140Sstevel@tonic-gate 			if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
7150Sstevel@tonic-gate 				continue;
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
7180Sstevel@tonic-gate 			    == -1) {
7190Sstevel@tonic-gate 				rval = -1;
7200Sstevel@tonic-gate 				break;
7210Sstevel@tonic-gate 			}
7220Sstevel@tonic-gate 		}
7230Sstevel@tonic-gate 	}
7240Sstevel@tonic-gate 
7250Sstevel@tonic-gate 	if (! set_locked) {
7260Sstevel@tonic-gate 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
7270Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
7280Sstevel@tonic-gate 			nd = sd->sd_nodelist;
7290Sstevel@tonic-gate 			while (nd) {
7300Sstevel@tonic-gate 				if (force &&
7310Sstevel@tonic-gate 				    strcmp(nd->nd_nodename, mynode()) != 0) {
7320Sstevel@tonic-gate 					nd = nd->nd_next;
7330Sstevel@tonic-gate 					continue;
7340Sstevel@tonic-gate 				}
7350Sstevel@tonic-gate 
7360Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
7370Sstevel@tonic-gate 					nd = nd->nd_next;
7380Sstevel@tonic-gate 					continue;
7390Sstevel@tonic-gate 				}
7400Sstevel@tonic-gate 
7410Sstevel@tonic-gate 				if (clnt_unlock_set(nd->nd_nodename, cl_sk,
7420Sstevel@tonic-gate 				    ep)) {
7430Sstevel@tonic-gate 					rval = -1;
7440Sstevel@tonic-gate 					break;
7450Sstevel@tonic-gate 				}
7460Sstevel@tonic-gate 				nd = nd->nd_next;
7470Sstevel@tonic-gate 			}
7480Sstevel@tonic-gate 		} else {
7490Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
7500Sstevel@tonic-gate 				/* Skip empty slots */
7510Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
7520Sstevel@tonic-gate 					continue;
7530Sstevel@tonic-gate 
7540Sstevel@tonic-gate 				if (force &&
7550Sstevel@tonic-gate 				    strcmp(sd->sd_nodes[i], mynode()) != 0)
7560Sstevel@tonic-gate 					continue;
7570Sstevel@tonic-gate 
7580Sstevel@tonic-gate 				if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
7590Sstevel@tonic-gate 				    ep)) {
7600Sstevel@tonic-gate 					rval = -1;
7610Sstevel@tonic-gate 					break;
7620Sstevel@tonic-gate 				}
7630Sstevel@tonic-gate 			}
7640Sstevel@tonic-gate 
7650Sstevel@tonic-gate 		}
7660Sstevel@tonic-gate 		cl_set_setkey(NULL);
7670Sstevel@tonic-gate 	}
7680Sstevel@tonic-gate 
7690Sstevel@tonic-gate 	return (rval);
7700Sstevel@tonic-gate }
7710Sstevel@tonic-gate 
7720Sstevel@tonic-gate int
7730Sstevel@tonic-gate meta_db_addsidenms(
7740Sstevel@tonic-gate 	mdsetname_t	*sp,
7750Sstevel@tonic-gate 	mdname_t	*np,
7760Sstevel@tonic-gate 	daddr_t		blkno,
7770Sstevel@tonic-gate 	int		bcast,
7780Sstevel@tonic-gate 	md_error_t	*ep
7790Sstevel@tonic-gate )
7800Sstevel@tonic-gate {
7810Sstevel@tonic-gate 	side_t		sideno;
7820Sstevel@tonic-gate 	char		*bname = NULL;
7830Sstevel@tonic-gate 	char		*dname = NULL;
7840Sstevel@tonic-gate 	minor_t		mnum;
7850Sstevel@tonic-gate 	mddb_config_t	c;
7860Sstevel@tonic-gate 	int		done;
7870Sstevel@tonic-gate 	int		rval = 0;
7880Sstevel@tonic-gate 	md_set_desc	*sd;
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate 	sideno = MD_SIDEWILD;
7910Sstevel@tonic-gate 	/*CONSTCOND*/
7920Sstevel@tonic-gate 	while (1) {
7930Sstevel@tonic-gate 		if (bname != NULL) {
7940Sstevel@tonic-gate 			Free(bname);
7950Sstevel@tonic-gate 			bname = NULL;
7960Sstevel@tonic-gate 		}
7970Sstevel@tonic-gate 		if (dname != NULL) {
7980Sstevel@tonic-gate 			Free(dname);
7990Sstevel@tonic-gate 			dname = NULL;
8000Sstevel@tonic-gate 		}
8010Sstevel@tonic-gate 		if ((done = meta_getnextside_devinfo(sp, np->bname,
8020Sstevel@tonic-gate 		    &sideno, &bname, &dname, &mnum, ep)) == -1) {
8030Sstevel@tonic-gate 			rval = -1;
8040Sstevel@tonic-gate 			break;
8050Sstevel@tonic-gate 		}
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate 		if (done == 0)
8080Sstevel@tonic-gate 			break;
8090Sstevel@tonic-gate 
8100Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
8110Sstevel@tonic-gate 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
8120Sstevel@tonic-gate 				rval = -1;
8130Sstevel@tonic-gate 				break;
8140Sstevel@tonic-gate 			}
8150Sstevel@tonic-gate 		}
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate 		/*
8180Sstevel@tonic-gate 		 * Send addsidenms to all nodes using rpc.mdcommd if
8190Sstevel@tonic-gate 		 * sidename is being added to MN diskset.
8200Sstevel@tonic-gate 		 *
8210Sstevel@tonic-gate 		 *   It's ok to broadcast this call to other nodes.
8220Sstevel@tonic-gate 		 *
8230Sstevel@tonic-gate 		 *   Note: The broadcast to other nodes isn't needed during
8240Sstevel@tonic-gate 		 *   the addition of the first mddbs to the set since the
8250Sstevel@tonic-gate 		 *   other nodes haven't been joined to the set yet.  All
8260Sstevel@tonic-gate 		 *   nodes in a MN diskset are (implicitly) joined to the set
8270Sstevel@tonic-gate 		 *   on the addition of the first mddb.
8280Sstevel@tonic-gate 		 */
8290Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
8300Sstevel@tonic-gate 		    (bcast == DB_ADDSIDENMS_BCAST)) {
8310Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
8320Sstevel@tonic-gate 			md_mn_msg_meta_db_newside_t	db_ns;
8330Sstevel@tonic-gate 			int				send_rval;
8340Sstevel@tonic-gate 
8350Sstevel@tonic-gate 			db_ns.msg_l_dev = np->dev;
8360Sstevel@tonic-gate 			db_ns.msg_sideno = sideno;
8370Sstevel@tonic-gate 			db_ns.msg_blkno = blkno;
8380Sstevel@tonic-gate 			(void) strncpy(db_ns.msg_dname, dname,
8390Sstevel@tonic-gate 			    sizeof (db_ns.msg_dname));
8400Sstevel@tonic-gate 			(void) splitname(np->bname, &db_ns.msg_splitname);
8410Sstevel@tonic-gate 			db_ns.msg_mnum = mnum;
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
8440Sstevel@tonic-gate 			db_ns.msg_devid[0] = NULL;
8450Sstevel@tonic-gate 
8460Sstevel@tonic-gate 			/*
8470Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
8480Sstevel@tonic-gate 			 * stuck in in the return step until this command has
8490Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
8500Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
8510Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
8520Sstevel@tonic-gate 			 * cycle to proceed.
8530Sstevel@tonic-gate 			 */
8540Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
8550Sstevel@tonic-gate 			    MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
8560Sstevel@tonic-gate 			    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns,
8570Sstevel@tonic-gate 			    sizeof (md_mn_msg_meta_db_newside_t),
8580Sstevel@tonic-gate 			    &resultp, ep);
8590Sstevel@tonic-gate 			if (send_rval != 0) {
8600Sstevel@tonic-gate 				rval = -1;
8610Sstevel@tonic-gate 				if (resultp == NULL)
8620Sstevel@tonic-gate 					(void) mddserror(ep,
8630Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
8640Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
8650Sstevel@tonic-gate 					    sp->setname);
8660Sstevel@tonic-gate 				else {
8670Sstevel@tonic-gate 					(void) mdstealerror(ep,
8680Sstevel@tonic-gate 					    &(resultp->mmr_ep));
8690Sstevel@tonic-gate 					if (mdisok(ep)) {
8700Sstevel@tonic-gate 						(void) mddserror(ep,
8710Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
8720Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
8730Sstevel@tonic-gate 						    sp->setname);
8740Sstevel@tonic-gate 					}
8750Sstevel@tonic-gate 					free_result(resultp);
8760Sstevel@tonic-gate 				}
8770Sstevel@tonic-gate 				break;
8780Sstevel@tonic-gate 			}
8790Sstevel@tonic-gate 			if (resultp)
8800Sstevel@tonic-gate 				free_result(resultp);
8810Sstevel@tonic-gate 		} else {
8820Sstevel@tonic-gate 			/*
8830Sstevel@tonic-gate 			 * Let this side's  device name, minor # and driver name
8840Sstevel@tonic-gate 			 * be known to the database replica.
8850Sstevel@tonic-gate 			 */
8860Sstevel@tonic-gate 			(void) memset(&c, 0, sizeof (c));
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate 			/* Fill in device/replica info */
8890Sstevel@tonic-gate 			c.c_locator.l_dev = meta_cmpldev(np->dev);
8900Sstevel@tonic-gate 			c.c_locator.l_blkno = blkno;
8910Sstevel@tonic-gate 			(void) strncpy(c.c_locator.l_driver, dname,
8920Sstevel@tonic-gate 			    sizeof (c.c_locator.l_driver));
8930Sstevel@tonic-gate 			(void) splitname(bname, &c.c_devname);
8940Sstevel@tonic-gate 			c.c_locator.l_mnum = mnum;
8950Sstevel@tonic-gate 
8960Sstevel@tonic-gate 			/* Fill in setno, setname, and sideno */
8970Sstevel@tonic-gate 			c.c_setno = sp->setno;
8980Sstevel@tonic-gate 			(void) strncpy(c.c_setname, sp->setname,
8990Sstevel@tonic-gate 				sizeof (c.c_setname));
9000Sstevel@tonic-gate 			c.c_sideno = sideno;
9010Sstevel@tonic-gate 
9020Sstevel@tonic-gate 			/*
9030Sstevel@tonic-gate 			 * Don't need device id information from this ioctl
9040Sstevel@tonic-gate 			 * Kernel determines device id from dev_t, which
9050Sstevel@tonic-gate 			 * is just what this code would do.
9060Sstevel@tonic-gate 			 */
9070Sstevel@tonic-gate 			c.c_locator.l_devid = (uint64_t)0;
9080Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
9090Sstevel@tonic-gate 
9100Sstevel@tonic-gate 			if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
9110Sstevel@tonic-gate 				rval = mdstealerror(ep, &c.c_mde);
9120Sstevel@tonic-gate 				break;
9130Sstevel@tonic-gate 			}
9140Sstevel@tonic-gate 		}
9150Sstevel@tonic-gate 	}
9160Sstevel@tonic-gate 
9170Sstevel@tonic-gate 	/* cleanup, return success */
9180Sstevel@tonic-gate 	if (bname != NULL) {
9190Sstevel@tonic-gate 		Free(bname);
9200Sstevel@tonic-gate 		bname = NULL;
9210Sstevel@tonic-gate 	}
9220Sstevel@tonic-gate 	if (dname != NULL) {
9230Sstevel@tonic-gate 		Free(dname);
9240Sstevel@tonic-gate 		dname = NULL;
9250Sstevel@tonic-gate 	}
9260Sstevel@tonic-gate 	return (rval);
9270Sstevel@tonic-gate }
9280Sstevel@tonic-gate 
9290Sstevel@tonic-gate 
9300Sstevel@tonic-gate int
9310Sstevel@tonic-gate meta_db_delsidenm(
9320Sstevel@tonic-gate 	mdsetname_t	*sp,
9330Sstevel@tonic-gate 	side_t		sideno,
9340Sstevel@tonic-gate 	mdname_t	*np,
9350Sstevel@tonic-gate 	daddr_t		blkno,
9360Sstevel@tonic-gate 	md_error_t	*ep
9370Sstevel@tonic-gate )
9380Sstevel@tonic-gate {
9390Sstevel@tonic-gate 	mddb_config_t	c;
9400Sstevel@tonic-gate 	md_set_desc	*sd;
9410Sstevel@tonic-gate 
9420Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
9430Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
9440Sstevel@tonic-gate 			return (-1);
9450Sstevel@tonic-gate 	}
9460Sstevel@tonic-gate 	/* Use rpc.mdcommd to delete mddb side from all nodes */
9470Sstevel@tonic-gate 	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
9480Sstevel@tonic-gate 	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
9490Sstevel@tonic-gate 		md_mn_result_t			*resultp = NULL;
9500Sstevel@tonic-gate 		md_mn_msg_meta_db_delside_t	db_ds;
9510Sstevel@tonic-gate 		int				send_rval;
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 		db_ds.msg_l_dev = np->dev;
9540Sstevel@tonic-gate 		db_ds.msg_blkno = blkno;
9550Sstevel@tonic-gate 		db_ds.msg_sideno = sideno;
9560Sstevel@tonic-gate 
9570Sstevel@tonic-gate 		/* Set devid to NULL until devids are supported */
9580Sstevel@tonic-gate 		db_ds.msg_devid[0] = NULL;
9590Sstevel@tonic-gate 
9600Sstevel@tonic-gate 		/*
9610Sstevel@tonic-gate 		 * If reconfig cycle has been started, this node is
9620Sstevel@tonic-gate 		 * stuck in in the return step until this command has
9630Sstevel@tonic-gate 		 * completed.  If mdcommd is suspended, ask
9640Sstevel@tonic-gate 		 * send_message to fail (instead of retrying)
9650Sstevel@tonic-gate 		 * so that metaset can finish allowing the reconfig
9660Sstevel@tonic-gate 		 * cycle to proceed.
9670Sstevel@tonic-gate 		 */
9680Sstevel@tonic-gate 		send_rval = mdmn_send_message(sp->setno,
9690Sstevel@tonic-gate 		    MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
9700Sstevel@tonic-gate 		    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds,
9710Sstevel@tonic-gate 		    sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
9720Sstevel@tonic-gate 		if (send_rval != 0) {
9730Sstevel@tonic-gate 			if (resultp == NULL)
9740Sstevel@tonic-gate 				(void) mddserror(ep,
9750Sstevel@tonic-gate 				    MDE_DS_COMMD_SEND_FAIL,
9760Sstevel@tonic-gate 				    sp->setno, NULL, NULL,
9770Sstevel@tonic-gate 				    sp->setname);
9780Sstevel@tonic-gate 			else {
9790Sstevel@tonic-gate 				(void) mdstealerror(ep, &(resultp->mmr_ep));
9800Sstevel@tonic-gate 				if (mdisok(ep)) {
9810Sstevel@tonic-gate 					(void) mddserror(ep,
9820Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
9830Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
9840Sstevel@tonic-gate 					    sp->setname);
9850Sstevel@tonic-gate 				}
9860Sstevel@tonic-gate 				free_result(resultp);
9870Sstevel@tonic-gate 			}
9880Sstevel@tonic-gate 			return (-1);
9890Sstevel@tonic-gate 		}
9900Sstevel@tonic-gate 		if (resultp)
9910Sstevel@tonic-gate 			free_result(resultp);
9920Sstevel@tonic-gate 
9930Sstevel@tonic-gate 	} else {
9940Sstevel@tonic-gate 		/*
9950Sstevel@tonic-gate 		 * Let this side's  device name, minor # and driver name
9960Sstevel@tonic-gate 		 * be known to the database replica.
9970Sstevel@tonic-gate 		 */
9980Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
9990Sstevel@tonic-gate 
10000Sstevel@tonic-gate 		/* Fill in device/replica info */
10010Sstevel@tonic-gate 		c.c_locator.l_dev = meta_cmpldev(np->dev);
10020Sstevel@tonic-gate 		c.c_locator.l_blkno = blkno;
10030Sstevel@tonic-gate 
10040Sstevel@tonic-gate 		/* Fill in setno, setname, and sideno */
10050Sstevel@tonic-gate 		c.c_setno = sp->setno;
10060Sstevel@tonic-gate 		(void) strcpy(c.c_setname, sp->setname);
10070Sstevel@tonic-gate 		c.c_sideno = sideno;
10080Sstevel@tonic-gate 
10090Sstevel@tonic-gate 		/*
10100Sstevel@tonic-gate 		 * Don't need device id information from this ioctl
10110Sstevel@tonic-gate 		 * Kernel determines device id from dev_t, which
10120Sstevel@tonic-gate 		 * is just what this code would do.
10130Sstevel@tonic-gate 		 */
10140Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
10150Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
10160Sstevel@tonic-gate 
10170Sstevel@tonic-gate 		if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
10180Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
10190Sstevel@tonic-gate 	}
10200Sstevel@tonic-gate 	return (0);
10210Sstevel@tonic-gate }
10220Sstevel@tonic-gate 
10230Sstevel@tonic-gate 
10240Sstevel@tonic-gate static int
10250Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
10260Sstevel@tonic-gate {
10270Sstevel@tonic-gate 	mdnamelist_t		*dnp1, *dnp2;
10280Sstevel@tonic-gate 
10290Sstevel@tonic-gate 	for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
10300Sstevel@tonic-gate 		for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
10310Sstevel@tonic-gate 			if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
10320Sstevel@tonic-gate 				return (mderror(ep, MDE_DUPDRIVE,
10330Sstevel@tonic-gate 				    dnp1->namep->cname));
10340Sstevel@tonic-gate 		}
10350Sstevel@tonic-gate 	}
10360Sstevel@tonic-gate 	return (0);
10370Sstevel@tonic-gate }
10380Sstevel@tonic-gate 
10390Sstevel@tonic-gate 
10400Sstevel@tonic-gate /*
10410Sstevel@tonic-gate  * Return 1 if files are different, else return 0
10420Sstevel@tonic-gate  */
10430Sstevel@tonic-gate static int
10440Sstevel@tonic-gate filediff(char *tsname, char *sname)
10450Sstevel@tonic-gate {
10460Sstevel@tonic-gate 	int ret = 1, fd;
10470Sstevel@tonic-gate 	size_t tsz, sz;
10480Sstevel@tonic-gate 	struct stat sbuf;
10490Sstevel@tonic-gate 	char *tbuf, *buf;
10500Sstevel@tonic-gate 
10510Sstevel@tonic-gate 	if (stat(tsname, &sbuf) != 0)
10520Sstevel@tonic-gate 		return (1);
10530Sstevel@tonic-gate 	tsz = sbuf.st_size;
10540Sstevel@tonic-gate 	if (stat(sname, &sbuf) != 0)
10550Sstevel@tonic-gate 		return (1);
10560Sstevel@tonic-gate 	sz = sbuf.st_size;
10570Sstevel@tonic-gate 	if (tsz != sz)
10580Sstevel@tonic-gate 		return (1);
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate 	/* allocate memory and read both files into buffer */
10610Sstevel@tonic-gate 	tbuf = malloc(tsz);
10620Sstevel@tonic-gate 	buf = malloc(sz);
10630Sstevel@tonic-gate 	if (tbuf == NULL || buf == NULL)
10640Sstevel@tonic-gate 		goto out;
10650Sstevel@tonic-gate 
10660Sstevel@tonic-gate 	fd = open(tsname, O_RDONLY);
10670Sstevel@tonic-gate 	if (fd == -1)
10680Sstevel@tonic-gate 		goto out;
10690Sstevel@tonic-gate 	sz = read(fd, tbuf, tsz);
10700Sstevel@tonic-gate 	(void) close(fd);
10710Sstevel@tonic-gate 	if (sz != tsz)
10720Sstevel@tonic-gate 		goto out;
10730Sstevel@tonic-gate 
10740Sstevel@tonic-gate 	fd = open(sname, O_RDONLY);
10750Sstevel@tonic-gate 	if (fd == -1)
10760Sstevel@tonic-gate 		goto out;
10770Sstevel@tonic-gate 	sz = read(fd, buf, tsz);
10780Sstevel@tonic-gate 	(void) close(fd);
10790Sstevel@tonic-gate 	if (sz != tsz)
10800Sstevel@tonic-gate 		goto out;
10810Sstevel@tonic-gate 
10820Sstevel@tonic-gate 	/* compare content */
10830Sstevel@tonic-gate 	ret = bcmp(tbuf, buf, tsz);
10840Sstevel@tonic-gate out:
10850Sstevel@tonic-gate 	if (tbuf)
10860Sstevel@tonic-gate 		free(tbuf);
10870Sstevel@tonic-gate 	if (buf)
10880Sstevel@tonic-gate 		free(buf);
10890Sstevel@tonic-gate 	return (ret);
10900Sstevel@tonic-gate }
10910Sstevel@tonic-gate 
10920Sstevel@tonic-gate /*
10930Sstevel@tonic-gate  * patch md.conf file with mddb locations
10940Sstevel@tonic-gate  */
10950Sstevel@tonic-gate int
10960Sstevel@tonic-gate meta_db_patch(
10970Sstevel@tonic-gate 	char		*sname,		/* system file name */
10980Sstevel@tonic-gate 	char		*cname,		/* mddb.cf file name */
10990Sstevel@tonic-gate 	int		patch,		/* patching locally */
11000Sstevel@tonic-gate 	md_error_t	*ep
11010Sstevel@tonic-gate )
11020Sstevel@tonic-gate {
11030Sstevel@tonic-gate 	char		*tsname = NULL;
11040Sstevel@tonic-gate 	char		line[MDDB_BOOTLIST_MAX_LEN];
11050Sstevel@tonic-gate 	FILE		*tsfp = NULL;
11060Sstevel@tonic-gate 	FILE		*mfp = NULL;
11070Sstevel@tonic-gate 	int		rval = -1;
11080Sstevel@tonic-gate 
11090Sstevel@tonic-gate 	/* check names */
11100Sstevel@tonic-gate 	if (sname == NULL) {
11110Sstevel@tonic-gate 		if (patch)
11120Sstevel@tonic-gate 			sname = "md.conf";
11130Sstevel@tonic-gate 		else
11140Sstevel@tonic-gate 			sname = "/kernel/drv/md.conf";
11150Sstevel@tonic-gate 	}
11160Sstevel@tonic-gate 	if (cname == NULL)
11170Sstevel@tonic-gate 		cname = META_DBCONF;
11180Sstevel@tonic-gate 
11190Sstevel@tonic-gate 	/*
11200Sstevel@tonic-gate 	 * edit file
11210Sstevel@tonic-gate 	 */
11220Sstevel@tonic-gate 	if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
11230Sstevel@tonic-gate 		if (mdissyserror(ep, EROFS)) {
11240Sstevel@tonic-gate 			/*
11250Sstevel@tonic-gate 			 * If we are booted on a read-only root because
11260Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
11270Sstevel@tonic-gate 			 * any scary error messages.
11280Sstevel@tonic-gate 			 */
11290Sstevel@tonic-gate 			mdclrerror(ep);
11300Sstevel@tonic-gate 			rval = 0;
11310Sstevel@tonic-gate 		}
11320Sstevel@tonic-gate 		goto out;
11330Sstevel@tonic-gate 	}
11340Sstevel@tonic-gate 
11350Sstevel@tonic-gate 	if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0,
11360Sstevel@tonic-gate 	    ep) != 0)
11370Sstevel@tonic-gate 		goto out;
11380Sstevel@tonic-gate 
11390Sstevel@tonic-gate 	/* if file content is identical, skip rename */
11400Sstevel@tonic-gate 	if (filediff(tsname, sname) == 0) {
11410Sstevel@tonic-gate 		rval = 0;
11420Sstevel@tonic-gate 		goto out;
11430Sstevel@tonic-gate 	}
11440Sstevel@tonic-gate 
11450Sstevel@tonic-gate 	if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
11460Sstevel@tonic-gate 					    (fclose(tsfp) != 0)) {
11470Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, tsname);
11480Sstevel@tonic-gate 		goto out;
11490Sstevel@tonic-gate 	}
11500Sstevel@tonic-gate 
11510Sstevel@tonic-gate 	tsfp = NULL;
11520Sstevel@tonic-gate 
11530Sstevel@tonic-gate 	/*
11540Sstevel@tonic-gate 	 * rename file. If we get a Cross Device error then it
11550Sstevel@tonic-gate 	 * is because we are in the miniroot.
11560Sstevel@tonic-gate 	 */
11570Sstevel@tonic-gate 	if (rename(tsname, sname) != 0 && errno != EXDEV) {
11580Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, sname);
11590Sstevel@tonic-gate 		goto out;
11600Sstevel@tonic-gate 	}
11610Sstevel@tonic-gate 
11620Sstevel@tonic-gate 	if (errno == EXDEV) {
11630Sstevel@tonic-gate 		if ((tsfp = fopen(tsname, "r")) == NULL)
11640Sstevel@tonic-gate 			goto out;
11650Sstevel@tonic-gate 		if ((mfp = fopen(sname, "w+")) == NULL)
11660Sstevel@tonic-gate 			goto out;
11670Sstevel@tonic-gate 		while (fgets(line, sizeof (line), tsfp) != NULL) {
11680Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
11690Sstevel@tonic-gate 				goto out;
11700Sstevel@tonic-gate 		}
11710Sstevel@tonic-gate 		(void) fclose(tsfp);
11720Sstevel@tonic-gate 		tsfp = NULL;
11730Sstevel@tonic-gate 		if (fflush(mfp) != 0)
11740Sstevel@tonic-gate 			goto out;
11750Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
11760Sstevel@tonic-gate 			goto out;
11770Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
11780Sstevel@tonic-gate 			mfp = NULL;
11790Sstevel@tonic-gate 			goto out;
11800Sstevel@tonic-gate 		}
11810Sstevel@tonic-gate 	}
11820Sstevel@tonic-gate 
11830Sstevel@tonic-gate 	Free(tsname);
11840Sstevel@tonic-gate 	tsname = NULL;
11850Sstevel@tonic-gate 	rval = 0;
11860Sstevel@tonic-gate 
11870Sstevel@tonic-gate 	/* cleanup, return error */
11880Sstevel@tonic-gate out:
11890Sstevel@tonic-gate 	if (tsfp != NULL)
11900Sstevel@tonic-gate 		(void) fclose(tsfp);
11910Sstevel@tonic-gate 	if (tsname != NULL) {
11920Sstevel@tonic-gate 		(void) unlink(tsname);
11930Sstevel@tonic-gate 		Free(tsname);
11940Sstevel@tonic-gate 	}
11950Sstevel@tonic-gate 	return (rval);
11960Sstevel@tonic-gate }
11970Sstevel@tonic-gate 
11980Sstevel@tonic-gate /*
11990Sstevel@tonic-gate  * Add replicas to set.  This happens as a result of:
12000Sstevel@tonic-gate  *	- metadb [-s set_name] -a
12010Sstevel@tonic-gate  *	- metaset -s set_name -a disk
12020Sstevel@tonic-gate  *	- metaset -s set_name -d disk	 (causes a rebalance of mddbs)
12030Sstevel@tonic-gate  *	- metaset -s set_name -b
12040Sstevel@tonic-gate  *
12050Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
12060Sstevel@tonic-gate  *
12070Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
12080Sstevel@tonic-gate  * is running the metaset command.
12090Sstevel@tonic-gate  *
12100Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
12110Sstevel@tonic-gate  * running the metaset command.  If this is the first mddb added to
12120Sstevel@tonic-gate  * the MN diskset, then no communication is made to other nodes via commd
12130Sstevel@tonic-gate  * since the other nodes will be in-sync with respect to the mddbs when
12140Sstevel@tonic-gate  * those other nodes join the set and snarf in the newly created mddb.
12150Sstevel@tonic-gate  * If this is not the first mddb added to the MN diskset, then this
12160Sstevel@tonic-gate  * attach command is sent to all of the nodes using commd.  This keeps
12170Sstevel@tonic-gate  * the nodes in-sync.
12180Sstevel@tonic-gate  */
12190Sstevel@tonic-gate int
12200Sstevel@tonic-gate meta_db_attach(
12210Sstevel@tonic-gate 	mdsetname_t		*sp,
12220Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
12230Sstevel@tonic-gate 	mdchkopts_t		options,
12240Sstevel@tonic-gate 	md_timeval32_t		*timeval,
12250Sstevel@tonic-gate 	int			dbcnt,
12260Sstevel@tonic-gate 	int			dbsize,
12270Sstevel@tonic-gate 	char			*sysfilename,
12280Sstevel@tonic-gate 	md_error_t		*ep
12290Sstevel@tonic-gate )
12300Sstevel@tonic-gate {
12310Sstevel@tonic-gate 	struct mddb_config	c;
12320Sstevel@tonic-gate 	mdnamelist_t		*nlp;
12330Sstevel@tonic-gate 	mdname_t		*np;
12340Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
12350Sstevel@tonic-gate 	md_drive_desc		*p;
12360Sstevel@tonic-gate 	int			i;
12370Sstevel@tonic-gate 	int			fd;
12380Sstevel@tonic-gate 	side_t			sideno;
12390Sstevel@tonic-gate 	daddr_t			blkno;
12400Sstevel@tonic-gate 	int			replicacount = 0;
12410Sstevel@tonic-gate 	int			start_mdmonitord = 0;
12420Sstevel@tonic-gate 	int			rval = 0;
12430Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
12440Sstevel@tonic-gate 	md_set_desc		*sd;
12450Sstevel@tonic-gate 	int			stale_bool = FALSE;
12460Sstevel@tonic-gate 	int			flags;
12470Sstevel@tonic-gate 	int			firstmddb = 1;
12480Sstevel@tonic-gate 	md_timeval32_t		inittime = {0, 0};
12490Sstevel@tonic-gate 
12500Sstevel@tonic-gate 	/*
12510Sstevel@tonic-gate 	 * Error if we don't get some work to do.
12520Sstevel@tonic-gate 	 */
12530Sstevel@tonic-gate 	if (db_nlp == NULL)
12540Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
12550Sstevel@tonic-gate 
12560Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
12570Sstevel@tonic-gate 		return (-1);
12580Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
12590Sstevel@tonic-gate 	c.c_id = 0;
12600Sstevel@tonic-gate 	c.c_setno = sp->setno;
12610Sstevel@tonic-gate 
12620Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
12630Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
12640Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
12650Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
12660Sstevel@tonic-gate 		if (metaislocalset(sp)) {
12670Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
12680Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
12690Sstevel@tonic-gate 			else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
12700Sstevel@tonic-gate 			    (! (options & MDCHK_ALLOW_NODBS)))
12710Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
12720Sstevel@tonic-gate 		} else {
12730Sstevel@tonic-gate 			if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
12740Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
12750Sstevel@tonic-gate 		}
12760Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
12770Sstevel@tonic-gate 	}
12780Sstevel@tonic-gate 	/*
12790Sstevel@tonic-gate 	 * Is current set STALE?
12800Sstevel@tonic-gate 	 */
12810Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
12820Sstevel@tonic-gate 		stale_bool = TRUE;
12830Sstevel@tonic-gate 	}
12840Sstevel@tonic-gate 
12850Sstevel@tonic-gate 	assert(db_nlp != NULL);
12860Sstevel@tonic-gate 
12870Sstevel@tonic-gate 	/* if creating the metadbs for the first time start mdmonitord */
12880Sstevel@tonic-gate 	if (c.c_dbcnt == 0)
12890Sstevel@tonic-gate 		start_mdmonitord = 1;
12900Sstevel@tonic-gate 
12910Sstevel@tonic-gate 	/*
12920Sstevel@tonic-gate 	 * check to see if we will go over the total possible number
12930Sstevel@tonic-gate 	 * of data bases
12940Sstevel@tonic-gate 	 */
12950Sstevel@tonic-gate 	nlp = db_nlp;
12960Sstevel@tonic-gate 	while (nlp) {
12970Sstevel@tonic-gate 		replicacount += dbcnt;
12980Sstevel@tonic-gate 		nlp = nlp->next;
12990Sstevel@tonic-gate 	}
13000Sstevel@tonic-gate 
13010Sstevel@tonic-gate 	if ((replicacount + c.c_dbcnt) > c.c_dbmax)
13020Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
13030Sstevel@tonic-gate 		    sp->setno, c.c_dbcnt + replicacount, NULL));
13040Sstevel@tonic-gate 
13050Sstevel@tonic-gate 	/*
13060Sstevel@tonic-gate 	 * go through and check to make sure all locations specified
13070Sstevel@tonic-gate 	 * are legal also pick out driver name;
13080Sstevel@tonic-gate 	 */
13090Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
13100Sstevel@tonic-gate 		diskaddr_t devsize;
13110Sstevel@tonic-gate 
13120Sstevel@tonic-gate 		np = nlp->namep;
13130Sstevel@tonic-gate 
13140Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
13150Sstevel@tonic-gate 			uint_t	partno;
13160Sstevel@tonic-gate 			uint_t	rep_partno;
13170Sstevel@tonic-gate 			mddrivename_t	*dnp = np->drivenamep;
13180Sstevel@tonic-gate 
13190Sstevel@tonic-gate 			/*
13200Sstevel@tonic-gate 			 * make sure that non-local database replicas
13210Sstevel@tonic-gate 			 * are always on the replica slice.
13220Sstevel@tonic-gate 			 */
13230Sstevel@tonic-gate 			if (meta_replicaslice(dnp,
13240Sstevel@tonic-gate 			    &rep_partno, ep) != 0)
13250Sstevel@tonic-gate 				return (-1);
13260Sstevel@tonic-gate 			if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
13270Sstevel@tonic-gate 				return (-1);
13280Sstevel@tonic-gate 			if (partno != rep_partno)
13290Sstevel@tonic-gate 				return (mddeverror(ep, MDE_REPCOMP_ONLY,
13300Sstevel@tonic-gate 				    np->dev, sp->setname));
13310Sstevel@tonic-gate 		}
13320Sstevel@tonic-gate 
13330Sstevel@tonic-gate 		if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
13340Sstevel@tonic-gate 		    ep)) {
13350Sstevel@tonic-gate 			return (-1);
13360Sstevel@tonic-gate 		}
13370Sstevel@tonic-gate 
13380Sstevel@tonic-gate 		if ((devsize = metagetsize(np, ep)) == -1)
13390Sstevel@tonic-gate 			return (-1);
13400Sstevel@tonic-gate 
13410Sstevel@tonic-gate 		if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
13420Sstevel@tonic-gate 			return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
13430Sstevel@tonic-gate 			    meta_getminor(np->dev), sp->setno, devsize,
13440Sstevel@tonic-gate 			    np->cname));
13450Sstevel@tonic-gate 	}
13460Sstevel@tonic-gate 
13470Sstevel@tonic-gate 	/*
13480Sstevel@tonic-gate 	 * If first disk in set we don't have lb_inittime yet for use as
13490Sstevel@tonic-gate 	 * mb_setcreatetime so don't go looking for it. WE'll come back
13500Sstevel@tonic-gate 	 * later and update after the locator block has been created.
13510Sstevel@tonic-gate 	 * If this isn't the first disk in the set, we have a locator
13520Sstevel@tonic-gate 	 * block and thus we have lb_inittime. Set mb_setcreatetime to
13530Sstevel@tonic-gate 	 * lb_inittime.
13540Sstevel@tonic-gate 	 */
13550Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
13560Sstevel@tonic-gate 		if (c.c_dbcnt != 0) {
13570Sstevel@tonic-gate 			firstmddb = 0;
13580Sstevel@tonic-gate 			inittime = meta_get_lb_inittime(sp, ep);
13590Sstevel@tonic-gate 		}
13600Sstevel@tonic-gate 	}
13610Sstevel@tonic-gate 
13620Sstevel@tonic-gate 	/*
13630Sstevel@tonic-gate 	 * go through and write all master blocks
13640Sstevel@tonic-gate 	 */
13650Sstevel@tonic-gate 
13660Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
13670Sstevel@tonic-gate 		np = nlp->namep;
13680Sstevel@tonic-gate 
13690Sstevel@tonic-gate 		if ((fd = open(np->rname, O_RDWR)) < 0)
13700Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
13710Sstevel@tonic-gate 
13720Sstevel@tonic-gate 		for (i = 0; i < dbcnt; i++) {
13730Sstevel@tonic-gate 			if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
13740Sstevel@tonic-gate 			    inittime, ep)) {
13750Sstevel@tonic-gate 				(void) close(fd);
13760Sstevel@tonic-gate 				return (-1);
13770Sstevel@tonic-gate 			}
13780Sstevel@tonic-gate 		}
13790Sstevel@tonic-gate 		(void) close(fd);
13800Sstevel@tonic-gate 	}
13810Sstevel@tonic-gate 
13820Sstevel@tonic-gate 	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
13830Sstevel@tonic-gate 		return (-1);
13840Sstevel@tonic-gate 
13850Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
13860Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
13870Sstevel@tonic-gate 		if (! mdisok(ep))
13880Sstevel@tonic-gate 			return (-1);
13890Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
13900Sstevel@tonic-gate 			return (-1);
13910Sstevel@tonic-gate 
13920Sstevel@tonic-gate 	}
13930Sstevel@tonic-gate 
13940Sstevel@tonic-gate 	/*
13950Sstevel@tonic-gate 	 * go through and tell kernel to add them
13960Sstevel@tonic-gate 	 */
13970Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
13980Sstevel@tonic-gate 		mdcinfo_t	*cinfo;
13990Sstevel@tonic-gate 
14000Sstevel@tonic-gate 		np = nlp->namep;
14010Sstevel@tonic-gate 
14020Sstevel@tonic-gate 		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
14030Sstevel@tonic-gate 			rval = -1;
14040Sstevel@tonic-gate 			goto out;
14050Sstevel@tonic-gate 		}
14060Sstevel@tonic-gate 
14070Sstevel@tonic-gate 		/*
14080Sstevel@tonic-gate 		 * If mddb is being added to MN diskset and there already
14090Sstevel@tonic-gate 		 * exists a valid mddb in the set (which equates to this
14100Sstevel@tonic-gate 		 * node being an owner of the set) then use rpc.mdcommd
14110Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
14120Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
14130Sstevel@tonic-gate 		 * can't write the message to the mddb.
14140Sstevel@tonic-gate 		 *
14150Sstevel@tonic-gate 		 * Otherwise, just add mddb to this node.
14160Sstevel@tonic-gate 		 */
14170Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
14180Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
14190Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
14200Sstevel@tonic-gate 			md_mn_msg_meta_db_attach_t	attach;
14210Sstevel@tonic-gate 			int 				send_rval;
14220Sstevel@tonic-gate 
14230Sstevel@tonic-gate 			/*
14240Sstevel@tonic-gate 			 * In a scenario where new replicas had been added on
14250Sstevel@tonic-gate 			 * the master, and then all of the old replicas failed
14260Sstevel@tonic-gate 			 * before the slaves had knowledge of the new replicas,
14270Sstevel@tonic-gate 			 * the slaves are unable to re-parse in the mddb
14280Sstevel@tonic-gate 			 * from the new replicas since the slaves have no
14290Sstevel@tonic-gate 			 * knowledge of the new replicas.  The following
14300Sstevel@tonic-gate 			 * algorithm solves this problem:
14310Sstevel@tonic-gate 			 * 	- META_DB_ATTACH message generates submsgs
14320Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
14330Sstevel@tonic-gate 			 * 		- MDDB_ATTACH new replicas
14340Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
14350Sstevel@tonic-gate 			 *		information to be sent from master
14360Sstevel@tonic-gate 			 *		to slaves at a higher class than the
14370Sstevel@tonic-gate 			 *		unblock so the parse message will
14380Sstevel@tonic-gate 			 *		reach slaves before unblock message.
14390Sstevel@tonic-gate 			 */
14400Sstevel@tonic-gate 			attach.msg_l_dev = np->dev;
14410Sstevel@tonic-gate 			attach.msg_cnt = dbcnt;
14420Sstevel@tonic-gate 			attach.msg_dbsize = dbsize;
14430Sstevel@tonic-gate 			(void) strncpy(attach.msg_dname, cinfo->dname,
14440Sstevel@tonic-gate 			    sizeof (attach.msg_dname));
14450Sstevel@tonic-gate 			(void) splitname(np->bname, &attach.msg_splitname);
14460Sstevel@tonic-gate 			attach.msg_options = options;
14470Sstevel@tonic-gate 
14480Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
14490Sstevel@tonic-gate 			attach.msg_devid[0] = NULL;
14500Sstevel@tonic-gate 
14510Sstevel@tonic-gate 			/*
14520Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
14530Sstevel@tonic-gate 			 * stuck in in the return step until this command has
14540Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
14550Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
14560Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
14570Sstevel@tonic-gate 			 * cycle to proceed.
14580Sstevel@tonic-gate 			 */
14590Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
14600Sstevel@tonic-gate 			if (stale_bool == TRUE)
14610Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
14620Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
14630Sstevel@tonic-gate 				MD_MN_MSG_META_DB_ATTACH,
14640Sstevel@tonic-gate 				flags, (char *)&attach,
14650Sstevel@tonic-gate 				sizeof (md_mn_msg_meta_db_attach_t),
14660Sstevel@tonic-gate 				&resultp, ep);
14670Sstevel@tonic-gate 			if (send_rval != 0) {
14680Sstevel@tonic-gate 				rval = -1;
14690Sstevel@tonic-gate 				if (resultp == NULL)
14700Sstevel@tonic-gate 					(void) mddserror(ep,
14710Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
14720Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
14730Sstevel@tonic-gate 					    sp->setname);
14740Sstevel@tonic-gate 				else {
14750Sstevel@tonic-gate 					(void) mdstealerror(ep,
14760Sstevel@tonic-gate 					    &(resultp->mmr_ep));
14770Sstevel@tonic-gate 					if (mdisok(ep)) {
14780Sstevel@tonic-gate 						(void) mddserror(ep,
14790Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
14800Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
14810Sstevel@tonic-gate 						    sp->setname);
14820Sstevel@tonic-gate 					}
14830Sstevel@tonic-gate 					free_result(resultp);
14840Sstevel@tonic-gate 				}
14850Sstevel@tonic-gate 				goto out;
14860Sstevel@tonic-gate 			}
14870Sstevel@tonic-gate 			if (resultp)
14880Sstevel@tonic-gate 				free_result(resultp);
14890Sstevel@tonic-gate 		} else {
14900Sstevel@tonic-gate 		    /* Adding mddb(s) to just this node */
14910Sstevel@tonic-gate 		    for (i = 0; i < dbcnt; i++) {
14920Sstevel@tonic-gate 			(void) memset(&c, 0, sizeof (c));
14930Sstevel@tonic-gate 			/* Fill in device/replica info */
14940Sstevel@tonic-gate 			c.c_locator.l_dev = meta_cmpldev(np->dev);
14950Sstevel@tonic-gate 			c.c_locator.l_blkno = i * dbsize + 16;
14960Sstevel@tonic-gate 			blkno = c.c_locator.l_blkno;
14970Sstevel@tonic-gate 			(void) strncpy(c.c_locator.l_driver, cinfo->dname,
14980Sstevel@tonic-gate 			    sizeof (c.c_locator.l_driver));
14990Sstevel@tonic-gate 			(void) splitname(np->bname, &c.c_devname);
15000Sstevel@tonic-gate 			c.c_locator.l_mnum = meta_getminor(np->dev);
15010Sstevel@tonic-gate 
15020Sstevel@tonic-gate 			/* Fill in setno, setname, and sideno */
15030Sstevel@tonic-gate 			c.c_setno = sp->setno;
15040Sstevel@tonic-gate 			if (! metaislocalset(sp)) {
15050Sstevel@tonic-gate 				if (MD_MNSET_DESC(sd)) {
15060Sstevel@tonic-gate 					c.c_multi_node = 1;
15070Sstevel@tonic-gate 				}
15080Sstevel@tonic-gate 			}
15090Sstevel@tonic-gate 			(void) strcpy(c.c_setname, sp->setname);
15100Sstevel@tonic-gate 			c.c_sideno = sideno;
15110Sstevel@tonic-gate 
15120Sstevel@tonic-gate 			/*
15130Sstevel@tonic-gate 			 * Don't need device id information from this ioctl
15140Sstevel@tonic-gate 			 * Kernel determines device id from dev_t, which
15150Sstevel@tonic-gate 			 * is just what this code would do.
15160Sstevel@tonic-gate 			 */
15170Sstevel@tonic-gate 			c.c_locator.l_devid = (uint64_t)0;
15180Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
15190Sstevel@tonic-gate 
15200Sstevel@tonic-gate 			if (timeval != NULL)
15210Sstevel@tonic-gate 				c.c_timestamp = *timeval;
15220Sstevel@tonic-gate 
15230Sstevel@tonic-gate 			if (setup_med_cfg(sp, &c, (options & MDCHK_SET_FORCE),
15240Sstevel@tonic-gate 			    ep)) {
15250Sstevel@tonic-gate 				rval = -1;
15260Sstevel@tonic-gate 				goto out;
15270Sstevel@tonic-gate 			}
15280Sstevel@tonic-gate 
15290Sstevel@tonic-gate 			if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL) != 0) {
15300Sstevel@tonic-gate 				rval = mdstealerror(ep, &c.c_mde);
15310Sstevel@tonic-gate 				goto out;
15320Sstevel@tonic-gate 			}
15330Sstevel@tonic-gate 			/*
15340Sstevel@tonic-gate 			 * This is either a traditional diskset OR this
15350Sstevel@tonic-gate 			 * is the first replica added to a MN diskset.
15360Sstevel@tonic-gate 			 * In either case, set broadcast to NO_BCAST so
15370Sstevel@tonic-gate 			 * that message won't go through rpc.mdcommd.
15380Sstevel@tonic-gate 			 * If this is a traditional diskset, the bcast
15390Sstevel@tonic-gate 			 * flag is ignored since traditional disksets
15400Sstevel@tonic-gate 			 * don't use the rpc.mdcommd.
15410Sstevel@tonic-gate 			 */
15420Sstevel@tonic-gate 			if (meta_db_addsidenms(sp, np, blkno,
15430Sstevel@tonic-gate 			    DB_ADDSIDENMS_NO_BCAST, ep))
15440Sstevel@tonic-gate 				goto out;
15450Sstevel@tonic-gate 		    }
15460Sstevel@tonic-gate 		}
15470Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
15480Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
15490Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next)
15500Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
15510Sstevel@tonic-gate 					p->dd_dbcnt = dbcnt;
15520Sstevel@tonic-gate 					p->dd_dbsize  = dbsize;
15530Sstevel@tonic-gate 					break;
15540Sstevel@tonic-gate 				}
15550Sstevel@tonic-gate 		}
15560Sstevel@tonic-gate 
15570Sstevel@tonic-gate 		/*
15580Sstevel@tonic-gate 		 * If this was the first addition of disks to the
15590Sstevel@tonic-gate 		 * diskset you now need to update the mb_setcreatetime
15600Sstevel@tonic-gate 		 * which needed lb_inittime which wasn't there until now.
15610Sstevel@tonic-gate 		 */
15620Sstevel@tonic-gate 		if (firstmddb) {
15630Sstevel@tonic-gate 			if (meta_update_mb(sp, dd, ep) != 0) {
15640Sstevel@tonic-gate 				return (-1);
15650Sstevel@tonic-gate 			}
15660Sstevel@tonic-gate 		}
15670Sstevel@tonic-gate 		(void) close(fd);
15680Sstevel@tonic-gate 	}
15690Sstevel@tonic-gate 
15700Sstevel@tonic-gate out:
15710Sstevel@tonic-gate 	if (metaislocalset(sp)) {
15720Sstevel@tonic-gate 
15730Sstevel@tonic-gate 		/* everything looks fine. Start mdmonitord */
15740Sstevel@tonic-gate 		/* Note: popen/pclose is the MT-safe replacement for system */
15750Sstevel@tonic-gate 		if (rval == 0 && start_mdmonitord  == 1) {
15760Sstevel@tonic-gate 			if (pclose(popen(MDMONITORD, "w")) == -1)
15770Sstevel@tonic-gate 				md_perror(MDMONITORD);
15780Sstevel@tonic-gate 
15790Sstevel@tonic-gate 			if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
15800Sstevel@tonic-gate 				mde_perror(&status, "");
15810Sstevel@tonic-gate 				mdclrerror(&status);
15820Sstevel@tonic-gate 			}
15830Sstevel@tonic-gate 		}
15840Sstevel@tonic-gate 
15850Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
15860Sstevel@tonic-gate 			/* Don't mask any previous errors */
15870Sstevel@tonic-gate 			if (rval == 0)
15880Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
15890Sstevel@tonic-gate 			return (rval);
15900Sstevel@tonic-gate 		}
15910Sstevel@tonic-gate 
15920Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
15930Sstevel@tonic-gate 			/* Don't mask any previous errors */
15940Sstevel@tonic-gate 			if (rval == 0)
15950Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
15960Sstevel@tonic-gate 		}
15970Sstevel@tonic-gate 	} else {
15980Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
15990Sstevel@tonic-gate 		    (options & MDCHK_SET_LOCKED),
16000Sstevel@tonic-gate 		    (options & MDCHK_SET_FORCE),
16010Sstevel@tonic-gate 		    &status)) {
16020Sstevel@tonic-gate 			/* Don't mask any previous errors */
16030Sstevel@tonic-gate 			if (rval == 0)
16040Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
16050Sstevel@tonic-gate 			else
16060Sstevel@tonic-gate 				mdclrerror(&status);
16070Sstevel@tonic-gate 		}
16080Sstevel@tonic-gate 		metafreedrivedesc(&dd);
16090Sstevel@tonic-gate 	}
16100Sstevel@tonic-gate 	/*
16110Sstevel@tonic-gate 	 * For MN disksets that already had already had nodes joined
16120Sstevel@tonic-gate 	 * before the attach of this mddb(s), the name invalidation is
16130Sstevel@tonic-gate 	 * done by the commd handler routine.  Otherwise, if this
16140Sstevel@tonic-gate 	 * is the first attach of a MN diskset mddb, the invalidation
16150Sstevel@tonic-gate 	 * must be done here since the first attach cannot be sent
16160Sstevel@tonic-gate 	 * via the commd since there are no nodes joined to the set yet.
16170Sstevel@tonic-gate 	 */
16180Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
16190Sstevel@tonic-gate 	    (MD_MNSET_DESC(sd) &&
16200Sstevel@tonic-gate 	    (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
16210Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
16220Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
16230Sstevel@tonic-gate 		}
16240Sstevel@tonic-gate 	}
16250Sstevel@tonic-gate 	return (rval);
16260Sstevel@tonic-gate }
16270Sstevel@tonic-gate 
16280Sstevel@tonic-gate /*
16290Sstevel@tonic-gate  * deletelist_length
16300Sstevel@tonic-gate  *
16310Sstevel@tonic-gate  *	return the number of slices that have been specified for deletion
16320Sstevel@tonic-gate  *	on the metadb command line.  This does not calculate the number
16330Sstevel@tonic-gate  *	of replicas because there may be multiple replicas per slice.
16340Sstevel@tonic-gate  */
16350Sstevel@tonic-gate static int
16360Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp)
16370Sstevel@tonic-gate {
16380Sstevel@tonic-gate 
16390Sstevel@tonic-gate 	mdnamelist_t		*nlp;
16400Sstevel@tonic-gate 	int			list_length = 0;
16410Sstevel@tonic-gate 
16420Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
16430Sstevel@tonic-gate 		list_length++;
16440Sstevel@tonic-gate 	}
16450Sstevel@tonic-gate 
16460Sstevel@tonic-gate 	return (list_length);
16470Sstevel@tonic-gate }
16480Sstevel@tonic-gate 
16490Sstevel@tonic-gate static int
16500Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp)
16510Sstevel@tonic-gate {
16520Sstevel@tonic-gate 
16530Sstevel@tonic-gate 	mdnamelist_t		*nlp;
16540Sstevel@tonic-gate 	mdname_t		*np;
16550Sstevel@tonic-gate 	int			index = 0;
16560Sstevel@tonic-gate 
16570Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
16580Sstevel@tonic-gate 		np = nlp->namep;
16590Sstevel@tonic-gate 
16600Sstevel@tonic-gate 		if (strcmp(devname, np->bname) == 0)
16610Sstevel@tonic-gate 			return (index);
16620Sstevel@tonic-gate 		index++;
16630Sstevel@tonic-gate 	}
16640Sstevel@tonic-gate 
16650Sstevel@tonic-gate 	return (-1);
16660Sstevel@tonic-gate }
16670Sstevel@tonic-gate 
16680Sstevel@tonic-gate /*
16690Sstevel@tonic-gate  * Delete replicas from set.  This happens as a result of:
16700Sstevel@tonic-gate  *	- metadb [-s set_name] -d
16710Sstevel@tonic-gate  *	- metaset -s set_name -a disk	(causes a rebalance of mddbs)
16720Sstevel@tonic-gate  *	- metaset -s set_name -d disk
16730Sstevel@tonic-gate  *	- metaset -s set_name -b
16740Sstevel@tonic-gate  *
16750Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
16760Sstevel@tonic-gate  *
16770Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
16780Sstevel@tonic-gate  * is running the metaset command.
16790Sstevel@tonic-gate  *
16800Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
16810Sstevel@tonic-gate  * running the metaset command.  This detach routine is sent to all
16820Sstevel@tonic-gate  * of the joined nodes in the diskset using commd.  This keeps
16830Sstevel@tonic-gate  * the nodes in-sync.
16840Sstevel@tonic-gate  */
16850Sstevel@tonic-gate int
16860Sstevel@tonic-gate meta_db_detach(
16870Sstevel@tonic-gate 	mdsetname_t		*sp,
16880Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
16890Sstevel@tonic-gate 	mdforceopts_t		force_option,
16900Sstevel@tonic-gate 	char			*sysfilename,
16910Sstevel@tonic-gate 	md_error_t		*ep
16920Sstevel@tonic-gate )
16930Sstevel@tonic-gate {
16940Sstevel@tonic-gate 	struct mddb_config	c;
16950Sstevel@tonic-gate 	mdnamelist_t		*nlp;
16960Sstevel@tonic-gate 	mdname_t		*np;
16970Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
16980Sstevel@tonic-gate 	md_drive_desc		*p;
16990Sstevel@tonic-gate 	int			replicacount;
17000Sstevel@tonic-gate 	int			replica_delete_count;
17010Sstevel@tonic-gate 	int			nr_replica_slices;
17020Sstevel@tonic-gate 	int			i;
17030Sstevel@tonic-gate 	int			stop_svmdaemons = 0;
17040Sstevel@tonic-gate 	int			rval = 0;
17050Sstevel@tonic-gate 	int			index;
17060Sstevel@tonic-gate 	int			valid_replicas_nottodelete = 0;
17070Sstevel@tonic-gate 	int			invalid_replicas_nottodelete = 0;
17080Sstevel@tonic-gate 	int			invalid_replicas_todelete = 0;
17090Sstevel@tonic-gate 	int			errored = 0;
17100Sstevel@tonic-gate 	int			*tag_array;
17110Sstevel@tonic-gate 	int			fd = -1;
17120Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
17130Sstevel@tonic-gate 	md_set_desc		*sd;
17140Sstevel@tonic-gate 	int			stale_bool = FALSE;
17150Sstevel@tonic-gate 	int			flags;
17160Sstevel@tonic-gate 
17170Sstevel@tonic-gate 	/*
17180Sstevel@tonic-gate 	 * Error if we don't get some work to do.
17190Sstevel@tonic-gate 	 */
17200Sstevel@tonic-gate 	if (db_nlp == NULL)
17210Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
17220Sstevel@tonic-gate 
17230Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
17240Sstevel@tonic-gate 		return (-1);
17250Sstevel@tonic-gate 
17260Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
17270Sstevel@tonic-gate 	c.c_id = 0;
17280Sstevel@tonic-gate 	c.c_setno = sp->setno;
17290Sstevel@tonic-gate 
17300Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
17310Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
17320Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
17330Sstevel@tonic-gate 
17340Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
17350Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
17360Sstevel@tonic-gate 
17370Sstevel@tonic-gate 	/*
17380Sstevel@tonic-gate 	 * Is current set STALE?
17390Sstevel@tonic-gate 	 */
17400Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
17410Sstevel@tonic-gate 		stale_bool = TRUE;
17420Sstevel@tonic-gate 	}
17430Sstevel@tonic-gate 
17440Sstevel@tonic-gate 	replicacount = c.c_dbcnt;
17450Sstevel@tonic-gate 
17460Sstevel@tonic-gate 	assert(db_nlp != NULL);
17470Sstevel@tonic-gate 
17480Sstevel@tonic-gate 	/*
17490Sstevel@tonic-gate 	 * go through and gather how many data bases are on each
17500Sstevel@tonic-gate 	 * device specified.
17510Sstevel@tonic-gate 	 */
17520Sstevel@tonic-gate 
17530Sstevel@tonic-gate 	nr_replica_slices = deletelist_length(db_nlp);
17540Sstevel@tonic-gate 	tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
17550Sstevel@tonic-gate 
17560Sstevel@tonic-gate 	replica_delete_count = 0;
17570Sstevel@tonic-gate 	for (i = 0; i < replicacount; i++) {
17580Sstevel@tonic-gate 		char	*devname;
17590Sstevel@tonic-gate 		int	found = 0;
17600Sstevel@tonic-gate 
17610Sstevel@tonic-gate 		c.c_id = i;
17620Sstevel@tonic-gate 
17630Sstevel@tonic-gate 		/* Don't need device id information from this ioctl */
17640Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
17650Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
17660Sstevel@tonic-gate 
17670Sstevel@tonic-gate 		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
17680Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
17690Sstevel@tonic-gate 
17700Sstevel@tonic-gate 		devname = splicename(&c.c_devname);
17710Sstevel@tonic-gate 
17720Sstevel@tonic-gate 		if ((index = in_deletelist(devname, db_nlp)) != -1) {
17730Sstevel@tonic-gate 			found = 1;
17740Sstevel@tonic-gate 			tag_array[index] = 1;
17750Sstevel@tonic-gate 			replica_delete_count++;
17760Sstevel@tonic-gate 		}
17770Sstevel@tonic-gate 
17780Sstevel@tonic-gate 		errored = c.c_locator.l_flags & (MDDB_F_EREAD |
17790Sstevel@tonic-gate 				MDDB_F_EWRITE | MDDB_F_TOOSMALL |
17800Sstevel@tonic-gate 				MDDB_F_EFMT | MDDB_F_EDATA |
17810Sstevel@tonic-gate 				MDDB_F_EMASTER);
17820Sstevel@tonic-gate 
17830Sstevel@tonic-gate 		/*
17840Sstevel@tonic-gate 		 * There are four combinations of "errored" and "found"
17850Sstevel@tonic-gate 		 * and they are used to find the number of
17860Sstevel@tonic-gate 		 * (a) valid/invalid replicas that are not in the delete
17870Sstevel@tonic-gate 		 * list and are available in the system.
17880Sstevel@tonic-gate 		 * (b) valid/invalid replicas that are to be deleted.
17890Sstevel@tonic-gate 		 */
17900Sstevel@tonic-gate 
17910Sstevel@tonic-gate 		if (errored && !found)		/* errored and !found */
17920Sstevel@tonic-gate 			invalid_replicas_nottodelete++;
17930Sstevel@tonic-gate 		else if (!found)		/* !errored and !found */
17940Sstevel@tonic-gate 			valid_replicas_nottodelete++;
17950Sstevel@tonic-gate 		else if (errored)		/* errored and found */
17960Sstevel@tonic-gate 			invalid_replicas_todelete++;
17970Sstevel@tonic-gate 		/*
17980Sstevel@tonic-gate 		 * else it is !errored and found. This means
17990Sstevel@tonic-gate 		 * valid_replicas_todelete++; But this variable will not
18000Sstevel@tonic-gate 		 * be used anywhere
18010Sstevel@tonic-gate 		 */
18020Sstevel@tonic-gate 
18030Sstevel@tonic-gate 		Free(devname);
18040Sstevel@tonic-gate 	}
18050Sstevel@tonic-gate 
18060Sstevel@tonic-gate 	index = 0;
18070Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
18080Sstevel@tonic-gate 		np = nlp->namep;
18090Sstevel@tonic-gate 		if (tag_array[index++] != 1) {
18100Sstevel@tonic-gate 			Free(tag_array);
18110Sstevel@tonic-gate 			return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
18120Sstevel@tonic-gate 		}
18130Sstevel@tonic-gate 	}
18140Sstevel@tonic-gate 
18150Sstevel@tonic-gate 	Free(tag_array);
18160Sstevel@tonic-gate 
18170Sstevel@tonic-gate 
18180Sstevel@tonic-gate 	/* if all replicas are deleted stop mdmonitord */
18190Sstevel@tonic-gate 	if ((replicacount - replica_delete_count) == 0)
18200Sstevel@tonic-gate 		stop_svmdaemons = 1;
18210Sstevel@tonic-gate 
18220Sstevel@tonic-gate 	if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
18230Sstevel@tonic-gate 		if (force_option & MDFORCE_NONE)
18240Sstevel@tonic-gate 			return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
18250Sstevel@tonic-gate 		if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
18260Sstevel@tonic-gate 			return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
18270Sstevel@tonic-gate 	}
18280Sstevel@tonic-gate 
18290Sstevel@tonic-gate 	/*
18300Sstevel@tonic-gate 	 * The following algorithms are followed to check for deletion:
18310Sstevel@tonic-gate 	 * (a) If the delete list(db_nlp) has all invalid replicas and no valid
18320Sstevel@tonic-gate 	 * replicas, then deletion should be allowed.
18330Sstevel@tonic-gate 	 * (b) Deletion should be allowed only if valid replicas that are "not"
18340Sstevel@tonic-gate 	 * to be deleted is always greater than the invalid replicas that
18350Sstevel@tonic-gate 	 * are "not" to be deleted.
18360Sstevel@tonic-gate 	 * (c) If the user uses -f option, then deletion should be allowed.
18370Sstevel@tonic-gate 	 */
18380Sstevel@tonic-gate 
18390Sstevel@tonic-gate 	if ((invalid_replicas_todelete != replica_delete_count) &&
18400Sstevel@tonic-gate 		(invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
18410Sstevel@tonic-gate 				(force_option != MDFORCE_LOCAL))
18420Sstevel@tonic-gate 		return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
18430Sstevel@tonic-gate 
18440Sstevel@tonic-gate 	/*
18450Sstevel@tonic-gate 	 * go through and tell kernel to delete them
18460Sstevel@tonic-gate 	 */
18470Sstevel@tonic-gate 
18480Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
18490Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
18500Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
18510Sstevel@tonic-gate 
18520Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
18530Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
18540Sstevel@tonic-gate 
18550Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
18560Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
18570Sstevel@tonic-gate 		if (! mdisok(ep))
18580Sstevel@tonic-gate 			return (-1);
18590Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
18600Sstevel@tonic-gate 			return (-1);
18610Sstevel@tonic-gate 	}
18620Sstevel@tonic-gate 
18630Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
18640Sstevel@tonic-gate 		np = nlp->namep;
18650Sstevel@tonic-gate 
18660Sstevel@tonic-gate 		/*
18670Sstevel@tonic-gate 		 * If mddb is being deleted from MN diskset and node is
18680Sstevel@tonic-gate 		 * an owner of the diskset then use rpc.mdcommd
18690Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
18700Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
18710Sstevel@tonic-gate 		 * can't write the message to the mddb.
18720Sstevel@tonic-gate 		 *
18730Sstevel@tonic-gate 		 * When mddbs are first being added to set, a detach can
18740Sstevel@tonic-gate 		 * be called before any node has joined the diskset, so
18750Sstevel@tonic-gate 		 * must check to see if node is an owner of the diskset.
18760Sstevel@tonic-gate 		 *
18770Sstevel@tonic-gate 		 * Otherwise, just delete mddb from this node.
18780Sstevel@tonic-gate 		 */
18790Sstevel@tonic-gate 
18800Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
18810Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
18820Sstevel@tonic-gate 			md_mn_result_t			*resultp;
18830Sstevel@tonic-gate 			md_mn_msg_meta_db_detach_t	detach;
18840Sstevel@tonic-gate 			int				send_rval;
18850Sstevel@tonic-gate 
18860Sstevel@tonic-gate 			/*
18870Sstevel@tonic-gate 			 * The following algorithm is used to detach replicas.
18880Sstevel@tonic-gate 			 * 	- META_DB_DETACH message generates submsgs
18890Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
18900Sstevel@tonic-gate 			 * 		- MDDB_DETACH replicas
18910Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
18920Sstevel@tonic-gate 			 *		information to be sent from master
18930Sstevel@tonic-gate 			 *		to slaves at a higher class than the
18940Sstevel@tonic-gate 			 *		unblock so the parse message will
18950Sstevel@tonic-gate 			 *		reach slaves before unblock message.
18960Sstevel@tonic-gate 			 */
18970Sstevel@tonic-gate 			(void) splitname(np->bname, &detach.msg_splitname);
18980Sstevel@tonic-gate 
18990Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
19000Sstevel@tonic-gate 			detach.msg_devid[0] = NULL;
19010Sstevel@tonic-gate 
19020Sstevel@tonic-gate 			/*
19030Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
19040Sstevel@tonic-gate 			 * stuck in in the return step until this command has
19050Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
19060Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
19070Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
19080Sstevel@tonic-gate 			 * cycle to proceed.
19090Sstevel@tonic-gate 			 */
19100Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
19110Sstevel@tonic-gate 			if (stale_bool == TRUE)
19120Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
19130Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
19140Sstevel@tonic-gate 				MD_MN_MSG_META_DB_DETACH,
19150Sstevel@tonic-gate 				flags, (char *)&detach,
19160Sstevel@tonic-gate 				sizeof (md_mn_msg_meta_db_detach_t),
19170Sstevel@tonic-gate 				&resultp, ep);
19180Sstevel@tonic-gate 			if (send_rval != 0) {
19190Sstevel@tonic-gate 				rval = -1;
19200Sstevel@tonic-gate 				if (resultp == NULL)
19210Sstevel@tonic-gate 					(void) mddserror(ep,
19220Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
19230Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
19240Sstevel@tonic-gate 					    sp->setname);
19250Sstevel@tonic-gate 				else {
19260Sstevel@tonic-gate 					(void) mdstealerror(ep,
19270Sstevel@tonic-gate 					    &(resultp->mmr_ep));
19280Sstevel@tonic-gate 					if (mdisok(ep)) {
19290Sstevel@tonic-gate 						(void) mddserror(ep,
19300Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
19310Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
19320Sstevel@tonic-gate 						    sp->setname);
19330Sstevel@tonic-gate 					}
19340Sstevel@tonic-gate 					free_result(resultp);
19350Sstevel@tonic-gate 				}
19360Sstevel@tonic-gate 				goto out;
19370Sstevel@tonic-gate 			}
19380Sstevel@tonic-gate 			if (resultp)
19390Sstevel@tonic-gate 				free_result(resultp);
19400Sstevel@tonic-gate 		} else {
19410Sstevel@tonic-gate 			i = 0;
19420Sstevel@tonic-gate 			while (i < c.c_dbcnt) {
19430Sstevel@tonic-gate 				char	*devname;
19440Sstevel@tonic-gate 
19450Sstevel@tonic-gate 				c.c_id = i;
19460Sstevel@tonic-gate 
19470Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
19480Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
19490Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
19500Sstevel@tonic-gate 
19510Sstevel@tonic-gate 				if (metaioctl(MD_DB_GETDEV, &c,
19520Sstevel@tonic-gate 				    &c.c_mde, NULL)) {
19530Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
19540Sstevel@tonic-gate 					goto out;
19550Sstevel@tonic-gate 				}
19560Sstevel@tonic-gate 
19570Sstevel@tonic-gate 				devname = splicename(&c.c_devname);
19580Sstevel@tonic-gate 				if (strcmp(devname, np->bname) != 0) {
19590Sstevel@tonic-gate 					Free(devname);
19600Sstevel@tonic-gate 					i++;
19610Sstevel@tonic-gate 					continue;
19620Sstevel@tonic-gate 				}
19630Sstevel@tonic-gate 				Free(devname);
19640Sstevel@tonic-gate 
19650Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
19660Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
19670Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
19680Sstevel@tonic-gate 
19690Sstevel@tonic-gate 				if (metaioctl(MD_DB_DELDEV, &c,
19700Sstevel@tonic-gate 				    &c.c_mde, NULL) != 0) {
19710Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
19720Sstevel@tonic-gate 					goto out;
19730Sstevel@tonic-gate 				}
19740Sstevel@tonic-gate 
19750Sstevel@tonic-gate 				/* Not incrementing "i" intentionally */
19760Sstevel@tonic-gate 			}
19770Sstevel@tonic-gate 		}
19780Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
19790Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
19800Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next) {
19810Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
19820Sstevel@tonic-gate 					p->dd_dbcnt = 0;
19830Sstevel@tonic-gate 					p->dd_dbsize  = 0;
19840Sstevel@tonic-gate 					break;
19850Sstevel@tonic-gate 				}
19860Sstevel@tonic-gate 			}
19870Sstevel@tonic-gate 
19880Sstevel@tonic-gate 			/*
19890Sstevel@tonic-gate 			 * Slam a dummy master block and make it self
19900Sstevel@tonic-gate 			 * identifying
19910Sstevel@tonic-gate 			 */
19920Sstevel@tonic-gate 			if ((fd = open(np->rname, O_RDWR)) >= 0) {
19930Sstevel@tonic-gate 				meta_mkdummymaster(sp, fd, 16);
19940Sstevel@tonic-gate 				(void) close(fd);
19950Sstevel@tonic-gate 			}
19960Sstevel@tonic-gate 		}
19970Sstevel@tonic-gate 	}
19980Sstevel@tonic-gate out:
19990Sstevel@tonic-gate 	if (metaislocalset(sp)) {
20000Sstevel@tonic-gate 		/*
20010Sstevel@tonic-gate 		 * Stop all the daemons if there are
20020Sstevel@tonic-gate 		 * no more replicas so that the module can be
20030Sstevel@tonic-gate 		 * unloaded.
20040Sstevel@tonic-gate 		 */
20050Sstevel@tonic-gate 		if (rval == 0 && stop_svmdaemons == 1) {
20060Sstevel@tonic-gate 			char buf[MAXPATHLEN];
20070Sstevel@tonic-gate 			int i;
20080Sstevel@tonic-gate 
20090Sstevel@tonic-gate 			for (i = 0; i < DAEMON_COUNT; i++) {
20100Sstevel@tonic-gate 				(void) snprintf(buf, MAXPATHLEN,
20110Sstevel@tonic-gate 					"/usr/bin/pkill -%s -x %s",
20120Sstevel@tonic-gate 					svmd_kill_list[i].svmd_kill_val,
20130Sstevel@tonic-gate 					svmd_kill_list[i].svmd_name);
20140Sstevel@tonic-gate 				if (pclose(popen(buf, "w")) == -1)
20150Sstevel@tonic-gate 					md_perror(buf);
20160Sstevel@tonic-gate 			}
20170Sstevel@tonic-gate 
20180Sstevel@tonic-gate 			if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
20190Sstevel@tonic-gate 				mde_perror(&status, "");
20200Sstevel@tonic-gate 				mdclrerror(&status);
20210Sstevel@tonic-gate 			}
20220Sstevel@tonic-gate 		}
20230Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
20240Sstevel@tonic-gate 			/* Don't mask any previous errors */
20250Sstevel@tonic-gate 			if (rval == 0)
20260Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
20270Sstevel@tonic-gate 			else
20280Sstevel@tonic-gate 				mdclrerror(&status);
20290Sstevel@tonic-gate 			return (rval);
20300Sstevel@tonic-gate 		}
20310Sstevel@tonic-gate 
20320Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
20330Sstevel@tonic-gate 			/* Don't mask any previous errors */
20340Sstevel@tonic-gate 			if (rval == 0)
20350Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
20360Sstevel@tonic-gate 			else
20370Sstevel@tonic-gate 				mdclrerror(&status);
20380Sstevel@tonic-gate 		}
20390Sstevel@tonic-gate 	} else {
20400Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
20410Sstevel@tonic-gate 		    (force_option & MDFORCE_SET_LOCKED),
20420Sstevel@tonic-gate 		    ((force_option & MDFORCE_LOCAL) |
20430Sstevel@tonic-gate 		    (force_option & MDFORCE_DS)), &status)) {
20440Sstevel@tonic-gate 			/* Don't mask any previous errors */
20450Sstevel@tonic-gate 			if (rval == 0)
20460Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
20470Sstevel@tonic-gate 			else
20480Sstevel@tonic-gate 				mdclrerror(&status);
20490Sstevel@tonic-gate 		}
20500Sstevel@tonic-gate 		metafreedrivedesc(&dd);
20510Sstevel@tonic-gate 	}
20520Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
20530Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
20540Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
20550Sstevel@tonic-gate 		}
20560Sstevel@tonic-gate 	}
20570Sstevel@tonic-gate 	return (rval);
20580Sstevel@tonic-gate }
20590Sstevel@tonic-gate 
20600Sstevel@tonic-gate static md_replica_t *
20610Sstevel@tonic-gate metareplicaname(
20620Sstevel@tonic-gate 	mdsetname_t		*sp,
20630Sstevel@tonic-gate 	int			flags,
20640Sstevel@tonic-gate 	struct mddb_config	*c,
20650Sstevel@tonic-gate 	md_error_t		*ep
20660Sstevel@tonic-gate )
20670Sstevel@tonic-gate {
20680Sstevel@tonic-gate 	md_replica_t	*rp;
20690Sstevel@tonic-gate 	char		*devname;
20700Sstevel@tonic-gate 	size_t		sz;
20710Sstevel@tonic-gate 
20720Sstevel@tonic-gate 	/* allocate replicaname */
20730Sstevel@tonic-gate 	rp = Zalloc(sizeof (*rp));
20740Sstevel@tonic-gate 
20750Sstevel@tonic-gate 	/* get device name */
20760Sstevel@tonic-gate 	devname = splicename(&c->c_devname);
20770Sstevel@tonic-gate 	if (flags & PRINT_FAST) {
2078*1623Stw21770 		if ((rp->r_namep = metaname_fast(&sp, devname,
2079*1623Stw21770 		    LOGICAL_DEVICE, ep)) == NULL) {
20800Sstevel@tonic-gate 			Free(devname);
20810Sstevel@tonic-gate 			Free(rp);
20820Sstevel@tonic-gate 			return (NULL);
20830Sstevel@tonic-gate 		}
20840Sstevel@tonic-gate 	} else {
2085*1623Stw21770 		if ((rp->r_namep = metaname(&sp, devname,
2086*1623Stw21770 		    LOGICAL_DEVICE, ep)) == NULL) {
20870Sstevel@tonic-gate 			Free(devname);
20880Sstevel@tonic-gate 			Free(rp);
20890Sstevel@tonic-gate 			return (NULL);
20900Sstevel@tonic-gate 		}
20910Sstevel@tonic-gate 	}
20920Sstevel@tonic-gate 	Free(devname);
20930Sstevel@tonic-gate 
20940Sstevel@tonic-gate 	/* make sure it's OK */
20950Sstevel@tonic-gate 	if ((! (flags & MD_BASICNAME_OK)) &&
20960Sstevel@tonic-gate 	    (metachkcomp(rp->r_namep, ep) != 0)) {
20970Sstevel@tonic-gate 		Free(rp);
20980Sstevel@tonic-gate 		return (NULL);
20990Sstevel@tonic-gate 	}
21000Sstevel@tonic-gate 
210162Sjeanm 	rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR;
210262Sjeanm 	rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR;
21030Sstevel@tonic-gate 	rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
21040Sstevel@tonic-gate 	if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
210562Sjeanm 		sz = devid_sizeof((ddi_devid_t)(uintptr_t)
210662Sjeanm 		    (c->c_locator.l_devid));
21070Sstevel@tonic-gate 		if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
21080Sstevel@tonic-gate 		    (ddi_devid_t)NULL) {
21090Sstevel@tonic-gate 			Free(rp);
21100Sstevel@tonic-gate 			return (NULL);
21110Sstevel@tonic-gate 		}
21120Sstevel@tonic-gate 		(void) memcpy((void *)rp->r_devid,
211362Sjeanm 		    (void *)(uintptr_t)c->c_locator.l_devid, sz);
21140Sstevel@tonic-gate 		(void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
21150Sstevel@tonic-gate 		rp->r_flags &= ~MDDB_F_NODEVID;
21160Sstevel@tonic-gate 		/* Overwrite dev derived from name with dev from devid */
21170Sstevel@tonic-gate 		rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
21180Sstevel@tonic-gate 	}
21190Sstevel@tonic-gate 	(void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
21200Sstevel@tonic-gate 
21210Sstevel@tonic-gate 	rp->r_blkno = c->c_locator.l_blkno;
21220Sstevel@tonic-gate 	if (c->c_dbend != 0)
21230Sstevel@tonic-gate 		rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
21240Sstevel@tonic-gate 
21250Sstevel@tonic-gate 	/* return replica */
21260Sstevel@tonic-gate 	return (rp);
21270Sstevel@tonic-gate }
21280Sstevel@tonic-gate 
21290Sstevel@tonic-gate /*
21300Sstevel@tonic-gate  * free replica list
21310Sstevel@tonic-gate  */
21320Sstevel@tonic-gate void
21330Sstevel@tonic-gate metafreereplicalist(
21340Sstevel@tonic-gate 	md_replicalist_t	*rlp
21350Sstevel@tonic-gate )
21360Sstevel@tonic-gate {
21370Sstevel@tonic-gate 	md_replicalist_t	*rl = NULL;
21380Sstevel@tonic-gate 
21390Sstevel@tonic-gate 	for (/* void */; (rlp != NULL); rlp = rl) {
21400Sstevel@tonic-gate 		rl = rlp->rl_next;
21410Sstevel@tonic-gate 		if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
21420Sstevel@tonic-gate 			free(rlp->rl_repp->r_devid);
21430Sstevel@tonic-gate 		}
21440Sstevel@tonic-gate 		Free(rlp->rl_repp);
21450Sstevel@tonic-gate 		Free(rlp);
21460Sstevel@tonic-gate 	}
21470Sstevel@tonic-gate }
21480Sstevel@tonic-gate 
21490Sstevel@tonic-gate /*
21500Sstevel@tonic-gate  * return list of all replicas in set
21510Sstevel@tonic-gate  */
21520Sstevel@tonic-gate int
21530Sstevel@tonic-gate metareplicalist(
21540Sstevel@tonic-gate 	mdsetname_t		*sp,
21550Sstevel@tonic-gate 	int			flags,
21560Sstevel@tonic-gate 	md_replicalist_t	**rlpp,
21570Sstevel@tonic-gate 	md_error_t		*ep
21580Sstevel@tonic-gate )
21590Sstevel@tonic-gate {
21600Sstevel@tonic-gate 	md_replicalist_t	**tail = rlpp;
21610Sstevel@tonic-gate 	int			count = 0;
21620Sstevel@tonic-gate 	struct mddb_config	c;
21630Sstevel@tonic-gate 	int			i;
21640Sstevel@tonic-gate 	char			*devid;
21650Sstevel@tonic-gate 
21660Sstevel@tonic-gate 	/* for each replica */
21670Sstevel@tonic-gate 	i = 0;
21680Sstevel@tonic-gate 	do {
21690Sstevel@tonic-gate 		md_replica_t	*rp;
21700Sstevel@tonic-gate 
21710Sstevel@tonic-gate 		/* get next replica */
21720Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
21730Sstevel@tonic-gate 		c.c_id = i;
21740Sstevel@tonic-gate 		c.c_setno = sp->setno;
21750Sstevel@tonic-gate 
21760Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
21770Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
21780Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
21790Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
21800Sstevel@tonic-gate 				break;	/* handle none at all */
21810Sstevel@tonic-gate 			}
21820Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
21830Sstevel@tonic-gate 			goto out;
21840Sstevel@tonic-gate 		}
21850Sstevel@tonic-gate 
21860Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
21870Sstevel@tonic-gate 			if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
21880Sstevel@tonic-gate 				(void) mdsyserror(ep, ENOMEM, META_DBCONF);
21890Sstevel@tonic-gate 				goto out;
21900Sstevel@tonic-gate 			}
21910Sstevel@tonic-gate 			c.c_locator.l_devid = (uintptr_t)devid;
21920Sstevel@tonic-gate 			/*
21930Sstevel@tonic-gate 			 * Turn on space and sz flags since 'sz' amount of
21940Sstevel@tonic-gate 			 * space has been alloc'd.
21950Sstevel@tonic-gate 			 */
21960Sstevel@tonic-gate 			c.c_locator.l_devid_flags =
21970Sstevel@tonic-gate 				MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
21980Sstevel@tonic-gate 		}
21990Sstevel@tonic-gate 
22000Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
22010Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
22020Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
22030Sstevel@tonic-gate 				break;	/* handle none at all */
22040Sstevel@tonic-gate 			}
22050Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
22060Sstevel@tonic-gate 			goto out;
22070Sstevel@tonic-gate 		}
22080Sstevel@tonic-gate 
22090Sstevel@tonic-gate 		/*
22100Sstevel@tonic-gate 		 * Paranoid check - shouldn't happen, but is left as
22110Sstevel@tonic-gate 		 * a place holder for changes that will be needed after
22120Sstevel@tonic-gate 		 * dynamic reconfiguration changes are added to SVM (to
22130Sstevel@tonic-gate 		 * support movement of disks at any point in time).
22140Sstevel@tonic-gate 		 */
22150Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
22160Sstevel@tonic-gate 			(void) fprintf(stderr,
22170Sstevel@tonic-gate 			    dgettext(TEXT_DOMAIN,
22180Sstevel@tonic-gate 				"Error: Relocation Information "
22190Sstevel@tonic-gate 				"(drvnm=%s, mnum=0x%lx) \n"
22200Sstevel@tonic-gate 				"relocation information size changed - \n"
22210Sstevel@tonic-gate 				"rerun command\n"),
22220Sstevel@tonic-gate 			    c.c_locator.l_driver, c.c_locator.l_mnum);
22230Sstevel@tonic-gate 			(void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
22240Sstevel@tonic-gate 			goto out;
22250Sstevel@tonic-gate 		}
22260Sstevel@tonic-gate 
22270Sstevel@tonic-gate 		if (c.c_dbcnt == 0)
22280Sstevel@tonic-gate 			break;		/* handle none at all */
22290Sstevel@tonic-gate 
22300Sstevel@tonic-gate 		/* get info */
22310Sstevel@tonic-gate 		if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
22320Sstevel@tonic-gate 			goto out;
22330Sstevel@tonic-gate 
22340Sstevel@tonic-gate 		/* append to list */
22350Sstevel@tonic-gate 		*tail = Zalloc(sizeof (**tail));
22360Sstevel@tonic-gate 		(*tail)->rl_repp = rp;
22370Sstevel@tonic-gate 		tail = &(*tail)->rl_next;
22380Sstevel@tonic-gate 		++count;
22390Sstevel@tonic-gate 
22400Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
22410Sstevel@tonic-gate 			free(devid);
22420Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
22430Sstevel@tonic-gate 		}
22440Sstevel@tonic-gate 
22450Sstevel@tonic-gate 	} while (++i < c.c_dbcnt);
22460Sstevel@tonic-gate 
22470Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
22480Sstevel@tonic-gate 		free(devid);
22490Sstevel@tonic-gate 	}
22500Sstevel@tonic-gate 
22510Sstevel@tonic-gate 	/* return count */
22520Sstevel@tonic-gate 	return (count);
22530Sstevel@tonic-gate 
22540Sstevel@tonic-gate 	/* cleanup, return error */
22550Sstevel@tonic-gate out:
22560Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
22570Sstevel@tonic-gate 		free(devid);
22580Sstevel@tonic-gate 	}
22590Sstevel@tonic-gate 	metafreereplicalist(*rlpp);
22600Sstevel@tonic-gate 	*rlpp = NULL;
22610Sstevel@tonic-gate 	return (-1);
22620Sstevel@tonic-gate }
22630Sstevel@tonic-gate 
22640Sstevel@tonic-gate /*
22650Sstevel@tonic-gate  * meta_sync_db_locations - get list of replicas from kernel and write
22660Sstevel@tonic-gate  * 	out to mddb.cf and md.conf.  'Syncs up' the replica list in
22670Sstevel@tonic-gate  * 	the kernel with the replica list in the conf files.
22680Sstevel@tonic-gate  *
22690Sstevel@tonic-gate  */
22700Sstevel@tonic-gate void
22710Sstevel@tonic-gate meta_sync_db_locations(
22720Sstevel@tonic-gate 	mdsetname_t	*sp,
22730Sstevel@tonic-gate 	md_error_t	*ep
22740Sstevel@tonic-gate )
22750Sstevel@tonic-gate {
22760Sstevel@tonic-gate 	char		*sname = 0;		/* system file name */
22770Sstevel@tonic-gate 	char 		*cname = 0;		/* config file name */
22780Sstevel@tonic-gate 
22790Sstevel@tonic-gate 	if (!metaislocalset(sp))
22800Sstevel@tonic-gate 		return;
22810Sstevel@tonic-gate 
22820Sstevel@tonic-gate 	/* Updates backup of configuration file (aka mddb.cf) */
22830Sstevel@tonic-gate 	if (buildconf(sp, ep) != 0)
22840Sstevel@tonic-gate 		return;
22850Sstevel@tonic-gate 
22860Sstevel@tonic-gate 	/* Updates system configuration file (aka md.conf) */
22870Sstevel@tonic-gate 	(void) meta_db_patch(sname, cname, 0, ep);
22880Sstevel@tonic-gate }
22890Sstevel@tonic-gate 
22900Sstevel@tonic-gate /*
22910Sstevel@tonic-gate  * setup_db_locations - parse the mddb.cf file and
22920Sstevel@tonic-gate  *			tells the driver which db locations to use.
22930Sstevel@tonic-gate  */
22940Sstevel@tonic-gate int
22950Sstevel@tonic-gate meta_setup_db_locations(
22960Sstevel@tonic-gate 	md_error_t	*ep
22970Sstevel@tonic-gate )
22980Sstevel@tonic-gate {
22990Sstevel@tonic-gate 	mddb_config_t	c;
23000Sstevel@tonic-gate 	FILE		*fp;
23010Sstevel@tonic-gate 	char		inbuff[1024];
23020Sstevel@tonic-gate 	char		*buff;
23030Sstevel@tonic-gate 	uint_t		i;
23040Sstevel@tonic-gate 	size_t		sz;
23050Sstevel@tonic-gate 	int		rval = 0;
23060Sstevel@tonic-gate 	char		*devidp;
23070Sstevel@tonic-gate 	uint_t		devid_size;
23080Sstevel@tonic-gate 	char		*minor_name = NULL;
23090Sstevel@tonic-gate 	ddi_devid_t	devid_decode;
23100Sstevel@tonic-gate 	int		checksum;
23110Sstevel@tonic-gate 
23120Sstevel@tonic-gate 	/* do mddb.cf file */
23130Sstevel@tonic-gate 	(void) memset(&c, '\0', sizeof (c));
23140Sstevel@tonic-gate 	if ((fp = fopen(META_DBCONF, "r")) == NULL) {
23150Sstevel@tonic-gate 		if (errno != ENOENT)
23160Sstevel@tonic-gate 			return (mdsyserror(ep, errno, META_DBCONF));
23170Sstevel@tonic-gate 	}
23180Sstevel@tonic-gate 	while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
23190Sstevel@tonic-gate 	    fp)) != NULL)) {
23200Sstevel@tonic-gate 
23210Sstevel@tonic-gate 		/* ignore comments */
23220Sstevel@tonic-gate 		if (*buff == '#')
23230Sstevel@tonic-gate 			continue;
23240Sstevel@tonic-gate 
23250Sstevel@tonic-gate 		/* parse locator */
23260Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
23270Sstevel@tonic-gate 		c.c_setno = MD_LOCAL_SET;
23280Sstevel@tonic-gate 		i = strcspn(buff, " \t");
23290Sstevel@tonic-gate 		if (i > sizeof (c.c_locator.l_driver))
23300Sstevel@tonic-gate 			i = sizeof (c.c_locator.l_driver);
23310Sstevel@tonic-gate 		(void) strncpy(c.c_locator.l_driver, buff, i);
23320Sstevel@tonic-gate 		buff += i;
23330Sstevel@tonic-gate 		c.c_locator.l_dev =
23340Sstevel@tonic-gate 		    makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
23350Sstevel@tonic-gate 		c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
23360Sstevel@tonic-gate 		c.c_locator.l_mnum = minor(c.c_locator.l_dev);
23370Sstevel@tonic-gate 
23380Sstevel@tonic-gate 		/* parse out devid */
23390Sstevel@tonic-gate 		while (isspace((int)(*buff)))
23400Sstevel@tonic-gate 			buff += 1;
23410Sstevel@tonic-gate 		i = strcspn(buff, " \t");
23420Sstevel@tonic-gate 		if ((devidp = (char *)malloc(i+1)) == NULL)
23430Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
23440Sstevel@tonic-gate 
23450Sstevel@tonic-gate 		(void) strncpy(devidp, buff, i);
23460Sstevel@tonic-gate 		devidp[i] = '\0';
23470Sstevel@tonic-gate 		if (devid_str_decode(devidp, &devid_decode,
23480Sstevel@tonic-gate 		    &minor_name) == -1) {
23490Sstevel@tonic-gate 			free(devidp);
23500Sstevel@tonic-gate 			continue;
23510Sstevel@tonic-gate 		}
23520Sstevel@tonic-gate 
23530Sstevel@tonic-gate 		/* Conf file must have minor name associated with devid */
23540Sstevel@tonic-gate 		if (minor_name == NULL) {
23550Sstevel@tonic-gate 			free(devidp);
23560Sstevel@tonic-gate 			devid_free(devid_decode);
23570Sstevel@tonic-gate 			continue;
23580Sstevel@tonic-gate 		}
23590Sstevel@tonic-gate 
23600Sstevel@tonic-gate 		sz = devid_sizeof(devid_decode);
23610Sstevel@tonic-gate 		/* Copy to devid size buffer that ioctl expects */
23620Sstevel@tonic-gate 		if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
23630Sstevel@tonic-gate 			devid_free(devid_decode);
23640Sstevel@tonic-gate 			free(minor_name);
23650Sstevel@tonic-gate 			free(devidp);
23660Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
23670Sstevel@tonic-gate 		}
23680Sstevel@tonic-gate 
236962Sjeanm 		(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
23700Sstevel@tonic-gate 		    (void *)devid_decode, sz);
23710Sstevel@tonic-gate 
23720Sstevel@tonic-gate 		devid_free(devid_decode);
23730Sstevel@tonic-gate 
23740Sstevel@tonic-gate 		if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
23750Sstevel@tonic-gate 			free(minor_name);
23760Sstevel@tonic-gate 			free(devidp);
237762Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
23780Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
23790Sstevel@tonic-gate 		}
23800Sstevel@tonic-gate 		(void) strcpy(c.c_locator.l_minor_name, minor_name);
23810Sstevel@tonic-gate 		free(minor_name);
23820Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
23830Sstevel@tonic-gate 			MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
23840Sstevel@tonic-gate 		c.c_locator.l_devid_sz = sz;
23850Sstevel@tonic-gate 
23860Sstevel@tonic-gate 		devid_size = strlen(devidp);
23870Sstevel@tonic-gate 		buff += devid_size;
23880Sstevel@tonic-gate 
23890Sstevel@tonic-gate 		checksum = strtol(buff, &buff, 10);
23900Sstevel@tonic-gate 		for (i = 0; c.c_locator.l_driver[i] != 0; i++)
23910Sstevel@tonic-gate 			checksum += c.c_locator.l_driver[i];
23920Sstevel@tonic-gate 		for (i = 0; i < devid_size; i++) {
23930Sstevel@tonic-gate 			checksum += devidp[i];
23940Sstevel@tonic-gate 		}
23950Sstevel@tonic-gate 		free(devidp);
23960Sstevel@tonic-gate 
23970Sstevel@tonic-gate 		checksum += minor(c.c_locator.l_dev);
23980Sstevel@tonic-gate 		checksum += c.c_locator.l_blkno;
23990Sstevel@tonic-gate 		if (checksum != 42) {
24000Sstevel@tonic-gate 			/* overwritten later for more serious problems */
24010Sstevel@tonic-gate 			rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
240262Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
24030Sstevel@tonic-gate 			continue;
24040Sstevel@tonic-gate 		}
24050Sstevel@tonic-gate 		c.c_locator.l_flags = 0;
24060Sstevel@tonic-gate 
24070Sstevel@tonic-gate 		/* use db location */
24080Sstevel@tonic-gate 		if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
240962Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
24100Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
24110Sstevel@tonic-gate 		}
24120Sstevel@tonic-gate 
24130Sstevel@tonic-gate 		/* free up devid if in use */
241462Sjeanm 		free((void *)(uintptr_t)c.c_locator.l_devid);
24150Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
24160Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
24170Sstevel@tonic-gate 	}
24180Sstevel@tonic-gate 	if ((fp) && (fclose(fp) != 0))
24190Sstevel@tonic-gate 		return (mdsyserror(ep, errno, META_DBCONF));
24200Sstevel@tonic-gate 
24210Sstevel@tonic-gate 	/* check for stale database */
24220Sstevel@tonic-gate 	(void) memset((char *)&c, 0, sizeof (struct mddb_config));
24230Sstevel@tonic-gate 	c.c_id = 0;
24240Sstevel@tonic-gate 	c.c_setno = MD_LOCAL_SET;
24250Sstevel@tonic-gate 
24260Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
24270Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
24280Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
24290Sstevel@tonic-gate 
24300Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
24310Sstevel@tonic-gate 		if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
24320Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
24330Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
24340Sstevel@tonic-gate 	}
24350Sstevel@tonic-gate 
24360Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE)
24370Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
24380Sstevel@tonic-gate 		    0, NULL));
24390Sstevel@tonic-gate 
24400Sstevel@tonic-gate 	/* success */
24410Sstevel@tonic-gate 	return (rval);
24420Sstevel@tonic-gate }
24430Sstevel@tonic-gate 
24440Sstevel@tonic-gate /*
24450Sstevel@tonic-gate  * meta_db_minreplica - returns the minimum size replica currently in use.
24460Sstevel@tonic-gate  */
24470Sstevel@tonic-gate daddr_t
24480Sstevel@tonic-gate meta_db_minreplica(
24490Sstevel@tonic-gate 	mdsetname_t	*sp,
24500Sstevel@tonic-gate 	md_error_t	*ep
24510Sstevel@tonic-gate )
24520Sstevel@tonic-gate {
24530Sstevel@tonic-gate 	md_replica_t		*r;
24540Sstevel@tonic-gate 	md_replicalist_t	*rl, *rlp = NULL;
24550Sstevel@tonic-gate 	daddr_t			nblks = 0;
24560Sstevel@tonic-gate 
24570Sstevel@tonic-gate 	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
24580Sstevel@tonic-gate 		return (-1);
24590Sstevel@tonic-gate 
24600Sstevel@tonic-gate 	if (rlp == NULL)
24610Sstevel@tonic-gate 		return (-1);
24620Sstevel@tonic-gate 
24630Sstevel@tonic-gate 	/* find the smallest existing replica */
24640Sstevel@tonic-gate 	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
24650Sstevel@tonic-gate 		r = rl->rl_repp;
24660Sstevel@tonic-gate 		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
24670Sstevel@tonic-gate 	}
24680Sstevel@tonic-gate 
24690Sstevel@tonic-gate 	metafreereplicalist(rlp);
24700Sstevel@tonic-gate 	return (nblks);
24710Sstevel@tonic-gate }
24720Sstevel@tonic-gate 
24730Sstevel@tonic-gate /*
24740Sstevel@tonic-gate  * meta_get_replica_names
24750Sstevel@tonic-gate  *  returns an mdnamelist_t of replica slices
24760Sstevel@tonic-gate  */
24770Sstevel@tonic-gate /*ARGSUSED*/
24780Sstevel@tonic-gate int
24790Sstevel@tonic-gate meta_get_replica_names(
24800Sstevel@tonic-gate 	mdsetname_t	*sp,
24810Sstevel@tonic-gate 	mdnamelist_t	**nlpp,
24820Sstevel@tonic-gate 	int		options,
24830Sstevel@tonic-gate 	md_error_t	*ep
24840Sstevel@tonic-gate )
24850Sstevel@tonic-gate {
24860Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
24870Sstevel@tonic-gate 	md_replicalist_t	*rl;
24880Sstevel@tonic-gate 	mdnamelist_t		**tailpp = nlpp;
24890Sstevel@tonic-gate 	int			cnt = 0;
24900Sstevel@tonic-gate 
24910Sstevel@tonic-gate 	assert(nlpp != NULL);
24920Sstevel@tonic-gate 
24930Sstevel@tonic-gate 	if (!metaislocalset(sp))
24940Sstevel@tonic-gate 		goto out;
24950Sstevel@tonic-gate 
24960Sstevel@tonic-gate 	/* get replicas */
24970Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
24980Sstevel@tonic-gate 		cnt = -1;
24990Sstevel@tonic-gate 		goto out;
25000Sstevel@tonic-gate 	}
25010Sstevel@tonic-gate 
25020Sstevel@tonic-gate 	/* build name list */
25030Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
25040Sstevel@tonic-gate 		/*
25050Sstevel@tonic-gate 		 * Add the name struct to the end of the
25060Sstevel@tonic-gate 		 * namelist but keep a pointer to the last
25070Sstevel@tonic-gate 		 * element so that we don't incur the overhead
25080Sstevel@tonic-gate 		 * of traversing the list each time
25090Sstevel@tonic-gate 		 */
25100Sstevel@tonic-gate 		tailpp = meta_namelist_append_wrapper(
25110Sstevel@tonic-gate 			tailpp, rl->rl_repp->r_namep);
25120Sstevel@tonic-gate 		++cnt;
25130Sstevel@tonic-gate 	}
25140Sstevel@tonic-gate 
25150Sstevel@tonic-gate 	/* cleanup, return count or error */
25160Sstevel@tonic-gate out:
25170Sstevel@tonic-gate 	metafreereplicalist(rlp);
25180Sstevel@tonic-gate 	return (cnt);
25190Sstevel@tonic-gate }
2520