10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
230Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * Just in case we're not in a build environment, make sure that
310Sstevel@tonic-gate  * TEXT_DOMAIN gets set to something.
320Sstevel@tonic-gate  */
330Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
340Sstevel@tonic-gate #define	TEXT_DOMAIN "SYS_TEST"
350Sstevel@tonic-gate #endif
360Sstevel@tonic-gate 
370Sstevel@tonic-gate /*
380Sstevel@tonic-gate  * Metadevice database interfaces.
390Sstevel@tonic-gate  */
400Sstevel@tonic-gate 
410Sstevel@tonic-gate #define	MDDB
420Sstevel@tonic-gate 
430Sstevel@tonic-gate #include <meta.h>
440Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
450Sstevel@tonic-gate #include <sys/lvm/md_crc.h>
460Sstevel@tonic-gate #include <sys/lvm/mdio.h>
470Sstevel@tonic-gate #include <string.h>
480Sstevel@tonic-gate #include <strings.h>
490Sstevel@tonic-gate #include <ctype.h>
500Sstevel@tonic-gate 
510Sstevel@tonic-gate struct svm_daemon {
520Sstevel@tonic-gate 	char *svmd_name;
530Sstevel@tonic-gate 	char *svmd_kill_val;
540Sstevel@tonic-gate };
550Sstevel@tonic-gate 
560Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = {
570Sstevel@tonic-gate 		{"mdmonitord", "HUP"},
580Sstevel@tonic-gate 		{"mddoors", "KILL"},
590Sstevel@tonic-gate 	};
600Sstevel@tonic-gate 
610Sstevel@tonic-gate #define	DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
620Sstevel@tonic-gate #define	MDMONITORD	"/usr/sbin/mdmonitord"
630Sstevel@tonic-gate 
640Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
650Sstevel@tonic-gate 
660Sstevel@tonic-gate /*
670Sstevel@tonic-gate  * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
680Sstevel@tonic-gate  */
690Sstevel@tonic-gate md_timeval32_t
700Sstevel@tonic-gate meta_get_lb_inittime(
710Sstevel@tonic-gate 	mdsetname_t	*sp,
720Sstevel@tonic-gate 	md_error_t	*ep
730Sstevel@tonic-gate )
740Sstevel@tonic-gate {
750Sstevel@tonic-gate 	mddb_config_t	c;
760Sstevel@tonic-gate 
770Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
780Sstevel@tonic-gate 
790Sstevel@tonic-gate 	/* Fill in setno, setname, and sideno */
800Sstevel@tonic-gate 	c.c_setno = sp->setno;
810Sstevel@tonic-gate 
820Sstevel@tonic-gate 	if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
830Sstevel@tonic-gate 		(void) mdstealerror(ep, &c.c_mde);
840Sstevel@tonic-gate 	}
850Sstevel@tonic-gate 
860Sstevel@tonic-gate 	return (c.c_timestamp);
870Sstevel@tonic-gate }
880Sstevel@tonic-gate 
890Sstevel@tonic-gate /*
900Sstevel@tonic-gate  * mkmasterblks writes out the master blocks of the mddb to the replica.
910Sstevel@tonic-gate  *
920Sstevel@tonic-gate  * In a MN diskset, this is called by the node that is adding this replica
930Sstevel@tonic-gate  * to the diskset.
940Sstevel@tonic-gate  */
950Sstevel@tonic-gate 
960Sstevel@tonic-gate #define	MDDB_VERIFY_SIZE	8192
970Sstevel@tonic-gate 
980Sstevel@tonic-gate static int
990Sstevel@tonic-gate mkmasterblks(
1000Sstevel@tonic-gate 	mdsetname_t	*sp,
1010Sstevel@tonic-gate 	mdname_t	*np,
1020Sstevel@tonic-gate 	int		fd,
1030Sstevel@tonic-gate 	daddr_t		firstblk,
1040Sstevel@tonic-gate 	int		dbsize,
1050Sstevel@tonic-gate 	md_timeval32_t	inittime,
1060Sstevel@tonic-gate 	md_error_t	*ep
1070Sstevel@tonic-gate )
1080Sstevel@tonic-gate {
1090Sstevel@tonic-gate 	int		consecutive;
1100Sstevel@tonic-gate 	md_timeval32_t	tp;
1110Sstevel@tonic-gate 	struct mddb_mb	*mb;
1120Sstevel@tonic-gate 	char		*buffer;
1130Sstevel@tonic-gate 	int		iosize;
1140Sstevel@tonic-gate 	md_set_desc	*sd;
1150Sstevel@tonic-gate 	int		mn_set = 0;
1160Sstevel@tonic-gate 	daddr_t		startblk;
1170Sstevel@tonic-gate 	int		cnt;
1180Sstevel@tonic-gate 	ddi_devid_t	devid;
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
1210Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1220Sstevel@tonic-gate 			return (-1);
1230Sstevel@tonic-gate 
1240Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
1250Sstevel@tonic-gate 			mn_set = 1;		/* Used later */
1260Sstevel@tonic-gate 		}
1270Sstevel@tonic-gate 	}
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate 	/*
1300Sstevel@tonic-gate 	 * Loop to verify the entire mddb region on disk is read/writable.
1310Sstevel@tonic-gate 	 * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
1320Sstevel@tonic-gate 	 * chunks.
1330Sstevel@tonic-gate 	 *
1340Sstevel@tonic-gate 	 * A side-effect of this loop is to zero out the entire mddb region
1350Sstevel@tonic-gate 	 */
1360Sstevel@tonic-gate 	if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
1370Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 	startblk = firstblk;
1400Sstevel@tonic-gate 	for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate 		if (cnt > MDDB_VERIFY_SIZE)
1430Sstevel@tonic-gate 			consecutive = MDDB_VERIFY_SIZE;
1440Sstevel@tonic-gate 		else
1450Sstevel@tonic-gate 			consecutive = cnt;
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
1480Sstevel@tonic-gate 			Free(buffer);
1490Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
1500Sstevel@tonic-gate 		}
1510Sstevel@tonic-gate 
1520Sstevel@tonic-gate 		iosize = DEV_BSIZE * consecutive;
1530Sstevel@tonic-gate 		if (write(fd, buffer, iosize) != iosize) {
1540Sstevel@tonic-gate 			Free(buffer);
1550Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
1560Sstevel@tonic-gate 		}
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
1590Sstevel@tonic-gate 			Free(buffer);
1600Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
1610Sstevel@tonic-gate 		}
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate 		if (read(fd, buffer, iosize) != iosize) {
1640Sstevel@tonic-gate 			Free(buffer);
1650Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
1660Sstevel@tonic-gate 		}
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate 		startblk += consecutive;
1690Sstevel@tonic-gate 	}
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate 	Free(buffer);
1720Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
1730Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) == -1) {
1760Sstevel@tonic-gate 		Free(mb);
1770Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
1780Sstevel@tonic-gate 	}
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_MB;
1810Sstevel@tonic-gate 	/*
1820Sstevel@tonic-gate 	 * If a MN diskset, set master block revision for a MN set.
1830Sstevel@tonic-gate 	 * Even though the master block structure is no different
1840Sstevel@tonic-gate 	 * for a MN set, setting the revision field to a different
1850Sstevel@tonic-gate 	 * number keeps any pre-MN_diskset code from accessing
1860Sstevel@tonic-gate 	 * this diskset.  It also allows for an early determination
1870Sstevel@tonic-gate 	 * of a MN diskset when reading in from disk so that the
1880Sstevel@tonic-gate 	 * proper size locator block and locator names structure
1890Sstevel@tonic-gate 	 * can be read in thus saving time on diskset startup.
1900Sstevel@tonic-gate 	 */
1910Sstevel@tonic-gate 	if (mn_set)
1920Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MNMB;
1930Sstevel@tonic-gate 	else
1940Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MB;
1950Sstevel@tonic-gate 	mb->mb_timestamp = tp;
1960Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
1970Sstevel@tonic-gate 	mb->mb_blkcnt = dbsize - 1;
1980Sstevel@tonic-gate 	mb->mb_blkno = firstblk;
1990Sstevel@tonic-gate 	mb->mb_nextblk = 0;
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 	mb->mb_blkmap.m_firstblk = firstblk + 1;
2020Sstevel@tonic-gate 	mb->mb_blkmap.m_consecutive = dbsize - 1;
2030Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
2040Sstevel@tonic-gate 		mb->mb_setcreatetime = inittime;
2050Sstevel@tonic-gate 	}
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate 	/*
2080Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
2090Sstevel@tonic-gate 	 * the master block. The saved devid is used to provide a mapping
2100Sstevel@tonic-gate 	 * between this disk's devid and the devid stored into the master
2110Sstevel@tonic-gate 	 * block. This allows the disk image to be self-identifying
2120Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
2130Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
2140Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
2150Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
2160Sstevel@tonic-gate 	 * in the remote copy scenario.
2170Sstevel@tonic-gate 	 */
2180Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
2190Sstevel@tonic-gate 		size_t len;
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate 		len = devid_sizeof(devid);
2220Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
2230Sstevel@tonic-gate 			/* there is enough space to store the devid */
2240Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
2250Sstevel@tonic-gate 			mb->mb_devid_len = len;
2260Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, devid, len);
2270Sstevel@tonic-gate 		}
2280Sstevel@tonic-gate 		devid_free(devid);
2290Sstevel@tonic-gate 	}
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
2320Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
2350Sstevel@tonic-gate 		Free(mb);
2360Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2370Sstevel@tonic-gate 	}
2380Sstevel@tonic-gate 
2390Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
2400Sstevel@tonic-gate 		Free(mb);
2410Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2420Sstevel@tonic-gate 	}
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
2450Sstevel@tonic-gate 		Free(mb);
2460Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2470Sstevel@tonic-gate 	}
2480Sstevel@tonic-gate 
2490Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
2500Sstevel@tonic-gate 		Free(mb);
2510Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
2520Sstevel@tonic-gate 	}
2530Sstevel@tonic-gate 
2540Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
2550Sstevel@tonic-gate 		(uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
2560Sstevel@tonic-gate 		Free(mb);
2570Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_NOTVERIFIED,
2580Sstevel@tonic-gate 			meta_getminor(np->dev), sp->setno, 0, np->rname));
2590Sstevel@tonic-gate 	}
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 	Free(mb);
2620Sstevel@tonic-gate 	return (0);
2630Sstevel@tonic-gate }
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate void
2660Sstevel@tonic-gate meta_mkdummymaster(
2670Sstevel@tonic-gate 	mdsetname_t	*sp,
2680Sstevel@tonic-gate 	int		fd,
2690Sstevel@tonic-gate 	daddr_t		firstblk
2700Sstevel@tonic-gate )
2710Sstevel@tonic-gate {
2720Sstevel@tonic-gate 	md_timeval32_t	tp;
2730Sstevel@tonic-gate 	struct mddb_mb	*mb;
2740Sstevel@tonic-gate 	ddi_devid_t	devid;
2750Sstevel@tonic-gate 	md_set_desc	*sd;
2760Sstevel@tonic-gate 	md_error_t	ep = mdnullerror;
2770Sstevel@tonic-gate 	md_timeval32_t	inittime;
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate 	/*
2800Sstevel@tonic-gate 	 * No dummy master blocks are written for a MN diskset since devids
2810Sstevel@tonic-gate 	 * are not supported in MN disksets.
2820Sstevel@tonic-gate 	 */
2830Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
2840Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, &ep)) == NULL)
2850Sstevel@tonic-gate 			return;
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd))
2880Sstevel@tonic-gate 			return;
2890Sstevel@tonic-gate 	}
2900Sstevel@tonic-gate 
2910Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
2920Sstevel@tonic-gate 		return;
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_DU;
2950Sstevel@tonic-gate 	mb->mb_revision = MDDB_REV_MB;
2960Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
2970Sstevel@tonic-gate 	inittime = meta_get_lb_inittime(sp, &ep);
2980Sstevel@tonic-gate 	mb->mb_setcreatetime = inittime;
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) != -1)
3010Sstevel@tonic-gate 		mb->mb_timestamp = tp;
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate 	/*
3040Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
3050Sstevel@tonic-gate 	 * the master block.  This allows the disk image to be self-identifying
3060Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
3070Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
3080Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
3090Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
3100Sstevel@tonic-gate 	 * in the remote copy scenario.
3110Sstevel@tonic-gate 	 */
3120Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
3130Sstevel@tonic-gate 		int len;
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate 		len = devid_sizeof(devid);
3160Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
3170Sstevel@tonic-gate 			/* there is enough space to store the devid */
3180Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
3190Sstevel@tonic-gate 			mb->mb_devid_len = len;
3200Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, (char *)devid, len);
3210Sstevel@tonic-gate 		}
3220Sstevel@tonic-gate 		devid_free(devid);
3230Sstevel@tonic-gate 	}
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
3260Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate 	/*
3290Sstevel@tonic-gate 	 * If any of these operations fail, we need to inform the
3300Sstevel@tonic-gate 	 * user that the disk won't be self identifying. When support
3310Sstevel@tonic-gate 	 * for importing remotely replicated disksets is added, we
3320Sstevel@tonic-gate 	 * want to add the error messages here.
3330Sstevel@tonic-gate 	 */
3340Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
3350Sstevel@tonic-gate 		goto out;
3360Sstevel@tonic-gate 
3370Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
3380Sstevel@tonic-gate 		goto out;
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
3410Sstevel@tonic-gate 		goto out;
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
3440Sstevel@tonic-gate 		goto out;
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
3470Sstevel@tonic-gate 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
3480Sstevel@tonic-gate 		goto out;
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate out:
3510Sstevel@tonic-gate 	Free(mb);
3520Sstevel@tonic-gate }
3530Sstevel@tonic-gate 
3540Sstevel@tonic-gate static int
3550Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep)
3560Sstevel@tonic-gate {
3570Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
3580Sstevel@tonic-gate 	md_replicalist_t	*rl;
3590Sstevel@tonic-gate 	FILE			*cfp = NULL;
3600Sstevel@tonic-gate 	FILE			*mfp = NULL;
3610Sstevel@tonic-gate 	struct stat		sbuf;
3620Sstevel@tonic-gate 	int			rval = 0;
3630Sstevel@tonic-gate 	int			in_miniroot = 0;
3640Sstevel@tonic-gate 	char			line[MDDB_BOOTLIST_MAX_LEN];
3650Sstevel@tonic-gate 	char			*tname = NULL;
3660Sstevel@tonic-gate 
3670Sstevel@tonic-gate 	/* get list of local replicas */
3680Sstevel@tonic-gate 	if (! metaislocalset(sp))
3690Sstevel@tonic-gate 		return (0);
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
3720Sstevel@tonic-gate 		return (-1);
3730Sstevel@tonic-gate 
3740Sstevel@tonic-gate 	/* open tempfile, copy permissions of original file */
3750Sstevel@tonic-gate 	if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
3760Sstevel@tonic-gate 		/*
3770Sstevel@tonic-gate 		 * On the miniroot tmp files must be created in /var/tmp.
3780Sstevel@tonic-gate 		 * If we get a EROFS error, we assume that we are in the
3790Sstevel@tonic-gate 		 * miniroot.
3800Sstevel@tonic-gate 		 */
3810Sstevel@tonic-gate 		if (errno != EROFS)
3820Sstevel@tonic-gate 			goto error;
3830Sstevel@tonic-gate 		in_miniroot = 1;
3840Sstevel@tonic-gate 		errno = 0;
3850Sstevel@tonic-gate 		tname = tempnam("/var/tmp", "slvm_");
3860Sstevel@tonic-gate 		if (tname == NULL && errno == EROFS) {
3870Sstevel@tonic-gate 			/*
3880Sstevel@tonic-gate 			 * If we are booted on a read-only root because
3890Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
3900Sstevel@tonic-gate 			 * any scary error messages.
3910Sstevel@tonic-gate 			 */
3920Sstevel@tonic-gate 			errno = 0;
3930Sstevel@tonic-gate 			goto out;
3940Sstevel@tonic-gate 		}
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate 		/* open tempfile, copy permissions of original file */
3970Sstevel@tonic-gate 		if ((cfp = fopen(tname, "w+")) == NULL)
3980Sstevel@tonic-gate 			goto error;
3990Sstevel@tonic-gate 	}
4000Sstevel@tonic-gate 	if (stat(META_DBCONF, &sbuf) == 0) {
4010Sstevel@tonic-gate 		if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
4020Sstevel@tonic-gate 			goto error;
4030Sstevel@tonic-gate 		if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
4040Sstevel@tonic-gate 			goto error;
4050Sstevel@tonic-gate 	}
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 	/* print header */
4080Sstevel@tonic-gate 	if (fprintf(cfp, "#metadevice database location file ") == EOF)
4090Sstevel@tonic-gate 		goto error;
4100Sstevel@tonic-gate 	if (fprintf(cfp, "do not hand edit\n") < 0)
4110Sstevel@tonic-gate 		goto error;
4120Sstevel@tonic-gate 	if (fprintf(cfp,
4130Sstevel@tonic-gate 		"#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
4140Sstevel@tonic-gate 		goto error;
4150Sstevel@tonic-gate 
4160Sstevel@tonic-gate 	/* dump replicas */
4170Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
4180Sstevel@tonic-gate 		md_replica_t	*r = rl->rl_repp;
4190Sstevel@tonic-gate 		int		checksum = 42;
4200Sstevel@tonic-gate 		int		i;
4210Sstevel@tonic-gate 		char		*devidp;
4220Sstevel@tonic-gate 		minor_t		min;
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate 		devidp = devid_str_encode(r->r_devid, r->r_minor_name);
4250Sstevel@tonic-gate 		/* If devid code can't encode devidp - skip entry */
4260Sstevel@tonic-gate 		if (devidp == NULL) {
4270Sstevel@tonic-gate 			continue;
4280Sstevel@tonic-gate 		}
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate 		/* compute checksum */
4310Sstevel@tonic-gate 		for (i = 0; ((r->r_driver_name[i] != '\0') &&
4320Sstevel@tonic-gate 		    (i < sizeof (r->r_driver_name))); i++) {
4330Sstevel@tonic-gate 			checksum -= r->r_driver_name[i];
4340Sstevel@tonic-gate 		}
4350Sstevel@tonic-gate 		min = meta_getminor(r->r_namep->dev);
4360Sstevel@tonic-gate 		checksum -= min;
4370Sstevel@tonic-gate 		checksum -= r->r_blkno;
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 		for (i = 0; i < strlen(devidp); i++) {
4400Sstevel@tonic-gate 			checksum -= devidp[i];
4410Sstevel@tonic-gate 		}
4420Sstevel@tonic-gate 		/* print info */
4430Sstevel@tonic-gate 		if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
4440Sstevel@tonic-gate 		    r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
4450Sstevel@tonic-gate 			goto error;
4460Sstevel@tonic-gate 		}
4470Sstevel@tonic-gate 
4480Sstevel@tonic-gate 		devid_str_free(devidp);
4490Sstevel@tonic-gate 	}
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 	/* close and rename to real file */
4520Sstevel@tonic-gate 	if (fflush(cfp) != 0)
4530Sstevel@tonic-gate 		goto error;
4540Sstevel@tonic-gate 	if (fsync(fileno(cfp)) != 0)
4550Sstevel@tonic-gate 		goto error;
4560Sstevel@tonic-gate 	if (fclose(cfp) != 0) {
4570Sstevel@tonic-gate 		cfp = NULL;
4580Sstevel@tonic-gate 		goto error;
4590Sstevel@tonic-gate 	}
4600Sstevel@tonic-gate 	cfp = NULL;
4610Sstevel@tonic-gate 
4620Sstevel@tonic-gate 	/*
4630Sstevel@tonic-gate 	 * Renames don't work in the miniroot since tmpfiles are
4640Sstevel@tonic-gate 	 * created in /var/tmp. Hence we copy the data out.
4650Sstevel@tonic-gate 	 */
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate 	if (! in_miniroot) {
4680Sstevel@tonic-gate 		if (rename(META_DBCONFTMP, META_DBCONF) != 0)
4690Sstevel@tonic-gate 			goto error;
4700Sstevel@tonic-gate 	} else {
4710Sstevel@tonic-gate 		if ((cfp = fopen(tname, "r")) == NULL)
4720Sstevel@tonic-gate 			goto error;
4730Sstevel@tonic-gate 		if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
4740Sstevel@tonic-gate 			goto error;
4750Sstevel@tonic-gate 		while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
4760Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
4770Sstevel@tonic-gate 				goto error;
4780Sstevel@tonic-gate 		}
4790Sstevel@tonic-gate 		(void) fclose(cfp);
4800Sstevel@tonic-gate 		cfp = NULL;
4810Sstevel@tonic-gate 		if (fflush(mfp) != 0)
4820Sstevel@tonic-gate 			goto error;
4830Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
4840Sstevel@tonic-gate 			goto error;
4850Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
4860Sstevel@tonic-gate 			mfp = NULL;
4870Sstevel@tonic-gate 			goto error;
4880Sstevel@tonic-gate 		}
4890Sstevel@tonic-gate 		/* delete the tempfile */
4900Sstevel@tonic-gate 		(void) unlink(tname);
4910Sstevel@tonic-gate 	}
4920Sstevel@tonic-gate 	/* success */
4930Sstevel@tonic-gate 	rval = 0;
4940Sstevel@tonic-gate 	goto out;
4950Sstevel@tonic-gate 
4960Sstevel@tonic-gate 	/* tempfile error */
4970Sstevel@tonic-gate error:
4980Sstevel@tonic-gate 	rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
4990Sstevel@tonic-gate 				mdsyserror(ep, errno, META_DBCONFTMP);
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate 
5020Sstevel@tonic-gate 	/* cleanup, return success */
5030Sstevel@tonic-gate out:
5040Sstevel@tonic-gate 	if (rlp != NULL)
5050Sstevel@tonic-gate 		metafreereplicalist(rlp);
5060Sstevel@tonic-gate 	if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
5070Sstevel@tonic-gate 		rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
5080Sstevel@tonic-gate 					mdsyserror(ep, errno, META_DBCONFTMP);
5090Sstevel@tonic-gate 	}
5100Sstevel@tonic-gate 	free(tname);
5110Sstevel@tonic-gate 	return (rval);
5120Sstevel@tonic-gate }
5130Sstevel@tonic-gate 
5140Sstevel@tonic-gate /*
5150Sstevel@tonic-gate  * check replica for dev
5160Sstevel@tonic-gate  */
5170Sstevel@tonic-gate static int
5180Sstevel@tonic-gate in_replica(
5190Sstevel@tonic-gate 	mdsetname_t	*sp,
5200Sstevel@tonic-gate 	md_replica_t	*rp,
5210Sstevel@tonic-gate 	mdname_t	*np,
5220Sstevel@tonic-gate 	diskaddr_t	slblk,
5230Sstevel@tonic-gate 	diskaddr_t	nblks,
5240Sstevel@tonic-gate 	md_error_t	*ep
5250Sstevel@tonic-gate )
5260Sstevel@tonic-gate {
5270Sstevel@tonic-gate 	mdname_t	*repnp = rp->r_namep;
5280Sstevel@tonic-gate 	diskaddr_t	rep_sblk = rp->r_blkno;
5290Sstevel@tonic-gate 	diskaddr_t	rep_nblks = rp->r_nblk;
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 	/* should be in the same set */
5320Sstevel@tonic-gate 	assert(sp != NULL);
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate 	/* if error in master block, assume whole partition */
5350Sstevel@tonic-gate 	if ((rep_sblk == MD_DISKADDR_ERROR) ||
5360Sstevel@tonic-gate 	    (rep_nblks == MD_DISKADDR_ERROR)) {
5370Sstevel@tonic-gate 		rep_sblk = 0;
5380Sstevel@tonic-gate 		rep_nblks = MD_DISKADDR_ERROR;
5390Sstevel@tonic-gate 	}
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	/* check overlap */
5420Sstevel@tonic-gate 	if (meta_check_overlap(
5430Sstevel@tonic-gate 	    MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
5440Sstevel@tonic-gate 		return (-1);
5450Sstevel@tonic-gate 	}
5460Sstevel@tonic-gate 
5470Sstevel@tonic-gate 	/* return success */
5480Sstevel@tonic-gate 	return (0);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate /*
5520Sstevel@tonic-gate  * check to see if we're in a replica
5530Sstevel@tonic-gate  */
5540Sstevel@tonic-gate int
5550Sstevel@tonic-gate meta_check_inreplica(
5560Sstevel@tonic-gate 	mdsetname_t		*sp,
5570Sstevel@tonic-gate 	mdname_t		*np,
5580Sstevel@tonic-gate 	diskaddr_t		slblk,
5590Sstevel@tonic-gate 	diskaddr_t		nblks,
5600Sstevel@tonic-gate 	md_error_t		*ep
5610Sstevel@tonic-gate )
5620Sstevel@tonic-gate {
5630Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
5640Sstevel@tonic-gate 	md_replicalist_t	*rl;
5650Sstevel@tonic-gate 	int			rval = 0;
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate 	/* should have a set */
5680Sstevel@tonic-gate 	assert(sp != NULL);
5690Sstevel@tonic-gate 
5700Sstevel@tonic-gate 	/* for each replica */
5710Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
5720Sstevel@tonic-gate 		return (-1);
5730Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
5740Sstevel@tonic-gate 		md_replica_t	*rp = rl->rl_repp;
5750Sstevel@tonic-gate 
5760Sstevel@tonic-gate 		/* check replica */
5770Sstevel@tonic-gate 		if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
5780Sstevel@tonic-gate 			rval = -1;
5790Sstevel@tonic-gate 			break;
5800Sstevel@tonic-gate 		}
5810Sstevel@tonic-gate 	}
5820Sstevel@tonic-gate 
5830Sstevel@tonic-gate 	/* cleanup, return success */
5840Sstevel@tonic-gate 	metafreereplicalist(rlp);
5850Sstevel@tonic-gate 	return (rval);
5860Sstevel@tonic-gate }
5870Sstevel@tonic-gate 
5880Sstevel@tonic-gate /*
5890Sstevel@tonic-gate  * check replica
5900Sstevel@tonic-gate  */
5910Sstevel@tonic-gate int
5920Sstevel@tonic-gate meta_check_replica(
5930Sstevel@tonic-gate 	mdsetname_t	*sp,		/* set to check against */
5940Sstevel@tonic-gate 	mdname_t	*np,		/* component to check against */
5950Sstevel@tonic-gate 	mdchkopts_t	options,	/* option flags */
5960Sstevel@tonic-gate 	diskaddr_t	slblk,		/* start logical block */
5970Sstevel@tonic-gate 	diskaddr_t	nblks,		/* number of blocks (-1,rest of them) */
5980Sstevel@tonic-gate 	md_error_t	*ep		/* error packet */
5990Sstevel@tonic-gate )
6000Sstevel@tonic-gate {
6010Sstevel@tonic-gate 	mdchkopts_t	chkoptions = MDCHK_ALLOW_REPSLICE;
6020Sstevel@tonic-gate 
6030Sstevel@tonic-gate 	/* make sure we have a disk */
6040Sstevel@tonic-gate 	if (metachkcomp(np, ep) != 0)
6050Sstevel@tonic-gate 		return (-1);
6060Sstevel@tonic-gate 
6070Sstevel@tonic-gate 	/* check to ensure that it is not already in use */
6080Sstevel@tonic-gate 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
6090Sstevel@tonic-gate 		return (-1);
6100Sstevel@tonic-gate 	}
6110Sstevel@tonic-gate 
6120Sstevel@tonic-gate 	if (options & MDCHK_ALLOW_NODBS)
6130Sstevel@tonic-gate 		return (0);
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate 	if (options & MDCHK_DRVINSET)
6160Sstevel@tonic-gate 		return (0);
6170Sstevel@tonic-gate 
6180Sstevel@tonic-gate 	/* make sure it is in the set */
6190Sstevel@tonic-gate 	if (meta_check_inset(sp, np, ep) != 0)
6200Sstevel@tonic-gate 		return (-1);
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 	/* make sure its not in a metadevice */
6230Sstevel@tonic-gate 	if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
6240Sstevel@tonic-gate 		return (-1);
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	/* return success */
6270Sstevel@tonic-gate 	return (0);
6280Sstevel@tonic-gate }
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate static int
6310Sstevel@tonic-gate update_dbinfo_on_drives(
6320Sstevel@tonic-gate 	mdsetname_t	*sp,
6330Sstevel@tonic-gate 	md_drive_desc	*dd,
6340Sstevel@tonic-gate 	int		set_locked,
6350Sstevel@tonic-gate 	int		force,
6360Sstevel@tonic-gate 	md_error_t	*ep
6370Sstevel@tonic-gate )
6380Sstevel@tonic-gate {
6390Sstevel@tonic-gate 	md_set_desc		*sd;
6400Sstevel@tonic-gate 	int			i;
6410Sstevel@tonic-gate 	md_setkey_t		*cl_sk;
6420Sstevel@tonic-gate 	int			rval = 0;
6430Sstevel@tonic-gate 	md_mnnode_desc		*nd;
6440Sstevel@tonic-gate 
6450Sstevel@tonic-gate 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
6460Sstevel@tonic-gate 		return (-1);
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	if (! set_locked) {
6490Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
6500Sstevel@tonic-gate 			md_error_t xep = mdnullerror;
6510Sstevel@tonic-gate 			sigset_t sigs;
6520Sstevel@tonic-gate 			/* Make sure we are blocking all signals */
6530Sstevel@tonic-gate 			if (procsigs(TRUE, &sigs, &xep) < 0)
6540Sstevel@tonic-gate 				mdclrerror(&xep);
6550Sstevel@tonic-gate 
6560Sstevel@tonic-gate 			nd = sd->sd_nodelist;
6570Sstevel@tonic-gate 			while (nd) {
6580Sstevel@tonic-gate 				if (force && strcmp(nd->nd_nodename,
6590Sstevel@tonic-gate 				    mynode()) != 0) {
6600Sstevel@tonic-gate 					nd = nd->nd_next;
6610Sstevel@tonic-gate 					continue;
6620Sstevel@tonic-gate 				}
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
6650Sstevel@tonic-gate 					nd = nd->nd_next;
6660Sstevel@tonic-gate 					continue;
6670Sstevel@tonic-gate 				}
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate 				if (clnt_lock_set(nd->nd_nodename, sp, ep))
6700Sstevel@tonic-gate 					return (-1);
6710Sstevel@tonic-gate 				nd = nd->nd_next;
6720Sstevel@tonic-gate 			}
6730Sstevel@tonic-gate 		} else {
6740Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
6750Sstevel@tonic-gate 				/* Skip empty slots */
6760Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
6770Sstevel@tonic-gate 					continue;
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate 				if (force && strcmp(sd->sd_nodes[i],
6800Sstevel@tonic-gate 				    mynode()) != 0)
6810Sstevel@tonic-gate 					continue;
6820Sstevel@tonic-gate 
6830Sstevel@tonic-gate 				if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
6840Sstevel@tonic-gate 					return (-1);
6850Sstevel@tonic-gate 			}
6860Sstevel@tonic-gate 		}
6870Sstevel@tonic-gate 	}
6880Sstevel@tonic-gate 
6890Sstevel@tonic-gate 	if (MD_MNSET_DESC(sd)) {
6900Sstevel@tonic-gate 		nd = sd->sd_nodelist;
6910Sstevel@tonic-gate 		while (nd) {
6920Sstevel@tonic-gate 			if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
6930Sstevel@tonic-gate 				nd = nd->nd_next;
6940Sstevel@tonic-gate 				continue;
6950Sstevel@tonic-gate 			}
6960Sstevel@tonic-gate 
6970Sstevel@tonic-gate 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
6980Sstevel@tonic-gate 				nd = nd->nd_next;
6990Sstevel@tonic-gate 				continue;
7000Sstevel@tonic-gate 			}
7010Sstevel@tonic-gate 
7020Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
7030Sstevel@tonic-gate 			    == -1) {
7040Sstevel@tonic-gate 				rval = -1;
7050Sstevel@tonic-gate 				break;
7060Sstevel@tonic-gate 			}
7070Sstevel@tonic-gate 			nd = nd->nd_next;
7080Sstevel@tonic-gate 		}
7090Sstevel@tonic-gate 	} else {
7100Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
7110Sstevel@tonic-gate 			/* Skip empty slots */
7120Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
7130Sstevel@tonic-gate 				continue;
7140Sstevel@tonic-gate 
7150Sstevel@tonic-gate 			if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
7160Sstevel@tonic-gate 				continue;
7170Sstevel@tonic-gate 
7180Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
7190Sstevel@tonic-gate 			    == -1) {
7200Sstevel@tonic-gate 				rval = -1;
7210Sstevel@tonic-gate 				break;
7220Sstevel@tonic-gate 			}
7230Sstevel@tonic-gate 		}
7240Sstevel@tonic-gate 	}
7250Sstevel@tonic-gate 
7260Sstevel@tonic-gate 	if (! set_locked) {
7270Sstevel@tonic-gate 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
7280Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
7290Sstevel@tonic-gate 			nd = sd->sd_nodelist;
7300Sstevel@tonic-gate 			while (nd) {
7310Sstevel@tonic-gate 				if (force &&
7320Sstevel@tonic-gate 				    strcmp(nd->nd_nodename, mynode()) != 0) {
7330Sstevel@tonic-gate 					nd = nd->nd_next;
7340Sstevel@tonic-gate 					continue;
7350Sstevel@tonic-gate 				}
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
7380Sstevel@tonic-gate 					nd = nd->nd_next;
7390Sstevel@tonic-gate 					continue;
7400Sstevel@tonic-gate 				}
7410Sstevel@tonic-gate 
7420Sstevel@tonic-gate 				if (clnt_unlock_set(nd->nd_nodename, cl_sk,
7430Sstevel@tonic-gate 				    ep)) {
7440Sstevel@tonic-gate 					rval = -1;
7450Sstevel@tonic-gate 					break;
7460Sstevel@tonic-gate 				}
7470Sstevel@tonic-gate 				nd = nd->nd_next;
7480Sstevel@tonic-gate 			}
7490Sstevel@tonic-gate 		} else {
7500Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
7510Sstevel@tonic-gate 				/* Skip empty slots */
7520Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
7530Sstevel@tonic-gate 					continue;
7540Sstevel@tonic-gate 
7550Sstevel@tonic-gate 				if (force &&
7560Sstevel@tonic-gate 				    strcmp(sd->sd_nodes[i], mynode()) != 0)
7570Sstevel@tonic-gate 					continue;
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 				if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
7600Sstevel@tonic-gate 				    ep)) {
7610Sstevel@tonic-gate 					rval = -1;
7620Sstevel@tonic-gate 					break;
7630Sstevel@tonic-gate 				}
7640Sstevel@tonic-gate 			}
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 		}
7670Sstevel@tonic-gate 		cl_set_setkey(NULL);
7680Sstevel@tonic-gate 	}
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate 	return (rval);
7710Sstevel@tonic-gate }
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate int
7740Sstevel@tonic-gate meta_db_addsidenms(
7750Sstevel@tonic-gate 	mdsetname_t	*sp,
7760Sstevel@tonic-gate 	mdname_t	*np,
7770Sstevel@tonic-gate 	daddr_t		blkno,
7780Sstevel@tonic-gate 	int		bcast,
7790Sstevel@tonic-gate 	md_error_t	*ep
7800Sstevel@tonic-gate )
7810Sstevel@tonic-gate {
7820Sstevel@tonic-gate 	side_t		sideno;
7830Sstevel@tonic-gate 	char		*bname = NULL;
7840Sstevel@tonic-gate 	char		*dname = NULL;
7850Sstevel@tonic-gate 	minor_t		mnum;
7860Sstevel@tonic-gate 	mddb_config_t	c;
7870Sstevel@tonic-gate 	int		done;
7880Sstevel@tonic-gate 	int		rval = 0;
7890Sstevel@tonic-gate 	md_set_desc	*sd;
7900Sstevel@tonic-gate 
7910Sstevel@tonic-gate 	sideno = MD_SIDEWILD;
7920Sstevel@tonic-gate 	/*CONSTCOND*/
7930Sstevel@tonic-gate 	while (1) {
7940Sstevel@tonic-gate 		if (bname != NULL) {
7950Sstevel@tonic-gate 			Free(bname);
7960Sstevel@tonic-gate 			bname = NULL;
7970Sstevel@tonic-gate 		}
7980Sstevel@tonic-gate 		if (dname != NULL) {
7990Sstevel@tonic-gate 			Free(dname);
8000Sstevel@tonic-gate 			dname = NULL;
8010Sstevel@tonic-gate 		}
8020Sstevel@tonic-gate 		if ((done = meta_getnextside_devinfo(sp, np->bname,
8030Sstevel@tonic-gate 		    &sideno, &bname, &dname, &mnum, ep)) == -1) {
8040Sstevel@tonic-gate 			rval = -1;
8050Sstevel@tonic-gate 			break;
8060Sstevel@tonic-gate 		}
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate 		if (done == 0)
8090Sstevel@tonic-gate 			break;
8100Sstevel@tonic-gate 
8110Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
8120Sstevel@tonic-gate 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
8130Sstevel@tonic-gate 				rval = -1;
8140Sstevel@tonic-gate 				break;
8150Sstevel@tonic-gate 			}
8160Sstevel@tonic-gate 		}
8170Sstevel@tonic-gate 
8180Sstevel@tonic-gate 		/*
8190Sstevel@tonic-gate 		 * Send addsidenms to all nodes using rpc.mdcommd if
8200Sstevel@tonic-gate 		 * sidename is being added to MN diskset.
8210Sstevel@tonic-gate 		 *
8220Sstevel@tonic-gate 		 *   It's ok to broadcast this call to other nodes.
8230Sstevel@tonic-gate 		 *
8240Sstevel@tonic-gate 		 *   Note: The broadcast to other nodes isn't needed during
8250Sstevel@tonic-gate 		 *   the addition of the first mddbs to the set since the
8260Sstevel@tonic-gate 		 *   other nodes haven't been joined to the set yet.  All
8270Sstevel@tonic-gate 		 *   nodes in a MN diskset are (implicitly) joined to the set
8280Sstevel@tonic-gate 		 *   on the addition of the first mddb.
8290Sstevel@tonic-gate 		 */
8300Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
8310Sstevel@tonic-gate 		    (bcast == DB_ADDSIDENMS_BCAST)) {
8320Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
8330Sstevel@tonic-gate 			md_mn_msg_meta_db_newside_t	db_ns;
8340Sstevel@tonic-gate 			int				send_rval;
8350Sstevel@tonic-gate 
8360Sstevel@tonic-gate 			db_ns.msg_l_dev = np->dev;
8370Sstevel@tonic-gate 			db_ns.msg_sideno = sideno;
8380Sstevel@tonic-gate 			db_ns.msg_blkno = blkno;
8390Sstevel@tonic-gate 			(void) strncpy(db_ns.msg_dname, dname,
8400Sstevel@tonic-gate 			    sizeof (db_ns.msg_dname));
8410Sstevel@tonic-gate 			(void) splitname(np->bname, &db_ns.msg_splitname);
8420Sstevel@tonic-gate 			db_ns.msg_mnum = mnum;
8430Sstevel@tonic-gate 
8440Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
8450Sstevel@tonic-gate 			db_ns.msg_devid[0] = NULL;
8460Sstevel@tonic-gate 
8470Sstevel@tonic-gate 			/*
8480Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
8490Sstevel@tonic-gate 			 * stuck in in the return step until this command has
8500Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
8510Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
8520Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
8530Sstevel@tonic-gate 			 * cycle to proceed.
8540Sstevel@tonic-gate 			 */
8550Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
8560Sstevel@tonic-gate 			    MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
8570Sstevel@tonic-gate 			    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns,
8580Sstevel@tonic-gate 			    sizeof (md_mn_msg_meta_db_newside_t),
8590Sstevel@tonic-gate 			    &resultp, ep);
8600Sstevel@tonic-gate 			if (send_rval != 0) {
8610Sstevel@tonic-gate 				rval = -1;
8620Sstevel@tonic-gate 				if (resultp == NULL)
8630Sstevel@tonic-gate 					(void) mddserror(ep,
8640Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
8650Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
8660Sstevel@tonic-gate 					    sp->setname);
8670Sstevel@tonic-gate 				else {
8680Sstevel@tonic-gate 					(void) mdstealerror(ep,
8690Sstevel@tonic-gate 					    &(resultp->mmr_ep));
8700Sstevel@tonic-gate 					if (mdisok(ep)) {
8710Sstevel@tonic-gate 						(void) mddserror(ep,
8720Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
8730Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
8740Sstevel@tonic-gate 						    sp->setname);
8750Sstevel@tonic-gate 					}
8760Sstevel@tonic-gate 					free_result(resultp);
8770Sstevel@tonic-gate 				}
8780Sstevel@tonic-gate 				break;
8790Sstevel@tonic-gate 			}
8800Sstevel@tonic-gate 			if (resultp)
8810Sstevel@tonic-gate 				free_result(resultp);
8820Sstevel@tonic-gate 		} else {
8830Sstevel@tonic-gate 			/*
8840Sstevel@tonic-gate 			 * Let this side's  device name, minor # and driver name
8850Sstevel@tonic-gate 			 * be known to the database replica.
8860Sstevel@tonic-gate 			 */
8870Sstevel@tonic-gate 			(void) memset(&c, 0, sizeof (c));
8880Sstevel@tonic-gate 
8890Sstevel@tonic-gate 			/* Fill in device/replica info */
8900Sstevel@tonic-gate 			c.c_locator.l_dev = meta_cmpldev(np->dev);
8910Sstevel@tonic-gate 			c.c_locator.l_blkno = blkno;
8920Sstevel@tonic-gate 			(void) strncpy(c.c_locator.l_driver, dname,
8930Sstevel@tonic-gate 			    sizeof (c.c_locator.l_driver));
8940Sstevel@tonic-gate 			(void) splitname(bname, &c.c_devname);
8950Sstevel@tonic-gate 			c.c_locator.l_mnum = mnum;
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate 			/* Fill in setno, setname, and sideno */
8980Sstevel@tonic-gate 			c.c_setno = sp->setno;
8990Sstevel@tonic-gate 			(void) strncpy(c.c_setname, sp->setname,
9000Sstevel@tonic-gate 				sizeof (c.c_setname));
9010Sstevel@tonic-gate 			c.c_sideno = sideno;
9020Sstevel@tonic-gate 
9030Sstevel@tonic-gate 			/*
9040Sstevel@tonic-gate 			 * Don't need device id information from this ioctl
9050Sstevel@tonic-gate 			 * Kernel determines device id from dev_t, which
9060Sstevel@tonic-gate 			 * is just what this code would do.
9070Sstevel@tonic-gate 			 */
9080Sstevel@tonic-gate 			c.c_locator.l_devid = (uint64_t)0;
9090Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
9100Sstevel@tonic-gate 
9110Sstevel@tonic-gate 			if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
9120Sstevel@tonic-gate 				rval = mdstealerror(ep, &c.c_mde);
9130Sstevel@tonic-gate 				break;
9140Sstevel@tonic-gate 			}
9150Sstevel@tonic-gate 		}
9160Sstevel@tonic-gate 	}
9170Sstevel@tonic-gate 
9180Sstevel@tonic-gate 	/* cleanup, return success */
9190Sstevel@tonic-gate 	if (bname != NULL) {
9200Sstevel@tonic-gate 		Free(bname);
9210Sstevel@tonic-gate 		bname = NULL;
9220Sstevel@tonic-gate 	}
9230Sstevel@tonic-gate 	if (dname != NULL) {
9240Sstevel@tonic-gate 		Free(dname);
9250Sstevel@tonic-gate 		dname = NULL;
9260Sstevel@tonic-gate 	}
9270Sstevel@tonic-gate 	return (rval);
9280Sstevel@tonic-gate }
9290Sstevel@tonic-gate 
9300Sstevel@tonic-gate 
9310Sstevel@tonic-gate int
9320Sstevel@tonic-gate meta_db_delsidenm(
9330Sstevel@tonic-gate 	mdsetname_t	*sp,
9340Sstevel@tonic-gate 	side_t		sideno,
9350Sstevel@tonic-gate 	mdname_t	*np,
9360Sstevel@tonic-gate 	daddr_t		blkno,
9370Sstevel@tonic-gate 	md_error_t	*ep
9380Sstevel@tonic-gate )
9390Sstevel@tonic-gate {
9400Sstevel@tonic-gate 	mddb_config_t	c;
9410Sstevel@tonic-gate 	md_set_desc	*sd;
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
9440Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
9450Sstevel@tonic-gate 			return (-1);
9460Sstevel@tonic-gate 	}
9470Sstevel@tonic-gate 	/* Use rpc.mdcommd to delete mddb side from all nodes */
9480Sstevel@tonic-gate 	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
9490Sstevel@tonic-gate 	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
9500Sstevel@tonic-gate 		md_mn_result_t			*resultp = NULL;
9510Sstevel@tonic-gate 		md_mn_msg_meta_db_delside_t	db_ds;
9520Sstevel@tonic-gate 		int				send_rval;
9530Sstevel@tonic-gate 
9540Sstevel@tonic-gate 		db_ds.msg_l_dev = np->dev;
9550Sstevel@tonic-gate 		db_ds.msg_blkno = blkno;
9560Sstevel@tonic-gate 		db_ds.msg_sideno = sideno;
9570Sstevel@tonic-gate 
9580Sstevel@tonic-gate 		/* Set devid to NULL until devids are supported */
9590Sstevel@tonic-gate 		db_ds.msg_devid[0] = NULL;
9600Sstevel@tonic-gate 
9610Sstevel@tonic-gate 		/*
9620Sstevel@tonic-gate 		 * If reconfig cycle has been started, this node is
9630Sstevel@tonic-gate 		 * stuck in in the return step until this command has
9640Sstevel@tonic-gate 		 * completed.  If mdcommd is suspended, ask
9650Sstevel@tonic-gate 		 * send_message to fail (instead of retrying)
9660Sstevel@tonic-gate 		 * so that metaset can finish allowing the reconfig
9670Sstevel@tonic-gate 		 * cycle to proceed.
9680Sstevel@tonic-gate 		 */
9690Sstevel@tonic-gate 		send_rval = mdmn_send_message(sp->setno,
9700Sstevel@tonic-gate 		    MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
9710Sstevel@tonic-gate 		    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds,
9720Sstevel@tonic-gate 		    sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
9730Sstevel@tonic-gate 		if (send_rval != 0) {
9740Sstevel@tonic-gate 			if (resultp == NULL)
9750Sstevel@tonic-gate 				(void) mddserror(ep,
9760Sstevel@tonic-gate 				    MDE_DS_COMMD_SEND_FAIL,
9770Sstevel@tonic-gate 				    sp->setno, NULL, NULL,
9780Sstevel@tonic-gate 				    sp->setname);
9790Sstevel@tonic-gate 			else {
9800Sstevel@tonic-gate 				(void) mdstealerror(ep, &(resultp->mmr_ep));
9810Sstevel@tonic-gate 				if (mdisok(ep)) {
9820Sstevel@tonic-gate 					(void) mddserror(ep,
9830Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
9840Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
9850Sstevel@tonic-gate 					    sp->setname);
9860Sstevel@tonic-gate 				}
9870Sstevel@tonic-gate 				free_result(resultp);
9880Sstevel@tonic-gate 			}
9890Sstevel@tonic-gate 			return (-1);
9900Sstevel@tonic-gate 		}
9910Sstevel@tonic-gate 		if (resultp)
9920Sstevel@tonic-gate 			free_result(resultp);
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate 	} else {
9950Sstevel@tonic-gate 		/*
9960Sstevel@tonic-gate 		 * Let this side's  device name, minor # and driver name
9970Sstevel@tonic-gate 		 * be known to the database replica.
9980Sstevel@tonic-gate 		 */
9990Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
10000Sstevel@tonic-gate 
10010Sstevel@tonic-gate 		/* Fill in device/replica info */
10020Sstevel@tonic-gate 		c.c_locator.l_dev = meta_cmpldev(np->dev);
10030Sstevel@tonic-gate 		c.c_locator.l_blkno = blkno;
10040Sstevel@tonic-gate 
10050Sstevel@tonic-gate 		/* Fill in setno, setname, and sideno */
10060Sstevel@tonic-gate 		c.c_setno = sp->setno;
10070Sstevel@tonic-gate 		(void) strcpy(c.c_setname, sp->setname);
10080Sstevel@tonic-gate 		c.c_sideno = sideno;
10090Sstevel@tonic-gate 
10100Sstevel@tonic-gate 		/*
10110Sstevel@tonic-gate 		 * Don't need device id information from this ioctl
10120Sstevel@tonic-gate 		 * Kernel determines device id from dev_t, which
10130Sstevel@tonic-gate 		 * is just what this code would do.
10140Sstevel@tonic-gate 		 */
10150Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
10160Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
10170Sstevel@tonic-gate 
10180Sstevel@tonic-gate 		if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
10190Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
10200Sstevel@tonic-gate 	}
10210Sstevel@tonic-gate 	return (0);
10220Sstevel@tonic-gate }
10230Sstevel@tonic-gate 
10240Sstevel@tonic-gate 
10250Sstevel@tonic-gate static int
10260Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
10270Sstevel@tonic-gate {
10280Sstevel@tonic-gate 	mdnamelist_t		*dnp1, *dnp2;
10290Sstevel@tonic-gate 
10300Sstevel@tonic-gate 	for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
10310Sstevel@tonic-gate 		for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
10320Sstevel@tonic-gate 			if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
10330Sstevel@tonic-gate 				return (mderror(ep, MDE_DUPDRIVE,
10340Sstevel@tonic-gate 				    dnp1->namep->cname));
10350Sstevel@tonic-gate 		}
10360Sstevel@tonic-gate 	}
10370Sstevel@tonic-gate 	return (0);
10380Sstevel@tonic-gate }
10390Sstevel@tonic-gate 
10400Sstevel@tonic-gate 
10410Sstevel@tonic-gate /*
10420Sstevel@tonic-gate  * Return 1 if files are different, else return 0
10430Sstevel@tonic-gate  */
10440Sstevel@tonic-gate static int
10450Sstevel@tonic-gate filediff(char *tsname, char *sname)
10460Sstevel@tonic-gate {
10470Sstevel@tonic-gate 	int ret = 1, fd;
10480Sstevel@tonic-gate 	size_t tsz, sz;
10490Sstevel@tonic-gate 	struct stat sbuf;
10500Sstevel@tonic-gate 	char *tbuf, *buf;
10510Sstevel@tonic-gate 
10520Sstevel@tonic-gate 	if (stat(tsname, &sbuf) != 0)
10530Sstevel@tonic-gate 		return (1);
10540Sstevel@tonic-gate 	tsz = sbuf.st_size;
10550Sstevel@tonic-gate 	if (stat(sname, &sbuf) != 0)
10560Sstevel@tonic-gate 		return (1);
10570Sstevel@tonic-gate 	sz = sbuf.st_size;
10580Sstevel@tonic-gate 	if (tsz != sz)
10590Sstevel@tonic-gate 		return (1);
10600Sstevel@tonic-gate 
10610Sstevel@tonic-gate 	/* allocate memory and read both files into buffer */
10620Sstevel@tonic-gate 	tbuf = malloc(tsz);
10630Sstevel@tonic-gate 	buf = malloc(sz);
10640Sstevel@tonic-gate 	if (tbuf == NULL || buf == NULL)
10650Sstevel@tonic-gate 		goto out;
10660Sstevel@tonic-gate 
10670Sstevel@tonic-gate 	fd = open(tsname, O_RDONLY);
10680Sstevel@tonic-gate 	if (fd == -1)
10690Sstevel@tonic-gate 		goto out;
10700Sstevel@tonic-gate 	sz = read(fd, tbuf, tsz);
10710Sstevel@tonic-gate 	(void) close(fd);
10720Sstevel@tonic-gate 	if (sz != tsz)
10730Sstevel@tonic-gate 		goto out;
10740Sstevel@tonic-gate 
10750Sstevel@tonic-gate 	fd = open(sname, O_RDONLY);
10760Sstevel@tonic-gate 	if (fd == -1)
10770Sstevel@tonic-gate 		goto out;
10780Sstevel@tonic-gate 	sz = read(fd, buf, tsz);
10790Sstevel@tonic-gate 	(void) close(fd);
10800Sstevel@tonic-gate 	if (sz != tsz)
10810Sstevel@tonic-gate 		goto out;
10820Sstevel@tonic-gate 
10830Sstevel@tonic-gate 	/* compare content */
10840Sstevel@tonic-gate 	ret = bcmp(tbuf, buf, tsz);
10850Sstevel@tonic-gate out:
10860Sstevel@tonic-gate 	if (tbuf)
10870Sstevel@tonic-gate 		free(tbuf);
10880Sstevel@tonic-gate 	if (buf)
10890Sstevel@tonic-gate 		free(buf);
10900Sstevel@tonic-gate 	return (ret);
10910Sstevel@tonic-gate }
10920Sstevel@tonic-gate 
10930Sstevel@tonic-gate /*
10940Sstevel@tonic-gate  * patch md.conf file with mddb locations
10950Sstevel@tonic-gate  */
10960Sstevel@tonic-gate int
10970Sstevel@tonic-gate meta_db_patch(
10980Sstevel@tonic-gate 	char		*sname,		/* system file name */
10990Sstevel@tonic-gate 	char		*cname,		/* mddb.cf file name */
11000Sstevel@tonic-gate 	int		patch,		/* patching locally */
11010Sstevel@tonic-gate 	md_error_t	*ep
11020Sstevel@tonic-gate )
11030Sstevel@tonic-gate {
11040Sstevel@tonic-gate 	char		*tsname = NULL;
11050Sstevel@tonic-gate 	char		line[MDDB_BOOTLIST_MAX_LEN];
11060Sstevel@tonic-gate 	FILE		*tsfp = NULL;
11070Sstevel@tonic-gate 	FILE		*mfp = NULL;
11080Sstevel@tonic-gate 	int		rval = -1;
11090Sstevel@tonic-gate 
11100Sstevel@tonic-gate 	/* check names */
11110Sstevel@tonic-gate 	if (sname == NULL) {
11120Sstevel@tonic-gate 		if (patch)
11130Sstevel@tonic-gate 			sname = "md.conf";
11140Sstevel@tonic-gate 		else
11150Sstevel@tonic-gate 			sname = "/kernel/drv/md.conf";
11160Sstevel@tonic-gate 	}
11170Sstevel@tonic-gate 	if (cname == NULL)
11180Sstevel@tonic-gate 		cname = META_DBCONF;
11190Sstevel@tonic-gate 
11200Sstevel@tonic-gate 	/*
11210Sstevel@tonic-gate 	 * edit file
11220Sstevel@tonic-gate 	 */
11230Sstevel@tonic-gate 	if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
11240Sstevel@tonic-gate 		if (mdissyserror(ep, EROFS)) {
11250Sstevel@tonic-gate 			/*
11260Sstevel@tonic-gate 			 * If we are booted on a read-only root because
11270Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
11280Sstevel@tonic-gate 			 * any scary error messages.
11290Sstevel@tonic-gate 			 */
11300Sstevel@tonic-gate 			mdclrerror(ep);
11310Sstevel@tonic-gate 			rval = 0;
11320Sstevel@tonic-gate 		}
11330Sstevel@tonic-gate 		goto out;
11340Sstevel@tonic-gate 	}
11350Sstevel@tonic-gate 
11360Sstevel@tonic-gate 	if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0,
11370Sstevel@tonic-gate 	    ep) != 0)
11380Sstevel@tonic-gate 		goto out;
11390Sstevel@tonic-gate 
11400Sstevel@tonic-gate 	/* if file content is identical, skip rename */
11410Sstevel@tonic-gate 	if (filediff(tsname, sname) == 0) {
11420Sstevel@tonic-gate 		rval = 0;
11430Sstevel@tonic-gate 		goto out;
11440Sstevel@tonic-gate 	}
11450Sstevel@tonic-gate 
11460Sstevel@tonic-gate 	if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
11470Sstevel@tonic-gate 					    (fclose(tsfp) != 0)) {
11480Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, tsname);
11490Sstevel@tonic-gate 		goto out;
11500Sstevel@tonic-gate 	}
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate 	tsfp = NULL;
11530Sstevel@tonic-gate 
11540Sstevel@tonic-gate 	/*
11550Sstevel@tonic-gate 	 * rename file. If we get a Cross Device error then it
11560Sstevel@tonic-gate 	 * is because we are in the miniroot.
11570Sstevel@tonic-gate 	 */
11580Sstevel@tonic-gate 	if (rename(tsname, sname) != 0 && errno != EXDEV) {
11590Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, sname);
11600Sstevel@tonic-gate 		goto out;
11610Sstevel@tonic-gate 	}
11620Sstevel@tonic-gate 
11630Sstevel@tonic-gate 	if (errno == EXDEV) {
11640Sstevel@tonic-gate 		if ((tsfp = fopen(tsname, "r")) == NULL)
11650Sstevel@tonic-gate 			goto out;
11660Sstevel@tonic-gate 		if ((mfp = fopen(sname, "w+")) == NULL)
11670Sstevel@tonic-gate 			goto out;
11680Sstevel@tonic-gate 		while (fgets(line, sizeof (line), tsfp) != NULL) {
11690Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
11700Sstevel@tonic-gate 				goto out;
11710Sstevel@tonic-gate 		}
11720Sstevel@tonic-gate 		(void) fclose(tsfp);
11730Sstevel@tonic-gate 		tsfp = NULL;
11740Sstevel@tonic-gate 		if (fflush(mfp) != 0)
11750Sstevel@tonic-gate 			goto out;
11760Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
11770Sstevel@tonic-gate 			goto out;
11780Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
11790Sstevel@tonic-gate 			mfp = NULL;
11800Sstevel@tonic-gate 			goto out;
11810Sstevel@tonic-gate 		}
11820Sstevel@tonic-gate 	}
11830Sstevel@tonic-gate 
11840Sstevel@tonic-gate 	Free(tsname);
11850Sstevel@tonic-gate 	tsname = NULL;
11860Sstevel@tonic-gate 	rval = 0;
11870Sstevel@tonic-gate 
11880Sstevel@tonic-gate 	/* cleanup, return error */
11890Sstevel@tonic-gate out:
11900Sstevel@tonic-gate 	if (tsfp != NULL)
11910Sstevel@tonic-gate 		(void) fclose(tsfp);
11920Sstevel@tonic-gate 	if (tsname != NULL) {
11930Sstevel@tonic-gate 		(void) unlink(tsname);
11940Sstevel@tonic-gate 		Free(tsname);
11950Sstevel@tonic-gate 	}
11960Sstevel@tonic-gate 	return (rval);
11970Sstevel@tonic-gate }
11980Sstevel@tonic-gate 
11990Sstevel@tonic-gate /*
12000Sstevel@tonic-gate  * Add replicas to set.  This happens as a result of:
12010Sstevel@tonic-gate  *	- metadb [-s set_name] -a
12020Sstevel@tonic-gate  *	- metaset -s set_name -a disk
12030Sstevel@tonic-gate  *	- metaset -s set_name -d disk	 (causes a rebalance of mddbs)
12040Sstevel@tonic-gate  *	- metaset -s set_name -b
12050Sstevel@tonic-gate  *
12060Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
12070Sstevel@tonic-gate  *
12080Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
12090Sstevel@tonic-gate  * is running the metaset command.
12100Sstevel@tonic-gate  *
12110Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
12120Sstevel@tonic-gate  * running the metaset command.  If this is the first mddb added to
12130Sstevel@tonic-gate  * the MN diskset, then no communication is made to other nodes via commd
12140Sstevel@tonic-gate  * since the other nodes will be in-sync with respect to the mddbs when
12150Sstevel@tonic-gate  * those other nodes join the set and snarf in the newly created mddb.
12160Sstevel@tonic-gate  * If this is not the first mddb added to the MN diskset, then this
12170Sstevel@tonic-gate  * attach command is sent to all of the nodes using commd.  This keeps
12180Sstevel@tonic-gate  * the nodes in-sync.
12190Sstevel@tonic-gate  */
12200Sstevel@tonic-gate int
12210Sstevel@tonic-gate meta_db_attach(
12220Sstevel@tonic-gate 	mdsetname_t		*sp,
12230Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
12240Sstevel@tonic-gate 	mdchkopts_t		options,
12250Sstevel@tonic-gate 	md_timeval32_t		*timeval,
12260Sstevel@tonic-gate 	int			dbcnt,
12270Sstevel@tonic-gate 	int			dbsize,
12280Sstevel@tonic-gate 	char			*sysfilename,
12290Sstevel@tonic-gate 	md_error_t		*ep
12300Sstevel@tonic-gate )
12310Sstevel@tonic-gate {
12320Sstevel@tonic-gate 	struct mddb_config	c;
12330Sstevel@tonic-gate 	mdnamelist_t		*nlp;
12340Sstevel@tonic-gate 	mdname_t		*np;
12350Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
12360Sstevel@tonic-gate 	md_drive_desc		*p;
12370Sstevel@tonic-gate 	int			i;
12380Sstevel@tonic-gate 	int			fd;
12390Sstevel@tonic-gate 	side_t			sideno;
12400Sstevel@tonic-gate 	daddr_t			blkno;
12410Sstevel@tonic-gate 	int			replicacount = 0;
12420Sstevel@tonic-gate 	int			start_mdmonitord = 0;
12430Sstevel@tonic-gate 	int			rval = 0;
12440Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
12450Sstevel@tonic-gate 	md_set_desc		*sd;
12460Sstevel@tonic-gate 	int			stale_bool = FALSE;
12470Sstevel@tonic-gate 	int			flags;
12480Sstevel@tonic-gate 	int			firstmddb = 1;
12490Sstevel@tonic-gate 	md_timeval32_t		inittime = {0, 0};
12500Sstevel@tonic-gate 
12510Sstevel@tonic-gate 	/*
12520Sstevel@tonic-gate 	 * Error if we don't get some work to do.
12530Sstevel@tonic-gate 	 */
12540Sstevel@tonic-gate 	if (db_nlp == NULL)
12550Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
12560Sstevel@tonic-gate 
12570Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
12580Sstevel@tonic-gate 		return (-1);
12590Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
12600Sstevel@tonic-gate 	c.c_id = 0;
12610Sstevel@tonic-gate 	c.c_setno = sp->setno;
12620Sstevel@tonic-gate 
12630Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
12640Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
12650Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
12660Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
12670Sstevel@tonic-gate 		if (metaislocalset(sp)) {
12680Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
12690Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
12700Sstevel@tonic-gate 			else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
12710Sstevel@tonic-gate 			    (! (options & MDCHK_ALLOW_NODBS)))
12720Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
12730Sstevel@tonic-gate 		} else {
12740Sstevel@tonic-gate 			if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
12750Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
12760Sstevel@tonic-gate 		}
12770Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
12780Sstevel@tonic-gate 	}
12790Sstevel@tonic-gate 	/*
12800Sstevel@tonic-gate 	 * Is current set STALE?
12810Sstevel@tonic-gate 	 */
12820Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
12830Sstevel@tonic-gate 		stale_bool = TRUE;
12840Sstevel@tonic-gate 	}
12850Sstevel@tonic-gate 
12860Sstevel@tonic-gate 	assert(db_nlp != NULL);
12870Sstevel@tonic-gate 
12880Sstevel@tonic-gate 	/* if creating the metadbs for the first time start mdmonitord */
12890Sstevel@tonic-gate 	if (c.c_dbcnt == 0)
12900Sstevel@tonic-gate 		start_mdmonitord = 1;
12910Sstevel@tonic-gate 
12920Sstevel@tonic-gate 	/*
12930Sstevel@tonic-gate 	 * check to see if we will go over the total possible number
12940Sstevel@tonic-gate 	 * of data bases
12950Sstevel@tonic-gate 	 */
12960Sstevel@tonic-gate 	nlp = db_nlp;
12970Sstevel@tonic-gate 	while (nlp) {
12980Sstevel@tonic-gate 		replicacount += dbcnt;
12990Sstevel@tonic-gate 		nlp = nlp->next;
13000Sstevel@tonic-gate 	}
13010Sstevel@tonic-gate 
13020Sstevel@tonic-gate 	if ((replicacount + c.c_dbcnt) > c.c_dbmax)
13030Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
13040Sstevel@tonic-gate 		    sp->setno, c.c_dbcnt + replicacount, NULL));
13050Sstevel@tonic-gate 
13060Sstevel@tonic-gate 	/*
13070Sstevel@tonic-gate 	 * go through and check to make sure all locations specified
13080Sstevel@tonic-gate 	 * are legal also pick out driver name;
13090Sstevel@tonic-gate 	 */
13100Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
13110Sstevel@tonic-gate 		diskaddr_t devsize;
13120Sstevel@tonic-gate 
13130Sstevel@tonic-gate 		np = nlp->namep;
13140Sstevel@tonic-gate 
13150Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
13160Sstevel@tonic-gate 			uint_t	partno;
13170Sstevel@tonic-gate 			uint_t	rep_partno;
13180Sstevel@tonic-gate 			mddrivename_t	*dnp = np->drivenamep;
13190Sstevel@tonic-gate 
13200Sstevel@tonic-gate 			/*
13210Sstevel@tonic-gate 			 * make sure that non-local database replicas
13220Sstevel@tonic-gate 			 * are always on the replica slice.
13230Sstevel@tonic-gate 			 */
13240Sstevel@tonic-gate 			if (meta_replicaslice(dnp,
13250Sstevel@tonic-gate 			    &rep_partno, ep) != 0)
13260Sstevel@tonic-gate 				return (-1);
13270Sstevel@tonic-gate 			if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
13280Sstevel@tonic-gate 				return (-1);
13290Sstevel@tonic-gate 			if (partno != rep_partno)
13300Sstevel@tonic-gate 				return (mddeverror(ep, MDE_REPCOMP_ONLY,
13310Sstevel@tonic-gate 				    np->dev, sp->setname));
13320Sstevel@tonic-gate 		}
13330Sstevel@tonic-gate 
13340Sstevel@tonic-gate 		if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
13350Sstevel@tonic-gate 		    ep)) {
13360Sstevel@tonic-gate 			return (-1);
13370Sstevel@tonic-gate 		}
13380Sstevel@tonic-gate 
13390Sstevel@tonic-gate 		if ((devsize = metagetsize(np, ep)) == -1)
13400Sstevel@tonic-gate 			return (-1);
13410Sstevel@tonic-gate 
13420Sstevel@tonic-gate 		if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
13430Sstevel@tonic-gate 			return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
13440Sstevel@tonic-gate 			    meta_getminor(np->dev), sp->setno, devsize,
13450Sstevel@tonic-gate 			    np->cname));
13460Sstevel@tonic-gate 	}
13470Sstevel@tonic-gate 
13480Sstevel@tonic-gate 	/*
13490Sstevel@tonic-gate 	 * If first disk in set we don't have lb_inittime yet for use as
13500Sstevel@tonic-gate 	 * mb_setcreatetime so don't go looking for it. WE'll come back
13510Sstevel@tonic-gate 	 * later and update after the locator block has been created.
13520Sstevel@tonic-gate 	 * If this isn't the first disk in the set, we have a locator
13530Sstevel@tonic-gate 	 * block and thus we have lb_inittime. Set mb_setcreatetime to
13540Sstevel@tonic-gate 	 * lb_inittime.
13550Sstevel@tonic-gate 	 */
13560Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
13570Sstevel@tonic-gate 		if (c.c_dbcnt != 0) {
13580Sstevel@tonic-gate 			firstmddb = 0;
13590Sstevel@tonic-gate 			inittime = meta_get_lb_inittime(sp, ep);
13600Sstevel@tonic-gate 		}
13610Sstevel@tonic-gate 	}
13620Sstevel@tonic-gate 
13630Sstevel@tonic-gate 	/*
13640Sstevel@tonic-gate 	 * go through and write all master blocks
13650Sstevel@tonic-gate 	 */
13660Sstevel@tonic-gate 
13670Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
13680Sstevel@tonic-gate 		np = nlp->namep;
13690Sstevel@tonic-gate 
13700Sstevel@tonic-gate 		if ((fd = open(np->rname, O_RDWR)) < 0)
13710Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
13720Sstevel@tonic-gate 
13730Sstevel@tonic-gate 		for (i = 0; i < dbcnt; i++) {
13740Sstevel@tonic-gate 			if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
13750Sstevel@tonic-gate 			    inittime, ep)) {
13760Sstevel@tonic-gate 				(void) close(fd);
13770Sstevel@tonic-gate 				return (-1);
13780Sstevel@tonic-gate 			}
13790Sstevel@tonic-gate 		}
13800Sstevel@tonic-gate 		(void) close(fd);
13810Sstevel@tonic-gate 	}
13820Sstevel@tonic-gate 
13830Sstevel@tonic-gate 	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
13840Sstevel@tonic-gate 		return (-1);
13850Sstevel@tonic-gate 
13860Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
13870Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
13880Sstevel@tonic-gate 		if (! mdisok(ep))
13890Sstevel@tonic-gate 			return (-1);
13900Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
13910Sstevel@tonic-gate 			return (-1);
13920Sstevel@tonic-gate 
13930Sstevel@tonic-gate 	}
13940Sstevel@tonic-gate 
13950Sstevel@tonic-gate 	/*
13960Sstevel@tonic-gate 	 * go through and tell kernel to add them
13970Sstevel@tonic-gate 	 */
13980Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
13990Sstevel@tonic-gate 		mdcinfo_t	*cinfo;
14000Sstevel@tonic-gate 
14010Sstevel@tonic-gate 		np = nlp->namep;
14020Sstevel@tonic-gate 
14030Sstevel@tonic-gate 		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
14040Sstevel@tonic-gate 			rval = -1;
14050Sstevel@tonic-gate 			goto out;
14060Sstevel@tonic-gate 		}
14070Sstevel@tonic-gate 
14080Sstevel@tonic-gate 		/*
14090Sstevel@tonic-gate 		 * If mddb is being added to MN diskset and there already
14100Sstevel@tonic-gate 		 * exists a valid mddb in the set (which equates to this
14110Sstevel@tonic-gate 		 * node being an owner of the set) then use rpc.mdcommd
14120Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
14130Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
14140Sstevel@tonic-gate 		 * can't write the message to the mddb.
14150Sstevel@tonic-gate 		 *
14160Sstevel@tonic-gate 		 * Otherwise, just add mddb to this node.
14170Sstevel@tonic-gate 		 */
14180Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
14190Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
14200Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
14210Sstevel@tonic-gate 			md_mn_msg_meta_db_attach_t	attach;
14220Sstevel@tonic-gate 			int 				send_rval;
14230Sstevel@tonic-gate 
14240Sstevel@tonic-gate 			/*
14250Sstevel@tonic-gate 			 * In a scenario where new replicas had been added on
14260Sstevel@tonic-gate 			 * the master, and then all of the old replicas failed
14270Sstevel@tonic-gate 			 * before the slaves had knowledge of the new replicas,
14280Sstevel@tonic-gate 			 * the slaves are unable to re-parse in the mddb
14290Sstevel@tonic-gate 			 * from the new replicas since the slaves have no
14300Sstevel@tonic-gate 			 * knowledge of the new replicas.  The following
14310Sstevel@tonic-gate 			 * algorithm solves this problem:
14320Sstevel@tonic-gate 			 * 	- META_DB_ATTACH message generates submsgs
14330Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
14340Sstevel@tonic-gate 			 * 		- MDDB_ATTACH new replicas
14350Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
14360Sstevel@tonic-gate 			 *		information to be sent from master
14370Sstevel@tonic-gate 			 *		to slaves at a higher class than the
14380Sstevel@tonic-gate 			 *		unblock so the parse message will
14390Sstevel@tonic-gate 			 *		reach slaves before unblock message.
14400Sstevel@tonic-gate 			 */
14410Sstevel@tonic-gate 			attach.msg_l_dev = np->dev;
14420Sstevel@tonic-gate 			attach.msg_cnt = dbcnt;
14430Sstevel@tonic-gate 			attach.msg_dbsize = dbsize;
14440Sstevel@tonic-gate 			(void) strncpy(attach.msg_dname, cinfo->dname,
14450Sstevel@tonic-gate 			    sizeof (attach.msg_dname));
14460Sstevel@tonic-gate 			(void) splitname(np->bname, &attach.msg_splitname);
14470Sstevel@tonic-gate 			attach.msg_options = options;
14480Sstevel@tonic-gate 
14490Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
14500Sstevel@tonic-gate 			attach.msg_devid[0] = NULL;
14510Sstevel@tonic-gate 
14520Sstevel@tonic-gate 			/*
14530Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
14540Sstevel@tonic-gate 			 * stuck in in the return step until this command has
14550Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
14560Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
14570Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
14580Sstevel@tonic-gate 			 * cycle to proceed.
14590Sstevel@tonic-gate 			 */
14600Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
14610Sstevel@tonic-gate 			if (stale_bool == TRUE)
14620Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
14630Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
14640Sstevel@tonic-gate 				MD_MN_MSG_META_DB_ATTACH,
14650Sstevel@tonic-gate 				flags, (char *)&attach,
14660Sstevel@tonic-gate 				sizeof (md_mn_msg_meta_db_attach_t),
14670Sstevel@tonic-gate 				&resultp, ep);
14680Sstevel@tonic-gate 			if (send_rval != 0) {
14690Sstevel@tonic-gate 				rval = -1;
14700Sstevel@tonic-gate 				if (resultp == NULL)
14710Sstevel@tonic-gate 					(void) mddserror(ep,
14720Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
14730Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
14740Sstevel@tonic-gate 					    sp->setname);
14750Sstevel@tonic-gate 				else {
14760Sstevel@tonic-gate 					(void) mdstealerror(ep,
14770Sstevel@tonic-gate 					    &(resultp->mmr_ep));
14780Sstevel@tonic-gate 					if (mdisok(ep)) {
14790Sstevel@tonic-gate 						(void) mddserror(ep,
14800Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
14810Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
14820Sstevel@tonic-gate 						    sp->setname);
14830Sstevel@tonic-gate 					}
14840Sstevel@tonic-gate 					free_result(resultp);
14850Sstevel@tonic-gate 				}
14860Sstevel@tonic-gate 				goto out;
14870Sstevel@tonic-gate 			}
14880Sstevel@tonic-gate 			if (resultp)
14890Sstevel@tonic-gate 				free_result(resultp);
14900Sstevel@tonic-gate 		} else {
14910Sstevel@tonic-gate 		    /* Adding mddb(s) to just this node */
14920Sstevel@tonic-gate 		    for (i = 0; i < dbcnt; i++) {
14930Sstevel@tonic-gate 			(void) memset(&c, 0, sizeof (c));
14940Sstevel@tonic-gate 			/* Fill in device/replica info */
14950Sstevel@tonic-gate 			c.c_locator.l_dev = meta_cmpldev(np->dev);
14960Sstevel@tonic-gate 			c.c_locator.l_blkno = i * dbsize + 16;
14970Sstevel@tonic-gate 			blkno = c.c_locator.l_blkno;
14980Sstevel@tonic-gate 			(void) strncpy(c.c_locator.l_driver, cinfo->dname,
14990Sstevel@tonic-gate 			    sizeof (c.c_locator.l_driver));
15000Sstevel@tonic-gate 			(void) splitname(np->bname, &c.c_devname);
15010Sstevel@tonic-gate 			c.c_locator.l_mnum = meta_getminor(np->dev);
15020Sstevel@tonic-gate 
15030Sstevel@tonic-gate 			/* Fill in setno, setname, and sideno */
15040Sstevel@tonic-gate 			c.c_setno = sp->setno;
15050Sstevel@tonic-gate 			if (! metaislocalset(sp)) {
15060Sstevel@tonic-gate 				if (MD_MNSET_DESC(sd)) {
15070Sstevel@tonic-gate 					c.c_multi_node = 1;
15080Sstevel@tonic-gate 				}
15090Sstevel@tonic-gate 			}
15100Sstevel@tonic-gate 			(void) strcpy(c.c_setname, sp->setname);
15110Sstevel@tonic-gate 			c.c_sideno = sideno;
15120Sstevel@tonic-gate 
15130Sstevel@tonic-gate 			/*
15140Sstevel@tonic-gate 			 * Don't need device id information from this ioctl
15150Sstevel@tonic-gate 			 * Kernel determines device id from dev_t, which
15160Sstevel@tonic-gate 			 * is just what this code would do.
15170Sstevel@tonic-gate 			 */
15180Sstevel@tonic-gate 			c.c_locator.l_devid = (uint64_t)0;
15190Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
15200Sstevel@tonic-gate 
15210Sstevel@tonic-gate 			if (timeval != NULL)
15220Sstevel@tonic-gate 				c.c_timestamp = *timeval;
15230Sstevel@tonic-gate 
15240Sstevel@tonic-gate 			if (setup_med_cfg(sp, &c, (options & MDCHK_SET_FORCE),
15250Sstevel@tonic-gate 			    ep)) {
15260Sstevel@tonic-gate 				rval = -1;
15270Sstevel@tonic-gate 				goto out;
15280Sstevel@tonic-gate 			}
15290Sstevel@tonic-gate 
15300Sstevel@tonic-gate 			if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL) != 0) {
15310Sstevel@tonic-gate 				rval = mdstealerror(ep, &c.c_mde);
15320Sstevel@tonic-gate 				goto out;
15330Sstevel@tonic-gate 			}
15340Sstevel@tonic-gate 			/*
15350Sstevel@tonic-gate 			 * This is either a traditional diskset OR this
15360Sstevel@tonic-gate 			 * is the first replica added to a MN diskset.
15370Sstevel@tonic-gate 			 * In either case, set broadcast to NO_BCAST so
15380Sstevel@tonic-gate 			 * that message won't go through rpc.mdcommd.
15390Sstevel@tonic-gate 			 * If this is a traditional diskset, the bcast
15400Sstevel@tonic-gate 			 * flag is ignored since traditional disksets
15410Sstevel@tonic-gate 			 * don't use the rpc.mdcommd.
15420Sstevel@tonic-gate 			 */
15430Sstevel@tonic-gate 			if (meta_db_addsidenms(sp, np, blkno,
15440Sstevel@tonic-gate 			    DB_ADDSIDENMS_NO_BCAST, ep))
15450Sstevel@tonic-gate 				goto out;
15460Sstevel@tonic-gate 		    }
15470Sstevel@tonic-gate 		}
15480Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
15490Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
15500Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next)
15510Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
15520Sstevel@tonic-gate 					p->dd_dbcnt = dbcnt;
15530Sstevel@tonic-gate 					p->dd_dbsize  = dbsize;
15540Sstevel@tonic-gate 					break;
15550Sstevel@tonic-gate 				}
15560Sstevel@tonic-gate 		}
15570Sstevel@tonic-gate 
15580Sstevel@tonic-gate 		/*
15590Sstevel@tonic-gate 		 * If this was the first addition of disks to the
15600Sstevel@tonic-gate 		 * diskset you now need to update the mb_setcreatetime
15610Sstevel@tonic-gate 		 * which needed lb_inittime which wasn't there until now.
15620Sstevel@tonic-gate 		 */
15630Sstevel@tonic-gate 		if (firstmddb) {
15640Sstevel@tonic-gate 			if (meta_update_mb(sp, dd, ep) != 0) {
15650Sstevel@tonic-gate 				return (-1);
15660Sstevel@tonic-gate 			}
15670Sstevel@tonic-gate 		}
15680Sstevel@tonic-gate 		(void) close(fd);
15690Sstevel@tonic-gate 	}
15700Sstevel@tonic-gate 
15710Sstevel@tonic-gate out:
15720Sstevel@tonic-gate 	if (metaislocalset(sp)) {
15730Sstevel@tonic-gate 
15740Sstevel@tonic-gate 		/* everything looks fine. Start mdmonitord */
15750Sstevel@tonic-gate 		/* Note: popen/pclose is the MT-safe replacement for system */
15760Sstevel@tonic-gate 		if (rval == 0 && start_mdmonitord  == 1) {
15770Sstevel@tonic-gate 			if (pclose(popen(MDMONITORD, "w")) == -1)
15780Sstevel@tonic-gate 				md_perror(MDMONITORD);
15790Sstevel@tonic-gate 
15800Sstevel@tonic-gate 			if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
15810Sstevel@tonic-gate 				mde_perror(&status, "");
15820Sstevel@tonic-gate 				mdclrerror(&status);
15830Sstevel@tonic-gate 			}
15840Sstevel@tonic-gate 		}
15850Sstevel@tonic-gate 
15860Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
15870Sstevel@tonic-gate 			/* Don't mask any previous errors */
15880Sstevel@tonic-gate 			if (rval == 0)
15890Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
15900Sstevel@tonic-gate 			return (rval);
15910Sstevel@tonic-gate 		}
15920Sstevel@tonic-gate 
15930Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
15940Sstevel@tonic-gate 			/* Don't mask any previous errors */
15950Sstevel@tonic-gate 			if (rval == 0)
15960Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
15970Sstevel@tonic-gate 		}
15980Sstevel@tonic-gate 	} else {
15990Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
16000Sstevel@tonic-gate 		    (options & MDCHK_SET_LOCKED),
16010Sstevel@tonic-gate 		    (options & MDCHK_SET_FORCE),
16020Sstevel@tonic-gate 		    &status)) {
16030Sstevel@tonic-gate 			/* Don't mask any previous errors */
16040Sstevel@tonic-gate 			if (rval == 0)
16050Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
16060Sstevel@tonic-gate 			else
16070Sstevel@tonic-gate 				mdclrerror(&status);
16080Sstevel@tonic-gate 		}
16090Sstevel@tonic-gate 		metafreedrivedesc(&dd);
16100Sstevel@tonic-gate 	}
16110Sstevel@tonic-gate 	/*
16120Sstevel@tonic-gate 	 * For MN disksets that already had already had nodes joined
16130Sstevel@tonic-gate 	 * before the attach of this mddb(s), the name invalidation is
16140Sstevel@tonic-gate 	 * done by the commd handler routine.  Otherwise, if this
16150Sstevel@tonic-gate 	 * is the first attach of a MN diskset mddb, the invalidation
16160Sstevel@tonic-gate 	 * must be done here since the first attach cannot be sent
16170Sstevel@tonic-gate 	 * via the commd since there are no nodes joined to the set yet.
16180Sstevel@tonic-gate 	 */
16190Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
16200Sstevel@tonic-gate 	    (MD_MNSET_DESC(sd) &&
16210Sstevel@tonic-gate 	    (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
16220Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
16230Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
16240Sstevel@tonic-gate 		}
16250Sstevel@tonic-gate 	}
16260Sstevel@tonic-gate 	return (rval);
16270Sstevel@tonic-gate }
16280Sstevel@tonic-gate 
16290Sstevel@tonic-gate /*
16300Sstevel@tonic-gate  * deletelist_length
16310Sstevel@tonic-gate  *
16320Sstevel@tonic-gate  *	return the number of slices that have been specified for deletion
16330Sstevel@tonic-gate  *	on the metadb command line.  This does not calculate the number
16340Sstevel@tonic-gate  *	of replicas because there may be multiple replicas per slice.
16350Sstevel@tonic-gate  */
16360Sstevel@tonic-gate static int
16370Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp)
16380Sstevel@tonic-gate {
16390Sstevel@tonic-gate 
16400Sstevel@tonic-gate 	mdnamelist_t		*nlp;
16410Sstevel@tonic-gate 	int			list_length = 0;
16420Sstevel@tonic-gate 
16430Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
16440Sstevel@tonic-gate 		list_length++;
16450Sstevel@tonic-gate 	}
16460Sstevel@tonic-gate 
16470Sstevel@tonic-gate 	return (list_length);
16480Sstevel@tonic-gate }
16490Sstevel@tonic-gate 
16500Sstevel@tonic-gate static int
16510Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp)
16520Sstevel@tonic-gate {
16530Sstevel@tonic-gate 
16540Sstevel@tonic-gate 	mdnamelist_t		*nlp;
16550Sstevel@tonic-gate 	mdname_t		*np;
16560Sstevel@tonic-gate 	int			index = 0;
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
16590Sstevel@tonic-gate 		np = nlp->namep;
16600Sstevel@tonic-gate 
16610Sstevel@tonic-gate 		if (strcmp(devname, np->bname) == 0)
16620Sstevel@tonic-gate 			return (index);
16630Sstevel@tonic-gate 		index++;
16640Sstevel@tonic-gate 	}
16650Sstevel@tonic-gate 
16660Sstevel@tonic-gate 	return (-1);
16670Sstevel@tonic-gate }
16680Sstevel@tonic-gate 
16690Sstevel@tonic-gate /*
16700Sstevel@tonic-gate  * Delete replicas from set.  This happens as a result of:
16710Sstevel@tonic-gate  *	- metadb [-s set_name] -d
16720Sstevel@tonic-gate  *	- metaset -s set_name -a disk	(causes a rebalance of mddbs)
16730Sstevel@tonic-gate  *	- metaset -s set_name -d disk
16740Sstevel@tonic-gate  *	- metaset -s set_name -b
16750Sstevel@tonic-gate  *
16760Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
16770Sstevel@tonic-gate  *
16780Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
16790Sstevel@tonic-gate  * is running the metaset command.
16800Sstevel@tonic-gate  *
16810Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
16820Sstevel@tonic-gate  * running the metaset command.  This detach routine is sent to all
16830Sstevel@tonic-gate  * of the joined nodes in the diskset using commd.  This keeps
16840Sstevel@tonic-gate  * the nodes in-sync.
16850Sstevel@tonic-gate  */
16860Sstevel@tonic-gate int
16870Sstevel@tonic-gate meta_db_detach(
16880Sstevel@tonic-gate 	mdsetname_t		*sp,
16890Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
16900Sstevel@tonic-gate 	mdforceopts_t		force_option,
16910Sstevel@tonic-gate 	char			*sysfilename,
16920Sstevel@tonic-gate 	md_error_t		*ep
16930Sstevel@tonic-gate )
16940Sstevel@tonic-gate {
16950Sstevel@tonic-gate 	struct mddb_config	c;
16960Sstevel@tonic-gate 	mdnamelist_t		*nlp;
16970Sstevel@tonic-gate 	mdname_t		*np;
16980Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
16990Sstevel@tonic-gate 	md_drive_desc		*p;
17000Sstevel@tonic-gate 	int			replicacount;
17010Sstevel@tonic-gate 	int			replica_delete_count;
17020Sstevel@tonic-gate 	int			nr_replica_slices;
17030Sstevel@tonic-gate 	int			i;
17040Sstevel@tonic-gate 	int			stop_svmdaemons = 0;
17050Sstevel@tonic-gate 	int			rval = 0;
17060Sstevel@tonic-gate 	int			index;
17070Sstevel@tonic-gate 	int			valid_replicas_nottodelete = 0;
17080Sstevel@tonic-gate 	int			invalid_replicas_nottodelete = 0;
17090Sstevel@tonic-gate 	int			invalid_replicas_todelete = 0;
17100Sstevel@tonic-gate 	int			errored = 0;
17110Sstevel@tonic-gate 	int			*tag_array;
17120Sstevel@tonic-gate 	int			fd = -1;
17130Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
17140Sstevel@tonic-gate 	md_set_desc		*sd;
17150Sstevel@tonic-gate 	int			stale_bool = FALSE;
17160Sstevel@tonic-gate 	int			flags;
17170Sstevel@tonic-gate 
17180Sstevel@tonic-gate 	/*
17190Sstevel@tonic-gate 	 * Error if we don't get some work to do.
17200Sstevel@tonic-gate 	 */
17210Sstevel@tonic-gate 	if (db_nlp == NULL)
17220Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
17230Sstevel@tonic-gate 
17240Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
17250Sstevel@tonic-gate 		return (-1);
17260Sstevel@tonic-gate 
17270Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
17280Sstevel@tonic-gate 	c.c_id = 0;
17290Sstevel@tonic-gate 	c.c_setno = sp->setno;
17300Sstevel@tonic-gate 
17310Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
17320Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
17330Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
17340Sstevel@tonic-gate 
17350Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
17360Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
17370Sstevel@tonic-gate 
17380Sstevel@tonic-gate 	/*
17390Sstevel@tonic-gate 	 * Is current set STALE?
17400Sstevel@tonic-gate 	 */
17410Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
17420Sstevel@tonic-gate 		stale_bool = TRUE;
17430Sstevel@tonic-gate 	}
17440Sstevel@tonic-gate 
17450Sstevel@tonic-gate 	replicacount = c.c_dbcnt;
17460Sstevel@tonic-gate 
17470Sstevel@tonic-gate 	assert(db_nlp != NULL);
17480Sstevel@tonic-gate 
17490Sstevel@tonic-gate 	/*
17500Sstevel@tonic-gate 	 * go through and gather how many data bases are on each
17510Sstevel@tonic-gate 	 * device specified.
17520Sstevel@tonic-gate 	 */
17530Sstevel@tonic-gate 
17540Sstevel@tonic-gate 	nr_replica_slices = deletelist_length(db_nlp);
17550Sstevel@tonic-gate 	tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
17560Sstevel@tonic-gate 
17570Sstevel@tonic-gate 	replica_delete_count = 0;
17580Sstevel@tonic-gate 	for (i = 0; i < replicacount; i++) {
17590Sstevel@tonic-gate 		char	*devname;
17600Sstevel@tonic-gate 		int	found = 0;
17610Sstevel@tonic-gate 
17620Sstevel@tonic-gate 		c.c_id = i;
17630Sstevel@tonic-gate 
17640Sstevel@tonic-gate 		/* Don't need device id information from this ioctl */
17650Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
17660Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
17670Sstevel@tonic-gate 
17680Sstevel@tonic-gate 		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
17690Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
17700Sstevel@tonic-gate 
17710Sstevel@tonic-gate 		devname = splicename(&c.c_devname);
17720Sstevel@tonic-gate 
17730Sstevel@tonic-gate 		if ((index = in_deletelist(devname, db_nlp)) != -1) {
17740Sstevel@tonic-gate 			found = 1;
17750Sstevel@tonic-gate 			tag_array[index] = 1;
17760Sstevel@tonic-gate 			replica_delete_count++;
17770Sstevel@tonic-gate 		}
17780Sstevel@tonic-gate 
17790Sstevel@tonic-gate 		errored = c.c_locator.l_flags & (MDDB_F_EREAD |
17800Sstevel@tonic-gate 				MDDB_F_EWRITE | MDDB_F_TOOSMALL |
17810Sstevel@tonic-gate 				MDDB_F_EFMT | MDDB_F_EDATA |
17820Sstevel@tonic-gate 				MDDB_F_EMASTER);
17830Sstevel@tonic-gate 
17840Sstevel@tonic-gate 		/*
17850Sstevel@tonic-gate 		 * There are four combinations of "errored" and "found"
17860Sstevel@tonic-gate 		 * and they are used to find the number of
17870Sstevel@tonic-gate 		 * (a) valid/invalid replicas that are not in the delete
17880Sstevel@tonic-gate 		 * list and are available in the system.
17890Sstevel@tonic-gate 		 * (b) valid/invalid replicas that are to be deleted.
17900Sstevel@tonic-gate 		 */
17910Sstevel@tonic-gate 
17920Sstevel@tonic-gate 		if (errored && !found)		/* errored and !found */
17930Sstevel@tonic-gate 			invalid_replicas_nottodelete++;
17940Sstevel@tonic-gate 		else if (!found)		/* !errored and !found */
17950Sstevel@tonic-gate 			valid_replicas_nottodelete++;
17960Sstevel@tonic-gate 		else if (errored)		/* errored and found */
17970Sstevel@tonic-gate 			invalid_replicas_todelete++;
17980Sstevel@tonic-gate 		/*
17990Sstevel@tonic-gate 		 * else it is !errored and found. This means
18000Sstevel@tonic-gate 		 * valid_replicas_todelete++; But this variable will not
18010Sstevel@tonic-gate 		 * be used anywhere
18020Sstevel@tonic-gate 		 */
18030Sstevel@tonic-gate 
18040Sstevel@tonic-gate 		Free(devname);
18050Sstevel@tonic-gate 	}
18060Sstevel@tonic-gate 
18070Sstevel@tonic-gate 	index = 0;
18080Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
18090Sstevel@tonic-gate 		np = nlp->namep;
18100Sstevel@tonic-gate 		if (tag_array[index++] != 1) {
18110Sstevel@tonic-gate 			Free(tag_array);
18120Sstevel@tonic-gate 			return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
18130Sstevel@tonic-gate 		}
18140Sstevel@tonic-gate 	}
18150Sstevel@tonic-gate 
18160Sstevel@tonic-gate 	Free(tag_array);
18170Sstevel@tonic-gate 
18180Sstevel@tonic-gate 
18190Sstevel@tonic-gate 	/* if all replicas are deleted stop mdmonitord */
18200Sstevel@tonic-gate 	if ((replicacount - replica_delete_count) == 0)
18210Sstevel@tonic-gate 		stop_svmdaemons = 1;
18220Sstevel@tonic-gate 
18230Sstevel@tonic-gate 	if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
18240Sstevel@tonic-gate 		if (force_option & MDFORCE_NONE)
18250Sstevel@tonic-gate 			return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
18260Sstevel@tonic-gate 		if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
18270Sstevel@tonic-gate 			return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
18280Sstevel@tonic-gate 	}
18290Sstevel@tonic-gate 
18300Sstevel@tonic-gate 	/*
18310Sstevel@tonic-gate 	 * The following algorithms are followed to check for deletion:
18320Sstevel@tonic-gate 	 * (a) If the delete list(db_nlp) has all invalid replicas and no valid
18330Sstevel@tonic-gate 	 * replicas, then deletion should be allowed.
18340Sstevel@tonic-gate 	 * (b) Deletion should be allowed only if valid replicas that are "not"
18350Sstevel@tonic-gate 	 * to be deleted is always greater than the invalid replicas that
18360Sstevel@tonic-gate 	 * are "not" to be deleted.
18370Sstevel@tonic-gate 	 * (c) If the user uses -f option, then deletion should be allowed.
18380Sstevel@tonic-gate 	 */
18390Sstevel@tonic-gate 
18400Sstevel@tonic-gate 	if ((invalid_replicas_todelete != replica_delete_count) &&
18410Sstevel@tonic-gate 		(invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
18420Sstevel@tonic-gate 				(force_option != MDFORCE_LOCAL))
18430Sstevel@tonic-gate 		return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
18440Sstevel@tonic-gate 
18450Sstevel@tonic-gate 	/*
18460Sstevel@tonic-gate 	 * go through and tell kernel to delete them
18470Sstevel@tonic-gate 	 */
18480Sstevel@tonic-gate 
18490Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
18500Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
18510Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
18520Sstevel@tonic-gate 
18530Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
18540Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
18550Sstevel@tonic-gate 
18560Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
18570Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
18580Sstevel@tonic-gate 		if (! mdisok(ep))
18590Sstevel@tonic-gate 			return (-1);
18600Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
18610Sstevel@tonic-gate 			return (-1);
18620Sstevel@tonic-gate 	}
18630Sstevel@tonic-gate 
18640Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
18650Sstevel@tonic-gate 		np = nlp->namep;
18660Sstevel@tonic-gate 
18670Sstevel@tonic-gate 		/*
18680Sstevel@tonic-gate 		 * If mddb is being deleted from MN diskset and node is
18690Sstevel@tonic-gate 		 * an owner of the diskset then use rpc.mdcommd
18700Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
18710Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
18720Sstevel@tonic-gate 		 * can't write the message to the mddb.
18730Sstevel@tonic-gate 		 *
18740Sstevel@tonic-gate 		 * When mddbs are first being added to set, a detach can
18750Sstevel@tonic-gate 		 * be called before any node has joined the diskset, so
18760Sstevel@tonic-gate 		 * must check to see if node is an owner of the diskset.
18770Sstevel@tonic-gate 		 *
18780Sstevel@tonic-gate 		 * Otherwise, just delete mddb from this node.
18790Sstevel@tonic-gate 		 */
18800Sstevel@tonic-gate 
18810Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
18820Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
18830Sstevel@tonic-gate 			md_mn_result_t			*resultp;
18840Sstevel@tonic-gate 			md_mn_msg_meta_db_detach_t	detach;
18850Sstevel@tonic-gate 			int				send_rval;
18860Sstevel@tonic-gate 
18870Sstevel@tonic-gate 			/*
18880Sstevel@tonic-gate 			 * The following algorithm is used to detach replicas.
18890Sstevel@tonic-gate 			 * 	- META_DB_DETACH message generates submsgs
18900Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
18910Sstevel@tonic-gate 			 * 		- MDDB_DETACH replicas
18920Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
18930Sstevel@tonic-gate 			 *		information to be sent from master
18940Sstevel@tonic-gate 			 *		to slaves at a higher class than the
18950Sstevel@tonic-gate 			 *		unblock so the parse message will
18960Sstevel@tonic-gate 			 *		reach slaves before unblock message.
18970Sstevel@tonic-gate 			 */
18980Sstevel@tonic-gate 			(void) splitname(np->bname, &detach.msg_splitname);
18990Sstevel@tonic-gate 
19000Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
19010Sstevel@tonic-gate 			detach.msg_devid[0] = NULL;
19020Sstevel@tonic-gate 
19030Sstevel@tonic-gate 			/*
19040Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
19050Sstevel@tonic-gate 			 * stuck in in the return step until this command has
19060Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
19070Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
19080Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
19090Sstevel@tonic-gate 			 * cycle to proceed.
19100Sstevel@tonic-gate 			 */
19110Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
19120Sstevel@tonic-gate 			if (stale_bool == TRUE)
19130Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
19140Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
19150Sstevel@tonic-gate 				MD_MN_MSG_META_DB_DETACH,
19160Sstevel@tonic-gate 				flags, (char *)&detach,
19170Sstevel@tonic-gate 				sizeof (md_mn_msg_meta_db_detach_t),
19180Sstevel@tonic-gate 				&resultp, ep);
19190Sstevel@tonic-gate 			if (send_rval != 0) {
19200Sstevel@tonic-gate 				rval = -1;
19210Sstevel@tonic-gate 				if (resultp == NULL)
19220Sstevel@tonic-gate 					(void) mddserror(ep,
19230Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
19240Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
19250Sstevel@tonic-gate 					    sp->setname);
19260Sstevel@tonic-gate 				else {
19270Sstevel@tonic-gate 					(void) mdstealerror(ep,
19280Sstevel@tonic-gate 					    &(resultp->mmr_ep));
19290Sstevel@tonic-gate 					if (mdisok(ep)) {
19300Sstevel@tonic-gate 						(void) mddserror(ep,
19310Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
19320Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
19330Sstevel@tonic-gate 						    sp->setname);
19340Sstevel@tonic-gate 					}
19350Sstevel@tonic-gate 					free_result(resultp);
19360Sstevel@tonic-gate 				}
19370Sstevel@tonic-gate 				goto out;
19380Sstevel@tonic-gate 			}
19390Sstevel@tonic-gate 			if (resultp)
19400Sstevel@tonic-gate 				free_result(resultp);
19410Sstevel@tonic-gate 		} else {
19420Sstevel@tonic-gate 			i = 0;
19430Sstevel@tonic-gate 			while (i < c.c_dbcnt) {
19440Sstevel@tonic-gate 				char	*devname;
19450Sstevel@tonic-gate 
19460Sstevel@tonic-gate 				c.c_id = i;
19470Sstevel@tonic-gate 
19480Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
19490Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
19500Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
19510Sstevel@tonic-gate 
19520Sstevel@tonic-gate 				if (metaioctl(MD_DB_GETDEV, &c,
19530Sstevel@tonic-gate 				    &c.c_mde, NULL)) {
19540Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
19550Sstevel@tonic-gate 					goto out;
19560Sstevel@tonic-gate 				}
19570Sstevel@tonic-gate 
19580Sstevel@tonic-gate 				devname = splicename(&c.c_devname);
19590Sstevel@tonic-gate 				if (strcmp(devname, np->bname) != 0) {
19600Sstevel@tonic-gate 					Free(devname);
19610Sstevel@tonic-gate 					i++;
19620Sstevel@tonic-gate 					continue;
19630Sstevel@tonic-gate 				}
19640Sstevel@tonic-gate 				Free(devname);
19650Sstevel@tonic-gate 
19660Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
19670Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
19680Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
19690Sstevel@tonic-gate 
19700Sstevel@tonic-gate 				if (metaioctl(MD_DB_DELDEV, &c,
19710Sstevel@tonic-gate 				    &c.c_mde, NULL) != 0) {
19720Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
19730Sstevel@tonic-gate 					goto out;
19740Sstevel@tonic-gate 				}
19750Sstevel@tonic-gate 
19760Sstevel@tonic-gate 				/* Not incrementing "i" intentionally */
19770Sstevel@tonic-gate 			}
19780Sstevel@tonic-gate 		}
19790Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
19800Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
19810Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next) {
19820Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
19830Sstevel@tonic-gate 					p->dd_dbcnt = 0;
19840Sstevel@tonic-gate 					p->dd_dbsize  = 0;
19850Sstevel@tonic-gate 					break;
19860Sstevel@tonic-gate 				}
19870Sstevel@tonic-gate 			}
19880Sstevel@tonic-gate 
19890Sstevel@tonic-gate 			/*
19900Sstevel@tonic-gate 			 * Slam a dummy master block and make it self
19910Sstevel@tonic-gate 			 * identifying
19920Sstevel@tonic-gate 			 */
19930Sstevel@tonic-gate 			if ((fd = open(np->rname, O_RDWR)) >= 0) {
19940Sstevel@tonic-gate 				meta_mkdummymaster(sp, fd, 16);
19950Sstevel@tonic-gate 				(void) close(fd);
19960Sstevel@tonic-gate 			}
19970Sstevel@tonic-gate 		}
19980Sstevel@tonic-gate 	}
19990Sstevel@tonic-gate out:
20000Sstevel@tonic-gate 	if (metaislocalset(sp)) {
20010Sstevel@tonic-gate 		/*
20020Sstevel@tonic-gate 		 * Stop all the daemons if there are
20030Sstevel@tonic-gate 		 * no more replicas so that the module can be
20040Sstevel@tonic-gate 		 * unloaded.
20050Sstevel@tonic-gate 		 */
20060Sstevel@tonic-gate 		if (rval == 0 && stop_svmdaemons == 1) {
20070Sstevel@tonic-gate 			char buf[MAXPATHLEN];
20080Sstevel@tonic-gate 			int i;
20090Sstevel@tonic-gate 
20100Sstevel@tonic-gate 			for (i = 0; i < DAEMON_COUNT; i++) {
20110Sstevel@tonic-gate 				(void) snprintf(buf, MAXPATHLEN,
20120Sstevel@tonic-gate 					"/usr/bin/pkill -%s -x %s",
20130Sstevel@tonic-gate 					svmd_kill_list[i].svmd_kill_val,
20140Sstevel@tonic-gate 					svmd_kill_list[i].svmd_name);
20150Sstevel@tonic-gate 				if (pclose(popen(buf, "w")) == -1)
20160Sstevel@tonic-gate 					md_perror(buf);
20170Sstevel@tonic-gate 			}
20180Sstevel@tonic-gate 
20190Sstevel@tonic-gate 			if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
20200Sstevel@tonic-gate 				mde_perror(&status, "");
20210Sstevel@tonic-gate 				mdclrerror(&status);
20220Sstevel@tonic-gate 			}
20230Sstevel@tonic-gate 		}
20240Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
20250Sstevel@tonic-gate 			/* Don't mask any previous errors */
20260Sstevel@tonic-gate 			if (rval == 0)
20270Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
20280Sstevel@tonic-gate 			else
20290Sstevel@tonic-gate 				mdclrerror(&status);
20300Sstevel@tonic-gate 			return (rval);
20310Sstevel@tonic-gate 		}
20320Sstevel@tonic-gate 
20330Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
20340Sstevel@tonic-gate 			/* Don't mask any previous errors */
20350Sstevel@tonic-gate 			if (rval == 0)
20360Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
20370Sstevel@tonic-gate 			else
20380Sstevel@tonic-gate 				mdclrerror(&status);
20390Sstevel@tonic-gate 		}
20400Sstevel@tonic-gate 	} else {
20410Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
20420Sstevel@tonic-gate 		    (force_option & MDFORCE_SET_LOCKED),
20430Sstevel@tonic-gate 		    ((force_option & MDFORCE_LOCAL) |
20440Sstevel@tonic-gate 		    (force_option & MDFORCE_DS)), &status)) {
20450Sstevel@tonic-gate 			/* Don't mask any previous errors */
20460Sstevel@tonic-gate 			if (rval == 0)
20470Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
20480Sstevel@tonic-gate 			else
20490Sstevel@tonic-gate 				mdclrerror(&status);
20500Sstevel@tonic-gate 		}
20510Sstevel@tonic-gate 		metafreedrivedesc(&dd);
20520Sstevel@tonic-gate 	}
20530Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
20540Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
20550Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
20560Sstevel@tonic-gate 		}
20570Sstevel@tonic-gate 	}
20580Sstevel@tonic-gate 	return (rval);
20590Sstevel@tonic-gate }
20600Sstevel@tonic-gate 
20610Sstevel@tonic-gate static md_replica_t *
20620Sstevel@tonic-gate metareplicaname(
20630Sstevel@tonic-gate 	mdsetname_t		*sp,
20640Sstevel@tonic-gate 	int			flags,
20650Sstevel@tonic-gate 	struct mddb_config	*c,
20660Sstevel@tonic-gate 	md_error_t		*ep
20670Sstevel@tonic-gate )
20680Sstevel@tonic-gate {
20690Sstevel@tonic-gate 	md_replica_t	*rp;
20700Sstevel@tonic-gate 	char		*devname;
20710Sstevel@tonic-gate 	size_t		sz;
20720Sstevel@tonic-gate 
20730Sstevel@tonic-gate 	/* allocate replicaname */
20740Sstevel@tonic-gate 	rp = Zalloc(sizeof (*rp));
20750Sstevel@tonic-gate 
20760Sstevel@tonic-gate 	/* get device name */
20770Sstevel@tonic-gate 	devname = splicename(&c->c_devname);
20780Sstevel@tonic-gate 	if (flags & PRINT_FAST) {
20790Sstevel@tonic-gate 		if ((rp->r_namep = metaname_fast(&sp, devname, ep)) == NULL) {
20800Sstevel@tonic-gate 			Free(devname);
20810Sstevel@tonic-gate 			Free(rp);
20820Sstevel@tonic-gate 			return (NULL);
20830Sstevel@tonic-gate 		}
20840Sstevel@tonic-gate 	} else {
20850Sstevel@tonic-gate 		if ((rp->r_namep = metaname(&sp, devname, ep)) == NULL) {
20860Sstevel@tonic-gate 			Free(devname);
20870Sstevel@tonic-gate 			Free(rp);
20880Sstevel@tonic-gate 			return (NULL);
20890Sstevel@tonic-gate 		}
20900Sstevel@tonic-gate 	}
20910Sstevel@tonic-gate 	Free(devname);
20920Sstevel@tonic-gate 
20930Sstevel@tonic-gate 	/* make sure it's OK */
20940Sstevel@tonic-gate 	if ((! (flags & MD_BASICNAME_OK)) &&
20950Sstevel@tonic-gate 	    (metachkcomp(rp->r_namep, ep) != 0)) {
20960Sstevel@tonic-gate 		Free(rp);
20970Sstevel@tonic-gate 		return (NULL);
20980Sstevel@tonic-gate 	}
20990Sstevel@tonic-gate 
2100*62Sjeanm 	rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR;
2101*62Sjeanm 	rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR;
21020Sstevel@tonic-gate 	rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
21030Sstevel@tonic-gate 	if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
2104*62Sjeanm 		sz = devid_sizeof((ddi_devid_t)(uintptr_t)
2105*62Sjeanm 		    (c->c_locator.l_devid));
21060Sstevel@tonic-gate 		if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
21070Sstevel@tonic-gate 		    (ddi_devid_t)NULL) {
21080Sstevel@tonic-gate 			Free(rp);
21090Sstevel@tonic-gate 			return (NULL);
21100Sstevel@tonic-gate 		}
21110Sstevel@tonic-gate 		(void) memcpy((void *)rp->r_devid,
2112*62Sjeanm 		    (void *)(uintptr_t)c->c_locator.l_devid, sz);
21130Sstevel@tonic-gate 		(void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
21140Sstevel@tonic-gate 		rp->r_flags &= ~MDDB_F_NODEVID;
21150Sstevel@tonic-gate 		/* Overwrite dev derived from name with dev from devid */
21160Sstevel@tonic-gate 		rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
21170Sstevel@tonic-gate 	}
21180Sstevel@tonic-gate 	(void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
21190Sstevel@tonic-gate 
21200Sstevel@tonic-gate 	rp->r_blkno = c->c_locator.l_blkno;
21210Sstevel@tonic-gate 	if (c->c_dbend != 0)
21220Sstevel@tonic-gate 		rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
21230Sstevel@tonic-gate 
21240Sstevel@tonic-gate 	/* return replica */
21250Sstevel@tonic-gate 	return (rp);
21260Sstevel@tonic-gate }
21270Sstevel@tonic-gate 
21280Sstevel@tonic-gate /*
21290Sstevel@tonic-gate  * free replica list
21300Sstevel@tonic-gate  */
21310Sstevel@tonic-gate void
21320Sstevel@tonic-gate metafreereplicalist(
21330Sstevel@tonic-gate 	md_replicalist_t	*rlp
21340Sstevel@tonic-gate )
21350Sstevel@tonic-gate {
21360Sstevel@tonic-gate 	md_replicalist_t	*rl = NULL;
21370Sstevel@tonic-gate 
21380Sstevel@tonic-gate 	for (/* void */; (rlp != NULL); rlp = rl) {
21390Sstevel@tonic-gate 		rl = rlp->rl_next;
21400Sstevel@tonic-gate 		if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
21410Sstevel@tonic-gate 			free(rlp->rl_repp->r_devid);
21420Sstevel@tonic-gate 		}
21430Sstevel@tonic-gate 		Free(rlp->rl_repp);
21440Sstevel@tonic-gate 		Free(rlp);
21450Sstevel@tonic-gate 	}
21460Sstevel@tonic-gate }
21470Sstevel@tonic-gate 
21480Sstevel@tonic-gate /*
21490Sstevel@tonic-gate  * return list of all replicas in set
21500Sstevel@tonic-gate  */
21510Sstevel@tonic-gate int
21520Sstevel@tonic-gate metareplicalist(
21530Sstevel@tonic-gate 	mdsetname_t		*sp,
21540Sstevel@tonic-gate 	int			flags,
21550Sstevel@tonic-gate 	md_replicalist_t	**rlpp,
21560Sstevel@tonic-gate 	md_error_t		*ep
21570Sstevel@tonic-gate )
21580Sstevel@tonic-gate {
21590Sstevel@tonic-gate 	md_replicalist_t	**tail = rlpp;
21600Sstevel@tonic-gate 	int			count = 0;
21610Sstevel@tonic-gate 	struct mddb_config	c;
21620Sstevel@tonic-gate 	int			i;
21630Sstevel@tonic-gate 	char			*devid;
21640Sstevel@tonic-gate 
21650Sstevel@tonic-gate 	/* for each replica */
21660Sstevel@tonic-gate 	i = 0;
21670Sstevel@tonic-gate 	do {
21680Sstevel@tonic-gate 		md_replica_t	*rp;
21690Sstevel@tonic-gate 
21700Sstevel@tonic-gate 		/* get next replica */
21710Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
21720Sstevel@tonic-gate 		c.c_id = i;
21730Sstevel@tonic-gate 		c.c_setno = sp->setno;
21740Sstevel@tonic-gate 
21750Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
21760Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
21770Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
21780Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
21790Sstevel@tonic-gate 				break;	/* handle none at all */
21800Sstevel@tonic-gate 			}
21810Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
21820Sstevel@tonic-gate 			goto out;
21830Sstevel@tonic-gate 		}
21840Sstevel@tonic-gate 
21850Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
21860Sstevel@tonic-gate 			if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
21870Sstevel@tonic-gate 				(void) mdsyserror(ep, ENOMEM, META_DBCONF);
21880Sstevel@tonic-gate 				goto out;
21890Sstevel@tonic-gate 			}
21900Sstevel@tonic-gate 			c.c_locator.l_devid = (uintptr_t)devid;
21910Sstevel@tonic-gate 			/*
21920Sstevel@tonic-gate 			 * Turn on space and sz flags since 'sz' amount of
21930Sstevel@tonic-gate 			 * space has been alloc'd.
21940Sstevel@tonic-gate 			 */
21950Sstevel@tonic-gate 			c.c_locator.l_devid_flags =
21960Sstevel@tonic-gate 				MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
21970Sstevel@tonic-gate 		}
21980Sstevel@tonic-gate 
21990Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
22000Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
22010Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
22020Sstevel@tonic-gate 				break;	/* handle none at all */
22030Sstevel@tonic-gate 			}
22040Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
22050Sstevel@tonic-gate 			goto out;
22060Sstevel@tonic-gate 		}
22070Sstevel@tonic-gate 
22080Sstevel@tonic-gate 		/*
22090Sstevel@tonic-gate 		 * Paranoid check - shouldn't happen, but is left as
22100Sstevel@tonic-gate 		 * a place holder for changes that will be needed after
22110Sstevel@tonic-gate 		 * dynamic reconfiguration changes are added to SVM (to
22120Sstevel@tonic-gate 		 * support movement of disks at any point in time).
22130Sstevel@tonic-gate 		 */
22140Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
22150Sstevel@tonic-gate 			(void) fprintf(stderr,
22160Sstevel@tonic-gate 			    dgettext(TEXT_DOMAIN,
22170Sstevel@tonic-gate 				"Error: Relocation Information "
22180Sstevel@tonic-gate 				"(drvnm=%s, mnum=0x%lx) \n"
22190Sstevel@tonic-gate 				"relocation information size changed - \n"
22200Sstevel@tonic-gate 				"rerun command\n"),
22210Sstevel@tonic-gate 			    c.c_locator.l_driver, c.c_locator.l_mnum);
22220Sstevel@tonic-gate 			(void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
22230Sstevel@tonic-gate 			goto out;
22240Sstevel@tonic-gate 		}
22250Sstevel@tonic-gate 
22260Sstevel@tonic-gate 		if (c.c_dbcnt == 0)
22270Sstevel@tonic-gate 			break;		/* handle none at all */
22280Sstevel@tonic-gate 
22290Sstevel@tonic-gate 		/* get info */
22300Sstevel@tonic-gate 		if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
22310Sstevel@tonic-gate 			goto out;
22320Sstevel@tonic-gate 
22330Sstevel@tonic-gate 		/* append to list */
22340Sstevel@tonic-gate 		*tail = Zalloc(sizeof (**tail));
22350Sstevel@tonic-gate 		(*tail)->rl_repp = rp;
22360Sstevel@tonic-gate 		tail = &(*tail)->rl_next;
22370Sstevel@tonic-gate 		++count;
22380Sstevel@tonic-gate 
22390Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
22400Sstevel@tonic-gate 			free(devid);
22410Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
22420Sstevel@tonic-gate 		}
22430Sstevel@tonic-gate 
22440Sstevel@tonic-gate 	} while (++i < c.c_dbcnt);
22450Sstevel@tonic-gate 
22460Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
22470Sstevel@tonic-gate 		free(devid);
22480Sstevel@tonic-gate 	}
22490Sstevel@tonic-gate 
22500Sstevel@tonic-gate 	/* return count */
22510Sstevel@tonic-gate 	return (count);
22520Sstevel@tonic-gate 
22530Sstevel@tonic-gate 	/* cleanup, return error */
22540Sstevel@tonic-gate out:
22550Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
22560Sstevel@tonic-gate 		free(devid);
22570Sstevel@tonic-gate 	}
22580Sstevel@tonic-gate 	metafreereplicalist(*rlpp);
22590Sstevel@tonic-gate 	*rlpp = NULL;
22600Sstevel@tonic-gate 	return (-1);
22610Sstevel@tonic-gate }
22620Sstevel@tonic-gate 
22630Sstevel@tonic-gate /*
22640Sstevel@tonic-gate  * meta_sync_db_locations - get list of replicas from kernel and write
22650Sstevel@tonic-gate  * 	out to mddb.cf and md.conf.  'Syncs up' the replica list in
22660Sstevel@tonic-gate  * 	the kernel with the replica list in the conf files.
22670Sstevel@tonic-gate  *
22680Sstevel@tonic-gate  */
22690Sstevel@tonic-gate void
22700Sstevel@tonic-gate meta_sync_db_locations(
22710Sstevel@tonic-gate 	mdsetname_t	*sp,
22720Sstevel@tonic-gate 	md_error_t	*ep
22730Sstevel@tonic-gate )
22740Sstevel@tonic-gate {
22750Sstevel@tonic-gate 	char		*sname = 0;		/* system file name */
22760Sstevel@tonic-gate 	char 		*cname = 0;		/* config file name */
22770Sstevel@tonic-gate 
22780Sstevel@tonic-gate 	if (!metaislocalset(sp))
22790Sstevel@tonic-gate 		return;
22800Sstevel@tonic-gate 
22810Sstevel@tonic-gate 	/* Updates backup of configuration file (aka mddb.cf) */
22820Sstevel@tonic-gate 	if (buildconf(sp, ep) != 0)
22830Sstevel@tonic-gate 		return;
22840Sstevel@tonic-gate 
22850Sstevel@tonic-gate 	/* Updates system configuration file (aka md.conf) */
22860Sstevel@tonic-gate 	(void) meta_db_patch(sname, cname, 0, ep);
22870Sstevel@tonic-gate }
22880Sstevel@tonic-gate 
22890Sstevel@tonic-gate /*
22900Sstevel@tonic-gate  * setup_db_locations - parse the mddb.cf file and
22910Sstevel@tonic-gate  *			tells the driver which db locations to use.
22920Sstevel@tonic-gate  */
22930Sstevel@tonic-gate int
22940Sstevel@tonic-gate meta_setup_db_locations(
22950Sstevel@tonic-gate 	md_error_t	*ep
22960Sstevel@tonic-gate )
22970Sstevel@tonic-gate {
22980Sstevel@tonic-gate 	mddb_config_t	c;
22990Sstevel@tonic-gate 	FILE		*fp;
23000Sstevel@tonic-gate 	char		inbuff[1024];
23010Sstevel@tonic-gate 	char		*buff;
23020Sstevel@tonic-gate 	uint_t		i;
23030Sstevel@tonic-gate 	size_t		sz;
23040Sstevel@tonic-gate 	int		rval = 0;
23050Sstevel@tonic-gate 	char		*devidp;
23060Sstevel@tonic-gate 	uint_t		devid_size;
23070Sstevel@tonic-gate 	char		*minor_name = NULL;
23080Sstevel@tonic-gate 	ddi_devid_t	devid_decode;
23090Sstevel@tonic-gate 	int		checksum;
23100Sstevel@tonic-gate 
23110Sstevel@tonic-gate 	/* do mddb.cf file */
23120Sstevel@tonic-gate 	(void) memset(&c, '\0', sizeof (c));
23130Sstevel@tonic-gate 	if ((fp = fopen(META_DBCONF, "r")) == NULL) {
23140Sstevel@tonic-gate 		if (errno != ENOENT)
23150Sstevel@tonic-gate 			return (mdsyserror(ep, errno, META_DBCONF));
23160Sstevel@tonic-gate 	}
23170Sstevel@tonic-gate 	while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
23180Sstevel@tonic-gate 	    fp)) != NULL)) {
23190Sstevel@tonic-gate 
23200Sstevel@tonic-gate 		/* ignore comments */
23210Sstevel@tonic-gate 		if (*buff == '#')
23220Sstevel@tonic-gate 			continue;
23230Sstevel@tonic-gate 
23240Sstevel@tonic-gate 		/* parse locator */
23250Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
23260Sstevel@tonic-gate 		c.c_setno = MD_LOCAL_SET;
23270Sstevel@tonic-gate 		i = strcspn(buff, " \t");
23280Sstevel@tonic-gate 		if (i > sizeof (c.c_locator.l_driver))
23290Sstevel@tonic-gate 			i = sizeof (c.c_locator.l_driver);
23300Sstevel@tonic-gate 		(void) strncpy(c.c_locator.l_driver, buff, i);
23310Sstevel@tonic-gate 		buff += i;
23320Sstevel@tonic-gate 		c.c_locator.l_dev =
23330Sstevel@tonic-gate 		    makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
23340Sstevel@tonic-gate 		c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
23350Sstevel@tonic-gate 		c.c_locator.l_mnum = minor(c.c_locator.l_dev);
23360Sstevel@tonic-gate 
23370Sstevel@tonic-gate 		/* parse out devid */
23380Sstevel@tonic-gate 		while (isspace((int)(*buff)))
23390Sstevel@tonic-gate 			buff += 1;
23400Sstevel@tonic-gate 		i = strcspn(buff, " \t");
23410Sstevel@tonic-gate 		if ((devidp = (char *)malloc(i+1)) == NULL)
23420Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
23430Sstevel@tonic-gate 
23440Sstevel@tonic-gate 		(void) strncpy(devidp, buff, i);
23450Sstevel@tonic-gate 		devidp[i] = '\0';
23460Sstevel@tonic-gate 		if (devid_str_decode(devidp, &devid_decode,
23470Sstevel@tonic-gate 		    &minor_name) == -1) {
23480Sstevel@tonic-gate 			free(devidp);
23490Sstevel@tonic-gate 			continue;
23500Sstevel@tonic-gate 		}
23510Sstevel@tonic-gate 
23520Sstevel@tonic-gate 		/* Conf file must have minor name associated with devid */
23530Sstevel@tonic-gate 		if (minor_name == NULL) {
23540Sstevel@tonic-gate 			free(devidp);
23550Sstevel@tonic-gate 			devid_free(devid_decode);
23560Sstevel@tonic-gate 			continue;
23570Sstevel@tonic-gate 		}
23580Sstevel@tonic-gate 
23590Sstevel@tonic-gate 		sz = devid_sizeof(devid_decode);
23600Sstevel@tonic-gate 		/* Copy to devid size buffer that ioctl expects */
23610Sstevel@tonic-gate 		if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
23620Sstevel@tonic-gate 			devid_free(devid_decode);
23630Sstevel@tonic-gate 			free(minor_name);
23640Sstevel@tonic-gate 			free(devidp);
23650Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
23660Sstevel@tonic-gate 		}
23670Sstevel@tonic-gate 
2368*62Sjeanm 		(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
23690Sstevel@tonic-gate 		    (void *)devid_decode, sz);
23700Sstevel@tonic-gate 
23710Sstevel@tonic-gate 		devid_free(devid_decode);
23720Sstevel@tonic-gate 
23730Sstevel@tonic-gate 		if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
23740Sstevel@tonic-gate 			free(minor_name);
23750Sstevel@tonic-gate 			free(devidp);
2376*62Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
23770Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
23780Sstevel@tonic-gate 		}
23790Sstevel@tonic-gate 		(void) strcpy(c.c_locator.l_minor_name, minor_name);
23800Sstevel@tonic-gate 		free(minor_name);
23810Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
23820Sstevel@tonic-gate 			MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
23830Sstevel@tonic-gate 		c.c_locator.l_devid_sz = sz;
23840Sstevel@tonic-gate 
23850Sstevel@tonic-gate 		devid_size = strlen(devidp);
23860Sstevel@tonic-gate 		buff += devid_size;
23870Sstevel@tonic-gate 
23880Sstevel@tonic-gate 		checksum = strtol(buff, &buff, 10);
23890Sstevel@tonic-gate 		for (i = 0; c.c_locator.l_driver[i] != 0; i++)
23900Sstevel@tonic-gate 			checksum += c.c_locator.l_driver[i];
23910Sstevel@tonic-gate 		for (i = 0; i < devid_size; i++) {
23920Sstevel@tonic-gate 			checksum += devidp[i];
23930Sstevel@tonic-gate 		}
23940Sstevel@tonic-gate 		free(devidp);
23950Sstevel@tonic-gate 
23960Sstevel@tonic-gate 		checksum += minor(c.c_locator.l_dev);
23970Sstevel@tonic-gate 		checksum += c.c_locator.l_blkno;
23980Sstevel@tonic-gate 		if (checksum != 42) {
23990Sstevel@tonic-gate 			/* overwritten later for more serious problems */
24000Sstevel@tonic-gate 			rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
2401*62Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
24020Sstevel@tonic-gate 			continue;
24030Sstevel@tonic-gate 		}
24040Sstevel@tonic-gate 		c.c_locator.l_flags = 0;
24050Sstevel@tonic-gate 
24060Sstevel@tonic-gate 		/* use db location */
24070Sstevel@tonic-gate 		if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
2408*62Sjeanm 			free((void *)(uintptr_t)c.c_locator.l_devid);
24090Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
24100Sstevel@tonic-gate 		}
24110Sstevel@tonic-gate 
24120Sstevel@tonic-gate 		/* free up devid if in use */
2413*62Sjeanm 		free((void *)(uintptr_t)c.c_locator.l_devid);
24140Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
24150Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
24160Sstevel@tonic-gate 	}
24170Sstevel@tonic-gate 	if ((fp) && (fclose(fp) != 0))
24180Sstevel@tonic-gate 		return (mdsyserror(ep, errno, META_DBCONF));
24190Sstevel@tonic-gate 
24200Sstevel@tonic-gate 	/* check for stale database */
24210Sstevel@tonic-gate 	(void) memset((char *)&c, 0, sizeof (struct mddb_config));
24220Sstevel@tonic-gate 	c.c_id = 0;
24230Sstevel@tonic-gate 	c.c_setno = MD_LOCAL_SET;
24240Sstevel@tonic-gate 
24250Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
24260Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
24270Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
24280Sstevel@tonic-gate 
24290Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
24300Sstevel@tonic-gate 		if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
24310Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
24320Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
24330Sstevel@tonic-gate 	}
24340Sstevel@tonic-gate 
24350Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE)
24360Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
24370Sstevel@tonic-gate 		    0, NULL));
24380Sstevel@tonic-gate 
24390Sstevel@tonic-gate 	/* success */
24400Sstevel@tonic-gate 	return (rval);
24410Sstevel@tonic-gate }
24420Sstevel@tonic-gate 
24430Sstevel@tonic-gate /*
24440Sstevel@tonic-gate  * meta_db_minreplica - returns the minimum size replica currently in use.
24450Sstevel@tonic-gate  */
24460Sstevel@tonic-gate daddr_t
24470Sstevel@tonic-gate meta_db_minreplica(
24480Sstevel@tonic-gate 	mdsetname_t	*sp,
24490Sstevel@tonic-gate 	md_error_t	*ep
24500Sstevel@tonic-gate )
24510Sstevel@tonic-gate {
24520Sstevel@tonic-gate 	md_replica_t		*r;
24530Sstevel@tonic-gate 	md_replicalist_t	*rl, *rlp = NULL;
24540Sstevel@tonic-gate 	daddr_t			nblks = 0;
24550Sstevel@tonic-gate 
24560Sstevel@tonic-gate 	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
24570Sstevel@tonic-gate 		return (-1);
24580Sstevel@tonic-gate 
24590Sstevel@tonic-gate 	if (rlp == NULL)
24600Sstevel@tonic-gate 		return (-1);
24610Sstevel@tonic-gate 
24620Sstevel@tonic-gate 	/* find the smallest existing replica */
24630Sstevel@tonic-gate 	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
24640Sstevel@tonic-gate 		r = rl->rl_repp;
24650Sstevel@tonic-gate 		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
24660Sstevel@tonic-gate 	}
24670Sstevel@tonic-gate 
24680Sstevel@tonic-gate 	metafreereplicalist(rlp);
24690Sstevel@tonic-gate 	return (nblks);
24700Sstevel@tonic-gate }
24710Sstevel@tonic-gate 
24720Sstevel@tonic-gate /*
24730Sstevel@tonic-gate  * meta_get_replica_names
24740Sstevel@tonic-gate  *  returns an mdnamelist_t of replica slices
24750Sstevel@tonic-gate  */
24760Sstevel@tonic-gate /*ARGSUSED*/
24770Sstevel@tonic-gate int
24780Sstevel@tonic-gate meta_get_replica_names(
24790Sstevel@tonic-gate 	mdsetname_t	*sp,
24800Sstevel@tonic-gate 	mdnamelist_t	**nlpp,
24810Sstevel@tonic-gate 	int		options,
24820Sstevel@tonic-gate 	md_error_t	*ep
24830Sstevel@tonic-gate )
24840Sstevel@tonic-gate {
24850Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
24860Sstevel@tonic-gate 	md_replicalist_t	*rl;
24870Sstevel@tonic-gate 	mdnamelist_t		**tailpp = nlpp;
24880Sstevel@tonic-gate 	int			cnt = 0;
24890Sstevel@tonic-gate 
24900Sstevel@tonic-gate 	assert(nlpp != NULL);
24910Sstevel@tonic-gate 
24920Sstevel@tonic-gate 	if (!metaislocalset(sp))
24930Sstevel@tonic-gate 		goto out;
24940Sstevel@tonic-gate 
24950Sstevel@tonic-gate 	/* get replicas */
24960Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
24970Sstevel@tonic-gate 		cnt = -1;
24980Sstevel@tonic-gate 		goto out;
24990Sstevel@tonic-gate 	}
25000Sstevel@tonic-gate 
25010Sstevel@tonic-gate 	/* build name list */
25020Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
25030Sstevel@tonic-gate 		/*
25040Sstevel@tonic-gate 		 * Add the name struct to the end of the
25050Sstevel@tonic-gate 		 * namelist but keep a pointer to the last
25060Sstevel@tonic-gate 		 * element so that we don't incur the overhead
25070Sstevel@tonic-gate 		 * of traversing the list each time
25080Sstevel@tonic-gate 		 */
25090Sstevel@tonic-gate 		tailpp = meta_namelist_append_wrapper(
25100Sstevel@tonic-gate 			tailpp, rl->rl_repp->r_namep);
25110Sstevel@tonic-gate 		++cnt;
25120Sstevel@tonic-gate 	}
25130Sstevel@tonic-gate 
25140Sstevel@tonic-gate 	/* cleanup, return count or error */
25150Sstevel@tonic-gate out:
25160Sstevel@tonic-gate 	metafreereplicalist(rlp);
25170Sstevel@tonic-gate 	return (cnt);
25180Sstevel@tonic-gate }
2519