1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * Just in case we're not in a build environment, make sure that
31*0Sstevel@tonic-gate  * TEXT_DOMAIN gets set to something.
32*0Sstevel@tonic-gate  */
33*0Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
34*0Sstevel@tonic-gate #define	TEXT_DOMAIN "SYS_TEST"
35*0Sstevel@tonic-gate #endif
36*0Sstevel@tonic-gate 
37*0Sstevel@tonic-gate /*
38*0Sstevel@tonic-gate  * Metadevice database interfaces.
39*0Sstevel@tonic-gate  */
40*0Sstevel@tonic-gate 
41*0Sstevel@tonic-gate #define	MDDB
42*0Sstevel@tonic-gate 
43*0Sstevel@tonic-gate #include <meta.h>
44*0Sstevel@tonic-gate #include <sys/lvm/md_mddb.h>
45*0Sstevel@tonic-gate #include <sys/lvm/md_crc.h>
46*0Sstevel@tonic-gate #include <sys/lvm/mdio.h>
47*0Sstevel@tonic-gate #include <string.h>
48*0Sstevel@tonic-gate #include <strings.h>
49*0Sstevel@tonic-gate #include <ctype.h>
50*0Sstevel@tonic-gate 
51*0Sstevel@tonic-gate struct svm_daemon {
52*0Sstevel@tonic-gate 	char *svmd_name;
53*0Sstevel@tonic-gate 	char *svmd_kill_val;
54*0Sstevel@tonic-gate };
55*0Sstevel@tonic-gate 
56*0Sstevel@tonic-gate struct svm_daemon svmd_kill_list[] = {
57*0Sstevel@tonic-gate 		{"mdmonitord", "HUP"},
58*0Sstevel@tonic-gate 		{"mddoors", "KILL"},
59*0Sstevel@tonic-gate 	};
60*0Sstevel@tonic-gate 
61*0Sstevel@tonic-gate #define	DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
62*0Sstevel@tonic-gate #define	MDMONITORD	"/usr/sbin/mdmonitord"
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
65*0Sstevel@tonic-gate 
66*0Sstevel@tonic-gate /*
67*0Sstevel@tonic-gate  * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
68*0Sstevel@tonic-gate  */
69*0Sstevel@tonic-gate md_timeval32_t
70*0Sstevel@tonic-gate meta_get_lb_inittime(
71*0Sstevel@tonic-gate 	mdsetname_t	*sp,
72*0Sstevel@tonic-gate 	md_error_t	*ep
73*0Sstevel@tonic-gate )
74*0Sstevel@tonic-gate {
75*0Sstevel@tonic-gate 	mddb_config_t	c;
76*0Sstevel@tonic-gate 
77*0Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
78*0Sstevel@tonic-gate 
79*0Sstevel@tonic-gate 	/* Fill in setno, setname, and sideno */
80*0Sstevel@tonic-gate 	c.c_setno = sp->setno;
81*0Sstevel@tonic-gate 
82*0Sstevel@tonic-gate 	if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
83*0Sstevel@tonic-gate 		(void) mdstealerror(ep, &c.c_mde);
84*0Sstevel@tonic-gate 	}
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate 	return (c.c_timestamp);
87*0Sstevel@tonic-gate }
88*0Sstevel@tonic-gate 
89*0Sstevel@tonic-gate /*
90*0Sstevel@tonic-gate  * mkmasterblks writes out the master blocks of the mddb to the replica.
91*0Sstevel@tonic-gate  *
92*0Sstevel@tonic-gate  * In a MN diskset, this is called by the node that is adding this replica
93*0Sstevel@tonic-gate  * to the diskset.
94*0Sstevel@tonic-gate  */
95*0Sstevel@tonic-gate 
96*0Sstevel@tonic-gate #define	MDDB_VERIFY_SIZE	8192
97*0Sstevel@tonic-gate 
98*0Sstevel@tonic-gate static int
99*0Sstevel@tonic-gate mkmasterblks(
100*0Sstevel@tonic-gate 	mdsetname_t	*sp,
101*0Sstevel@tonic-gate 	mdname_t	*np,
102*0Sstevel@tonic-gate 	int		fd,
103*0Sstevel@tonic-gate 	daddr_t		firstblk,
104*0Sstevel@tonic-gate 	int		dbsize,
105*0Sstevel@tonic-gate 	md_timeval32_t	inittime,
106*0Sstevel@tonic-gate 	md_error_t	*ep
107*0Sstevel@tonic-gate )
108*0Sstevel@tonic-gate {
109*0Sstevel@tonic-gate 	int		consecutive;
110*0Sstevel@tonic-gate 	md_timeval32_t	tp;
111*0Sstevel@tonic-gate 	struct mddb_mb	*mb;
112*0Sstevel@tonic-gate 	char		*buffer;
113*0Sstevel@tonic-gate 	int		iosize;
114*0Sstevel@tonic-gate 	md_set_desc	*sd;
115*0Sstevel@tonic-gate 	int		mn_set = 0;
116*0Sstevel@tonic-gate 	daddr_t		startblk;
117*0Sstevel@tonic-gate 	int		cnt;
118*0Sstevel@tonic-gate 	ddi_devid_t	devid;
119*0Sstevel@tonic-gate 
120*0Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
121*0Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
122*0Sstevel@tonic-gate 			return (-1);
123*0Sstevel@tonic-gate 
124*0Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
125*0Sstevel@tonic-gate 			mn_set = 1;		/* Used later */
126*0Sstevel@tonic-gate 		}
127*0Sstevel@tonic-gate 	}
128*0Sstevel@tonic-gate 
129*0Sstevel@tonic-gate 	/*
130*0Sstevel@tonic-gate 	 * Loop to verify the entire mddb region on disk is read/writable.
131*0Sstevel@tonic-gate 	 * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
132*0Sstevel@tonic-gate 	 * chunks.
133*0Sstevel@tonic-gate 	 *
134*0Sstevel@tonic-gate 	 * A side-effect of this loop is to zero out the entire mddb region
135*0Sstevel@tonic-gate 	 */
136*0Sstevel@tonic-gate 	if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
137*0Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
138*0Sstevel@tonic-gate 
139*0Sstevel@tonic-gate 	startblk = firstblk;
140*0Sstevel@tonic-gate 	for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
141*0Sstevel@tonic-gate 
142*0Sstevel@tonic-gate 		if (cnt > MDDB_VERIFY_SIZE)
143*0Sstevel@tonic-gate 			consecutive = MDDB_VERIFY_SIZE;
144*0Sstevel@tonic-gate 		else
145*0Sstevel@tonic-gate 			consecutive = cnt;
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
148*0Sstevel@tonic-gate 			Free(buffer);
149*0Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
150*0Sstevel@tonic-gate 		}
151*0Sstevel@tonic-gate 
152*0Sstevel@tonic-gate 		iosize = DEV_BSIZE * consecutive;
153*0Sstevel@tonic-gate 		if (write(fd, buffer, iosize) != iosize) {
154*0Sstevel@tonic-gate 			Free(buffer);
155*0Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
156*0Sstevel@tonic-gate 		}
157*0Sstevel@tonic-gate 
158*0Sstevel@tonic-gate 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
159*0Sstevel@tonic-gate 			Free(buffer);
160*0Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
161*0Sstevel@tonic-gate 		}
162*0Sstevel@tonic-gate 
163*0Sstevel@tonic-gate 		if (read(fd, buffer, iosize) != iosize) {
164*0Sstevel@tonic-gate 			Free(buffer);
165*0Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
166*0Sstevel@tonic-gate 		}
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate 		startblk += consecutive;
169*0Sstevel@tonic-gate 	}
170*0Sstevel@tonic-gate 
171*0Sstevel@tonic-gate 	Free(buffer);
172*0Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
173*0Sstevel@tonic-gate 		return (mdsyserror(ep, ENOMEM, np->rname));
174*0Sstevel@tonic-gate 
175*0Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) == -1) {
176*0Sstevel@tonic-gate 		Free(mb);
177*0Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
178*0Sstevel@tonic-gate 	}
179*0Sstevel@tonic-gate 
180*0Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_MB;
181*0Sstevel@tonic-gate 	/*
182*0Sstevel@tonic-gate 	 * If a MN diskset, set master block revision for a MN set.
183*0Sstevel@tonic-gate 	 * Even though the master block structure is no different
184*0Sstevel@tonic-gate 	 * for a MN set, setting the revision field to a different
185*0Sstevel@tonic-gate 	 * number keeps any pre-MN_diskset code from accessing
186*0Sstevel@tonic-gate 	 * this diskset.  It also allows for an early determination
187*0Sstevel@tonic-gate 	 * of a MN diskset when reading in from disk so that the
188*0Sstevel@tonic-gate 	 * proper size locator block and locator names structure
189*0Sstevel@tonic-gate 	 * can be read in thus saving time on diskset startup.
190*0Sstevel@tonic-gate 	 */
191*0Sstevel@tonic-gate 	if (mn_set)
192*0Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MNMB;
193*0Sstevel@tonic-gate 	else
194*0Sstevel@tonic-gate 		mb->mb_revision = MDDB_REV_MB;
195*0Sstevel@tonic-gate 	mb->mb_timestamp = tp;
196*0Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
197*0Sstevel@tonic-gate 	mb->mb_blkcnt = dbsize - 1;
198*0Sstevel@tonic-gate 	mb->mb_blkno = firstblk;
199*0Sstevel@tonic-gate 	mb->mb_nextblk = 0;
200*0Sstevel@tonic-gate 
201*0Sstevel@tonic-gate 	mb->mb_blkmap.m_firstblk = firstblk + 1;
202*0Sstevel@tonic-gate 	mb->mb_blkmap.m_consecutive = dbsize - 1;
203*0Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
204*0Sstevel@tonic-gate 		mb->mb_setcreatetime = inittime;
205*0Sstevel@tonic-gate 	}
206*0Sstevel@tonic-gate 
207*0Sstevel@tonic-gate 	/*
208*0Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
209*0Sstevel@tonic-gate 	 * the master block. The saved devid is used to provide a mapping
210*0Sstevel@tonic-gate 	 * between this disk's devid and the devid stored into the master
211*0Sstevel@tonic-gate 	 * block. This allows the disk image to be self-identifying
212*0Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
213*0Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
214*0Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
215*0Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
216*0Sstevel@tonic-gate 	 * in the remote copy scenario.
217*0Sstevel@tonic-gate 	 */
218*0Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
219*0Sstevel@tonic-gate 		size_t len;
220*0Sstevel@tonic-gate 
221*0Sstevel@tonic-gate 		len = devid_sizeof(devid);
222*0Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
223*0Sstevel@tonic-gate 			/* there is enough space to store the devid */
224*0Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
225*0Sstevel@tonic-gate 			mb->mb_devid_len = len;
226*0Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, devid, len);
227*0Sstevel@tonic-gate 		}
228*0Sstevel@tonic-gate 		devid_free(devid);
229*0Sstevel@tonic-gate 	}
230*0Sstevel@tonic-gate 
231*0Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
232*0Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
233*0Sstevel@tonic-gate 
234*0Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
235*0Sstevel@tonic-gate 		Free(mb);
236*0Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
237*0Sstevel@tonic-gate 	}
238*0Sstevel@tonic-gate 
239*0Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
240*0Sstevel@tonic-gate 		Free(mb);
241*0Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
242*0Sstevel@tonic-gate 	}
243*0Sstevel@tonic-gate 
244*0Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
245*0Sstevel@tonic-gate 		Free(mb);
246*0Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
247*0Sstevel@tonic-gate 	}
248*0Sstevel@tonic-gate 
249*0Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
250*0Sstevel@tonic-gate 		Free(mb);
251*0Sstevel@tonic-gate 		return (mdsyserror(ep, errno, np->rname));
252*0Sstevel@tonic-gate 	}
253*0Sstevel@tonic-gate 
254*0Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
255*0Sstevel@tonic-gate 		(uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
256*0Sstevel@tonic-gate 		Free(mb);
257*0Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_NOTVERIFIED,
258*0Sstevel@tonic-gate 			meta_getminor(np->dev), sp->setno, 0, np->rname));
259*0Sstevel@tonic-gate 	}
260*0Sstevel@tonic-gate 
261*0Sstevel@tonic-gate 	Free(mb);
262*0Sstevel@tonic-gate 	return (0);
263*0Sstevel@tonic-gate }
264*0Sstevel@tonic-gate 
265*0Sstevel@tonic-gate void
266*0Sstevel@tonic-gate meta_mkdummymaster(
267*0Sstevel@tonic-gate 	mdsetname_t	*sp,
268*0Sstevel@tonic-gate 	int		fd,
269*0Sstevel@tonic-gate 	daddr_t		firstblk
270*0Sstevel@tonic-gate )
271*0Sstevel@tonic-gate {
272*0Sstevel@tonic-gate 	md_timeval32_t	tp;
273*0Sstevel@tonic-gate 	struct mddb_mb	*mb;
274*0Sstevel@tonic-gate 	ddi_devid_t	devid;
275*0Sstevel@tonic-gate 	md_set_desc	*sd;
276*0Sstevel@tonic-gate 	md_error_t	ep = mdnullerror;
277*0Sstevel@tonic-gate 	md_timeval32_t	inittime;
278*0Sstevel@tonic-gate 
279*0Sstevel@tonic-gate 	/*
280*0Sstevel@tonic-gate 	 * No dummy master blocks are written for a MN diskset since devids
281*0Sstevel@tonic-gate 	 * are not supported in MN disksets.
282*0Sstevel@tonic-gate 	 */
283*0Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
284*0Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, &ep)) == NULL)
285*0Sstevel@tonic-gate 			return;
286*0Sstevel@tonic-gate 
287*0Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd))
288*0Sstevel@tonic-gate 			return;
289*0Sstevel@tonic-gate 	}
290*0Sstevel@tonic-gate 
291*0Sstevel@tonic-gate 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
292*0Sstevel@tonic-gate 		return;
293*0Sstevel@tonic-gate 
294*0Sstevel@tonic-gate 	mb->mb_magic = MDDB_MAGIC_DU;
295*0Sstevel@tonic-gate 	mb->mb_revision = MDDB_REV_MB;
296*0Sstevel@tonic-gate 	mb->mb_setno = sp->setno;
297*0Sstevel@tonic-gate 	inittime = meta_get_lb_inittime(sp, &ep);
298*0Sstevel@tonic-gate 	mb->mb_setcreatetime = inittime;
299*0Sstevel@tonic-gate 
300*0Sstevel@tonic-gate 	if (meta_gettimeofday(&tp) != -1)
301*0Sstevel@tonic-gate 		mb->mb_timestamp = tp;
302*0Sstevel@tonic-gate 
303*0Sstevel@tonic-gate 	/*
304*0Sstevel@tonic-gate 	 * We try to save the disks device ID into the remaining bytes in
305*0Sstevel@tonic-gate 	 * the master block.  This allows the disk image to be self-identifying
306*0Sstevel@tonic-gate 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
307*0Sstevel@tonic-gate 	 * when we try to import these disks on the remote copied image.
308*0Sstevel@tonic-gate 	 * If we cannot save the disks device ID onto the master block that is
309*0Sstevel@tonic-gate 	 * ok.  The disk is just not self-identifying and won't be importable
310*0Sstevel@tonic-gate 	 * in the remote copy scenario.
311*0Sstevel@tonic-gate 	 */
312*0Sstevel@tonic-gate 	if (devid_get(fd, &devid) == 0) {
313*0Sstevel@tonic-gate 		int len;
314*0Sstevel@tonic-gate 
315*0Sstevel@tonic-gate 		len = devid_sizeof(devid);
316*0Sstevel@tonic-gate 		if (len <= DEV_BSIZE - sizeof (*mb)) {
317*0Sstevel@tonic-gate 			/* there is enough space to store the devid */
318*0Sstevel@tonic-gate 			mb->mb_devid_magic = MDDB_MAGIC_DE;
319*0Sstevel@tonic-gate 			mb->mb_devid_len = len;
320*0Sstevel@tonic-gate 			(void) memcpy(mb->mb_devid, (char *)devid, len);
321*0Sstevel@tonic-gate 		}
322*0Sstevel@tonic-gate 		devid_free(devid);
323*0Sstevel@tonic-gate 	}
324*0Sstevel@tonic-gate 
325*0Sstevel@tonic-gate 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
326*0Sstevel@tonic-gate 	    (crc_skip_t *)NULL);
327*0Sstevel@tonic-gate 
328*0Sstevel@tonic-gate 	/*
329*0Sstevel@tonic-gate 	 * If any of these operations fail, we need to inform the
330*0Sstevel@tonic-gate 	 * user that the disk won't be self identifying. When support
331*0Sstevel@tonic-gate 	 * for importing remotely replicated disksets is added, we
332*0Sstevel@tonic-gate 	 * want to add the error messages here.
333*0Sstevel@tonic-gate 	 */
334*0Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
335*0Sstevel@tonic-gate 		goto out;
336*0Sstevel@tonic-gate 
337*0Sstevel@tonic-gate 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
338*0Sstevel@tonic-gate 		goto out;
339*0Sstevel@tonic-gate 
340*0Sstevel@tonic-gate 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
341*0Sstevel@tonic-gate 		goto out;
342*0Sstevel@tonic-gate 
343*0Sstevel@tonic-gate 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
344*0Sstevel@tonic-gate 		goto out;
345*0Sstevel@tonic-gate 
346*0Sstevel@tonic-gate 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
347*0Sstevel@tonic-gate 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
348*0Sstevel@tonic-gate 		goto out;
349*0Sstevel@tonic-gate 
350*0Sstevel@tonic-gate out:
351*0Sstevel@tonic-gate 	Free(mb);
352*0Sstevel@tonic-gate }
353*0Sstevel@tonic-gate 
354*0Sstevel@tonic-gate static int
355*0Sstevel@tonic-gate buildconf(mdsetname_t *sp, md_error_t *ep)
356*0Sstevel@tonic-gate {
357*0Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
358*0Sstevel@tonic-gate 	md_replicalist_t	*rl;
359*0Sstevel@tonic-gate 	FILE			*cfp = NULL;
360*0Sstevel@tonic-gate 	FILE			*mfp = NULL;
361*0Sstevel@tonic-gate 	struct stat		sbuf;
362*0Sstevel@tonic-gate 	int			rval = 0;
363*0Sstevel@tonic-gate 	int			in_miniroot = 0;
364*0Sstevel@tonic-gate 	char			line[MDDB_BOOTLIST_MAX_LEN];
365*0Sstevel@tonic-gate 	char			*tname = NULL;
366*0Sstevel@tonic-gate 
367*0Sstevel@tonic-gate 	/* get list of local replicas */
368*0Sstevel@tonic-gate 	if (! metaislocalset(sp))
369*0Sstevel@tonic-gate 		return (0);
370*0Sstevel@tonic-gate 
371*0Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
372*0Sstevel@tonic-gate 		return (-1);
373*0Sstevel@tonic-gate 
374*0Sstevel@tonic-gate 	/* open tempfile, copy permissions of original file */
375*0Sstevel@tonic-gate 	if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
376*0Sstevel@tonic-gate 		/*
377*0Sstevel@tonic-gate 		 * On the miniroot tmp files must be created in /var/tmp.
378*0Sstevel@tonic-gate 		 * If we get a EROFS error, we assume that we are in the
379*0Sstevel@tonic-gate 		 * miniroot.
380*0Sstevel@tonic-gate 		 */
381*0Sstevel@tonic-gate 		if (errno != EROFS)
382*0Sstevel@tonic-gate 			goto error;
383*0Sstevel@tonic-gate 		in_miniroot = 1;
384*0Sstevel@tonic-gate 		errno = 0;
385*0Sstevel@tonic-gate 		tname = tempnam("/var/tmp", "slvm_");
386*0Sstevel@tonic-gate 		if (tname == NULL && errno == EROFS) {
387*0Sstevel@tonic-gate 			/*
388*0Sstevel@tonic-gate 			 * If we are booted on a read-only root because
389*0Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
390*0Sstevel@tonic-gate 			 * any scary error messages.
391*0Sstevel@tonic-gate 			 */
392*0Sstevel@tonic-gate 			errno = 0;
393*0Sstevel@tonic-gate 			goto out;
394*0Sstevel@tonic-gate 		}
395*0Sstevel@tonic-gate 
396*0Sstevel@tonic-gate 		/* open tempfile, copy permissions of original file */
397*0Sstevel@tonic-gate 		if ((cfp = fopen(tname, "w+")) == NULL)
398*0Sstevel@tonic-gate 			goto error;
399*0Sstevel@tonic-gate 	}
400*0Sstevel@tonic-gate 	if (stat(META_DBCONF, &sbuf) == 0) {
401*0Sstevel@tonic-gate 		if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
402*0Sstevel@tonic-gate 			goto error;
403*0Sstevel@tonic-gate 		if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
404*0Sstevel@tonic-gate 			goto error;
405*0Sstevel@tonic-gate 	}
406*0Sstevel@tonic-gate 
407*0Sstevel@tonic-gate 	/* print header */
408*0Sstevel@tonic-gate 	if (fprintf(cfp, "#metadevice database location file ") == EOF)
409*0Sstevel@tonic-gate 		goto error;
410*0Sstevel@tonic-gate 	if (fprintf(cfp, "do not hand edit\n") < 0)
411*0Sstevel@tonic-gate 		goto error;
412*0Sstevel@tonic-gate 	if (fprintf(cfp,
413*0Sstevel@tonic-gate 		"#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
414*0Sstevel@tonic-gate 		goto error;
415*0Sstevel@tonic-gate 
416*0Sstevel@tonic-gate 	/* dump replicas */
417*0Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
418*0Sstevel@tonic-gate 		md_replica_t	*r = rl->rl_repp;
419*0Sstevel@tonic-gate 		int		checksum = 42;
420*0Sstevel@tonic-gate 		int		i;
421*0Sstevel@tonic-gate 		char		*devidp;
422*0Sstevel@tonic-gate 		minor_t		min;
423*0Sstevel@tonic-gate 
424*0Sstevel@tonic-gate 		devidp = devid_str_encode(r->r_devid, r->r_minor_name);
425*0Sstevel@tonic-gate 		/* If devid code can't encode devidp - skip entry */
426*0Sstevel@tonic-gate 		if (devidp == NULL) {
427*0Sstevel@tonic-gate 			continue;
428*0Sstevel@tonic-gate 		}
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate 		/* compute checksum */
431*0Sstevel@tonic-gate 		for (i = 0; ((r->r_driver_name[i] != '\0') &&
432*0Sstevel@tonic-gate 		    (i < sizeof (r->r_driver_name))); i++) {
433*0Sstevel@tonic-gate 			checksum -= r->r_driver_name[i];
434*0Sstevel@tonic-gate 		}
435*0Sstevel@tonic-gate 		min = meta_getminor(r->r_namep->dev);
436*0Sstevel@tonic-gate 		checksum -= min;
437*0Sstevel@tonic-gate 		checksum -= r->r_blkno;
438*0Sstevel@tonic-gate 
439*0Sstevel@tonic-gate 		for (i = 0; i < strlen(devidp); i++) {
440*0Sstevel@tonic-gate 			checksum -= devidp[i];
441*0Sstevel@tonic-gate 		}
442*0Sstevel@tonic-gate 		/* print info */
443*0Sstevel@tonic-gate 		if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
444*0Sstevel@tonic-gate 		    r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
445*0Sstevel@tonic-gate 			goto error;
446*0Sstevel@tonic-gate 		}
447*0Sstevel@tonic-gate 
448*0Sstevel@tonic-gate 		devid_str_free(devidp);
449*0Sstevel@tonic-gate 	}
450*0Sstevel@tonic-gate 
451*0Sstevel@tonic-gate 	/* close and rename to real file */
452*0Sstevel@tonic-gate 	if (fflush(cfp) != 0)
453*0Sstevel@tonic-gate 		goto error;
454*0Sstevel@tonic-gate 	if (fsync(fileno(cfp)) != 0)
455*0Sstevel@tonic-gate 		goto error;
456*0Sstevel@tonic-gate 	if (fclose(cfp) != 0) {
457*0Sstevel@tonic-gate 		cfp = NULL;
458*0Sstevel@tonic-gate 		goto error;
459*0Sstevel@tonic-gate 	}
460*0Sstevel@tonic-gate 	cfp = NULL;
461*0Sstevel@tonic-gate 
462*0Sstevel@tonic-gate 	/*
463*0Sstevel@tonic-gate 	 * Renames don't work in the miniroot since tmpfiles are
464*0Sstevel@tonic-gate 	 * created in /var/tmp. Hence we copy the data out.
465*0Sstevel@tonic-gate 	 */
466*0Sstevel@tonic-gate 
467*0Sstevel@tonic-gate 	if (! in_miniroot) {
468*0Sstevel@tonic-gate 		if (rename(META_DBCONFTMP, META_DBCONF) != 0)
469*0Sstevel@tonic-gate 			goto error;
470*0Sstevel@tonic-gate 	} else {
471*0Sstevel@tonic-gate 		if ((cfp = fopen(tname, "r")) == NULL)
472*0Sstevel@tonic-gate 			goto error;
473*0Sstevel@tonic-gate 		if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
474*0Sstevel@tonic-gate 			goto error;
475*0Sstevel@tonic-gate 		while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
476*0Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
477*0Sstevel@tonic-gate 				goto error;
478*0Sstevel@tonic-gate 		}
479*0Sstevel@tonic-gate 		(void) fclose(cfp);
480*0Sstevel@tonic-gate 		cfp = NULL;
481*0Sstevel@tonic-gate 		if (fflush(mfp) != 0)
482*0Sstevel@tonic-gate 			goto error;
483*0Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
484*0Sstevel@tonic-gate 			goto error;
485*0Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
486*0Sstevel@tonic-gate 			mfp = NULL;
487*0Sstevel@tonic-gate 			goto error;
488*0Sstevel@tonic-gate 		}
489*0Sstevel@tonic-gate 		/* delete the tempfile */
490*0Sstevel@tonic-gate 		(void) unlink(tname);
491*0Sstevel@tonic-gate 	}
492*0Sstevel@tonic-gate 	/* success */
493*0Sstevel@tonic-gate 	rval = 0;
494*0Sstevel@tonic-gate 	goto out;
495*0Sstevel@tonic-gate 
496*0Sstevel@tonic-gate 	/* tempfile error */
497*0Sstevel@tonic-gate error:
498*0Sstevel@tonic-gate 	rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
499*0Sstevel@tonic-gate 				mdsyserror(ep, errno, META_DBCONFTMP);
500*0Sstevel@tonic-gate 
501*0Sstevel@tonic-gate 
502*0Sstevel@tonic-gate 	/* cleanup, return success */
503*0Sstevel@tonic-gate out:
504*0Sstevel@tonic-gate 	if (rlp != NULL)
505*0Sstevel@tonic-gate 		metafreereplicalist(rlp);
506*0Sstevel@tonic-gate 	if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
507*0Sstevel@tonic-gate 		rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
508*0Sstevel@tonic-gate 					mdsyserror(ep, errno, META_DBCONFTMP);
509*0Sstevel@tonic-gate 	}
510*0Sstevel@tonic-gate 	free(tname);
511*0Sstevel@tonic-gate 	return (rval);
512*0Sstevel@tonic-gate }
513*0Sstevel@tonic-gate 
514*0Sstevel@tonic-gate /*
515*0Sstevel@tonic-gate  * check replica for dev
516*0Sstevel@tonic-gate  */
517*0Sstevel@tonic-gate static int
518*0Sstevel@tonic-gate in_replica(
519*0Sstevel@tonic-gate 	mdsetname_t	*sp,
520*0Sstevel@tonic-gate 	md_replica_t	*rp,
521*0Sstevel@tonic-gate 	mdname_t	*np,
522*0Sstevel@tonic-gate 	diskaddr_t	slblk,
523*0Sstevel@tonic-gate 	diskaddr_t	nblks,
524*0Sstevel@tonic-gate 	md_error_t	*ep
525*0Sstevel@tonic-gate )
526*0Sstevel@tonic-gate {
527*0Sstevel@tonic-gate 	mdname_t	*repnp = rp->r_namep;
528*0Sstevel@tonic-gate 	diskaddr_t	rep_sblk = rp->r_blkno;
529*0Sstevel@tonic-gate 	diskaddr_t	rep_nblks = rp->r_nblk;
530*0Sstevel@tonic-gate 
531*0Sstevel@tonic-gate 	/* should be in the same set */
532*0Sstevel@tonic-gate 	assert(sp != NULL);
533*0Sstevel@tonic-gate 
534*0Sstevel@tonic-gate 	/* if error in master block, assume whole partition */
535*0Sstevel@tonic-gate 	if ((rep_sblk == MD_DISKADDR_ERROR) ||
536*0Sstevel@tonic-gate 	    (rep_nblks == MD_DISKADDR_ERROR)) {
537*0Sstevel@tonic-gate 		rep_sblk = 0;
538*0Sstevel@tonic-gate 		rep_nblks = MD_DISKADDR_ERROR;
539*0Sstevel@tonic-gate 	}
540*0Sstevel@tonic-gate 
541*0Sstevel@tonic-gate 	/* check overlap */
542*0Sstevel@tonic-gate 	if (meta_check_overlap(
543*0Sstevel@tonic-gate 	    MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
544*0Sstevel@tonic-gate 		return (-1);
545*0Sstevel@tonic-gate 	}
546*0Sstevel@tonic-gate 
547*0Sstevel@tonic-gate 	/* return success */
548*0Sstevel@tonic-gate 	return (0);
549*0Sstevel@tonic-gate }
550*0Sstevel@tonic-gate 
551*0Sstevel@tonic-gate /*
552*0Sstevel@tonic-gate  * check to see if we're in a replica
553*0Sstevel@tonic-gate  */
554*0Sstevel@tonic-gate int
555*0Sstevel@tonic-gate meta_check_inreplica(
556*0Sstevel@tonic-gate 	mdsetname_t		*sp,
557*0Sstevel@tonic-gate 	mdname_t		*np,
558*0Sstevel@tonic-gate 	diskaddr_t		slblk,
559*0Sstevel@tonic-gate 	diskaddr_t		nblks,
560*0Sstevel@tonic-gate 	md_error_t		*ep
561*0Sstevel@tonic-gate )
562*0Sstevel@tonic-gate {
563*0Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
564*0Sstevel@tonic-gate 	md_replicalist_t	*rl;
565*0Sstevel@tonic-gate 	int			rval = 0;
566*0Sstevel@tonic-gate 
567*0Sstevel@tonic-gate 	/* should have a set */
568*0Sstevel@tonic-gate 	assert(sp != NULL);
569*0Sstevel@tonic-gate 
570*0Sstevel@tonic-gate 	/* for each replica */
571*0Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
572*0Sstevel@tonic-gate 		return (-1);
573*0Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
574*0Sstevel@tonic-gate 		md_replica_t	*rp = rl->rl_repp;
575*0Sstevel@tonic-gate 
576*0Sstevel@tonic-gate 		/* check replica */
577*0Sstevel@tonic-gate 		if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
578*0Sstevel@tonic-gate 			rval = -1;
579*0Sstevel@tonic-gate 			break;
580*0Sstevel@tonic-gate 		}
581*0Sstevel@tonic-gate 	}
582*0Sstevel@tonic-gate 
583*0Sstevel@tonic-gate 	/* cleanup, return success */
584*0Sstevel@tonic-gate 	metafreereplicalist(rlp);
585*0Sstevel@tonic-gate 	return (rval);
586*0Sstevel@tonic-gate }
587*0Sstevel@tonic-gate 
588*0Sstevel@tonic-gate /*
589*0Sstevel@tonic-gate  * check replica
590*0Sstevel@tonic-gate  */
591*0Sstevel@tonic-gate int
592*0Sstevel@tonic-gate meta_check_replica(
593*0Sstevel@tonic-gate 	mdsetname_t	*sp,		/* set to check against */
594*0Sstevel@tonic-gate 	mdname_t	*np,		/* component to check against */
595*0Sstevel@tonic-gate 	mdchkopts_t	options,	/* option flags */
596*0Sstevel@tonic-gate 	diskaddr_t	slblk,		/* start logical block */
597*0Sstevel@tonic-gate 	diskaddr_t	nblks,		/* number of blocks (-1,rest of them) */
598*0Sstevel@tonic-gate 	md_error_t	*ep		/* error packet */
599*0Sstevel@tonic-gate )
600*0Sstevel@tonic-gate {
601*0Sstevel@tonic-gate 	mdchkopts_t	chkoptions = MDCHK_ALLOW_REPSLICE;
602*0Sstevel@tonic-gate 
603*0Sstevel@tonic-gate 	/* make sure we have a disk */
604*0Sstevel@tonic-gate 	if (metachkcomp(np, ep) != 0)
605*0Sstevel@tonic-gate 		return (-1);
606*0Sstevel@tonic-gate 
607*0Sstevel@tonic-gate 	/* check to ensure that it is not already in use */
608*0Sstevel@tonic-gate 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
609*0Sstevel@tonic-gate 		return (-1);
610*0Sstevel@tonic-gate 	}
611*0Sstevel@tonic-gate 
612*0Sstevel@tonic-gate 	if (options & MDCHK_ALLOW_NODBS)
613*0Sstevel@tonic-gate 		return (0);
614*0Sstevel@tonic-gate 
615*0Sstevel@tonic-gate 	if (options & MDCHK_DRVINSET)
616*0Sstevel@tonic-gate 		return (0);
617*0Sstevel@tonic-gate 
618*0Sstevel@tonic-gate 	/* make sure it is in the set */
619*0Sstevel@tonic-gate 	if (meta_check_inset(sp, np, ep) != 0)
620*0Sstevel@tonic-gate 		return (-1);
621*0Sstevel@tonic-gate 
622*0Sstevel@tonic-gate 	/* make sure its not in a metadevice */
623*0Sstevel@tonic-gate 	if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
624*0Sstevel@tonic-gate 		return (-1);
625*0Sstevel@tonic-gate 
626*0Sstevel@tonic-gate 	/* return success */
627*0Sstevel@tonic-gate 	return (0);
628*0Sstevel@tonic-gate }
629*0Sstevel@tonic-gate 
630*0Sstevel@tonic-gate static int
631*0Sstevel@tonic-gate update_dbinfo_on_drives(
632*0Sstevel@tonic-gate 	mdsetname_t	*sp,
633*0Sstevel@tonic-gate 	md_drive_desc	*dd,
634*0Sstevel@tonic-gate 	int		set_locked,
635*0Sstevel@tonic-gate 	int		force,
636*0Sstevel@tonic-gate 	md_error_t	*ep
637*0Sstevel@tonic-gate )
638*0Sstevel@tonic-gate {
639*0Sstevel@tonic-gate 	md_set_desc		*sd;
640*0Sstevel@tonic-gate 	int			i;
641*0Sstevel@tonic-gate 	md_setkey_t		*cl_sk;
642*0Sstevel@tonic-gate 	int			rval = 0;
643*0Sstevel@tonic-gate 	md_mnnode_desc		*nd;
644*0Sstevel@tonic-gate 
645*0Sstevel@tonic-gate 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
646*0Sstevel@tonic-gate 		return (-1);
647*0Sstevel@tonic-gate 
648*0Sstevel@tonic-gate 	if (! set_locked) {
649*0Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
650*0Sstevel@tonic-gate 			md_error_t xep = mdnullerror;
651*0Sstevel@tonic-gate 			sigset_t sigs;
652*0Sstevel@tonic-gate 			/* Make sure we are blocking all signals */
653*0Sstevel@tonic-gate 			if (procsigs(TRUE, &sigs, &xep) < 0)
654*0Sstevel@tonic-gate 				mdclrerror(&xep);
655*0Sstevel@tonic-gate 
656*0Sstevel@tonic-gate 			nd = sd->sd_nodelist;
657*0Sstevel@tonic-gate 			while (nd) {
658*0Sstevel@tonic-gate 				if (force && strcmp(nd->nd_nodename,
659*0Sstevel@tonic-gate 				    mynode()) != 0) {
660*0Sstevel@tonic-gate 					nd = nd->nd_next;
661*0Sstevel@tonic-gate 					continue;
662*0Sstevel@tonic-gate 				}
663*0Sstevel@tonic-gate 
664*0Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
665*0Sstevel@tonic-gate 					nd = nd->nd_next;
666*0Sstevel@tonic-gate 					continue;
667*0Sstevel@tonic-gate 				}
668*0Sstevel@tonic-gate 
669*0Sstevel@tonic-gate 				if (clnt_lock_set(nd->nd_nodename, sp, ep))
670*0Sstevel@tonic-gate 					return (-1);
671*0Sstevel@tonic-gate 				nd = nd->nd_next;
672*0Sstevel@tonic-gate 			}
673*0Sstevel@tonic-gate 		} else {
674*0Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
675*0Sstevel@tonic-gate 				/* Skip empty slots */
676*0Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
677*0Sstevel@tonic-gate 					continue;
678*0Sstevel@tonic-gate 
679*0Sstevel@tonic-gate 				if (force && strcmp(sd->sd_nodes[i],
680*0Sstevel@tonic-gate 				    mynode()) != 0)
681*0Sstevel@tonic-gate 					continue;
682*0Sstevel@tonic-gate 
683*0Sstevel@tonic-gate 				if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
684*0Sstevel@tonic-gate 					return (-1);
685*0Sstevel@tonic-gate 			}
686*0Sstevel@tonic-gate 		}
687*0Sstevel@tonic-gate 	}
688*0Sstevel@tonic-gate 
689*0Sstevel@tonic-gate 	if (MD_MNSET_DESC(sd)) {
690*0Sstevel@tonic-gate 		nd = sd->sd_nodelist;
691*0Sstevel@tonic-gate 		while (nd) {
692*0Sstevel@tonic-gate 			if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
693*0Sstevel@tonic-gate 				nd = nd->nd_next;
694*0Sstevel@tonic-gate 				continue;
695*0Sstevel@tonic-gate 			}
696*0Sstevel@tonic-gate 
697*0Sstevel@tonic-gate 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
698*0Sstevel@tonic-gate 				nd = nd->nd_next;
699*0Sstevel@tonic-gate 				continue;
700*0Sstevel@tonic-gate 			}
701*0Sstevel@tonic-gate 
702*0Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
703*0Sstevel@tonic-gate 			    == -1) {
704*0Sstevel@tonic-gate 				rval = -1;
705*0Sstevel@tonic-gate 				break;
706*0Sstevel@tonic-gate 			}
707*0Sstevel@tonic-gate 			nd = nd->nd_next;
708*0Sstevel@tonic-gate 		}
709*0Sstevel@tonic-gate 	} else {
710*0Sstevel@tonic-gate 		for (i = 0; i < MD_MAXSIDES; i++) {
711*0Sstevel@tonic-gate 			/* Skip empty slots */
712*0Sstevel@tonic-gate 			if (sd->sd_nodes[i][0] == '\0')
713*0Sstevel@tonic-gate 				continue;
714*0Sstevel@tonic-gate 
715*0Sstevel@tonic-gate 			if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
716*0Sstevel@tonic-gate 				continue;
717*0Sstevel@tonic-gate 
718*0Sstevel@tonic-gate 			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
719*0Sstevel@tonic-gate 			    == -1) {
720*0Sstevel@tonic-gate 				rval = -1;
721*0Sstevel@tonic-gate 				break;
722*0Sstevel@tonic-gate 			}
723*0Sstevel@tonic-gate 		}
724*0Sstevel@tonic-gate 	}
725*0Sstevel@tonic-gate 
726*0Sstevel@tonic-gate 	if (! set_locked) {
727*0Sstevel@tonic-gate 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
728*0Sstevel@tonic-gate 		if (MD_MNSET_DESC(sd)) {
729*0Sstevel@tonic-gate 			nd = sd->sd_nodelist;
730*0Sstevel@tonic-gate 			while (nd) {
731*0Sstevel@tonic-gate 				if (force &&
732*0Sstevel@tonic-gate 				    strcmp(nd->nd_nodename, mynode()) != 0) {
733*0Sstevel@tonic-gate 					nd = nd->nd_next;
734*0Sstevel@tonic-gate 					continue;
735*0Sstevel@tonic-gate 				}
736*0Sstevel@tonic-gate 
737*0Sstevel@tonic-gate 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
738*0Sstevel@tonic-gate 					nd = nd->nd_next;
739*0Sstevel@tonic-gate 					continue;
740*0Sstevel@tonic-gate 				}
741*0Sstevel@tonic-gate 
742*0Sstevel@tonic-gate 				if (clnt_unlock_set(nd->nd_nodename, cl_sk,
743*0Sstevel@tonic-gate 				    ep)) {
744*0Sstevel@tonic-gate 					rval = -1;
745*0Sstevel@tonic-gate 					break;
746*0Sstevel@tonic-gate 				}
747*0Sstevel@tonic-gate 				nd = nd->nd_next;
748*0Sstevel@tonic-gate 			}
749*0Sstevel@tonic-gate 		} else {
750*0Sstevel@tonic-gate 			for (i = 0; i < MD_MAXSIDES; i++) {
751*0Sstevel@tonic-gate 				/* Skip empty slots */
752*0Sstevel@tonic-gate 				if (sd->sd_nodes[i][0] == '\0')
753*0Sstevel@tonic-gate 					continue;
754*0Sstevel@tonic-gate 
755*0Sstevel@tonic-gate 				if (force &&
756*0Sstevel@tonic-gate 				    strcmp(sd->sd_nodes[i], mynode()) != 0)
757*0Sstevel@tonic-gate 					continue;
758*0Sstevel@tonic-gate 
759*0Sstevel@tonic-gate 				if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
760*0Sstevel@tonic-gate 				    ep)) {
761*0Sstevel@tonic-gate 					rval = -1;
762*0Sstevel@tonic-gate 					break;
763*0Sstevel@tonic-gate 				}
764*0Sstevel@tonic-gate 			}
765*0Sstevel@tonic-gate 
766*0Sstevel@tonic-gate 		}
767*0Sstevel@tonic-gate 		cl_set_setkey(NULL);
768*0Sstevel@tonic-gate 	}
769*0Sstevel@tonic-gate 
770*0Sstevel@tonic-gate 	return (rval);
771*0Sstevel@tonic-gate }
772*0Sstevel@tonic-gate 
773*0Sstevel@tonic-gate int
774*0Sstevel@tonic-gate meta_db_addsidenms(
775*0Sstevel@tonic-gate 	mdsetname_t	*sp,
776*0Sstevel@tonic-gate 	mdname_t	*np,
777*0Sstevel@tonic-gate 	daddr_t		blkno,
778*0Sstevel@tonic-gate 	int		bcast,
779*0Sstevel@tonic-gate 	md_error_t	*ep
780*0Sstevel@tonic-gate )
781*0Sstevel@tonic-gate {
782*0Sstevel@tonic-gate 	side_t		sideno;
783*0Sstevel@tonic-gate 	char		*bname = NULL;
784*0Sstevel@tonic-gate 	char		*dname = NULL;
785*0Sstevel@tonic-gate 	minor_t		mnum;
786*0Sstevel@tonic-gate 	mddb_config_t	c;
787*0Sstevel@tonic-gate 	int		done;
788*0Sstevel@tonic-gate 	int		rval = 0;
789*0Sstevel@tonic-gate 	md_set_desc	*sd;
790*0Sstevel@tonic-gate 
791*0Sstevel@tonic-gate 	sideno = MD_SIDEWILD;
792*0Sstevel@tonic-gate 	/*CONSTCOND*/
793*0Sstevel@tonic-gate 	while (1) {
794*0Sstevel@tonic-gate 		if (bname != NULL) {
795*0Sstevel@tonic-gate 			Free(bname);
796*0Sstevel@tonic-gate 			bname = NULL;
797*0Sstevel@tonic-gate 		}
798*0Sstevel@tonic-gate 		if (dname != NULL) {
799*0Sstevel@tonic-gate 			Free(dname);
800*0Sstevel@tonic-gate 			dname = NULL;
801*0Sstevel@tonic-gate 		}
802*0Sstevel@tonic-gate 		if ((done = meta_getnextside_devinfo(sp, np->bname,
803*0Sstevel@tonic-gate 		    &sideno, &bname, &dname, &mnum, ep)) == -1) {
804*0Sstevel@tonic-gate 			rval = -1;
805*0Sstevel@tonic-gate 			break;
806*0Sstevel@tonic-gate 		}
807*0Sstevel@tonic-gate 
808*0Sstevel@tonic-gate 		if (done == 0)
809*0Sstevel@tonic-gate 			break;
810*0Sstevel@tonic-gate 
811*0Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
812*0Sstevel@tonic-gate 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
813*0Sstevel@tonic-gate 				rval = -1;
814*0Sstevel@tonic-gate 				break;
815*0Sstevel@tonic-gate 			}
816*0Sstevel@tonic-gate 		}
817*0Sstevel@tonic-gate 
818*0Sstevel@tonic-gate 		/*
819*0Sstevel@tonic-gate 		 * Send addsidenms to all nodes using rpc.mdcommd if
820*0Sstevel@tonic-gate 		 * sidename is being added to MN diskset.
821*0Sstevel@tonic-gate 		 *
822*0Sstevel@tonic-gate 		 *   It's ok to broadcast this call to other nodes.
823*0Sstevel@tonic-gate 		 *
824*0Sstevel@tonic-gate 		 *   Note: The broadcast to other nodes isn't needed during
825*0Sstevel@tonic-gate 		 *   the addition of the first mddbs to the set since the
826*0Sstevel@tonic-gate 		 *   other nodes haven't been joined to the set yet.  All
827*0Sstevel@tonic-gate 		 *   nodes in a MN diskset are (implicitly) joined to the set
828*0Sstevel@tonic-gate 		 *   on the addition of the first mddb.
829*0Sstevel@tonic-gate 		 */
830*0Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
831*0Sstevel@tonic-gate 		    (bcast == DB_ADDSIDENMS_BCAST)) {
832*0Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
833*0Sstevel@tonic-gate 			md_mn_msg_meta_db_newside_t	db_ns;
834*0Sstevel@tonic-gate 			int				send_rval;
835*0Sstevel@tonic-gate 
836*0Sstevel@tonic-gate 			db_ns.msg_l_dev = np->dev;
837*0Sstevel@tonic-gate 			db_ns.msg_sideno = sideno;
838*0Sstevel@tonic-gate 			db_ns.msg_blkno = blkno;
839*0Sstevel@tonic-gate 			(void) strncpy(db_ns.msg_dname, dname,
840*0Sstevel@tonic-gate 			    sizeof (db_ns.msg_dname));
841*0Sstevel@tonic-gate 			(void) splitname(np->bname, &db_ns.msg_splitname);
842*0Sstevel@tonic-gate 			db_ns.msg_mnum = mnum;
843*0Sstevel@tonic-gate 
844*0Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
845*0Sstevel@tonic-gate 			db_ns.msg_devid[0] = NULL;
846*0Sstevel@tonic-gate 
847*0Sstevel@tonic-gate 			/*
848*0Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
849*0Sstevel@tonic-gate 			 * stuck in in the return step until this command has
850*0Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
851*0Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
852*0Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
853*0Sstevel@tonic-gate 			 * cycle to proceed.
854*0Sstevel@tonic-gate 			 */
855*0Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
856*0Sstevel@tonic-gate 			    MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
857*0Sstevel@tonic-gate 			    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ns,
858*0Sstevel@tonic-gate 			    sizeof (md_mn_msg_meta_db_newside_t),
859*0Sstevel@tonic-gate 			    &resultp, ep);
860*0Sstevel@tonic-gate 			if (send_rval != 0) {
861*0Sstevel@tonic-gate 				rval = -1;
862*0Sstevel@tonic-gate 				if (resultp == NULL)
863*0Sstevel@tonic-gate 					(void) mddserror(ep,
864*0Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
865*0Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
866*0Sstevel@tonic-gate 					    sp->setname);
867*0Sstevel@tonic-gate 				else {
868*0Sstevel@tonic-gate 					(void) mdstealerror(ep,
869*0Sstevel@tonic-gate 					    &(resultp->mmr_ep));
870*0Sstevel@tonic-gate 					if (mdisok(ep)) {
871*0Sstevel@tonic-gate 						(void) mddserror(ep,
872*0Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
873*0Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
874*0Sstevel@tonic-gate 						    sp->setname);
875*0Sstevel@tonic-gate 					}
876*0Sstevel@tonic-gate 					free_result(resultp);
877*0Sstevel@tonic-gate 				}
878*0Sstevel@tonic-gate 				break;
879*0Sstevel@tonic-gate 			}
880*0Sstevel@tonic-gate 			if (resultp)
881*0Sstevel@tonic-gate 				free_result(resultp);
882*0Sstevel@tonic-gate 		} else {
883*0Sstevel@tonic-gate 			/*
884*0Sstevel@tonic-gate 			 * Let this side's  device name, minor # and driver name
885*0Sstevel@tonic-gate 			 * be known to the database replica.
886*0Sstevel@tonic-gate 			 */
887*0Sstevel@tonic-gate 			(void) memset(&c, 0, sizeof (c));
888*0Sstevel@tonic-gate 
889*0Sstevel@tonic-gate 			/* Fill in device/replica info */
890*0Sstevel@tonic-gate 			c.c_locator.l_dev = meta_cmpldev(np->dev);
891*0Sstevel@tonic-gate 			c.c_locator.l_blkno = blkno;
892*0Sstevel@tonic-gate 			(void) strncpy(c.c_locator.l_driver, dname,
893*0Sstevel@tonic-gate 			    sizeof (c.c_locator.l_driver));
894*0Sstevel@tonic-gate 			(void) splitname(bname, &c.c_devname);
895*0Sstevel@tonic-gate 			c.c_locator.l_mnum = mnum;
896*0Sstevel@tonic-gate 
897*0Sstevel@tonic-gate 			/* Fill in setno, setname, and sideno */
898*0Sstevel@tonic-gate 			c.c_setno = sp->setno;
899*0Sstevel@tonic-gate 			(void) strncpy(c.c_setname, sp->setname,
900*0Sstevel@tonic-gate 				sizeof (c.c_setname));
901*0Sstevel@tonic-gate 			c.c_sideno = sideno;
902*0Sstevel@tonic-gate 
903*0Sstevel@tonic-gate 			/*
904*0Sstevel@tonic-gate 			 * Don't need device id information from this ioctl
905*0Sstevel@tonic-gate 			 * Kernel determines device id from dev_t, which
906*0Sstevel@tonic-gate 			 * is just what this code would do.
907*0Sstevel@tonic-gate 			 */
908*0Sstevel@tonic-gate 			c.c_locator.l_devid = (uint64_t)0;
909*0Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
910*0Sstevel@tonic-gate 
911*0Sstevel@tonic-gate 			if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
912*0Sstevel@tonic-gate 				rval = mdstealerror(ep, &c.c_mde);
913*0Sstevel@tonic-gate 				break;
914*0Sstevel@tonic-gate 			}
915*0Sstevel@tonic-gate 		}
916*0Sstevel@tonic-gate 	}
917*0Sstevel@tonic-gate 
918*0Sstevel@tonic-gate 	/* cleanup, return success */
919*0Sstevel@tonic-gate 	if (bname != NULL) {
920*0Sstevel@tonic-gate 		Free(bname);
921*0Sstevel@tonic-gate 		bname = NULL;
922*0Sstevel@tonic-gate 	}
923*0Sstevel@tonic-gate 	if (dname != NULL) {
924*0Sstevel@tonic-gate 		Free(dname);
925*0Sstevel@tonic-gate 		dname = NULL;
926*0Sstevel@tonic-gate 	}
927*0Sstevel@tonic-gate 	return (rval);
928*0Sstevel@tonic-gate }
929*0Sstevel@tonic-gate 
930*0Sstevel@tonic-gate 
931*0Sstevel@tonic-gate int
932*0Sstevel@tonic-gate meta_db_delsidenm(
933*0Sstevel@tonic-gate 	mdsetname_t	*sp,
934*0Sstevel@tonic-gate 	side_t		sideno,
935*0Sstevel@tonic-gate 	mdname_t	*np,
936*0Sstevel@tonic-gate 	daddr_t		blkno,
937*0Sstevel@tonic-gate 	md_error_t	*ep
938*0Sstevel@tonic-gate )
939*0Sstevel@tonic-gate {
940*0Sstevel@tonic-gate 	mddb_config_t	c;
941*0Sstevel@tonic-gate 	md_set_desc	*sd;
942*0Sstevel@tonic-gate 
943*0Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
944*0Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
945*0Sstevel@tonic-gate 			return (-1);
946*0Sstevel@tonic-gate 	}
947*0Sstevel@tonic-gate 	/* Use rpc.mdcommd to delete mddb side from all nodes */
948*0Sstevel@tonic-gate 	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
949*0Sstevel@tonic-gate 	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
950*0Sstevel@tonic-gate 		md_mn_result_t			*resultp = NULL;
951*0Sstevel@tonic-gate 		md_mn_msg_meta_db_delside_t	db_ds;
952*0Sstevel@tonic-gate 		int				send_rval;
953*0Sstevel@tonic-gate 
954*0Sstevel@tonic-gate 		db_ds.msg_l_dev = np->dev;
955*0Sstevel@tonic-gate 		db_ds.msg_blkno = blkno;
956*0Sstevel@tonic-gate 		db_ds.msg_sideno = sideno;
957*0Sstevel@tonic-gate 
958*0Sstevel@tonic-gate 		/* Set devid to NULL until devids are supported */
959*0Sstevel@tonic-gate 		db_ds.msg_devid[0] = NULL;
960*0Sstevel@tonic-gate 
961*0Sstevel@tonic-gate 		/*
962*0Sstevel@tonic-gate 		 * If reconfig cycle has been started, this node is
963*0Sstevel@tonic-gate 		 * stuck in in the return step until this command has
964*0Sstevel@tonic-gate 		 * completed.  If mdcommd is suspended, ask
965*0Sstevel@tonic-gate 		 * send_message to fail (instead of retrying)
966*0Sstevel@tonic-gate 		 * so that metaset can finish allowing the reconfig
967*0Sstevel@tonic-gate 		 * cycle to proceed.
968*0Sstevel@tonic-gate 		 */
969*0Sstevel@tonic-gate 		send_rval = mdmn_send_message(sp->setno,
970*0Sstevel@tonic-gate 		    MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
971*0Sstevel@tonic-gate 		    MD_MSGF_PANIC_WHEN_INCONSISTENT, (char *)&db_ds,
972*0Sstevel@tonic-gate 		    sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
973*0Sstevel@tonic-gate 		if (send_rval != 0) {
974*0Sstevel@tonic-gate 			if (resultp == NULL)
975*0Sstevel@tonic-gate 				(void) mddserror(ep,
976*0Sstevel@tonic-gate 				    MDE_DS_COMMD_SEND_FAIL,
977*0Sstevel@tonic-gate 				    sp->setno, NULL, NULL,
978*0Sstevel@tonic-gate 				    sp->setname);
979*0Sstevel@tonic-gate 			else {
980*0Sstevel@tonic-gate 				(void) mdstealerror(ep, &(resultp->mmr_ep));
981*0Sstevel@tonic-gate 				if (mdisok(ep)) {
982*0Sstevel@tonic-gate 					(void) mddserror(ep,
983*0Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
984*0Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
985*0Sstevel@tonic-gate 					    sp->setname);
986*0Sstevel@tonic-gate 				}
987*0Sstevel@tonic-gate 				free_result(resultp);
988*0Sstevel@tonic-gate 			}
989*0Sstevel@tonic-gate 			return (-1);
990*0Sstevel@tonic-gate 		}
991*0Sstevel@tonic-gate 		if (resultp)
992*0Sstevel@tonic-gate 			free_result(resultp);
993*0Sstevel@tonic-gate 
994*0Sstevel@tonic-gate 	} else {
995*0Sstevel@tonic-gate 		/*
996*0Sstevel@tonic-gate 		 * Let this side's  device name, minor # and driver name
997*0Sstevel@tonic-gate 		 * be known to the database replica.
998*0Sstevel@tonic-gate 		 */
999*0Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
1000*0Sstevel@tonic-gate 
1001*0Sstevel@tonic-gate 		/* Fill in device/replica info */
1002*0Sstevel@tonic-gate 		c.c_locator.l_dev = meta_cmpldev(np->dev);
1003*0Sstevel@tonic-gate 		c.c_locator.l_blkno = blkno;
1004*0Sstevel@tonic-gate 
1005*0Sstevel@tonic-gate 		/* Fill in setno, setname, and sideno */
1006*0Sstevel@tonic-gate 		c.c_setno = sp->setno;
1007*0Sstevel@tonic-gate 		(void) strcpy(c.c_setname, sp->setname);
1008*0Sstevel@tonic-gate 		c.c_sideno = sideno;
1009*0Sstevel@tonic-gate 
1010*0Sstevel@tonic-gate 		/*
1011*0Sstevel@tonic-gate 		 * Don't need device id information from this ioctl
1012*0Sstevel@tonic-gate 		 * Kernel determines device id from dev_t, which
1013*0Sstevel@tonic-gate 		 * is just what this code would do.
1014*0Sstevel@tonic-gate 		 */
1015*0Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
1016*0Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
1017*0Sstevel@tonic-gate 
1018*0Sstevel@tonic-gate 		if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
1019*0Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
1020*0Sstevel@tonic-gate 	}
1021*0Sstevel@tonic-gate 	return (0);
1022*0Sstevel@tonic-gate }
1023*0Sstevel@tonic-gate 
1024*0Sstevel@tonic-gate 
1025*0Sstevel@tonic-gate static int
1026*0Sstevel@tonic-gate mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
1027*0Sstevel@tonic-gate {
1028*0Sstevel@tonic-gate 	mdnamelist_t		*dnp1, *dnp2;
1029*0Sstevel@tonic-gate 
1030*0Sstevel@tonic-gate 	for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
1031*0Sstevel@tonic-gate 		for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
1032*0Sstevel@tonic-gate 			if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
1033*0Sstevel@tonic-gate 				return (mderror(ep, MDE_DUPDRIVE,
1034*0Sstevel@tonic-gate 				    dnp1->namep->cname));
1035*0Sstevel@tonic-gate 		}
1036*0Sstevel@tonic-gate 	}
1037*0Sstevel@tonic-gate 	return (0);
1038*0Sstevel@tonic-gate }
1039*0Sstevel@tonic-gate 
1040*0Sstevel@tonic-gate 
1041*0Sstevel@tonic-gate /*
1042*0Sstevel@tonic-gate  * Return 1 if files are different, else return 0
1043*0Sstevel@tonic-gate  */
1044*0Sstevel@tonic-gate static int
1045*0Sstevel@tonic-gate filediff(char *tsname, char *sname)
1046*0Sstevel@tonic-gate {
1047*0Sstevel@tonic-gate 	int ret = 1, fd;
1048*0Sstevel@tonic-gate 	size_t tsz, sz;
1049*0Sstevel@tonic-gate 	struct stat sbuf;
1050*0Sstevel@tonic-gate 	char *tbuf, *buf;
1051*0Sstevel@tonic-gate 
1052*0Sstevel@tonic-gate 	if (stat(tsname, &sbuf) != 0)
1053*0Sstevel@tonic-gate 		return (1);
1054*0Sstevel@tonic-gate 	tsz = sbuf.st_size;
1055*0Sstevel@tonic-gate 	if (stat(sname, &sbuf) != 0)
1056*0Sstevel@tonic-gate 		return (1);
1057*0Sstevel@tonic-gate 	sz = sbuf.st_size;
1058*0Sstevel@tonic-gate 	if (tsz != sz)
1059*0Sstevel@tonic-gate 		return (1);
1060*0Sstevel@tonic-gate 
1061*0Sstevel@tonic-gate 	/* allocate memory and read both files into buffer */
1062*0Sstevel@tonic-gate 	tbuf = malloc(tsz);
1063*0Sstevel@tonic-gate 	buf = malloc(sz);
1064*0Sstevel@tonic-gate 	if (tbuf == NULL || buf == NULL)
1065*0Sstevel@tonic-gate 		goto out;
1066*0Sstevel@tonic-gate 
1067*0Sstevel@tonic-gate 	fd = open(tsname, O_RDONLY);
1068*0Sstevel@tonic-gate 	if (fd == -1)
1069*0Sstevel@tonic-gate 		goto out;
1070*0Sstevel@tonic-gate 	sz = read(fd, tbuf, tsz);
1071*0Sstevel@tonic-gate 	(void) close(fd);
1072*0Sstevel@tonic-gate 	if (sz != tsz)
1073*0Sstevel@tonic-gate 		goto out;
1074*0Sstevel@tonic-gate 
1075*0Sstevel@tonic-gate 	fd = open(sname, O_RDONLY);
1076*0Sstevel@tonic-gate 	if (fd == -1)
1077*0Sstevel@tonic-gate 		goto out;
1078*0Sstevel@tonic-gate 	sz = read(fd, buf, tsz);
1079*0Sstevel@tonic-gate 	(void) close(fd);
1080*0Sstevel@tonic-gate 	if (sz != tsz)
1081*0Sstevel@tonic-gate 		goto out;
1082*0Sstevel@tonic-gate 
1083*0Sstevel@tonic-gate 	/* compare content */
1084*0Sstevel@tonic-gate 	ret = bcmp(tbuf, buf, tsz);
1085*0Sstevel@tonic-gate out:
1086*0Sstevel@tonic-gate 	if (tbuf)
1087*0Sstevel@tonic-gate 		free(tbuf);
1088*0Sstevel@tonic-gate 	if (buf)
1089*0Sstevel@tonic-gate 		free(buf);
1090*0Sstevel@tonic-gate 	return (ret);
1091*0Sstevel@tonic-gate }
1092*0Sstevel@tonic-gate 
1093*0Sstevel@tonic-gate /*
1094*0Sstevel@tonic-gate  * patch md.conf file with mddb locations
1095*0Sstevel@tonic-gate  */
1096*0Sstevel@tonic-gate int
1097*0Sstevel@tonic-gate meta_db_patch(
1098*0Sstevel@tonic-gate 	char		*sname,		/* system file name */
1099*0Sstevel@tonic-gate 	char		*cname,		/* mddb.cf file name */
1100*0Sstevel@tonic-gate 	int		patch,		/* patching locally */
1101*0Sstevel@tonic-gate 	md_error_t	*ep
1102*0Sstevel@tonic-gate )
1103*0Sstevel@tonic-gate {
1104*0Sstevel@tonic-gate 	char		*tsname = NULL;
1105*0Sstevel@tonic-gate 	char		line[MDDB_BOOTLIST_MAX_LEN];
1106*0Sstevel@tonic-gate 	FILE		*tsfp = NULL;
1107*0Sstevel@tonic-gate 	FILE		*mfp = NULL;
1108*0Sstevel@tonic-gate 	int		rval = -1;
1109*0Sstevel@tonic-gate 
1110*0Sstevel@tonic-gate 	/* check names */
1111*0Sstevel@tonic-gate 	if (sname == NULL) {
1112*0Sstevel@tonic-gate 		if (patch)
1113*0Sstevel@tonic-gate 			sname = "md.conf";
1114*0Sstevel@tonic-gate 		else
1115*0Sstevel@tonic-gate 			sname = "/kernel/drv/md.conf";
1116*0Sstevel@tonic-gate 	}
1117*0Sstevel@tonic-gate 	if (cname == NULL)
1118*0Sstevel@tonic-gate 		cname = META_DBCONF;
1119*0Sstevel@tonic-gate 
1120*0Sstevel@tonic-gate 	/*
1121*0Sstevel@tonic-gate 	 * edit file
1122*0Sstevel@tonic-gate 	 */
1123*0Sstevel@tonic-gate 	if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
1124*0Sstevel@tonic-gate 		if (mdissyserror(ep, EROFS)) {
1125*0Sstevel@tonic-gate 			/*
1126*0Sstevel@tonic-gate 			 * If we are booted on a read-only root because
1127*0Sstevel@tonic-gate 			 * of mddb quorum problems we don't want to emit
1128*0Sstevel@tonic-gate 			 * any scary error messages.
1129*0Sstevel@tonic-gate 			 */
1130*0Sstevel@tonic-gate 			mdclrerror(ep);
1131*0Sstevel@tonic-gate 			rval = 0;
1132*0Sstevel@tonic-gate 		}
1133*0Sstevel@tonic-gate 		goto out;
1134*0Sstevel@tonic-gate 	}
1135*0Sstevel@tonic-gate 
1136*0Sstevel@tonic-gate 	if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0,
1137*0Sstevel@tonic-gate 	    ep) != 0)
1138*0Sstevel@tonic-gate 		goto out;
1139*0Sstevel@tonic-gate 
1140*0Sstevel@tonic-gate 	/* if file content is identical, skip rename */
1141*0Sstevel@tonic-gate 	if (filediff(tsname, sname) == 0) {
1142*0Sstevel@tonic-gate 		rval = 0;
1143*0Sstevel@tonic-gate 		goto out;
1144*0Sstevel@tonic-gate 	}
1145*0Sstevel@tonic-gate 
1146*0Sstevel@tonic-gate 	if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
1147*0Sstevel@tonic-gate 					    (fclose(tsfp) != 0)) {
1148*0Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, tsname);
1149*0Sstevel@tonic-gate 		goto out;
1150*0Sstevel@tonic-gate 	}
1151*0Sstevel@tonic-gate 
1152*0Sstevel@tonic-gate 	tsfp = NULL;
1153*0Sstevel@tonic-gate 
1154*0Sstevel@tonic-gate 	/*
1155*0Sstevel@tonic-gate 	 * rename file. If we get a Cross Device error then it
1156*0Sstevel@tonic-gate 	 * is because we are in the miniroot.
1157*0Sstevel@tonic-gate 	 */
1158*0Sstevel@tonic-gate 	if (rename(tsname, sname) != 0 && errno != EXDEV) {
1159*0Sstevel@tonic-gate 		(void) mdsyserror(ep, errno, sname);
1160*0Sstevel@tonic-gate 		goto out;
1161*0Sstevel@tonic-gate 	}
1162*0Sstevel@tonic-gate 
1163*0Sstevel@tonic-gate 	if (errno == EXDEV) {
1164*0Sstevel@tonic-gate 		if ((tsfp = fopen(tsname, "r")) == NULL)
1165*0Sstevel@tonic-gate 			goto out;
1166*0Sstevel@tonic-gate 		if ((mfp = fopen(sname, "w+")) == NULL)
1167*0Sstevel@tonic-gate 			goto out;
1168*0Sstevel@tonic-gate 		while (fgets(line, sizeof (line), tsfp) != NULL) {
1169*0Sstevel@tonic-gate 			if (fputs(line, mfp) == NULL)
1170*0Sstevel@tonic-gate 				goto out;
1171*0Sstevel@tonic-gate 		}
1172*0Sstevel@tonic-gate 		(void) fclose(tsfp);
1173*0Sstevel@tonic-gate 		tsfp = NULL;
1174*0Sstevel@tonic-gate 		if (fflush(mfp) != 0)
1175*0Sstevel@tonic-gate 			goto out;
1176*0Sstevel@tonic-gate 		if (fsync(fileno(mfp)) != 0)
1177*0Sstevel@tonic-gate 			goto out;
1178*0Sstevel@tonic-gate 		if (fclose(mfp) != 0) {
1179*0Sstevel@tonic-gate 			mfp = NULL;
1180*0Sstevel@tonic-gate 			goto out;
1181*0Sstevel@tonic-gate 		}
1182*0Sstevel@tonic-gate 	}
1183*0Sstevel@tonic-gate 
1184*0Sstevel@tonic-gate 	Free(tsname);
1185*0Sstevel@tonic-gate 	tsname = NULL;
1186*0Sstevel@tonic-gate 	rval = 0;
1187*0Sstevel@tonic-gate 
1188*0Sstevel@tonic-gate 	/* cleanup, return error */
1189*0Sstevel@tonic-gate out:
1190*0Sstevel@tonic-gate 	if (tsfp != NULL)
1191*0Sstevel@tonic-gate 		(void) fclose(tsfp);
1192*0Sstevel@tonic-gate 	if (tsname != NULL) {
1193*0Sstevel@tonic-gate 		(void) unlink(tsname);
1194*0Sstevel@tonic-gate 		Free(tsname);
1195*0Sstevel@tonic-gate 	}
1196*0Sstevel@tonic-gate 	return (rval);
1197*0Sstevel@tonic-gate }
1198*0Sstevel@tonic-gate 
1199*0Sstevel@tonic-gate /*
1200*0Sstevel@tonic-gate  * Add replicas to set.  This happens as a result of:
1201*0Sstevel@tonic-gate  *	- metadb [-s set_name] -a
1202*0Sstevel@tonic-gate  *	- metaset -s set_name -a disk
1203*0Sstevel@tonic-gate  *	- metaset -s set_name -d disk	 (causes a rebalance of mddbs)
1204*0Sstevel@tonic-gate  *	- metaset -s set_name -b
1205*0Sstevel@tonic-gate  *
1206*0Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
1207*0Sstevel@tonic-gate  *
1208*0Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
1209*0Sstevel@tonic-gate  * is running the metaset command.
1210*0Sstevel@tonic-gate  *
1211*0Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
1212*0Sstevel@tonic-gate  * running the metaset command.  If this is the first mddb added to
1213*0Sstevel@tonic-gate  * the MN diskset, then no communication is made to other nodes via commd
1214*0Sstevel@tonic-gate  * since the other nodes will be in-sync with respect to the mddbs when
1215*0Sstevel@tonic-gate  * those other nodes join the set and snarf in the newly created mddb.
1216*0Sstevel@tonic-gate  * If this is not the first mddb added to the MN diskset, then this
1217*0Sstevel@tonic-gate  * attach command is sent to all of the nodes using commd.  This keeps
1218*0Sstevel@tonic-gate  * the nodes in-sync.
1219*0Sstevel@tonic-gate  */
1220*0Sstevel@tonic-gate int
1221*0Sstevel@tonic-gate meta_db_attach(
1222*0Sstevel@tonic-gate 	mdsetname_t		*sp,
1223*0Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
1224*0Sstevel@tonic-gate 	mdchkopts_t		options,
1225*0Sstevel@tonic-gate 	md_timeval32_t		*timeval,
1226*0Sstevel@tonic-gate 	int			dbcnt,
1227*0Sstevel@tonic-gate 	int			dbsize,
1228*0Sstevel@tonic-gate 	char			*sysfilename,
1229*0Sstevel@tonic-gate 	md_error_t		*ep
1230*0Sstevel@tonic-gate )
1231*0Sstevel@tonic-gate {
1232*0Sstevel@tonic-gate 	struct mddb_config	c;
1233*0Sstevel@tonic-gate 	mdnamelist_t		*nlp;
1234*0Sstevel@tonic-gate 	mdname_t		*np;
1235*0Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
1236*0Sstevel@tonic-gate 	md_drive_desc		*p;
1237*0Sstevel@tonic-gate 	int			i;
1238*0Sstevel@tonic-gate 	int			fd;
1239*0Sstevel@tonic-gate 	side_t			sideno;
1240*0Sstevel@tonic-gate 	daddr_t			blkno;
1241*0Sstevel@tonic-gate 	int			replicacount = 0;
1242*0Sstevel@tonic-gate 	int			start_mdmonitord = 0;
1243*0Sstevel@tonic-gate 	int			rval = 0;
1244*0Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
1245*0Sstevel@tonic-gate 	md_set_desc		*sd;
1246*0Sstevel@tonic-gate 	int			stale_bool = FALSE;
1247*0Sstevel@tonic-gate 	int			flags;
1248*0Sstevel@tonic-gate 	int			firstmddb = 1;
1249*0Sstevel@tonic-gate 	md_timeval32_t		inittime = {0, 0};
1250*0Sstevel@tonic-gate 
1251*0Sstevel@tonic-gate 	/*
1252*0Sstevel@tonic-gate 	 * Error if we don't get some work to do.
1253*0Sstevel@tonic-gate 	 */
1254*0Sstevel@tonic-gate 	if (db_nlp == NULL)
1255*0Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
1256*0Sstevel@tonic-gate 
1257*0Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
1258*0Sstevel@tonic-gate 		return (-1);
1259*0Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
1260*0Sstevel@tonic-gate 	c.c_id = 0;
1261*0Sstevel@tonic-gate 	c.c_setno = sp->setno;
1262*0Sstevel@tonic-gate 
1263*0Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
1264*0Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
1265*0Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
1266*0Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
1267*0Sstevel@tonic-gate 		if (metaislocalset(sp)) {
1268*0Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
1269*0Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
1270*0Sstevel@tonic-gate 			else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
1271*0Sstevel@tonic-gate 			    (! (options & MDCHK_ALLOW_NODBS)))
1272*0Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
1273*0Sstevel@tonic-gate 		} else {
1274*0Sstevel@tonic-gate 			if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
1275*0Sstevel@tonic-gate 				return (mdstealerror(ep, &c.c_mde));
1276*0Sstevel@tonic-gate 		}
1277*0Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
1278*0Sstevel@tonic-gate 	}
1279*0Sstevel@tonic-gate 	/*
1280*0Sstevel@tonic-gate 	 * Is current set STALE?
1281*0Sstevel@tonic-gate 	 */
1282*0Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
1283*0Sstevel@tonic-gate 		stale_bool = TRUE;
1284*0Sstevel@tonic-gate 	}
1285*0Sstevel@tonic-gate 
1286*0Sstevel@tonic-gate 	assert(db_nlp != NULL);
1287*0Sstevel@tonic-gate 
1288*0Sstevel@tonic-gate 	/* if creating the metadbs for the first time start mdmonitord */
1289*0Sstevel@tonic-gate 	if (c.c_dbcnt == 0)
1290*0Sstevel@tonic-gate 		start_mdmonitord = 1;
1291*0Sstevel@tonic-gate 
1292*0Sstevel@tonic-gate 	/*
1293*0Sstevel@tonic-gate 	 * check to see if we will go over the total possible number
1294*0Sstevel@tonic-gate 	 * of data bases
1295*0Sstevel@tonic-gate 	 */
1296*0Sstevel@tonic-gate 	nlp = db_nlp;
1297*0Sstevel@tonic-gate 	while (nlp) {
1298*0Sstevel@tonic-gate 		replicacount += dbcnt;
1299*0Sstevel@tonic-gate 		nlp = nlp->next;
1300*0Sstevel@tonic-gate 	}
1301*0Sstevel@tonic-gate 
1302*0Sstevel@tonic-gate 	if ((replicacount + c.c_dbcnt) > c.c_dbmax)
1303*0Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
1304*0Sstevel@tonic-gate 		    sp->setno, c.c_dbcnt + replicacount, NULL));
1305*0Sstevel@tonic-gate 
1306*0Sstevel@tonic-gate 	/*
1307*0Sstevel@tonic-gate 	 * go through and check to make sure all locations specified
1308*0Sstevel@tonic-gate 	 * are legal also pick out driver name;
1309*0Sstevel@tonic-gate 	 */
1310*0Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1311*0Sstevel@tonic-gate 		diskaddr_t devsize;
1312*0Sstevel@tonic-gate 
1313*0Sstevel@tonic-gate 		np = nlp->namep;
1314*0Sstevel@tonic-gate 
1315*0Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
1316*0Sstevel@tonic-gate 			uint_t	partno;
1317*0Sstevel@tonic-gate 			uint_t	rep_partno;
1318*0Sstevel@tonic-gate 			mddrivename_t	*dnp = np->drivenamep;
1319*0Sstevel@tonic-gate 
1320*0Sstevel@tonic-gate 			/*
1321*0Sstevel@tonic-gate 			 * make sure that non-local database replicas
1322*0Sstevel@tonic-gate 			 * are always on the replica slice.
1323*0Sstevel@tonic-gate 			 */
1324*0Sstevel@tonic-gate 			if (meta_replicaslice(dnp,
1325*0Sstevel@tonic-gate 			    &rep_partno, ep) != 0)
1326*0Sstevel@tonic-gate 				return (-1);
1327*0Sstevel@tonic-gate 			if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
1328*0Sstevel@tonic-gate 				return (-1);
1329*0Sstevel@tonic-gate 			if (partno != rep_partno)
1330*0Sstevel@tonic-gate 				return (mddeverror(ep, MDE_REPCOMP_ONLY,
1331*0Sstevel@tonic-gate 				    np->dev, sp->setname));
1332*0Sstevel@tonic-gate 		}
1333*0Sstevel@tonic-gate 
1334*0Sstevel@tonic-gate 		if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
1335*0Sstevel@tonic-gate 		    ep)) {
1336*0Sstevel@tonic-gate 			return (-1);
1337*0Sstevel@tonic-gate 		}
1338*0Sstevel@tonic-gate 
1339*0Sstevel@tonic-gate 		if ((devsize = metagetsize(np, ep)) == -1)
1340*0Sstevel@tonic-gate 			return (-1);
1341*0Sstevel@tonic-gate 
1342*0Sstevel@tonic-gate 		if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
1343*0Sstevel@tonic-gate 			return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
1344*0Sstevel@tonic-gate 			    meta_getminor(np->dev), sp->setno, devsize,
1345*0Sstevel@tonic-gate 			    np->cname));
1346*0Sstevel@tonic-gate 	}
1347*0Sstevel@tonic-gate 
1348*0Sstevel@tonic-gate 	/*
1349*0Sstevel@tonic-gate 	 * If first disk in set we don't have lb_inittime yet for use as
1350*0Sstevel@tonic-gate 	 * mb_setcreatetime so don't go looking for it. WE'll come back
1351*0Sstevel@tonic-gate 	 * later and update after the locator block has been created.
1352*0Sstevel@tonic-gate 	 * If this isn't the first disk in the set, we have a locator
1353*0Sstevel@tonic-gate 	 * block and thus we have lb_inittime. Set mb_setcreatetime to
1354*0Sstevel@tonic-gate 	 * lb_inittime.
1355*0Sstevel@tonic-gate 	 */
1356*0Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
1357*0Sstevel@tonic-gate 		if (c.c_dbcnt != 0) {
1358*0Sstevel@tonic-gate 			firstmddb = 0;
1359*0Sstevel@tonic-gate 			inittime = meta_get_lb_inittime(sp, ep);
1360*0Sstevel@tonic-gate 		}
1361*0Sstevel@tonic-gate 	}
1362*0Sstevel@tonic-gate 
1363*0Sstevel@tonic-gate 	/*
1364*0Sstevel@tonic-gate 	 * go through and write all master blocks
1365*0Sstevel@tonic-gate 	 */
1366*0Sstevel@tonic-gate 
1367*0Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1368*0Sstevel@tonic-gate 		np = nlp->namep;
1369*0Sstevel@tonic-gate 
1370*0Sstevel@tonic-gate 		if ((fd = open(np->rname, O_RDWR)) < 0)
1371*0Sstevel@tonic-gate 			return (mdsyserror(ep, errno, np->rname));
1372*0Sstevel@tonic-gate 
1373*0Sstevel@tonic-gate 		for (i = 0; i < dbcnt; i++) {
1374*0Sstevel@tonic-gate 			if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
1375*0Sstevel@tonic-gate 			    inittime, ep)) {
1376*0Sstevel@tonic-gate 				(void) close(fd);
1377*0Sstevel@tonic-gate 				return (-1);
1378*0Sstevel@tonic-gate 			}
1379*0Sstevel@tonic-gate 		}
1380*0Sstevel@tonic-gate 		(void) close(fd);
1381*0Sstevel@tonic-gate 	}
1382*0Sstevel@tonic-gate 
1383*0Sstevel@tonic-gate 	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
1384*0Sstevel@tonic-gate 		return (-1);
1385*0Sstevel@tonic-gate 
1386*0Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
1387*0Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
1388*0Sstevel@tonic-gate 		if (! mdisok(ep))
1389*0Sstevel@tonic-gate 			return (-1);
1390*0Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1391*0Sstevel@tonic-gate 			return (-1);
1392*0Sstevel@tonic-gate 
1393*0Sstevel@tonic-gate 	}
1394*0Sstevel@tonic-gate 
1395*0Sstevel@tonic-gate 	/*
1396*0Sstevel@tonic-gate 	 * go through and tell kernel to add them
1397*0Sstevel@tonic-gate 	 */
1398*0Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1399*0Sstevel@tonic-gate 		mdcinfo_t	*cinfo;
1400*0Sstevel@tonic-gate 
1401*0Sstevel@tonic-gate 		np = nlp->namep;
1402*0Sstevel@tonic-gate 
1403*0Sstevel@tonic-gate 		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
1404*0Sstevel@tonic-gate 			rval = -1;
1405*0Sstevel@tonic-gate 			goto out;
1406*0Sstevel@tonic-gate 		}
1407*0Sstevel@tonic-gate 
1408*0Sstevel@tonic-gate 		/*
1409*0Sstevel@tonic-gate 		 * If mddb is being added to MN diskset and there already
1410*0Sstevel@tonic-gate 		 * exists a valid mddb in the set (which equates to this
1411*0Sstevel@tonic-gate 		 * node being an owner of the set) then use rpc.mdcommd
1412*0Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
1413*0Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
1414*0Sstevel@tonic-gate 		 * can't write the message to the mddb.
1415*0Sstevel@tonic-gate 		 *
1416*0Sstevel@tonic-gate 		 * Otherwise, just add mddb to this node.
1417*0Sstevel@tonic-gate 		 */
1418*0Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
1419*0Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
1420*0Sstevel@tonic-gate 			md_mn_result_t			*resultp = NULL;
1421*0Sstevel@tonic-gate 			md_mn_msg_meta_db_attach_t	attach;
1422*0Sstevel@tonic-gate 			int 				send_rval;
1423*0Sstevel@tonic-gate 
1424*0Sstevel@tonic-gate 			/*
1425*0Sstevel@tonic-gate 			 * In a scenario where new replicas had been added on
1426*0Sstevel@tonic-gate 			 * the master, and then all of the old replicas failed
1427*0Sstevel@tonic-gate 			 * before the slaves had knowledge of the new replicas,
1428*0Sstevel@tonic-gate 			 * the slaves are unable to re-parse in the mddb
1429*0Sstevel@tonic-gate 			 * from the new replicas since the slaves have no
1430*0Sstevel@tonic-gate 			 * knowledge of the new replicas.  The following
1431*0Sstevel@tonic-gate 			 * algorithm solves this problem:
1432*0Sstevel@tonic-gate 			 * 	- META_DB_ATTACH message generates submsgs
1433*0Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
1434*0Sstevel@tonic-gate 			 * 		- MDDB_ATTACH new replicas
1435*0Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
1436*0Sstevel@tonic-gate 			 *		information to be sent from master
1437*0Sstevel@tonic-gate 			 *		to slaves at a higher class than the
1438*0Sstevel@tonic-gate 			 *		unblock so the parse message will
1439*0Sstevel@tonic-gate 			 *		reach slaves before unblock message.
1440*0Sstevel@tonic-gate 			 */
1441*0Sstevel@tonic-gate 			attach.msg_l_dev = np->dev;
1442*0Sstevel@tonic-gate 			attach.msg_cnt = dbcnt;
1443*0Sstevel@tonic-gate 			attach.msg_dbsize = dbsize;
1444*0Sstevel@tonic-gate 			(void) strncpy(attach.msg_dname, cinfo->dname,
1445*0Sstevel@tonic-gate 			    sizeof (attach.msg_dname));
1446*0Sstevel@tonic-gate 			(void) splitname(np->bname, &attach.msg_splitname);
1447*0Sstevel@tonic-gate 			attach.msg_options = options;
1448*0Sstevel@tonic-gate 
1449*0Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
1450*0Sstevel@tonic-gate 			attach.msg_devid[0] = NULL;
1451*0Sstevel@tonic-gate 
1452*0Sstevel@tonic-gate 			/*
1453*0Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
1454*0Sstevel@tonic-gate 			 * stuck in in the return step until this command has
1455*0Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
1456*0Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
1457*0Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
1458*0Sstevel@tonic-gate 			 * cycle to proceed.
1459*0Sstevel@tonic-gate 			 */
1460*0Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
1461*0Sstevel@tonic-gate 			if (stale_bool == TRUE)
1462*0Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
1463*0Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
1464*0Sstevel@tonic-gate 				MD_MN_MSG_META_DB_ATTACH,
1465*0Sstevel@tonic-gate 				flags, (char *)&attach,
1466*0Sstevel@tonic-gate 				sizeof (md_mn_msg_meta_db_attach_t),
1467*0Sstevel@tonic-gate 				&resultp, ep);
1468*0Sstevel@tonic-gate 			if (send_rval != 0) {
1469*0Sstevel@tonic-gate 				rval = -1;
1470*0Sstevel@tonic-gate 				if (resultp == NULL)
1471*0Sstevel@tonic-gate 					(void) mddserror(ep,
1472*0Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
1473*0Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
1474*0Sstevel@tonic-gate 					    sp->setname);
1475*0Sstevel@tonic-gate 				else {
1476*0Sstevel@tonic-gate 					(void) mdstealerror(ep,
1477*0Sstevel@tonic-gate 					    &(resultp->mmr_ep));
1478*0Sstevel@tonic-gate 					if (mdisok(ep)) {
1479*0Sstevel@tonic-gate 						(void) mddserror(ep,
1480*0Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
1481*0Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
1482*0Sstevel@tonic-gate 						    sp->setname);
1483*0Sstevel@tonic-gate 					}
1484*0Sstevel@tonic-gate 					free_result(resultp);
1485*0Sstevel@tonic-gate 				}
1486*0Sstevel@tonic-gate 				goto out;
1487*0Sstevel@tonic-gate 			}
1488*0Sstevel@tonic-gate 			if (resultp)
1489*0Sstevel@tonic-gate 				free_result(resultp);
1490*0Sstevel@tonic-gate 		} else {
1491*0Sstevel@tonic-gate 		    /* Adding mddb(s) to just this node */
1492*0Sstevel@tonic-gate 		    for (i = 0; i < dbcnt; i++) {
1493*0Sstevel@tonic-gate 			(void) memset(&c, 0, sizeof (c));
1494*0Sstevel@tonic-gate 			/* Fill in device/replica info */
1495*0Sstevel@tonic-gate 			c.c_locator.l_dev = meta_cmpldev(np->dev);
1496*0Sstevel@tonic-gate 			c.c_locator.l_blkno = i * dbsize + 16;
1497*0Sstevel@tonic-gate 			blkno = c.c_locator.l_blkno;
1498*0Sstevel@tonic-gate 			(void) strncpy(c.c_locator.l_driver, cinfo->dname,
1499*0Sstevel@tonic-gate 			    sizeof (c.c_locator.l_driver));
1500*0Sstevel@tonic-gate 			(void) splitname(np->bname, &c.c_devname);
1501*0Sstevel@tonic-gate 			c.c_locator.l_mnum = meta_getminor(np->dev);
1502*0Sstevel@tonic-gate 
1503*0Sstevel@tonic-gate 			/* Fill in setno, setname, and sideno */
1504*0Sstevel@tonic-gate 			c.c_setno = sp->setno;
1505*0Sstevel@tonic-gate 			if (! metaislocalset(sp)) {
1506*0Sstevel@tonic-gate 				if (MD_MNSET_DESC(sd)) {
1507*0Sstevel@tonic-gate 					c.c_multi_node = 1;
1508*0Sstevel@tonic-gate 				}
1509*0Sstevel@tonic-gate 			}
1510*0Sstevel@tonic-gate 			(void) strcpy(c.c_setname, sp->setname);
1511*0Sstevel@tonic-gate 			c.c_sideno = sideno;
1512*0Sstevel@tonic-gate 
1513*0Sstevel@tonic-gate 			/*
1514*0Sstevel@tonic-gate 			 * Don't need device id information from this ioctl
1515*0Sstevel@tonic-gate 			 * Kernel determines device id from dev_t, which
1516*0Sstevel@tonic-gate 			 * is just what this code would do.
1517*0Sstevel@tonic-gate 			 */
1518*0Sstevel@tonic-gate 			c.c_locator.l_devid = (uint64_t)0;
1519*0Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
1520*0Sstevel@tonic-gate 
1521*0Sstevel@tonic-gate 			if (timeval != NULL)
1522*0Sstevel@tonic-gate 				c.c_timestamp = *timeval;
1523*0Sstevel@tonic-gate 
1524*0Sstevel@tonic-gate 			if (setup_med_cfg(sp, &c, (options & MDCHK_SET_FORCE),
1525*0Sstevel@tonic-gate 			    ep)) {
1526*0Sstevel@tonic-gate 				rval = -1;
1527*0Sstevel@tonic-gate 				goto out;
1528*0Sstevel@tonic-gate 			}
1529*0Sstevel@tonic-gate 
1530*0Sstevel@tonic-gate 			if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde, NULL) != 0) {
1531*0Sstevel@tonic-gate 				rval = mdstealerror(ep, &c.c_mde);
1532*0Sstevel@tonic-gate 				goto out;
1533*0Sstevel@tonic-gate 			}
1534*0Sstevel@tonic-gate 			/*
1535*0Sstevel@tonic-gate 			 * This is either a traditional diskset OR this
1536*0Sstevel@tonic-gate 			 * is the first replica added to a MN diskset.
1537*0Sstevel@tonic-gate 			 * In either case, set broadcast to NO_BCAST so
1538*0Sstevel@tonic-gate 			 * that message won't go through rpc.mdcommd.
1539*0Sstevel@tonic-gate 			 * If this is a traditional diskset, the bcast
1540*0Sstevel@tonic-gate 			 * flag is ignored since traditional disksets
1541*0Sstevel@tonic-gate 			 * don't use the rpc.mdcommd.
1542*0Sstevel@tonic-gate 			 */
1543*0Sstevel@tonic-gate 			if (meta_db_addsidenms(sp, np, blkno,
1544*0Sstevel@tonic-gate 			    DB_ADDSIDENMS_NO_BCAST, ep))
1545*0Sstevel@tonic-gate 				goto out;
1546*0Sstevel@tonic-gate 		    }
1547*0Sstevel@tonic-gate 		}
1548*0Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
1549*0Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
1550*0Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next)
1551*0Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
1552*0Sstevel@tonic-gate 					p->dd_dbcnt = dbcnt;
1553*0Sstevel@tonic-gate 					p->dd_dbsize  = dbsize;
1554*0Sstevel@tonic-gate 					break;
1555*0Sstevel@tonic-gate 				}
1556*0Sstevel@tonic-gate 		}
1557*0Sstevel@tonic-gate 
1558*0Sstevel@tonic-gate 		/*
1559*0Sstevel@tonic-gate 		 * If this was the first addition of disks to the
1560*0Sstevel@tonic-gate 		 * diskset you now need to update the mb_setcreatetime
1561*0Sstevel@tonic-gate 		 * which needed lb_inittime which wasn't there until now.
1562*0Sstevel@tonic-gate 		 */
1563*0Sstevel@tonic-gate 		if (firstmddb) {
1564*0Sstevel@tonic-gate 			if (meta_update_mb(sp, dd, ep) != 0) {
1565*0Sstevel@tonic-gate 				return (-1);
1566*0Sstevel@tonic-gate 			}
1567*0Sstevel@tonic-gate 		}
1568*0Sstevel@tonic-gate 		(void) close(fd);
1569*0Sstevel@tonic-gate 	}
1570*0Sstevel@tonic-gate 
1571*0Sstevel@tonic-gate out:
1572*0Sstevel@tonic-gate 	if (metaislocalset(sp)) {
1573*0Sstevel@tonic-gate 
1574*0Sstevel@tonic-gate 		/* everything looks fine. Start mdmonitord */
1575*0Sstevel@tonic-gate 		/* Note: popen/pclose is the MT-safe replacement for system */
1576*0Sstevel@tonic-gate 		if (rval == 0 && start_mdmonitord  == 1) {
1577*0Sstevel@tonic-gate 			if (pclose(popen(MDMONITORD, "w")) == -1)
1578*0Sstevel@tonic-gate 				md_perror(MDMONITORD);
1579*0Sstevel@tonic-gate 
1580*0Sstevel@tonic-gate 			if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
1581*0Sstevel@tonic-gate 				mde_perror(&status, "");
1582*0Sstevel@tonic-gate 				mdclrerror(&status);
1583*0Sstevel@tonic-gate 			}
1584*0Sstevel@tonic-gate 		}
1585*0Sstevel@tonic-gate 
1586*0Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
1587*0Sstevel@tonic-gate 			/* Don't mask any previous errors */
1588*0Sstevel@tonic-gate 			if (rval == 0)
1589*0Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
1590*0Sstevel@tonic-gate 			return (rval);
1591*0Sstevel@tonic-gate 		}
1592*0Sstevel@tonic-gate 
1593*0Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
1594*0Sstevel@tonic-gate 			/* Don't mask any previous errors */
1595*0Sstevel@tonic-gate 			if (rval == 0)
1596*0Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
1597*0Sstevel@tonic-gate 		}
1598*0Sstevel@tonic-gate 	} else {
1599*0Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
1600*0Sstevel@tonic-gate 		    (options & MDCHK_SET_LOCKED),
1601*0Sstevel@tonic-gate 		    (options & MDCHK_SET_FORCE),
1602*0Sstevel@tonic-gate 		    &status)) {
1603*0Sstevel@tonic-gate 			/* Don't mask any previous errors */
1604*0Sstevel@tonic-gate 			if (rval == 0)
1605*0Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
1606*0Sstevel@tonic-gate 			else
1607*0Sstevel@tonic-gate 				mdclrerror(&status);
1608*0Sstevel@tonic-gate 		}
1609*0Sstevel@tonic-gate 		metafreedrivedesc(&dd);
1610*0Sstevel@tonic-gate 	}
1611*0Sstevel@tonic-gate 	/*
1612*0Sstevel@tonic-gate 	 * For MN disksets that already had already had nodes joined
1613*0Sstevel@tonic-gate 	 * before the attach of this mddb(s), the name invalidation is
1614*0Sstevel@tonic-gate 	 * done by the commd handler routine.  Otherwise, if this
1615*0Sstevel@tonic-gate 	 * is the first attach of a MN diskset mddb, the invalidation
1616*0Sstevel@tonic-gate 	 * must be done here since the first attach cannot be sent
1617*0Sstevel@tonic-gate 	 * via the commd since there are no nodes joined to the set yet.
1618*0Sstevel@tonic-gate 	 */
1619*0Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
1620*0Sstevel@tonic-gate 	    (MD_MNSET_DESC(sd) &&
1621*0Sstevel@tonic-gate 	    (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
1622*0Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
1623*0Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
1624*0Sstevel@tonic-gate 		}
1625*0Sstevel@tonic-gate 	}
1626*0Sstevel@tonic-gate 	return (rval);
1627*0Sstevel@tonic-gate }
1628*0Sstevel@tonic-gate 
1629*0Sstevel@tonic-gate /*
1630*0Sstevel@tonic-gate  * deletelist_length
1631*0Sstevel@tonic-gate  *
1632*0Sstevel@tonic-gate  *	return the number of slices that have been specified for deletion
1633*0Sstevel@tonic-gate  *	on the metadb command line.  This does not calculate the number
1634*0Sstevel@tonic-gate  *	of replicas because there may be multiple replicas per slice.
1635*0Sstevel@tonic-gate  */
1636*0Sstevel@tonic-gate static int
1637*0Sstevel@tonic-gate deletelist_length(mdnamelist_t *db_nlp)
1638*0Sstevel@tonic-gate {
1639*0Sstevel@tonic-gate 
1640*0Sstevel@tonic-gate 	mdnamelist_t		*nlp;
1641*0Sstevel@tonic-gate 	int			list_length = 0;
1642*0Sstevel@tonic-gate 
1643*0Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1644*0Sstevel@tonic-gate 		list_length++;
1645*0Sstevel@tonic-gate 	}
1646*0Sstevel@tonic-gate 
1647*0Sstevel@tonic-gate 	return (list_length);
1648*0Sstevel@tonic-gate }
1649*0Sstevel@tonic-gate 
1650*0Sstevel@tonic-gate static int
1651*0Sstevel@tonic-gate in_deletelist(char *devname, mdnamelist_t *db_nlp)
1652*0Sstevel@tonic-gate {
1653*0Sstevel@tonic-gate 
1654*0Sstevel@tonic-gate 	mdnamelist_t		*nlp;
1655*0Sstevel@tonic-gate 	mdname_t		*np;
1656*0Sstevel@tonic-gate 	int			index = 0;
1657*0Sstevel@tonic-gate 
1658*0Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1659*0Sstevel@tonic-gate 		np = nlp->namep;
1660*0Sstevel@tonic-gate 
1661*0Sstevel@tonic-gate 		if (strcmp(devname, np->bname) == 0)
1662*0Sstevel@tonic-gate 			return (index);
1663*0Sstevel@tonic-gate 		index++;
1664*0Sstevel@tonic-gate 	}
1665*0Sstevel@tonic-gate 
1666*0Sstevel@tonic-gate 	return (-1);
1667*0Sstevel@tonic-gate }
1668*0Sstevel@tonic-gate 
1669*0Sstevel@tonic-gate /*
1670*0Sstevel@tonic-gate  * Delete replicas from set.  This happens as a result of:
1671*0Sstevel@tonic-gate  *	- metadb [-s set_name] -d
1672*0Sstevel@tonic-gate  *	- metaset -s set_name -a disk	(causes a rebalance of mddbs)
1673*0Sstevel@tonic-gate  *	- metaset -s set_name -d disk
1674*0Sstevel@tonic-gate  *	- metaset -s set_name -b
1675*0Sstevel@tonic-gate  *
1676*0Sstevel@tonic-gate  * For a local set, this routine is run on the local set host.
1677*0Sstevel@tonic-gate  *
1678*0Sstevel@tonic-gate  * For a traditional diskset, this routine is run on the node that
1679*0Sstevel@tonic-gate  * is running the metaset command.
1680*0Sstevel@tonic-gate  *
1681*0Sstevel@tonic-gate  * For a multinode diskset, this routine is run by the node that is
1682*0Sstevel@tonic-gate  * running the metaset command.  This detach routine is sent to all
1683*0Sstevel@tonic-gate  * of the joined nodes in the diskset using commd.  This keeps
1684*0Sstevel@tonic-gate  * the nodes in-sync.
1685*0Sstevel@tonic-gate  */
1686*0Sstevel@tonic-gate int
1687*0Sstevel@tonic-gate meta_db_detach(
1688*0Sstevel@tonic-gate 	mdsetname_t		*sp,
1689*0Sstevel@tonic-gate 	mdnamelist_t		*db_nlp,
1690*0Sstevel@tonic-gate 	mdforceopts_t		force_option,
1691*0Sstevel@tonic-gate 	char			*sysfilename,
1692*0Sstevel@tonic-gate 	md_error_t		*ep
1693*0Sstevel@tonic-gate )
1694*0Sstevel@tonic-gate {
1695*0Sstevel@tonic-gate 	struct mddb_config	c;
1696*0Sstevel@tonic-gate 	mdnamelist_t		*nlp;
1697*0Sstevel@tonic-gate 	mdname_t		*np;
1698*0Sstevel@tonic-gate 	md_drive_desc		*dd = NULL;
1699*0Sstevel@tonic-gate 	md_drive_desc		*p;
1700*0Sstevel@tonic-gate 	int			replicacount;
1701*0Sstevel@tonic-gate 	int			replica_delete_count;
1702*0Sstevel@tonic-gate 	int			nr_replica_slices;
1703*0Sstevel@tonic-gate 	int			i;
1704*0Sstevel@tonic-gate 	int			stop_svmdaemons = 0;
1705*0Sstevel@tonic-gate 	int			rval = 0;
1706*0Sstevel@tonic-gate 	int			index;
1707*0Sstevel@tonic-gate 	int			valid_replicas_nottodelete = 0;
1708*0Sstevel@tonic-gate 	int			invalid_replicas_nottodelete = 0;
1709*0Sstevel@tonic-gate 	int			invalid_replicas_todelete = 0;
1710*0Sstevel@tonic-gate 	int			errored = 0;
1711*0Sstevel@tonic-gate 	int			*tag_array;
1712*0Sstevel@tonic-gate 	int			fd = -1;
1713*0Sstevel@tonic-gate 	md_error_t		status = mdnullerror;
1714*0Sstevel@tonic-gate 	md_set_desc		*sd;
1715*0Sstevel@tonic-gate 	int			stale_bool = FALSE;
1716*0Sstevel@tonic-gate 	int			flags;
1717*0Sstevel@tonic-gate 
1718*0Sstevel@tonic-gate 	/*
1719*0Sstevel@tonic-gate 	 * Error if we don't get some work to do.
1720*0Sstevel@tonic-gate 	 */
1721*0Sstevel@tonic-gate 	if (db_nlp == NULL)
1722*0Sstevel@tonic-gate 		return (mdsyserror(ep, EINVAL, NULL));
1723*0Sstevel@tonic-gate 
1724*0Sstevel@tonic-gate 	if (mdnamesareunique(db_nlp, ep) != 0)
1725*0Sstevel@tonic-gate 		return (-1);
1726*0Sstevel@tonic-gate 
1727*0Sstevel@tonic-gate 	(void) memset(&c, 0, sizeof (c));
1728*0Sstevel@tonic-gate 	c.c_id = 0;
1729*0Sstevel@tonic-gate 	c.c_setno = sp->setno;
1730*0Sstevel@tonic-gate 
1731*0Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
1732*0Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
1733*0Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
1734*0Sstevel@tonic-gate 
1735*0Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
1736*0Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
1737*0Sstevel@tonic-gate 
1738*0Sstevel@tonic-gate 	/*
1739*0Sstevel@tonic-gate 	 * Is current set STALE?
1740*0Sstevel@tonic-gate 	 */
1741*0Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE) {
1742*0Sstevel@tonic-gate 		stale_bool = TRUE;
1743*0Sstevel@tonic-gate 	}
1744*0Sstevel@tonic-gate 
1745*0Sstevel@tonic-gate 	replicacount = c.c_dbcnt;
1746*0Sstevel@tonic-gate 
1747*0Sstevel@tonic-gate 	assert(db_nlp != NULL);
1748*0Sstevel@tonic-gate 
1749*0Sstevel@tonic-gate 	/*
1750*0Sstevel@tonic-gate 	 * go through and gather how many data bases are on each
1751*0Sstevel@tonic-gate 	 * device specified.
1752*0Sstevel@tonic-gate 	 */
1753*0Sstevel@tonic-gate 
1754*0Sstevel@tonic-gate 	nr_replica_slices = deletelist_length(db_nlp);
1755*0Sstevel@tonic-gate 	tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
1756*0Sstevel@tonic-gate 
1757*0Sstevel@tonic-gate 	replica_delete_count = 0;
1758*0Sstevel@tonic-gate 	for (i = 0; i < replicacount; i++) {
1759*0Sstevel@tonic-gate 		char	*devname;
1760*0Sstevel@tonic-gate 		int	found = 0;
1761*0Sstevel@tonic-gate 
1762*0Sstevel@tonic-gate 		c.c_id = i;
1763*0Sstevel@tonic-gate 
1764*0Sstevel@tonic-gate 		/* Don't need device id information from this ioctl */
1765*0Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
1766*0Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
1767*0Sstevel@tonic-gate 
1768*0Sstevel@tonic-gate 		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
1769*0Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
1770*0Sstevel@tonic-gate 
1771*0Sstevel@tonic-gate 		devname = splicename(&c.c_devname);
1772*0Sstevel@tonic-gate 
1773*0Sstevel@tonic-gate 		if ((index = in_deletelist(devname, db_nlp)) != -1) {
1774*0Sstevel@tonic-gate 			found = 1;
1775*0Sstevel@tonic-gate 			tag_array[index] = 1;
1776*0Sstevel@tonic-gate 			replica_delete_count++;
1777*0Sstevel@tonic-gate 		}
1778*0Sstevel@tonic-gate 
1779*0Sstevel@tonic-gate 		errored = c.c_locator.l_flags & (MDDB_F_EREAD |
1780*0Sstevel@tonic-gate 				MDDB_F_EWRITE | MDDB_F_TOOSMALL |
1781*0Sstevel@tonic-gate 				MDDB_F_EFMT | MDDB_F_EDATA |
1782*0Sstevel@tonic-gate 				MDDB_F_EMASTER);
1783*0Sstevel@tonic-gate 
1784*0Sstevel@tonic-gate 		/*
1785*0Sstevel@tonic-gate 		 * There are four combinations of "errored" and "found"
1786*0Sstevel@tonic-gate 		 * and they are used to find the number of
1787*0Sstevel@tonic-gate 		 * (a) valid/invalid replicas that are not in the delete
1788*0Sstevel@tonic-gate 		 * list and are available in the system.
1789*0Sstevel@tonic-gate 		 * (b) valid/invalid replicas that are to be deleted.
1790*0Sstevel@tonic-gate 		 */
1791*0Sstevel@tonic-gate 
1792*0Sstevel@tonic-gate 		if (errored && !found)		/* errored and !found */
1793*0Sstevel@tonic-gate 			invalid_replicas_nottodelete++;
1794*0Sstevel@tonic-gate 		else if (!found)		/* !errored and !found */
1795*0Sstevel@tonic-gate 			valid_replicas_nottodelete++;
1796*0Sstevel@tonic-gate 		else if (errored)		/* errored and found */
1797*0Sstevel@tonic-gate 			invalid_replicas_todelete++;
1798*0Sstevel@tonic-gate 		/*
1799*0Sstevel@tonic-gate 		 * else it is !errored and found. This means
1800*0Sstevel@tonic-gate 		 * valid_replicas_todelete++; But this variable will not
1801*0Sstevel@tonic-gate 		 * be used anywhere
1802*0Sstevel@tonic-gate 		 */
1803*0Sstevel@tonic-gate 
1804*0Sstevel@tonic-gate 		Free(devname);
1805*0Sstevel@tonic-gate 	}
1806*0Sstevel@tonic-gate 
1807*0Sstevel@tonic-gate 	index = 0;
1808*0Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1809*0Sstevel@tonic-gate 		np = nlp->namep;
1810*0Sstevel@tonic-gate 		if (tag_array[index++] != 1) {
1811*0Sstevel@tonic-gate 			Free(tag_array);
1812*0Sstevel@tonic-gate 			return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
1813*0Sstevel@tonic-gate 		}
1814*0Sstevel@tonic-gate 	}
1815*0Sstevel@tonic-gate 
1816*0Sstevel@tonic-gate 	Free(tag_array);
1817*0Sstevel@tonic-gate 
1818*0Sstevel@tonic-gate 
1819*0Sstevel@tonic-gate 	/* if all replicas are deleted stop mdmonitord */
1820*0Sstevel@tonic-gate 	if ((replicacount - replica_delete_count) == 0)
1821*0Sstevel@tonic-gate 		stop_svmdaemons = 1;
1822*0Sstevel@tonic-gate 
1823*0Sstevel@tonic-gate 	if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
1824*0Sstevel@tonic-gate 		if (force_option & MDFORCE_NONE)
1825*0Sstevel@tonic-gate 			return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
1826*0Sstevel@tonic-gate 		if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
1827*0Sstevel@tonic-gate 			return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
1828*0Sstevel@tonic-gate 	}
1829*0Sstevel@tonic-gate 
1830*0Sstevel@tonic-gate 	/*
1831*0Sstevel@tonic-gate 	 * The following algorithms are followed to check for deletion:
1832*0Sstevel@tonic-gate 	 * (a) If the delete list(db_nlp) has all invalid replicas and no valid
1833*0Sstevel@tonic-gate 	 * replicas, then deletion should be allowed.
1834*0Sstevel@tonic-gate 	 * (b) Deletion should be allowed only if valid replicas that are "not"
1835*0Sstevel@tonic-gate 	 * to be deleted is always greater than the invalid replicas that
1836*0Sstevel@tonic-gate 	 * are "not" to be deleted.
1837*0Sstevel@tonic-gate 	 * (c) If the user uses -f option, then deletion should be allowed.
1838*0Sstevel@tonic-gate 	 */
1839*0Sstevel@tonic-gate 
1840*0Sstevel@tonic-gate 	if ((invalid_replicas_todelete != replica_delete_count) &&
1841*0Sstevel@tonic-gate 		(invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
1842*0Sstevel@tonic-gate 				(force_option != MDFORCE_LOCAL))
1843*0Sstevel@tonic-gate 		return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
1844*0Sstevel@tonic-gate 
1845*0Sstevel@tonic-gate 	/*
1846*0Sstevel@tonic-gate 	 * go through and tell kernel to delete them
1847*0Sstevel@tonic-gate 	 */
1848*0Sstevel@tonic-gate 
1849*0Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
1850*0Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
1851*0Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
1852*0Sstevel@tonic-gate 
1853*0Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
1854*0Sstevel@tonic-gate 		return (mdstealerror(ep, &c.c_mde));
1855*0Sstevel@tonic-gate 
1856*0Sstevel@tonic-gate 	if (! metaislocalset(sp)) {
1857*0Sstevel@tonic-gate 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
1858*0Sstevel@tonic-gate 		if (! mdisok(ep))
1859*0Sstevel@tonic-gate 			return (-1);
1860*0Sstevel@tonic-gate 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1861*0Sstevel@tonic-gate 			return (-1);
1862*0Sstevel@tonic-gate 	}
1863*0Sstevel@tonic-gate 
1864*0Sstevel@tonic-gate 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
1865*0Sstevel@tonic-gate 		np = nlp->namep;
1866*0Sstevel@tonic-gate 
1867*0Sstevel@tonic-gate 		/*
1868*0Sstevel@tonic-gate 		 * If mddb is being deleted from MN diskset and node is
1869*0Sstevel@tonic-gate 		 * an owner of the diskset then use rpc.mdcommd
1870*0Sstevel@tonic-gate 		 * mechanism to add mddb(s) so that all nodes stay in sync.
1871*0Sstevel@tonic-gate 		 * If set is stale, don't log the message since rpc.mdcommd
1872*0Sstevel@tonic-gate 		 * can't write the message to the mddb.
1873*0Sstevel@tonic-gate 		 *
1874*0Sstevel@tonic-gate 		 * When mddbs are first being added to set, a detach can
1875*0Sstevel@tonic-gate 		 * be called before any node has joined the diskset, so
1876*0Sstevel@tonic-gate 		 * must check to see if node is an owner of the diskset.
1877*0Sstevel@tonic-gate 		 *
1878*0Sstevel@tonic-gate 		 * Otherwise, just delete mddb from this node.
1879*0Sstevel@tonic-gate 		 */
1880*0Sstevel@tonic-gate 
1881*0Sstevel@tonic-gate 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
1882*0Sstevel@tonic-gate 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
1883*0Sstevel@tonic-gate 			md_mn_result_t			*resultp;
1884*0Sstevel@tonic-gate 			md_mn_msg_meta_db_detach_t	detach;
1885*0Sstevel@tonic-gate 			int				send_rval;
1886*0Sstevel@tonic-gate 
1887*0Sstevel@tonic-gate 			/*
1888*0Sstevel@tonic-gate 			 * The following algorithm is used to detach replicas.
1889*0Sstevel@tonic-gate 			 * 	- META_DB_DETACH message generates submsgs
1890*0Sstevel@tonic-gate 			 * 		- BLOCK parse (master)
1891*0Sstevel@tonic-gate 			 * 		- MDDB_DETACH replicas
1892*0Sstevel@tonic-gate 			 * 		- UNBLOCK parse (master) causing parse
1893*0Sstevel@tonic-gate 			 *		information to be sent from master
1894*0Sstevel@tonic-gate 			 *		to slaves at a higher class than the
1895*0Sstevel@tonic-gate 			 *		unblock so the parse message will
1896*0Sstevel@tonic-gate 			 *		reach slaves before unblock message.
1897*0Sstevel@tonic-gate 			 */
1898*0Sstevel@tonic-gate 			(void) splitname(np->bname, &detach.msg_splitname);
1899*0Sstevel@tonic-gate 
1900*0Sstevel@tonic-gate 			/* Set devid to NULL until devids are supported */
1901*0Sstevel@tonic-gate 			detach.msg_devid[0] = NULL;
1902*0Sstevel@tonic-gate 
1903*0Sstevel@tonic-gate 			/*
1904*0Sstevel@tonic-gate 			 * If reconfig cycle has been started, this node is
1905*0Sstevel@tonic-gate 			 * stuck in in the return step until this command has
1906*0Sstevel@tonic-gate 			 * completed.  If mdcommd is suspended, ask
1907*0Sstevel@tonic-gate 			 * send_message to fail (instead of retrying)
1908*0Sstevel@tonic-gate 			 * so that metaset can finish allowing the reconfig
1909*0Sstevel@tonic-gate 			 * cycle to proceed.
1910*0Sstevel@tonic-gate 			 */
1911*0Sstevel@tonic-gate 			flags = MD_MSGF_FAIL_ON_SUSPEND;
1912*0Sstevel@tonic-gate 			if (stale_bool == TRUE)
1913*0Sstevel@tonic-gate 				flags |= MD_MSGF_NO_LOG;
1914*0Sstevel@tonic-gate 			send_rval = mdmn_send_message(sp->setno,
1915*0Sstevel@tonic-gate 				MD_MN_MSG_META_DB_DETACH,
1916*0Sstevel@tonic-gate 				flags, (char *)&detach,
1917*0Sstevel@tonic-gate 				sizeof (md_mn_msg_meta_db_detach_t),
1918*0Sstevel@tonic-gate 				&resultp, ep);
1919*0Sstevel@tonic-gate 			if (send_rval != 0) {
1920*0Sstevel@tonic-gate 				rval = -1;
1921*0Sstevel@tonic-gate 				if (resultp == NULL)
1922*0Sstevel@tonic-gate 					(void) mddserror(ep,
1923*0Sstevel@tonic-gate 					    MDE_DS_COMMD_SEND_FAIL,
1924*0Sstevel@tonic-gate 					    sp->setno, NULL, NULL,
1925*0Sstevel@tonic-gate 					    sp->setname);
1926*0Sstevel@tonic-gate 				else {
1927*0Sstevel@tonic-gate 					(void) mdstealerror(ep,
1928*0Sstevel@tonic-gate 					    &(resultp->mmr_ep));
1929*0Sstevel@tonic-gate 					if (mdisok(ep)) {
1930*0Sstevel@tonic-gate 						(void) mddserror(ep,
1931*0Sstevel@tonic-gate 						    MDE_DS_COMMD_SEND_FAIL,
1932*0Sstevel@tonic-gate 						    sp->setno, NULL, NULL,
1933*0Sstevel@tonic-gate 						    sp->setname);
1934*0Sstevel@tonic-gate 					}
1935*0Sstevel@tonic-gate 					free_result(resultp);
1936*0Sstevel@tonic-gate 				}
1937*0Sstevel@tonic-gate 				goto out;
1938*0Sstevel@tonic-gate 			}
1939*0Sstevel@tonic-gate 			if (resultp)
1940*0Sstevel@tonic-gate 				free_result(resultp);
1941*0Sstevel@tonic-gate 		} else {
1942*0Sstevel@tonic-gate 			i = 0;
1943*0Sstevel@tonic-gate 			while (i < c.c_dbcnt) {
1944*0Sstevel@tonic-gate 				char	*devname;
1945*0Sstevel@tonic-gate 
1946*0Sstevel@tonic-gate 				c.c_id = i;
1947*0Sstevel@tonic-gate 
1948*0Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
1949*0Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
1950*0Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
1951*0Sstevel@tonic-gate 
1952*0Sstevel@tonic-gate 				if (metaioctl(MD_DB_GETDEV, &c,
1953*0Sstevel@tonic-gate 				    &c.c_mde, NULL)) {
1954*0Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
1955*0Sstevel@tonic-gate 					goto out;
1956*0Sstevel@tonic-gate 				}
1957*0Sstevel@tonic-gate 
1958*0Sstevel@tonic-gate 				devname = splicename(&c.c_devname);
1959*0Sstevel@tonic-gate 				if (strcmp(devname, np->bname) != 0) {
1960*0Sstevel@tonic-gate 					Free(devname);
1961*0Sstevel@tonic-gate 					i++;
1962*0Sstevel@tonic-gate 					continue;
1963*0Sstevel@tonic-gate 				}
1964*0Sstevel@tonic-gate 				Free(devname);
1965*0Sstevel@tonic-gate 
1966*0Sstevel@tonic-gate 				/* Don't need devid info from this ioctl */
1967*0Sstevel@tonic-gate 				c.c_locator.l_devid = (uint64_t)0;
1968*0Sstevel@tonic-gate 				c.c_locator.l_devid_flags = 0;
1969*0Sstevel@tonic-gate 
1970*0Sstevel@tonic-gate 				if (metaioctl(MD_DB_DELDEV, &c,
1971*0Sstevel@tonic-gate 				    &c.c_mde, NULL) != 0) {
1972*0Sstevel@tonic-gate 					rval = mdstealerror(ep, &c.c_mde);
1973*0Sstevel@tonic-gate 					goto out;
1974*0Sstevel@tonic-gate 				}
1975*0Sstevel@tonic-gate 
1976*0Sstevel@tonic-gate 				/* Not incrementing "i" intentionally */
1977*0Sstevel@tonic-gate 			}
1978*0Sstevel@tonic-gate 		}
1979*0Sstevel@tonic-gate 		if (! metaislocalset(sp)) {
1980*0Sstevel@tonic-gate 			/* update the dbcnt and size in dd */
1981*0Sstevel@tonic-gate 			for (p = dd; p != NULL; p = p->dd_next) {
1982*0Sstevel@tonic-gate 				if (p->dd_dnp == np->drivenamep) {
1983*0Sstevel@tonic-gate 					p->dd_dbcnt = 0;
1984*0Sstevel@tonic-gate 					p->dd_dbsize  = 0;
1985*0Sstevel@tonic-gate 					break;
1986*0Sstevel@tonic-gate 				}
1987*0Sstevel@tonic-gate 			}
1988*0Sstevel@tonic-gate 
1989*0Sstevel@tonic-gate 			/*
1990*0Sstevel@tonic-gate 			 * Slam a dummy master block and make it self
1991*0Sstevel@tonic-gate 			 * identifying
1992*0Sstevel@tonic-gate 			 */
1993*0Sstevel@tonic-gate 			if ((fd = open(np->rname, O_RDWR)) >= 0) {
1994*0Sstevel@tonic-gate 				meta_mkdummymaster(sp, fd, 16);
1995*0Sstevel@tonic-gate 				(void) close(fd);
1996*0Sstevel@tonic-gate 			}
1997*0Sstevel@tonic-gate 		}
1998*0Sstevel@tonic-gate 	}
1999*0Sstevel@tonic-gate out:
2000*0Sstevel@tonic-gate 	if (metaislocalset(sp)) {
2001*0Sstevel@tonic-gate 		/*
2002*0Sstevel@tonic-gate 		 * Stop all the daemons if there are
2003*0Sstevel@tonic-gate 		 * no more replicas so that the module can be
2004*0Sstevel@tonic-gate 		 * unloaded.
2005*0Sstevel@tonic-gate 		 */
2006*0Sstevel@tonic-gate 		if (rval == 0 && stop_svmdaemons == 1) {
2007*0Sstevel@tonic-gate 			char buf[MAXPATHLEN];
2008*0Sstevel@tonic-gate 			int i;
2009*0Sstevel@tonic-gate 
2010*0Sstevel@tonic-gate 			for (i = 0; i < DAEMON_COUNT; i++) {
2011*0Sstevel@tonic-gate 				(void) snprintf(buf, MAXPATHLEN,
2012*0Sstevel@tonic-gate 					"/usr/bin/pkill -%s -x %s",
2013*0Sstevel@tonic-gate 					svmd_kill_list[i].svmd_kill_val,
2014*0Sstevel@tonic-gate 					svmd_kill_list[i].svmd_name);
2015*0Sstevel@tonic-gate 				if (pclose(popen(buf, "w")) == -1)
2016*0Sstevel@tonic-gate 					md_perror(buf);
2017*0Sstevel@tonic-gate 			}
2018*0Sstevel@tonic-gate 
2019*0Sstevel@tonic-gate 			if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
2020*0Sstevel@tonic-gate 				mde_perror(&status, "");
2021*0Sstevel@tonic-gate 				mdclrerror(&status);
2022*0Sstevel@tonic-gate 			}
2023*0Sstevel@tonic-gate 		}
2024*0Sstevel@tonic-gate 		if (buildconf(sp, &status)) {
2025*0Sstevel@tonic-gate 			/* Don't mask any previous errors */
2026*0Sstevel@tonic-gate 			if (rval == 0)
2027*0Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
2028*0Sstevel@tonic-gate 			else
2029*0Sstevel@tonic-gate 				mdclrerror(&status);
2030*0Sstevel@tonic-gate 			return (rval);
2031*0Sstevel@tonic-gate 		}
2032*0Sstevel@tonic-gate 
2033*0Sstevel@tonic-gate 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
2034*0Sstevel@tonic-gate 			/* Don't mask any previous errors */
2035*0Sstevel@tonic-gate 			if (rval == 0)
2036*0Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
2037*0Sstevel@tonic-gate 			else
2038*0Sstevel@tonic-gate 				mdclrerror(&status);
2039*0Sstevel@tonic-gate 		}
2040*0Sstevel@tonic-gate 	} else {
2041*0Sstevel@tonic-gate 		if (update_dbinfo_on_drives(sp, dd,
2042*0Sstevel@tonic-gate 		    (force_option & MDFORCE_SET_LOCKED),
2043*0Sstevel@tonic-gate 		    ((force_option & MDFORCE_LOCAL) |
2044*0Sstevel@tonic-gate 		    (force_option & MDFORCE_DS)), &status)) {
2045*0Sstevel@tonic-gate 			/* Don't mask any previous errors */
2046*0Sstevel@tonic-gate 			if (rval == 0)
2047*0Sstevel@tonic-gate 				rval = mdstealerror(ep, &status);
2048*0Sstevel@tonic-gate 			else
2049*0Sstevel@tonic-gate 				mdclrerror(&status);
2050*0Sstevel@tonic-gate 		}
2051*0Sstevel@tonic-gate 		metafreedrivedesc(&dd);
2052*0Sstevel@tonic-gate 	}
2053*0Sstevel@tonic-gate 	if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
2054*0Sstevel@tonic-gate 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
2055*0Sstevel@tonic-gate 			meta_invalidate_name(nlp->namep);
2056*0Sstevel@tonic-gate 		}
2057*0Sstevel@tonic-gate 	}
2058*0Sstevel@tonic-gate 	return (rval);
2059*0Sstevel@tonic-gate }
2060*0Sstevel@tonic-gate 
2061*0Sstevel@tonic-gate static md_replica_t *
2062*0Sstevel@tonic-gate metareplicaname(
2063*0Sstevel@tonic-gate 	mdsetname_t		*sp,
2064*0Sstevel@tonic-gate 	int			flags,
2065*0Sstevel@tonic-gate 	struct mddb_config	*c,
2066*0Sstevel@tonic-gate 	md_error_t		*ep
2067*0Sstevel@tonic-gate )
2068*0Sstevel@tonic-gate {
2069*0Sstevel@tonic-gate 	md_replica_t	*rp;
2070*0Sstevel@tonic-gate 	char		*devname;
2071*0Sstevel@tonic-gate 	size_t		sz;
2072*0Sstevel@tonic-gate 
2073*0Sstevel@tonic-gate 	/* allocate replicaname */
2074*0Sstevel@tonic-gate 	rp = Zalloc(sizeof (*rp));
2075*0Sstevel@tonic-gate 
2076*0Sstevel@tonic-gate 	/* get device name */
2077*0Sstevel@tonic-gate 	devname = splicename(&c->c_devname);
2078*0Sstevel@tonic-gate 	if (flags & PRINT_FAST) {
2079*0Sstevel@tonic-gate 		if ((rp->r_namep = metaname_fast(&sp, devname, ep)) == NULL) {
2080*0Sstevel@tonic-gate 			Free(devname);
2081*0Sstevel@tonic-gate 			Free(rp);
2082*0Sstevel@tonic-gate 			return (NULL);
2083*0Sstevel@tonic-gate 		}
2084*0Sstevel@tonic-gate 	} else {
2085*0Sstevel@tonic-gate 		if ((rp->r_namep = metaname(&sp, devname, ep)) == NULL) {
2086*0Sstevel@tonic-gate 			Free(devname);
2087*0Sstevel@tonic-gate 			Free(rp);
2088*0Sstevel@tonic-gate 			return (NULL);
2089*0Sstevel@tonic-gate 		}
2090*0Sstevel@tonic-gate 	}
2091*0Sstevel@tonic-gate 	Free(devname);
2092*0Sstevel@tonic-gate 
2093*0Sstevel@tonic-gate 	/* make sure it's OK */
2094*0Sstevel@tonic-gate 	if ((! (flags & MD_BASICNAME_OK)) &&
2095*0Sstevel@tonic-gate 	    (metachkcomp(rp->r_namep, ep) != 0)) {
2096*0Sstevel@tonic-gate 		Free(rp);
2097*0Sstevel@tonic-gate 		return (NULL);
2098*0Sstevel@tonic-gate 	}
2099*0Sstevel@tonic-gate 
2100*0Sstevel@tonic-gate 	rp->r_blkno = MD_DISKADDR_ERROR;
2101*0Sstevel@tonic-gate 	rp->r_nblk = MD_DISKADDR_ERROR;
2102*0Sstevel@tonic-gate 	rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
2103*0Sstevel@tonic-gate 	if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
2104*0Sstevel@tonic-gate 		sz = devid_sizeof((ddi_devid_t)(c->c_locator.l_devid));
2105*0Sstevel@tonic-gate 		if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
2106*0Sstevel@tonic-gate 		    (ddi_devid_t)NULL) {
2107*0Sstevel@tonic-gate 			Free(rp);
2108*0Sstevel@tonic-gate 			return (NULL);
2109*0Sstevel@tonic-gate 		}
2110*0Sstevel@tonic-gate 		(void) memcpy((void *)rp->r_devid,
2111*0Sstevel@tonic-gate 		    (void *)c->c_locator.l_devid, sz);
2112*0Sstevel@tonic-gate 		(void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
2113*0Sstevel@tonic-gate 		rp->r_flags &= ~MDDB_F_NODEVID;
2114*0Sstevel@tonic-gate 		/* Overwrite dev derived from name with dev from devid */
2115*0Sstevel@tonic-gate 		rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
2116*0Sstevel@tonic-gate 	}
2117*0Sstevel@tonic-gate 	(void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
2118*0Sstevel@tonic-gate 
2119*0Sstevel@tonic-gate 	rp->r_blkno = c->c_locator.l_blkno;
2120*0Sstevel@tonic-gate 	if (c->c_dbend != 0)
2121*0Sstevel@tonic-gate 		rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
2122*0Sstevel@tonic-gate 
2123*0Sstevel@tonic-gate 	/* return replica */
2124*0Sstevel@tonic-gate 	return (rp);
2125*0Sstevel@tonic-gate }
2126*0Sstevel@tonic-gate 
2127*0Sstevel@tonic-gate /*
2128*0Sstevel@tonic-gate  * free replica list
2129*0Sstevel@tonic-gate  */
2130*0Sstevel@tonic-gate void
2131*0Sstevel@tonic-gate metafreereplicalist(
2132*0Sstevel@tonic-gate 	md_replicalist_t	*rlp
2133*0Sstevel@tonic-gate )
2134*0Sstevel@tonic-gate {
2135*0Sstevel@tonic-gate 	md_replicalist_t	*rl = NULL;
2136*0Sstevel@tonic-gate 
2137*0Sstevel@tonic-gate 	for (/* void */; (rlp != NULL); rlp = rl) {
2138*0Sstevel@tonic-gate 		rl = rlp->rl_next;
2139*0Sstevel@tonic-gate 		if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
2140*0Sstevel@tonic-gate 			free(rlp->rl_repp->r_devid);
2141*0Sstevel@tonic-gate 		}
2142*0Sstevel@tonic-gate 		Free(rlp->rl_repp);
2143*0Sstevel@tonic-gate 		Free(rlp);
2144*0Sstevel@tonic-gate 	}
2145*0Sstevel@tonic-gate }
2146*0Sstevel@tonic-gate 
2147*0Sstevel@tonic-gate /*
2148*0Sstevel@tonic-gate  * return list of all replicas in set
2149*0Sstevel@tonic-gate  */
2150*0Sstevel@tonic-gate int
2151*0Sstevel@tonic-gate metareplicalist(
2152*0Sstevel@tonic-gate 	mdsetname_t		*sp,
2153*0Sstevel@tonic-gate 	int			flags,
2154*0Sstevel@tonic-gate 	md_replicalist_t	**rlpp,
2155*0Sstevel@tonic-gate 	md_error_t		*ep
2156*0Sstevel@tonic-gate )
2157*0Sstevel@tonic-gate {
2158*0Sstevel@tonic-gate 	md_replicalist_t	**tail = rlpp;
2159*0Sstevel@tonic-gate 	int			count = 0;
2160*0Sstevel@tonic-gate 	struct mddb_config	c;
2161*0Sstevel@tonic-gate 	int			i;
2162*0Sstevel@tonic-gate 	char			*devid;
2163*0Sstevel@tonic-gate 
2164*0Sstevel@tonic-gate 	/* for each replica */
2165*0Sstevel@tonic-gate 	i = 0;
2166*0Sstevel@tonic-gate 	do {
2167*0Sstevel@tonic-gate 		md_replica_t	*rp;
2168*0Sstevel@tonic-gate 
2169*0Sstevel@tonic-gate 		/* get next replica */
2170*0Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
2171*0Sstevel@tonic-gate 		c.c_id = i;
2172*0Sstevel@tonic-gate 		c.c_setno = sp->setno;
2173*0Sstevel@tonic-gate 
2174*0Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
2175*0Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
2176*0Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
2177*0Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
2178*0Sstevel@tonic-gate 				break;	/* handle none at all */
2179*0Sstevel@tonic-gate 			}
2180*0Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
2181*0Sstevel@tonic-gate 			goto out;
2182*0Sstevel@tonic-gate 		}
2183*0Sstevel@tonic-gate 
2184*0Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
2185*0Sstevel@tonic-gate 			if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
2186*0Sstevel@tonic-gate 				(void) mdsyserror(ep, ENOMEM, META_DBCONF);
2187*0Sstevel@tonic-gate 				goto out;
2188*0Sstevel@tonic-gate 			}
2189*0Sstevel@tonic-gate 			c.c_locator.l_devid = (uintptr_t)devid;
2190*0Sstevel@tonic-gate 			/*
2191*0Sstevel@tonic-gate 			 * Turn on space and sz flags since 'sz' amount of
2192*0Sstevel@tonic-gate 			 * space has been alloc'd.
2193*0Sstevel@tonic-gate 			 */
2194*0Sstevel@tonic-gate 			c.c_locator.l_devid_flags =
2195*0Sstevel@tonic-gate 				MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
2196*0Sstevel@tonic-gate 		}
2197*0Sstevel@tonic-gate 
2198*0Sstevel@tonic-gate 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
2199*0Sstevel@tonic-gate 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
2200*0Sstevel@tonic-gate 				mdclrerror(&c.c_mde);
2201*0Sstevel@tonic-gate 				break;	/* handle none at all */
2202*0Sstevel@tonic-gate 			}
2203*0Sstevel@tonic-gate 			(void) mdstealerror(ep, &c.c_mde);
2204*0Sstevel@tonic-gate 			goto out;
2205*0Sstevel@tonic-gate 		}
2206*0Sstevel@tonic-gate 
2207*0Sstevel@tonic-gate 		/*
2208*0Sstevel@tonic-gate 		 * Paranoid check - shouldn't happen, but is left as
2209*0Sstevel@tonic-gate 		 * a place holder for changes that will be needed after
2210*0Sstevel@tonic-gate 		 * dynamic reconfiguration changes are added to SVM (to
2211*0Sstevel@tonic-gate 		 * support movement of disks at any point in time).
2212*0Sstevel@tonic-gate 		 */
2213*0Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
2214*0Sstevel@tonic-gate 			(void) fprintf(stderr,
2215*0Sstevel@tonic-gate 			    dgettext(TEXT_DOMAIN,
2216*0Sstevel@tonic-gate 				"Error: Relocation Information "
2217*0Sstevel@tonic-gate 				"(drvnm=%s, mnum=0x%lx) \n"
2218*0Sstevel@tonic-gate 				"relocation information size changed - \n"
2219*0Sstevel@tonic-gate 				"rerun command\n"),
2220*0Sstevel@tonic-gate 			    c.c_locator.l_driver, c.c_locator.l_mnum);
2221*0Sstevel@tonic-gate 			(void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
2222*0Sstevel@tonic-gate 			goto out;
2223*0Sstevel@tonic-gate 		}
2224*0Sstevel@tonic-gate 
2225*0Sstevel@tonic-gate 		if (c.c_dbcnt == 0)
2226*0Sstevel@tonic-gate 			break;		/* handle none at all */
2227*0Sstevel@tonic-gate 
2228*0Sstevel@tonic-gate 		/* get info */
2229*0Sstevel@tonic-gate 		if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
2230*0Sstevel@tonic-gate 			goto out;
2231*0Sstevel@tonic-gate 
2232*0Sstevel@tonic-gate 		/* append to list */
2233*0Sstevel@tonic-gate 		*tail = Zalloc(sizeof (**tail));
2234*0Sstevel@tonic-gate 		(*tail)->rl_repp = rp;
2235*0Sstevel@tonic-gate 		tail = &(*tail)->rl_next;
2236*0Sstevel@tonic-gate 		++count;
2237*0Sstevel@tonic-gate 
2238*0Sstevel@tonic-gate 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
2239*0Sstevel@tonic-gate 			free(devid);
2240*0Sstevel@tonic-gate 			c.c_locator.l_devid_flags = 0;
2241*0Sstevel@tonic-gate 		}
2242*0Sstevel@tonic-gate 
2243*0Sstevel@tonic-gate 	} while (++i < c.c_dbcnt);
2244*0Sstevel@tonic-gate 
2245*0Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
2246*0Sstevel@tonic-gate 		free(devid);
2247*0Sstevel@tonic-gate 	}
2248*0Sstevel@tonic-gate 
2249*0Sstevel@tonic-gate 	/* return count */
2250*0Sstevel@tonic-gate 	return (count);
2251*0Sstevel@tonic-gate 
2252*0Sstevel@tonic-gate 	/* cleanup, return error */
2253*0Sstevel@tonic-gate out:
2254*0Sstevel@tonic-gate 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
2255*0Sstevel@tonic-gate 		free(devid);
2256*0Sstevel@tonic-gate 	}
2257*0Sstevel@tonic-gate 	metafreereplicalist(*rlpp);
2258*0Sstevel@tonic-gate 	*rlpp = NULL;
2259*0Sstevel@tonic-gate 	return (-1);
2260*0Sstevel@tonic-gate }
2261*0Sstevel@tonic-gate 
2262*0Sstevel@tonic-gate /*
2263*0Sstevel@tonic-gate  * meta_sync_db_locations - get list of replicas from kernel and write
2264*0Sstevel@tonic-gate  * 	out to mddb.cf and md.conf.  'Syncs up' the replica list in
2265*0Sstevel@tonic-gate  * 	the kernel with the replica list in the conf files.
2266*0Sstevel@tonic-gate  *
2267*0Sstevel@tonic-gate  */
2268*0Sstevel@tonic-gate void
2269*0Sstevel@tonic-gate meta_sync_db_locations(
2270*0Sstevel@tonic-gate 	mdsetname_t	*sp,
2271*0Sstevel@tonic-gate 	md_error_t	*ep
2272*0Sstevel@tonic-gate )
2273*0Sstevel@tonic-gate {
2274*0Sstevel@tonic-gate 	char		*sname = 0;		/* system file name */
2275*0Sstevel@tonic-gate 	char 		*cname = 0;		/* config file name */
2276*0Sstevel@tonic-gate 
2277*0Sstevel@tonic-gate 	if (!metaislocalset(sp))
2278*0Sstevel@tonic-gate 		return;
2279*0Sstevel@tonic-gate 
2280*0Sstevel@tonic-gate 	/* Updates backup of configuration file (aka mddb.cf) */
2281*0Sstevel@tonic-gate 	if (buildconf(sp, ep) != 0)
2282*0Sstevel@tonic-gate 		return;
2283*0Sstevel@tonic-gate 
2284*0Sstevel@tonic-gate 	/* Updates system configuration file (aka md.conf) */
2285*0Sstevel@tonic-gate 	(void) meta_db_patch(sname, cname, 0, ep);
2286*0Sstevel@tonic-gate }
2287*0Sstevel@tonic-gate 
2288*0Sstevel@tonic-gate /*
2289*0Sstevel@tonic-gate  * setup_db_locations - parse the mddb.cf file and
2290*0Sstevel@tonic-gate  *			tells the driver which db locations to use.
2291*0Sstevel@tonic-gate  */
2292*0Sstevel@tonic-gate int
2293*0Sstevel@tonic-gate meta_setup_db_locations(
2294*0Sstevel@tonic-gate 	md_error_t	*ep
2295*0Sstevel@tonic-gate )
2296*0Sstevel@tonic-gate {
2297*0Sstevel@tonic-gate 	mddb_config_t	c;
2298*0Sstevel@tonic-gate 	FILE		*fp;
2299*0Sstevel@tonic-gate 	char		inbuff[1024];
2300*0Sstevel@tonic-gate 	char		*buff;
2301*0Sstevel@tonic-gate 	uint_t		i;
2302*0Sstevel@tonic-gate 	size_t		sz;
2303*0Sstevel@tonic-gate 	int		rval = 0;
2304*0Sstevel@tonic-gate 	char		*devidp;
2305*0Sstevel@tonic-gate 	uint_t		devid_size;
2306*0Sstevel@tonic-gate 	char		*minor_name = NULL;
2307*0Sstevel@tonic-gate 	ddi_devid_t	devid_decode;
2308*0Sstevel@tonic-gate 	int		checksum;
2309*0Sstevel@tonic-gate 
2310*0Sstevel@tonic-gate 	/* do mddb.cf file */
2311*0Sstevel@tonic-gate 	(void) memset(&c, '\0', sizeof (c));
2312*0Sstevel@tonic-gate 	if ((fp = fopen(META_DBCONF, "r")) == NULL) {
2313*0Sstevel@tonic-gate 		if (errno != ENOENT)
2314*0Sstevel@tonic-gate 			return (mdsyserror(ep, errno, META_DBCONF));
2315*0Sstevel@tonic-gate 	}
2316*0Sstevel@tonic-gate 	while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
2317*0Sstevel@tonic-gate 	    fp)) != NULL)) {
2318*0Sstevel@tonic-gate 
2319*0Sstevel@tonic-gate 		/* ignore comments */
2320*0Sstevel@tonic-gate 		if (*buff == '#')
2321*0Sstevel@tonic-gate 			continue;
2322*0Sstevel@tonic-gate 
2323*0Sstevel@tonic-gate 		/* parse locator */
2324*0Sstevel@tonic-gate 		(void) memset(&c, 0, sizeof (c));
2325*0Sstevel@tonic-gate 		c.c_setno = MD_LOCAL_SET;
2326*0Sstevel@tonic-gate 		i = strcspn(buff, " \t");
2327*0Sstevel@tonic-gate 		if (i > sizeof (c.c_locator.l_driver))
2328*0Sstevel@tonic-gate 			i = sizeof (c.c_locator.l_driver);
2329*0Sstevel@tonic-gate 		(void) strncpy(c.c_locator.l_driver, buff, i);
2330*0Sstevel@tonic-gate 		buff += i;
2331*0Sstevel@tonic-gate 		c.c_locator.l_dev =
2332*0Sstevel@tonic-gate 		    makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
2333*0Sstevel@tonic-gate 		c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
2334*0Sstevel@tonic-gate 		c.c_locator.l_mnum = minor(c.c_locator.l_dev);
2335*0Sstevel@tonic-gate 
2336*0Sstevel@tonic-gate 		/* parse out devid */
2337*0Sstevel@tonic-gate 		while (isspace((int)(*buff)))
2338*0Sstevel@tonic-gate 			buff += 1;
2339*0Sstevel@tonic-gate 		i = strcspn(buff, " \t");
2340*0Sstevel@tonic-gate 		if ((devidp = (char *)malloc(i+1)) == NULL)
2341*0Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
2342*0Sstevel@tonic-gate 
2343*0Sstevel@tonic-gate 		(void) strncpy(devidp, buff, i);
2344*0Sstevel@tonic-gate 		devidp[i] = '\0';
2345*0Sstevel@tonic-gate 		if (devid_str_decode(devidp, &devid_decode,
2346*0Sstevel@tonic-gate 		    &minor_name) == -1) {
2347*0Sstevel@tonic-gate 			free(devidp);
2348*0Sstevel@tonic-gate 			continue;
2349*0Sstevel@tonic-gate 		}
2350*0Sstevel@tonic-gate 
2351*0Sstevel@tonic-gate 		/* Conf file must have minor name associated with devid */
2352*0Sstevel@tonic-gate 		if (minor_name == NULL) {
2353*0Sstevel@tonic-gate 			free(devidp);
2354*0Sstevel@tonic-gate 			devid_free(devid_decode);
2355*0Sstevel@tonic-gate 			continue;
2356*0Sstevel@tonic-gate 		}
2357*0Sstevel@tonic-gate 
2358*0Sstevel@tonic-gate 		sz = devid_sizeof(devid_decode);
2359*0Sstevel@tonic-gate 		/* Copy to devid size buffer that ioctl expects */
2360*0Sstevel@tonic-gate 		if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
2361*0Sstevel@tonic-gate 			devid_free(devid_decode);
2362*0Sstevel@tonic-gate 			free(minor_name);
2363*0Sstevel@tonic-gate 			free(devidp);
2364*0Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
2365*0Sstevel@tonic-gate 		}
2366*0Sstevel@tonic-gate 
2367*0Sstevel@tonic-gate 		(void) memcpy((void *)c.c_locator.l_devid,
2368*0Sstevel@tonic-gate 		    (void *)devid_decode, sz);
2369*0Sstevel@tonic-gate 
2370*0Sstevel@tonic-gate 		devid_free(devid_decode);
2371*0Sstevel@tonic-gate 
2372*0Sstevel@tonic-gate 		if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
2373*0Sstevel@tonic-gate 			free(minor_name);
2374*0Sstevel@tonic-gate 			free(devidp);
2375*0Sstevel@tonic-gate 			free((void *)c.c_locator.l_devid);
2376*0Sstevel@tonic-gate 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
2377*0Sstevel@tonic-gate 		}
2378*0Sstevel@tonic-gate 		(void) strcpy(c.c_locator.l_minor_name, minor_name);
2379*0Sstevel@tonic-gate 		free(minor_name);
2380*0Sstevel@tonic-gate 		c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
2381*0Sstevel@tonic-gate 			MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
2382*0Sstevel@tonic-gate 		c.c_locator.l_devid_sz = sz;
2383*0Sstevel@tonic-gate 
2384*0Sstevel@tonic-gate 		devid_size = strlen(devidp);
2385*0Sstevel@tonic-gate 		buff += devid_size;
2386*0Sstevel@tonic-gate 
2387*0Sstevel@tonic-gate 		checksum = strtol(buff, &buff, 10);
2388*0Sstevel@tonic-gate 		for (i = 0; c.c_locator.l_driver[i] != 0; i++)
2389*0Sstevel@tonic-gate 			checksum += c.c_locator.l_driver[i];
2390*0Sstevel@tonic-gate 		for (i = 0; i < devid_size; i++) {
2391*0Sstevel@tonic-gate 			checksum += devidp[i];
2392*0Sstevel@tonic-gate 		}
2393*0Sstevel@tonic-gate 		free(devidp);
2394*0Sstevel@tonic-gate 
2395*0Sstevel@tonic-gate 		checksum += minor(c.c_locator.l_dev);
2396*0Sstevel@tonic-gate 		checksum += c.c_locator.l_blkno;
2397*0Sstevel@tonic-gate 		if (checksum != 42) {
2398*0Sstevel@tonic-gate 			/* overwritten later for more serious problems */
2399*0Sstevel@tonic-gate 			rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
2400*0Sstevel@tonic-gate 			free((void *)c.c_locator.l_devid);
2401*0Sstevel@tonic-gate 			continue;
2402*0Sstevel@tonic-gate 		}
2403*0Sstevel@tonic-gate 		c.c_locator.l_flags = 0;
2404*0Sstevel@tonic-gate 
2405*0Sstevel@tonic-gate 		/* use db location */
2406*0Sstevel@tonic-gate 		if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
2407*0Sstevel@tonic-gate 			free((void *)c.c_locator.l_devid);
2408*0Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
2409*0Sstevel@tonic-gate 		}
2410*0Sstevel@tonic-gate 
2411*0Sstevel@tonic-gate 		/* free up devid if in use */
2412*0Sstevel@tonic-gate 		free((void *)c.c_locator.l_devid);
2413*0Sstevel@tonic-gate 		c.c_locator.l_devid = (uint64_t)0;
2414*0Sstevel@tonic-gate 		c.c_locator.l_devid_flags = 0;
2415*0Sstevel@tonic-gate 	}
2416*0Sstevel@tonic-gate 	if ((fp) && (fclose(fp) != 0))
2417*0Sstevel@tonic-gate 		return (mdsyserror(ep, errno, META_DBCONF));
2418*0Sstevel@tonic-gate 
2419*0Sstevel@tonic-gate 	/* check for stale database */
2420*0Sstevel@tonic-gate 	(void) memset((char *)&c, 0, sizeof (struct mddb_config));
2421*0Sstevel@tonic-gate 	c.c_id = 0;
2422*0Sstevel@tonic-gate 	c.c_setno = MD_LOCAL_SET;
2423*0Sstevel@tonic-gate 
2424*0Sstevel@tonic-gate 	/* Don't need device id information from this ioctl */
2425*0Sstevel@tonic-gate 	c.c_locator.l_devid = (uint64_t)0;
2426*0Sstevel@tonic-gate 	c.c_locator.l_devid_flags = 0;
2427*0Sstevel@tonic-gate 
2428*0Sstevel@tonic-gate 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
2429*0Sstevel@tonic-gate 		if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
2430*0Sstevel@tonic-gate 			return (mdstealerror(ep, &c.c_mde));
2431*0Sstevel@tonic-gate 		mdclrerror(&c.c_mde);
2432*0Sstevel@tonic-gate 	}
2433*0Sstevel@tonic-gate 
2434*0Sstevel@tonic-gate 	if (c.c_flags & MDDB_C_STALE)
2435*0Sstevel@tonic-gate 		return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
2436*0Sstevel@tonic-gate 		    0, NULL));
2437*0Sstevel@tonic-gate 
2438*0Sstevel@tonic-gate 	/* success */
2439*0Sstevel@tonic-gate 	return (rval);
2440*0Sstevel@tonic-gate }
2441*0Sstevel@tonic-gate 
2442*0Sstevel@tonic-gate /*
2443*0Sstevel@tonic-gate  * meta_db_minreplica - returns the minimum size replica currently in use.
2444*0Sstevel@tonic-gate  */
2445*0Sstevel@tonic-gate daddr_t
2446*0Sstevel@tonic-gate meta_db_minreplica(
2447*0Sstevel@tonic-gate 	mdsetname_t	*sp,
2448*0Sstevel@tonic-gate 	md_error_t	*ep
2449*0Sstevel@tonic-gate )
2450*0Sstevel@tonic-gate {
2451*0Sstevel@tonic-gate 	md_replica_t		*r;
2452*0Sstevel@tonic-gate 	md_replicalist_t	*rl, *rlp = NULL;
2453*0Sstevel@tonic-gate 	daddr_t			nblks = 0;
2454*0Sstevel@tonic-gate 
2455*0Sstevel@tonic-gate 	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
2456*0Sstevel@tonic-gate 		return (-1);
2457*0Sstevel@tonic-gate 
2458*0Sstevel@tonic-gate 	if (rlp == NULL)
2459*0Sstevel@tonic-gate 		return (-1);
2460*0Sstevel@tonic-gate 
2461*0Sstevel@tonic-gate 	/* find the smallest existing replica */
2462*0Sstevel@tonic-gate 	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
2463*0Sstevel@tonic-gate 		r = rl->rl_repp;
2464*0Sstevel@tonic-gate 		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
2465*0Sstevel@tonic-gate 	}
2466*0Sstevel@tonic-gate 
2467*0Sstevel@tonic-gate 	metafreereplicalist(rlp);
2468*0Sstevel@tonic-gate 	return (nblks);
2469*0Sstevel@tonic-gate }
2470*0Sstevel@tonic-gate 
2471*0Sstevel@tonic-gate /*
2472*0Sstevel@tonic-gate  * meta_get_replica_names
2473*0Sstevel@tonic-gate  *  returns an mdnamelist_t of replica slices
2474*0Sstevel@tonic-gate  */
2475*0Sstevel@tonic-gate /*ARGSUSED*/
2476*0Sstevel@tonic-gate int
2477*0Sstevel@tonic-gate meta_get_replica_names(
2478*0Sstevel@tonic-gate 	mdsetname_t	*sp,
2479*0Sstevel@tonic-gate 	mdnamelist_t	**nlpp,
2480*0Sstevel@tonic-gate 	int		options,
2481*0Sstevel@tonic-gate 	md_error_t	*ep
2482*0Sstevel@tonic-gate )
2483*0Sstevel@tonic-gate {
2484*0Sstevel@tonic-gate 	md_replicalist_t	*rlp = NULL;
2485*0Sstevel@tonic-gate 	md_replicalist_t	*rl;
2486*0Sstevel@tonic-gate 	mdnamelist_t		**tailpp = nlpp;
2487*0Sstevel@tonic-gate 	int			cnt = 0;
2488*0Sstevel@tonic-gate 
2489*0Sstevel@tonic-gate 	assert(nlpp != NULL);
2490*0Sstevel@tonic-gate 
2491*0Sstevel@tonic-gate 	if (!metaislocalset(sp))
2492*0Sstevel@tonic-gate 		goto out;
2493*0Sstevel@tonic-gate 
2494*0Sstevel@tonic-gate 	/* get replicas */
2495*0Sstevel@tonic-gate 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
2496*0Sstevel@tonic-gate 		cnt = -1;
2497*0Sstevel@tonic-gate 		goto out;
2498*0Sstevel@tonic-gate 	}
2499*0Sstevel@tonic-gate 
2500*0Sstevel@tonic-gate 	/* build name list */
2501*0Sstevel@tonic-gate 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
2502*0Sstevel@tonic-gate 		/*
2503*0Sstevel@tonic-gate 		 * Add the name struct to the end of the
2504*0Sstevel@tonic-gate 		 * namelist but keep a pointer to the last
2505*0Sstevel@tonic-gate 		 * element so that we don't incur the overhead
2506*0Sstevel@tonic-gate 		 * of traversing the list each time
2507*0Sstevel@tonic-gate 		 */
2508*0Sstevel@tonic-gate 		tailpp = meta_namelist_append_wrapper(
2509*0Sstevel@tonic-gate 			tailpp, rl->rl_repp->r_namep);
2510*0Sstevel@tonic-gate 		++cnt;
2511*0Sstevel@tonic-gate 	}
2512*0Sstevel@tonic-gate 
2513*0Sstevel@tonic-gate 	/* cleanup, return count or error */
2514*0Sstevel@tonic-gate out:
2515*0Sstevel@tonic-gate 	metafreereplicalist(rlp);
2516*0Sstevel@tonic-gate 	return (cnt);
2517*0Sstevel@tonic-gate }
2518