xref: /onnv-gate/usr/src/lib/lvm/libmeta/common/meta_metad_subr.c (revision 3165:63d5bf0b6167)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Just in case we're not in a build environment, make sure that
30  * TEXT_DOMAIN gets set to something.
31  */
32 #if !defined(TEXT_DOMAIN)
33 #define	TEXT_DOMAIN "SYS_TEST"
34 #endif
35 
36 /*
37  * interface between user land and the set records
38  */
39 
40 #include <meta.h>
41 #include <metad.h>
42 #include <sdssc.h>
43 #include <syslog.h>
44 #include <sys/cladm.h>
45 #include "meta_set_prv.h"
46 
47 #include <sys/sysevent/eventdefs.h>
48 #include <sys/sysevent/svm.h>
49 
50 static	md_set_record	*setrecords = NULL; /* head of cache linked list */
51 static	int		setsnarfdone = 0;
52 
53 typedef struct key_lst_t {
54 	side_t			kl_side;
55 	mdkey_t			kl_key;
56 	struct key_lst_t	*kl_next;
57 } key_lst_t;
58 
59 typedef struct ur_recid_lst {
60 	mddb_recid_t		url_recid;
61 	struct	ur_recid_lst	*url_nx;
62 } ur_recid_lst_t;
63 
64 static ur_recid_lst_t		*url_used = NULL;
65 static ur_recid_lst_t		*url_tode = NULL;
66 
67 static void
url_addl(ur_recid_lst_t ** urlpp,mddb_recid_t recid)68 url_addl(ur_recid_lst_t **urlpp, mddb_recid_t recid)
69 {
70 	/* Run to the end of the list */
71 	for (/* void */; (*urlpp != NULL); urlpp = &(*urlpp)->url_nx)
72 		if ((*urlpp)->url_recid == recid)
73 			return;
74 
75 	/* Add the new member */
76 	*urlpp = Zalloc(sizeof (**urlpp));
77 	if (*urlpp == NULL)
78 		return;
79 
80 	(*urlpp)->url_recid = recid;
81 }
82 
83 static int
url_findl(ur_recid_lst_t * urlp,mddb_recid_t recid)84 url_findl(ur_recid_lst_t *urlp, mddb_recid_t recid)
85 {
86 	while (urlp != NULL) {
87 		if (urlp->url_recid == recid)
88 			return (1);
89 		urlp = urlp->url_nx;
90 	}
91 	return (0);
92 }
93 
94 static void
url_freel(ur_recid_lst_t ** urlpp)95 url_freel(ur_recid_lst_t **urlpp)
96 {
97 	ur_recid_lst_t	*urlp;
98 	ur_recid_lst_t	*turlp;
99 
100 	for (turlp = *urlpp; turlp != NULL; turlp = urlp) {
101 		urlp = turlp->url_nx;
102 		Free(turlp);
103 	}
104 	*urlpp = (ur_recid_lst_t *)NULL;
105 }
106 
107 static int
ckncvt_set_record(mddb_userreq_t * reqp,md_error_t * ep)108 ckncvt_set_record(mddb_userreq_t *reqp, md_error_t *ep)
109 {
110 	mddb_userreq_t	req;
111 	md_set_record	*sr;
112 	int		recs[3];
113 
114 	if (reqp->ur_size == sizeof (*sr))
115 		return (0);
116 
117 	if (! md_in_daemon) {
118 		if (reqp->ur_size >= sizeof (*sr))
119 			return (0);
120 
121 		reqp->ur_data = (uintptr_t)Realloc((void *)(uintptr_t)
122 		    reqp->ur_data, sizeof (*sr));
123 		(void) memset(
124 		    ((char *)(uintptr_t)reqp->ur_data) + reqp->ur_size,
125 		    '\0', sizeof (*sr) - reqp->ur_size);
126 		reqp->ur_size = sizeof (*sr);
127 		return (0);
128 	}
129 
130 	/*
131 	 * If here, then the daemon is calling, and so the automatic
132 	 * conversion will be performed.
133 	 */
134 
135 	/* shorthand */
136 	req = *reqp;			/* structure assignment */
137 	sr = (md_set_record *)(uintptr_t)req.ur_data;
138 
139 	if (sr->sr_flags & MD_SR_CVT)
140 		return (0);
141 
142 	/* Leave multi-node set records alone */
143 	if (MD_MNSET_REC(sr)) {
144 		return (0);
145 	}
146 
147 	/* Mark the old record as converted */
148 	sr->sr_flags |= MD_SR_CVT;
149 
150 	METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
151 
152 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
153 		return (mdstealerror(ep, &req.ur_mde));
154 
155 	/* Create space for the new record */
156 	METAD_SETUP_SR(MD_DB_CREATE, 0);
157 	req.ur_size = sizeof (*sr);
158 
159 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
160 		return (mdstealerror(ep, &req.ur_mde));
161 
162 	/* Allocate the new record */
163 	sr = Zalloc(sizeof (*sr));
164 
165 	/* copy all the data from the record being converted */
166 	(void) memmove(sr, (void *)(uintptr_t)reqp->ur_data, reqp->ur_size);
167 	sr->sr_flags &= ~MD_SR_CVT;
168 
169 	/* adjust the selfid to point to the new record */
170 	sr->sr_selfid = req.ur_recid;
171 
172 	METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
173 	req.ur_size = sizeof (*sr);
174 	req.ur_data = (uintptr_t)sr;
175 
176 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
177 		Free(sr);
178 		return (mdstealerror(ep, &req.ur_mde));
179 	}
180 
181 	/* Commit the old and the new */
182 	recs[0] = ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid;
183 	recs[1] = sr->sr_selfid;
184 	recs[2] = 0;
185 
186 	METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
187 	req.ur_size = sizeof (recs);
188 	req.ur_data = (uintptr_t)recs;
189 
190 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
191 		Free(sr);
192 		return (mdstealerror(ep, &req.ur_mde));
193 	}
194 
195 	/* Add the the old record to the list of records to delete */
196 	url_addl(&url_tode,
197 	    ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid);
198 
199 	/* Free the old records space */
200 	Free((void *)(uintptr_t)reqp->ur_data);
201 
202 	/* Adjust the reqp structure to point to the new record and size */
203 	reqp->ur_recid = sr->sr_selfid;
204 	reqp->ur_size = sizeof (*sr);
205 	reqp->ur_data = (uintptr_t)sr;
206 
207 	return (0);
208 }
209 
210 mddb_userreq_t *
get_db_rec(md_ur_get_cmd_t cmd,set_t setno,mddb_type_t type,uint_t type2,mddb_recid_t * idp,md_error_t * ep)211 get_db_rec(
212 	md_ur_get_cmd_t	cmd,
213 	set_t		setno,
214 	mddb_type_t	type,
215 	uint_t		type2,
216 	mddb_recid_t	*idp,
217 	md_error_t	*ep
218 )
219 {
220 	mddb_userreq_t	*reqp = Zalloc(sizeof (*reqp));
221 	mdsetname_t	*sp;
222 	md_set_desc	*sd;
223 	int		ureq;
224 
225 	if ((sp = metasetnosetname(setno, ep)) == NULL) {
226 		Free(reqp);
227 		return (NULL);
228 	}
229 
230 	if (metaislocalset(sp)) {
231 		ureq = MD_DB_USERREQ;
232 	} else {
233 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
234 			Free(reqp);
235 			return (NULL);
236 		}
237 		ureq = MD_MNSET_DESC(sd) ? MD_MN_DB_USERREQ : MD_DB_USERREQ;
238 	}
239 
240 	reqp->ur_setno = setno;
241 	reqp->ur_type = type;
242 	reqp->ur_type2 = type2;
243 
244 	switch (cmd) {
245 	    case MD_UR_GET_NEXT:
246 		    reqp->ur_cmd = MD_DB_GETNEXTREC;
247 		    reqp->ur_recid = *idp;
248 		    if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL)
249 			!= 0) {
250 			    (void) mdstealerror(ep, &reqp->ur_mde);
251 			    Free(reqp);
252 			    return (NULL);
253 		    }
254 		    *idp = reqp->ur_recid;
255 		    break;
256 	    case MD_UR_GET_WKEY:
257 		    reqp->ur_recid = *idp;
258 		    break;
259 	}
260 
261 	if (*idp <= 0) {
262 		Free(reqp);
263 		return (NULL);
264 	}
265 
266 	reqp->ur_cmd = MD_DB_GETSIZE;
267 	if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
268 		(void) mdstealerror(ep, &reqp->ur_mde);
269 		Free(reqp);
270 
271 		*idp = 0;
272 		return (NULL);
273 	}
274 
275 	reqp->ur_cmd = MD_DB_GETDATA;
276 	reqp->ur_data = (uintptr_t)Zalloc(reqp->ur_size);
277 	if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
278 		(void) mdstealerror(ep, &reqp->ur_mde);
279 		Free((void *)(uintptr_t)reqp->ur_data);
280 		Free(reqp);
281 		*idp = 0;
282 		return (NULL);
283 	}
284 
285 	switch (reqp->ur_type) {
286 	    case MDDB_USER:
287 		    switch (reqp->ur_type2) {
288 			case MDDB_UR_SR:
289 				if (ckncvt_set_record(reqp, ep)) {
290 					Free((void *)(uintptr_t)reqp->ur_data);
291 					Free(reqp);
292 					return (NULL);
293 				}
294 				break;
295 		    }
296 		    break;
297 	}
298 
299 	return (reqp);
300 }
301 
302 void *
get_ur_rec(set_t setno,md_ur_get_cmd_t cmd,uint_t type2,mddb_recid_t * idp,md_error_t * ep)303 get_ur_rec(
304 	set_t		setno,
305 	md_ur_get_cmd_t	cmd,
306 	uint_t		type2,
307 	mddb_recid_t	*idp,
308 	md_error_t	*ep
309 )
310 {
311 	mddb_userreq_t	*reqp = NULL;
312 	void		*ret_val;
313 
314 	assert(idp != NULL);
315 
316 	reqp = get_db_rec(cmd, setno, MDDB_USER, type2, idp, ep);
317 	if (reqp == NULL)
318 		return (NULL);
319 
320 	ret_val = (void *)(uintptr_t)reqp->ur_data;
321 	Free(reqp);
322 	return (ret_val);
323 }
324 
325 /*
326  * Called by rpc.metad on startup of disksets to cleanup
327  * the host entries associated with a diskset.  This is needed if
328  * a node failed or the metaset command was killed during the addition
329  * of a node to a diskset.
330  *
331  * This is called for all traditional disksets.
332  * This is only called for MNdisksets when in there is only one node
333  * in all of the MN disksets and this node is not running SunCluster.
334  * (Otherwise, the cleanup of the host entries is handled by a
335  * reconfig cycle that the SunCluster software calls).
336  */
337 static int
sr_hosts(md_set_record * sr)338 sr_hosts(md_set_record *sr)
339 {
340 	int		i,
341 			nid = 0,
342 			self_in_set = FALSE;
343 	md_error_t	xep = mdnullerror;
344 	md_mnnode_record	*nr;
345 	md_mnset_record		*mnsr;
346 
347 	if (MD_MNSET_REC(sr)) {
348 		mnsr = (struct md_mnset_record *)sr;
349 		nr = mnsr->sr_nodechain;
350 		/*
351 		 * Already guaranteed to be only 1 node in set which
352 		 * is mynode (done in sr_validate).
353 		 * Now, check if node is in the OK state.  If not in
354 		 * the OK state, leave self_in_set FALSE so that
355 		 * set will be removed.
356 		 */
357 		if (nr->nr_flags & MD_MN_NODE_OK)
358 			self_in_set = TRUE;
359 	} else {
360 		for (i = 0; i < MD_MAXSIDES; i++) {
361 			/* Skip empty slots */
362 			if (sr->sr_nodes[i][0] == '\0')
363 				continue;
364 
365 			/* Make sure we are in the set and skip this node */
366 			if (strcmp(sr->sr_nodes[i], mynode()) == 0) {
367 				self_in_set = TRUE;
368 				break;
369 			}
370 		}
371 	}
372 
373 	if ((self_in_set == FALSE) && (!(MD_MNSET_REC(sr)))) {
374 		/*
375 		 * Under some circumstances (/etc/cluster/nodeid file is
376 		 * missing) it is possible for the call to _cladm() to
377 		 * return 0 and a nid of 0. In this instance do not remove
378 		 * the set as it is Sun Cluster error that needs to be fixed.
379 		 */
380 		if (_cladm(CL_CONFIG, CL_NODEID, &nid) == 0 && nid > 0) {
381 
382 			/*
383 			 * See if we've got a node which has been booted in
384 			 * non-cluster mode. If true the nodeid will match
385 			 * one of the sr_nodes values because the conversion
386 			 * from nodeid to hostname failed to occur.
387 			 */
388 			for (i = 0; i < MD_MAXSIDES; i++) {
389 				if (sr->sr_nodes[i][0] == 0)
390 					continue;
391 				if (atoi(sr->sr_nodes[i]) == nid)
392 					self_in_set = TRUE;
393 			}
394 
395 			/* If we aren't in the set, delete the set */
396 			if (self_in_set == FALSE) {
397 				syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
398 				    "Removing set %s from database\n"),
399 				    sr->sr_setname);
400 				s_delset(sr->sr_setname, &xep);
401 				if (! mdisok(&xep))
402 					mdclrerror(&xep);
403 				return (1);
404 			}
405 		} else {
406 			/*
407 			 * Send a message to syslog and return without
408 			 * deleting any sets
409 			 */
410 			syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
411 			    "Call to _cladm failed for set %s nodeid %d\n"),
412 			    sr->sr_setname, nid);
413 			return (1);
414 		}
415 	}
416 	return (0);
417 }
418 
419 void
sr_del_drv(md_set_record * sr,mddb_recid_t recid)420 sr_del_drv(md_set_record *sr, mddb_recid_t recid)
421 {
422 	mddb_userreq_t		req;
423 	md_error_t		xep = mdnullerror;
424 
425 	if (!s_ownset(sr->sr_setno, &xep)) {
426 		if (! mdisok(&xep))
427 			mdclrerror(&xep);
428 		goto skip;
429 	}
430 
431 	/* delete the replicas? */
432 	/* release ownership of the drive? */
433 	/* NOTE: We may not have a name, so both of the above are ugly! */
434 
435 skip:
436 	(void) memset(&req, 0, sizeof (req));
437 	METAD_SETUP_DR(MD_DB_DELETE, recid)
438 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
439 		mdclrerror(&req.ur_mde);
440 
441 	dr_cache_del(sr, recid);
442 }
443 
444 static void
sr_drvs(md_set_record * sr)445 sr_drvs(md_set_record *sr)
446 {
447 	md_drive_record		*dr;
448 	int			i;
449 	int			modified = 0;
450 	int			sidesok;
451 	mdnm_params_t		nm;
452 	static	char		device_name[MAXPATHLEN];
453 	md_error_t		xep = mdnullerror;
454 	md_mnnode_record	*nr;
455 	md_mnset_record		*mnsr;
456 
457 	for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
458 		/* If we were mid-add, cleanup */
459 		if ((dr->dr_flags & MD_DR_ADD)) {
460 			sr_del_drv(sr, dr->dr_selfid);
461 			modified++;
462 			continue;
463 		}
464 
465 		sidesok = TRUE;
466 		if (MD_MNSET_REC(sr)) {
467 			mnsr = (md_mnset_record *)sr;
468 			nr = mnsr->sr_nodechain;
469 			/*
470 			 * MultiNode disksets only have entries for
471 			 * their side in the local set.  Verify
472 			 * that drive has a name associated with
473 			 * this node's side.
474 			 */
475 			while (nr) {
476 				/* Find my node */
477 				if (strcmp(mynode(), nr->nr_nodename) != 0) {
478 					nr = nr->nr_next;
479 					continue;
480 				}
481 
482 				(void) memset(&nm, '\0', sizeof (nm));
483 				nm.setno = MD_LOCAL_SET;
484 				nm.side = nr->nr_nodeid;
485 				nm.key = dr->dr_key;
486 				nm.devname = (uint64_t)device_name;
487 
488 				if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
489 				    NULL) != 0) {
490 					if (! mdissyserror(&nm.mde, ENOENT)) {
491 						mdclrerror(&nm.mde);
492 						return;
493 					}
494 				}
495 
496 				/*
497 				 * If entry is found for this node, then
498 				 * break out of loop walking through
499 				 * node list.  For a multi-node diskset,
500 				 * there should only be an entry for
501 				 * this node.
502 				 */
503 				if (nm.key != MD_KEYWILD &&
504 				    ! mdissyserror(&nm.mde, ENOENT)) {
505 					break;
506 				}
507 
508 				/*
509 				 * If entry is not found for this node,
510 				 * then delete the drive.  No need to
511 				 * continue through the node loop since
512 				 * our node has already been found.
513 				 */
514 				sidesok = FALSE;
515 				mdclrerror(&nm.mde);
516 
517 				/* If we are missing a sidename, cleanup */
518 				sr_del_drv(sr, dr->dr_selfid);
519 				modified++;
520 
521 				break;
522 			}
523 		} else  {
524 			for (i = 0; i < MD_MAXSIDES; i++) {
525 				/* Skip empty slots */
526 				if (sr->sr_nodes[i][0] == '\0')
527 					continue;
528 
529 				(void) memset(&nm, '\0', sizeof (nm));
530 				nm.setno = MD_LOCAL_SET;
531 				nm.side = i + SKEW;
532 				nm.key = dr->dr_key;
533 				nm.devname = (uint64_t)device_name;
534 
535 				if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
536 				    NULL) != 0) {
537 					if (! mdissyserror(&nm.mde, ENOENT)) {
538 						mdclrerror(&nm.mde);
539 						return;
540 					}
541 				}
542 
543 				if (nm.key != MD_KEYWILD &&
544 				    ! mdissyserror(&nm.mde, ENOENT))
545 					continue;
546 
547 				sidesok = FALSE;
548 				mdclrerror(&nm.mde);
549 
550 				/* If we are missing a sidename, cleanup */
551 				sr_del_drv(sr, dr->dr_selfid);
552 				modified++;
553 
554 				break;
555 			}
556 		}
557 
558 		if (sidesok == FALSE)
559 			continue;
560 
561 		/*
562 		 * If we got this far, the drive record is either in the OK
563 		 * or DEL state, if it is in the DEL state and the sidenames
564 		 * all checked out, then we will make it OK.
565 		 */
566 		if ((dr->dr_flags & MD_DR_OK))
567 			continue;
568 
569 		dr->dr_flags = MD_DR_OK;
570 
571 		modified++;
572 	}
573 
574 	if (modified) {
575 		commitset(sr, FALSE, &xep);
576 		if (! mdisok(&xep))
577 			mdclrerror(&xep);
578 	}
579 }
580 
581 static void
add_key_to_lst(key_lst_t ** klpp,side_t side,mdkey_t key)582 add_key_to_lst(key_lst_t **klpp, side_t side, mdkey_t key)
583 {
584 	key_lst_t	*klp;
585 
586 	assert(klpp != NULL);
587 
588 	for (/* void */; *klpp != NULL; klpp = &(*klpp)->kl_next)
589 		/* void */;
590 
591 	/* allocate new list element */
592 	klp = *klpp = Zalloc(sizeof (*klp));
593 
594 	klp->kl_side = side;
595 	klp->kl_key  = key;
596 }
597 
598 #ifdef DUMPKEYLST
599 static void
pr_key_lst(char * tag,key_lst_t * klp)600 pr_key_lst(char *tag, key_lst_t *klp)
601 {
602 	key_lst_t	*tklp;
603 
604 	md_eprintf("Tag=%s\n", tag);
605 	for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
606 		md_eprintf("side=%d, key=%lu\n", tklp->kl_side, tklp->kl_key);
607 }
608 #endif	/* DUMPKEYLST */
609 
610 static int
key_in_key_lst(key_lst_t * klp,side_t side,mdkey_t key)611 key_in_key_lst(key_lst_t *klp, side_t side, mdkey_t key)
612 {
613 	key_lst_t	*tklp;
614 
615 	for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
616 		if (tklp->kl_side == side && tklp->kl_key == key)
617 			return (1);
618 
619 	return (0);
620 }
621 
622 static void
destroy_key_lst(key_lst_t ** klpp)623 destroy_key_lst(key_lst_t **klpp)
624 {
625 	key_lst_t	*tklp, *klp;
626 
627 	assert(klpp != NULL);
628 
629 	tklp = klp = *klpp;
630 	while (klp != NULL) {
631 		tklp = klp;
632 		klp = klp->kl_next;
633 		Free(tklp);
634 	}
635 	*klpp = NULL;
636 }
637 
638 static void
sr_sidenms(void)639 sr_sidenms(void)
640 {
641 	md_drive_record		*dr;
642 	md_set_record		*sr;
643 	key_lst_t		*use = NULL;
644 	mdnm_params_t		nm;
645 	int			i;
646 	md_mnset_record		*mnsr;
647 	md_mnnode_record	*nr;
648 	side_t			myside = 0;
649 
650 	/*
651 	 * We now go through the list of set and drive records collecting
652 	 * the key/side pairs that are being used.
653 	 */
654 	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
655 		/*
656 		 * To handle the multi-node diskset case, get the sideno
657 		 * associated with this node.  This sideno will be the
658 		 * same across all multi-node disksets.
659 		 */
660 		if ((myside == 0) && (MD_MNSET_REC(sr))) {
661 			mnsr = (struct md_mnset_record *)sr;
662 			nr = mnsr->sr_nodechain;
663 			while (nr) {
664 				if (strcmp(mynode(), nr->nr_nodename) == 0) {
665 					myside = nr->nr_nodeid;
666 					break;
667 				}
668 				nr = nr->nr_next;
669 			}
670 			/*
671 			 * If this node is not in this MNset -
672 			 * then skip this set.
673 			 */
674 			if (!nr) {
675 				continue;
676 			}
677 		}
678 
679 		for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
680 			if (MD_MNSET_REC(sr)) {
681 				/*
682 				 * There are no non-local sidenames in the
683 				 * local set for a multi-node diskset.
684 				 */
685 				add_key_to_lst(&use, myside, dr->dr_key);
686 			} else {
687 				for (i = 0; i < MD_MAXSIDES; i++) {
688 					/* Skip empty slots */
689 					if (sr->sr_nodes[i][0] == '\0')
690 						continue;
691 
692 					add_key_to_lst(&use, i + SKEW,
693 						dr->dr_key);
694 				}
695 			}
696 		}
697 	}
698 
699 #ifdef DUMPKEYLST
700 	pr_key_lst("use", use);
701 #endif	/* DUMPKEYLST */
702 
703 	/*
704 	 * We take the list above and get all non-local sidenames, checking
705 	 * each to see if they are in use, if they are not used, we delete them.
706 	 * Do the check for myside to cover multinode disksets.
707 	 * Then do the check for MD_MAXSIDES to cover non-multinode disksets.
708 	 * If any multi-node disksets were present, myside would be non-zero.
709 	 * myside is the same for all multi-node disksets for this node.
710 	 */
711 	if (myside) {
712 		(void) memset(&nm, '\0', sizeof (nm));
713 		nm.setno = MD_LOCAL_SET;
714 		nm.side = myside;
715 		nm.key = MD_KEYWILD;
716 
717 		/*CONSTCOND*/
718 		while (1) {
719 			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
720 			    NULL) != 0) {
721 				mdclrerror(&nm.mde);
722 				break;
723 			}
724 
725 			if (nm.key == MD_KEYWILD)
726 				break;
727 
728 			if (! key_in_key_lst(use, nm.side, nm.key)) {
729 				if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
730 				    NULL) != 0) {
731 					mdclrerror(&nm.mde);
732 					continue;
733 				}
734 			}
735 		}
736 	}
737 	/* Now handle the non-multinode disksets */
738 	for (i = 0; i < MD_MAXSIDES; i++) {
739 		(void) memset(&nm, '\0', sizeof (nm));
740 		nm.setno = MD_LOCAL_SET;
741 		nm.side = i + SKEW;
742 		nm.key = MD_KEYWILD;
743 
744 		/*CONSTCOND*/
745 		while (1) {
746 			if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
747 			    NULL) != 0) {
748 				mdclrerror(&nm.mde);
749 				break;
750 			}
751 
752 			if (nm.key == MD_KEYWILD)
753 				break;
754 
755 			if (! key_in_key_lst(use, nm.side, nm.key)) {
756 				if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
757 				    NULL) != 0) {
758 					mdclrerror(&nm.mde);
759 					continue;
760 				}
761 			}
762 		}
763 	}
764 
765 	/* Cleanup */
766 	destroy_key_lst(&use);
767 }
768 
769 void
sr_validate(void)770 sr_validate(void)
771 {
772 	md_set_record			*sr;
773 	md_error_t			xep = mdnullerror;
774 	int				mnset_single_node;
775 	md_mnnode_record		*nr;
776 	md_mnset_record			*mnsr;
777 
778 	assert(setsnarfdone != 0);
779 
780 	/* We have validated the records already */
781 	if (setsnarfdone == 3)
782 		return;
783 
784 	/*
785 	 * Check if we are in a single node non-SC3.x environmemnt
786 	 */
787 	mnset_single_node = meta_mn_singlenode();
788 	/*
789 	 * If a possible single_node situation, verify that all
790 	 * MN disksets have only one node (which is mynode()).
791 	 */
792 	if (mnset_single_node) {
793 		for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
794 			if (MD_MNSET_REC(sr)) {
795 				mnsr = (struct md_mnset_record *)sr;
796 				nr = mnsr->sr_nodechain;
797 				/*
798 				 * If next pointer is non-null (more than
799 				 * one node in list) or if the single node
800 				 * isn't my node - reset single node flag.
801 				 */
802 				if ((nr->nr_next) ||
803 				    (strcmp(nr->nr_nodename, mynode()) != 0)) {
804 					mnset_single_node = 0;
805 					break;
806 				}
807 			}
808 		}
809 	}
810 
811 	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
812 		/*
813 		 * If a MN diskset and not in the single node
814 		 * situation, then don't validate the MN set.
815 		 * This is done during a reconfig cycle since all
816 		 * nodes must take the same action.
817 		 */
818 		if (MD_MNSET_REC(sr) && (mnset_single_node == 0))
819 			continue;
820 
821 		/* Since we do "partial" snarf's, we only check new entries */
822 		if (! (sr->sr_flags & MD_SR_CHECK))
823 			continue;
824 
825 		/* If we were mid-add, cleanup */
826 		if ((sr->sr_flags & MD_SR_ADD)) {
827 			s_delset(sr->sr_setname, &xep);
828 			if (! mdisok(&xep))
829 				mdclrerror(&xep);
830 			continue;
831 		}
832 
833 		/* Make sure we are in the set. */
834 		if (sr_hosts(sr))
835 			continue;
836 
837 		/* Check has been done, clear the flag */
838 		if ((sr->sr_flags & MD_SR_CHECK))
839 			sr->sr_flags &= ~MD_SR_CHECK;
840 
841 		/*
842 		 * If we got here, we are in the set, make sure the flags make
843 		 * sense.
844 		 */
845 		if (! (sr->sr_flags & MD_SR_OK)) {
846 			sr->sr_flags &= ~MD_SR_STATE_FLAGS;
847 			sr->sr_flags |= MD_SR_OK;
848 			commitset(sr, FALSE, &xep);
849 			if (! mdisok(&xep))
850 				mdclrerror(&xep);
851 		}
852 
853 		/* Make sure all the drives are in a stable state. */
854 		sr_drvs(sr);
855 	}
856 
857 	/* Cleanup any stray sidenames */
858 	sr_sidenms();
859 
860 	setsnarfdone = 3;
861 }
862 
863 static md_set_record *
sr_in_cache(mddb_recid_t recid)864 sr_in_cache(mddb_recid_t recid)
865 {
866 	md_set_record *tsr;
867 
868 	for (tsr = setrecords; tsr != NULL; tsr = tsr->sr_next)
869 		if (tsr->sr_selfid == recid)
870 			return (tsr);
871 	return ((md_set_record *)NULL);
872 }
873 
874 int
set_snarf(md_error_t * ep)875 set_snarf(md_error_t *ep)
876 {
877 	md_set_record			*sr;
878 	md_mnset_record			*mnsr;
879 	md_set_record			*tsr;
880 	md_drive_record			*dr;
881 	mddb_userreq_t			*reqp;
882 	ur_recid_lst_t			*urlp;
883 	mddb_recid_t			id;
884 	mddb_recid_t			*p;
885 	md_error_t			xep = mdnullerror;
886 	md_mnnode_record		*nr;
887 	mddb_set_node_params_t		snp;
888 	int				nodecnt;
889 	mndiskset_membershiplist_t	 *nl, *nl2;
890 
891 	/* We have done the snarf call */
892 	if (setsnarfdone != 0)
893 		return (0);
894 
895 	if (meta_setup_db_locations(ep) != 0) {
896 		if (! mdismddberror(ep, MDE_DB_STALE))
897 			return (-1);
898 		mdclrerror(ep);
899 	}
900 
901 	/*
902 	 * Get membershiplist from API routine.
903 	 * If there's an error, just use a NULL
904 	 * nodelist.
905 	 */
906 	if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
907 		nodecnt = 0;  /* no nodes are alive */
908 		nl = NULL;
909 		mdclrerror(ep);
910 	}
911 
912 	/* Let sr_cache_add and dr_cache_add know we are doing the snarf */
913 	setsnarfdone = 1;
914 
915 	/* Go get the set records */
916 	id = 0;
917 	while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR,
918 							&id, ep)) != NULL) {
919 		sr->sr_next = NULL;
920 		sr->sr_drivechain = NULL;
921 
922 		/*
923 		 * Cluster nodename support
924 		 * Convert nodeid -> nodename
925 		 * Don't do this for MN disksets since we've already stored
926 		 * both the nodeid and name.
927 		 */
928 		if (!(MD_MNSET_REC(sr)))
929 			sdssc_cm_sr_nid2nm(sr);
930 
931 		/* If we were mid-cvt, cleanup */
932 		if (sr->sr_flags & MD_SR_CVT) {
933 			/* If the daemon is calling, cleanup */
934 			if (md_in_daemon)
935 				url_addl(&url_tode, sr->sr_selfid);
936 			continue;
937 		}
938 
939 		if (md_in_daemon)
940 			url_addl(&url_used, sr->sr_selfid);
941 
942 		/* Skip cached records */
943 		tsr = sr_in_cache(sr->sr_selfid);
944 		if (tsr != (md_set_record *)NULL) {
945 			if (MD_MNSET_REC(sr)) {
946 				mnsr = (struct md_mnset_record *)sr;
947 				Free(mnsr);
948 			} else {
949 				Free(sr);
950 			}
951 			if (md_in_daemon)
952 				for (dr = tsr->sr_drivechain;
953 				    dr != (md_drive_record *)NULL;
954 				    dr = dr->dr_next)
955 					url_addl(&url_used, dr->dr_selfid);
956 			continue;
957 		}
958 
959 		/* Mark the record as one to be checked */
960 		sr->sr_flags |= MD_SR_CHECK;
961 
962 		sr_cache_add(sr);
963 
964 		/* If MNdiskset, go get the node records */
965 		if (MD_MNSET_REC(sr)) {
966 			mnsr = (struct md_mnset_record *)sr;
967 			mnsr->sr_nodechain = NULL;
968 			p = &mnsr->sr_noderec;
969 			while ((nr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
970 					MDDB_UR_NR, p, ep)) != NULL) {
971 				nr->nr_next = NULL;
972 
973 				if (md_in_daemon)
974 					url_addl(&url_used, nr->nr_selfid);
975 
976 				/*
977 				 * Turn off ALIVE node flag based on member
978 				 * list.
979 				 * If ALIVE flag is not set, reset OWN flag.
980 				 * If this node is mynode, set the OWN flag
981 				 * to match the ownership of the diskset.
982 				 */
983 				if (md_in_daemon) {
984 					nr->nr_flags &= ~MD_MN_NODE_ALIVE;
985 					nl2 = nl;
986 					while (nl2) {
987 						/*
988 						 * If in member list,
989 						 * set alive.
990 						 */
991 						if (nl2->msl_node_id ==
992 						    nr->nr_nodeid) {
993 							nr->nr_flags |=
994 							    MD_MN_NODE_ALIVE;
995 							break;
996 						}
997 						nl2 = nl2->next;
998 					}
999 					/*
1000 					 * If mynode is in member list, then
1001 					 * check to see if set is snarfed.
1002 					 * If set snarfed, set own flag;
1003 					 * otherwise reset it.
1004 					 * Don't change master even if
1005 					 * node isn't an owner node, since
1006 					 * node may be master, but hasn't
1007 					 * joined the set yet.
1008 					 */
1009 					if (nr->nr_flags & MD_MN_NODE_ALIVE) {
1010 					    if (strcmp(nr->nr_nodename,
1011 						mynode()) == 0) {
1012 						    if (s_ownset(
1013 							mnsr->sr_setno, ep)) {
1014 							nr->nr_flags |=
1015 							    MD_MN_NODE_OWN;
1016 						    } else {
1017 							nr->nr_flags &=
1018 							    ~MD_MN_NODE_OWN;
1019 						    }
1020 					    }
1021 					} else {
1022 					    if (strcmp(nr->nr_nodename,
1023 						mynode()) == 0) {
1024 						/*
1025 						 * If my node isn't in member
1026 						 * list then reset master.
1027 						 */
1028 						mnsr = (struct
1029 						    md_mnset_record *)sr;
1030 						mnsr->sr_master_nodeid =
1031 							MD_MN_INVALID_NID;
1032 						mnsr->sr_master_nodenm[0] =
1033 							'\0';
1034 					    }
1035 					    nr->nr_flags &= ~MD_MN_NODE_OWN;
1036 					}
1037 				}
1038 
1039 				/*
1040 				 * Must grab nr_nextrec now since
1041 				 * mnnr_cache_add may change it
1042 				 * (mnnr_cache_add is storing the nodes in
1043 				 * an ascending nodeid order list in order
1044 				 * to support reconfig).
1045 				 */
1046 				if (nr->nr_nextrec != 0)
1047 					p = &nr->nr_nextrec;
1048 				else
1049 					p = NULL;
1050 
1051 				mnnr_cache_add((struct md_mnset_record *)sr,
1052 					nr);
1053 
1054 				if ((md_in_daemon) &&
1055 				    (strcmp(nr->nr_nodename, mynode()) == 0)) {
1056 					(void) memset(&snp, 0, sizeof (snp));
1057 					snp.sn_nodeid = nr->nr_nodeid;
1058 					snp.sn_setno = mnsr->sr_setno;
1059 					if (metaioctl(MD_MN_SET_NODEID, &snp,
1060 					    &snp.sn_mde, NULL) != 0) {
1061 						(void) mdstealerror(ep,
1062 							&snp.sn_mde);
1063 					}
1064 				}
1065 
1066 				if (p == NULL)
1067 					break;
1068 			}
1069 			if (! mdisok(ep)) {
1070 				if (! mdissyserror(ep, ENOENT))
1071 					goto out;
1072 				mdclrerror(ep);
1073 			}
1074 		}
1075 
1076 		if (sr->sr_driverec == 0)
1077 			continue;
1078 
1079 		/* Go get the drive records */
1080 		p = &sr->sr_driverec;
1081 		while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
1082 				MDDB_UR_DR, p, ep)) != NULL) {
1083 			dr->dr_next = NULL;
1084 
1085 			if (md_in_daemon)
1086 				url_addl(&url_used, dr->dr_selfid);
1087 
1088 			dr_cache_add(sr, dr);
1089 
1090 			if (dr->dr_nextrec == 0)
1091 				break;
1092 
1093 			p = &dr->dr_nextrec;
1094 		}
1095 		if (! mdisok(ep)) {
1096 			if (! mdissyserror(ep, ENOENT))
1097 				goto out;
1098 			mdclrerror(ep);
1099 			/*
1100 			 * If dr_nextrec was not valid, or we had some
1101 			 * problem getting the record, we end up here.
1102 			 * get_ur_rec() zeroes the recid we passed in,
1103 			 * if we had a failure getting a record using a key,
1104 			 * so we simply commit the set record and valid
1105 			 * drive records, if this fails, we hand an error
1106 			 * back to the caller.
1107 			 */
1108 			commitset(sr, FALSE, ep);
1109 			if (! mdisok(ep))
1110 				goto out;
1111 		}
1112 	}
1113 	if (! mdisok(ep)) {
1114 		if (! mdissyserror(ep, ENOENT))
1115 			goto out;
1116 		mdclrerror(ep);
1117 	}
1118 
1119 	/*
1120 	 * If the daemon called, go through the USER records and cleanup
1121 	 * any that are not used by valid sets.
1122 	 */
1123 	if (md_in_daemon) {
1124 		id = 0;
1125 		/* Make a list of records to delete */
1126 		while ((reqp = get_db_rec(MD_UR_GET_NEXT, MD_LOCAL_SET,
1127 		    MDDB_USER, 0, &id, ep)) != NULL) {
1128 			if (reqp->ur_type2 != MDDB_UR_SR &&
1129 			    reqp->ur_type2 != MDDB_UR_DR) {
1130 				Free((void *)(uintptr_t)reqp->ur_data);
1131 				Free(reqp);
1132 				continue;
1133 			}
1134 			if (! url_findl(url_used, reqp->ur_recid))
1135 				url_addl(&url_tode, reqp->ur_recid);
1136 			Free((void *)(uintptr_t)reqp->ur_data);
1137 			Free(reqp);
1138 		}
1139 		if (! mdisok(ep)) {
1140 			if (! mdissyserror(ep, ENOENT))
1141 				goto out;
1142 			mdclrerror(ep);
1143 		}
1144 
1145 		/* Delete all the delete listed records */
1146 		for (urlp = url_tode; urlp != NULL; urlp = urlp->url_nx) {
1147 			s_delrec(urlp->url_recid, &xep);
1148 			if (! mdisok(&xep))
1149 				mdclrerror(&xep);
1150 		}
1151 	}
1152 
1153 	url_freel(&url_used);
1154 	url_freel(&url_tode);
1155 
1156 	if (nodecnt)
1157 		meta_free_nodelist(nl);
1158 
1159 	/* Mark the snarf complete */
1160 	setsnarfdone = 2;
1161 	return (0);
1162 
1163 out:
1164 	url_freel(&url_used);
1165 	url_freel(&url_tode);
1166 
1167 	sr_cache_flush(1);
1168 
1169 	if (nodecnt)
1170 		meta_free_nodelist(nl);
1171 
1172 	/* Snarf failed, reset state */
1173 	setsnarfdone = 0;
1174 
1175 	return (-1);
1176 }
1177 
1178 void
sr_cache_add(md_set_record * sr)1179 sr_cache_add(md_set_record *sr)
1180 {
1181 	md_set_record *tsr;
1182 
1183 	assert(setsnarfdone != 0);
1184 
1185 	if (setrecords == NULL) {
1186 		setrecords = sr;
1187 		return;
1188 	}
1189 
1190 	for (tsr = setrecords; tsr->sr_next != NULL; tsr = tsr->sr_next)
1191 		/* void */;
1192 	tsr->sr_next = sr;
1193 }
1194 
1195 void
sr_cache_del(mddb_recid_t recid)1196 sr_cache_del(mddb_recid_t recid)
1197 {
1198 	md_set_record	*sr, *tsr;
1199 	md_mnset_record	*mnsr;
1200 
1201 	assert(setsnarfdone != 0);
1202 
1203 	for (sr = tsr = setrecords; sr != NULL; tsr = sr, sr = sr->sr_next) {
1204 		if (sr->sr_selfid != recid)
1205 			continue;
1206 		if (sr == setrecords)
1207 			setrecords = sr->sr_next;
1208 		else
1209 			tsr->sr_next = sr->sr_next;
1210 		if (MD_MNSET_REC(sr)) {
1211 			mnsr = (struct md_mnset_record *)sr;
1212 			Free(mnsr);
1213 		} else {
1214 			Free(sr);
1215 		}
1216 		break;
1217 	}
1218 	if (setrecords == NULL)
1219 		setsnarfdone = 0;
1220 }
1221 
1222 void
dr_cache_add(md_set_record * sr,md_drive_record * dr)1223 dr_cache_add(md_set_record *sr, md_drive_record *dr)
1224 {
1225 	md_drive_record	*tdr;
1226 
1227 	assert(setsnarfdone != 0);
1228 
1229 	assert(sr != NULL);
1230 
1231 	if (sr->sr_drivechain == NULL) {
1232 		sr->sr_drivechain = dr;
1233 		sr->sr_driverec = dr->dr_selfid;
1234 		return;
1235 	}
1236 
1237 	for (tdr = sr->sr_drivechain; tdr->dr_next != NULL; tdr = tdr->dr_next)
1238 		/* void */;
1239 
1240 	tdr->dr_next = dr;
1241 	tdr->dr_nextrec = dr->dr_selfid;
1242 }
1243 
1244 void
dr_cache_del(md_set_record * sr,mddb_recid_t recid)1245 dr_cache_del(md_set_record *sr, mddb_recid_t recid)
1246 {
1247 	md_drive_record *dr;
1248 	md_drive_record *tdr;
1249 
1250 	assert(setsnarfdone != 0);
1251 
1252 	assert(sr != NULL);
1253 
1254 	for (dr = tdr = sr->sr_drivechain; dr != NULL;
1255 	    tdr = dr, dr = dr->dr_next) {
1256 		if (dr->dr_selfid != recid)
1257 			continue;
1258 
1259 		if (dr == sr->sr_drivechain) {
1260 			sr->sr_drivechain = dr->dr_next;
1261 			sr->sr_driverec = dr->dr_nextrec;
1262 		} else {
1263 			tdr->dr_next = dr->dr_next;
1264 			tdr->dr_nextrec = dr->dr_nextrec;
1265 		}
1266 		Free(dr);
1267 		break;
1268 	}
1269 }
1270 
1271 /*
1272  * Nodes must be kept in ascending node id order in order to
1273  * support reconfig.
1274  *
1275  * This routine may change nr->nr_next and nr->nr_nextrec.
1276  */
1277 void
mnnr_cache_add(md_mnset_record * mnsr,md_mnnode_record * nr)1278 mnnr_cache_add(md_mnset_record *mnsr, md_mnnode_record *nr)
1279 {
1280 	md_mnnode_record	*tnr, *tnr_prev;
1281 
1282 	assert(mnsr != NULL);
1283 
1284 	if (mnsr->sr_nodechain == NULL) {
1285 		mnsr->sr_nodechain = nr;
1286 		mnsr->sr_noderec = nr->nr_selfid;
1287 		return;
1288 	}
1289 
1290 	/*
1291 	 * If new_record->nodeid < first_record->nodeid,
1292 	 * put new_record at beginning of list.
1293 	 */
1294 	if (nr->nr_nodeid < mnsr->sr_nodechain->nr_nodeid) {
1295 		nr->nr_next = mnsr->sr_nodechain;
1296 		nr->nr_nextrec = mnsr->sr_noderec;
1297 		mnsr->sr_nodechain = nr;
1298 		mnsr->sr_noderec = nr->nr_selfid;
1299 		return;
1300 	}
1301 
1302 	/*
1303 	 * Walk list looking for place to insert record.
1304 	 */
1305 
1306 	tnr_prev = mnsr->sr_nodechain;
1307 	tnr = tnr_prev->nr_next;
1308 	while (tnr) {
1309 		/* Insert new record between tnr_prev and tnr */
1310 		if (nr->nr_nodeid < tnr->nr_nodeid) {
1311 			nr->nr_next = tnr;
1312 			nr->nr_nextrec = tnr->nr_selfid; /* tnr's recid */
1313 			tnr_prev->nr_next = nr;
1314 			tnr_prev->nr_nextrec = nr->nr_selfid;
1315 			return;
1316 		}
1317 		tnr_prev = tnr;
1318 		tnr = tnr->nr_next;
1319 	}
1320 
1321 	/*
1322 	 * Add record to end of list.
1323 	 */
1324 	tnr_prev->nr_next = nr;
1325 	tnr_prev->nr_nextrec = nr->nr_selfid;
1326 }
1327 
1328 void
mnnr_cache_del(md_mnset_record * mnsr,mddb_recid_t recid)1329 mnnr_cache_del(md_mnset_record *mnsr, mddb_recid_t recid)
1330 {
1331 	md_mnnode_record *nr;
1332 	md_mnnode_record *tnr;
1333 
1334 	assert(mnsr != NULL);
1335 
1336 	tnr = 0;
1337 	nr = mnsr->sr_nodechain;
1338 	while (nr) {
1339 		if (nr->nr_selfid != recid) {
1340 			tnr = nr;
1341 			nr = nr->nr_next;
1342 			continue;
1343 		}
1344 
1345 		if (nr == mnsr->sr_nodechain) {
1346 			mnsr->sr_nodechain = nr->nr_next;
1347 			mnsr->sr_noderec = nr->nr_nextrec;
1348 		} else {
1349 			tnr->nr_next = nr->nr_next;
1350 			tnr->nr_nextrec = nr->nr_nextrec;
1351 		}
1352 		Free(nr);
1353 		break;
1354 	}
1355 }
1356 
1357 int
metad_isautotakebyname(char * setname)1358 metad_isautotakebyname(char *setname)
1359 {
1360 	md_error_t	error = mdnullerror;
1361 	md_set_record	*sr;
1362 
1363 	if (md_in_daemon)
1364 	    assert(setsnarfdone != 0);
1365 	else if (set_snarf(&error)) {
1366 	    mdclrerror(&error);
1367 	    return (0);
1368 	}
1369 
1370 	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
1371 	    if (strcmp(setname, sr->sr_setname) == 0) {
1372 		if (sr->sr_flags & MD_SR_AUTO_TAKE)
1373 		    return (1);
1374 		return (0);
1375 	    }
1376 	}
1377 
1378 	return (0);
1379 }
1380 
1381 int
metad_isautotakebynum(set_t setno)1382 metad_isautotakebynum(set_t setno)
1383 {
1384 	md_error_t	error = mdnullerror;
1385 	md_set_record	*sr;
1386 
1387 	if (md_in_daemon)
1388 	    assert(setsnarfdone != 0);
1389 	else if (set_snarf(&error)) {
1390 	    mdclrerror(&error);
1391 	    return (0);
1392 	}
1393 
1394 	for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
1395 	    if (setno == sr->sr_setno) {
1396 		if (sr->sr_flags & MD_SR_AUTO_TAKE)
1397 		    return (1);
1398 		return (0);
1399 	    }
1400 	}
1401 
1402 	return (0);
1403 }
1404 
1405 md_set_record *
metad_getsetbyname(char * setname,md_error_t * ep)1406 metad_getsetbyname(char *setname, md_error_t *ep)
1407 {
1408 	md_set_record	*sr;
1409 	char		buf[100];
1410 
1411 	assert(setsnarfdone != 0);
1412 
1413 	for (sr = setrecords; sr != NULL; sr = sr->sr_next)
1414 		if (strcmp(setname, sr->sr_setname) == 0)
1415 			return (sr);
1416 
1417 	(void) snprintf(buf, sizeof (buf), "setname \"%s\"", setname);
1418 	(void) mderror(ep, MDE_NO_SET, buf);
1419 	return (NULL);
1420 }
1421 
1422 md_set_record *
metad_getsetbynum(set_t setno,md_error_t * ep)1423 metad_getsetbynum(set_t setno, md_error_t *ep)
1424 {
1425 	md_set_record	*sr;
1426 	char		buf[100];
1427 
1428 	if (md_in_daemon)
1429 		assert(setsnarfdone != 0);
1430 	else if (set_snarf(ep))		/* BYPASS DAEMON mode */
1431 		return (NULL);
1432 
1433 	for (sr = setrecords; sr != NULL; sr = sr->sr_next)
1434 		if (setno == sr->sr_setno)
1435 			return (sr);
1436 
1437 	(void) sprintf(buf, "setno %u", setno);
1438 	(void) mderror(ep, MDE_NO_SET, buf);
1439 	return (NULL);
1440 }
1441 
1442 
1443 /*
1444  * Commit the set record and all of its associated records
1445  * (drive records, node records for a MNset) to the local mddb.
1446  */
1447 void
commitset(md_set_record * sr,int inc_genid,md_error_t * ep)1448 commitset(md_set_record *sr, int inc_genid, md_error_t *ep)
1449 {
1450 	int		drc, nrc, rc;
1451 	int		*recs;
1452 	uint_t		size;
1453 	md_drive_record	*dr;
1454 	mddb_userreq_t	req;
1455 	md_mnset_record	*mnsr;
1456 	md_mnnode_record	*nr;
1457 
1458 	assert(setsnarfdone != 0);
1459 
1460 	/*
1461 	 * Cluster nodename support
1462 	 * Convert nodename -> nodeid
1463 	 * Don't do this for MN disksets since we've already stored
1464 	 * both the nodeid and name.
1465 	 */
1466 	if (!(MD_MNSET_REC(sr)))
1467 		sdssc_cm_sr_nm2nid(sr);
1468 
1469 	/* Send down to kernel the data in mddb USER set record */
1470 	if (inc_genid)
1471 		sr->sr_genid++;
1472 	(void) memset(&req, 0, sizeof (req));
1473 	METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
1474 	if (MD_MNSET_REC(sr)) {
1475 		req.ur_size = sizeof (*mnsr);
1476 	} else {
1477 		req.ur_size = sizeof (*sr);
1478 	}
1479 	req.ur_data = (uintptr_t)sr;
1480 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1481 		(void) mdstealerror(ep, &req.ur_mde);
1482 		return;
1483 	}
1484 
1485 	/*
1486 	 * Walk through the drive records associated with this set record
1487 	 * and send down to kernel the data in mddb USER drive record.
1488 	 */
1489 	drc = 0;
1490 	dr = sr->sr_drivechain;
1491 	while (dr) {
1492 		if (inc_genid)
1493 			dr->dr_genid++;
1494 		METAD_SETUP_DR(MD_DB_SETDATA, dr->dr_selfid)
1495 		req.ur_size = sizeof (*dr);
1496 		req.ur_data = (uintptr_t)dr;
1497 		if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1498 			(void) mdstealerror(ep, &req.ur_mde);
1499 			return;
1500 		}
1501 		drc++;
1502 		dr = dr->dr_next;
1503 	}
1504 
1505 
1506 	/*
1507 	 * If this set is a multi-node set -
1508 	 * walk through the node records associated with this set record
1509 	 * and send down to kernel the data in mddb USER node record.
1510 	 */
1511 	nrc = 0;
1512 	if (MD_MNSET_REC(sr)) {
1513 		mnsr = (struct md_mnset_record *)sr;
1514 		nr = mnsr->sr_nodechain;
1515 		while (nr) {
1516 			if (inc_genid)
1517 				nr->nr_genid++;
1518 			METAD_SETUP_NR(MD_DB_SETDATA, nr->nr_selfid)
1519 			req.ur_size = sizeof (*nr);
1520 			req.ur_data = (uint64_t)(uintptr_t)nr;
1521 			if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL)
1522 			    != 0) {
1523 				(void) mdstealerror(ep, &req.ur_mde);
1524 				return;
1525 			}
1526 			nrc++;
1527 			nr = nr->nr_next;
1528 		}
1529 	}
1530 
1531 	/*
1532 	 * Set up list of mddb USER recids containing set and drive records
1533 	 * and node records if a MNset.
1534 	 */
1535 	rc = 0;
1536 	size = (nrc + drc + 2) * sizeof (int);
1537 	recs = Zalloc(size);
1538 	/* First recid in list is the set record's id */
1539 	recs[rc] = sr->sr_selfid;
1540 	rc++;
1541 	dr = sr->sr_drivechain;
1542 	while (dr) {
1543 		/* Now, fill in the drive record ids */
1544 		recs[rc] = dr->dr_selfid;
1545 		dr = dr->dr_next;
1546 		rc++;
1547 	}
1548 	if (MD_MNSET_REC(sr)) {
1549 		nr = mnsr->sr_nodechain;
1550 		while (nr) {
1551 			/* If a MNset, fill in the node record ids */
1552 			recs[rc] = nr->nr_selfid;
1553 			nr = nr->nr_next;
1554 			rc++;
1555 		}
1556 	}
1557 	/* Set last record to null recid */
1558 	recs[rc] = 0;
1559 
1560 	/* Write out the set and drive and node records to the local mddb */
1561 	METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
1562 	req.ur_size = size;
1563 	req.ur_data = (uintptr_t)recs;
1564 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1565 		(void) mdstealerror(ep, &req.ur_mde);
1566 		return;
1567 	}
1568 
1569 	/*
1570 	 * Cluster nodename support
1571 	 * Convert nodeid -> nodename
1572 	 * Don't do this for MN disksets since we've already stored
1573 	 * both the nodeid and name.
1574 	 */
1575 	if (!(MD_MNSET_REC(sr)))
1576 		sdssc_cm_sr_nid2nm(sr);
1577 
1578 	Free(recs);
1579 }
1580 
1581 /*
1582  * This routine only handles returns a md_set_record structure even
1583  * if the set record describes a MN set.  This will allow pre-MN
1584  * SVM RPC code to access a MN set record and to display it.
1585  *
1586  * The MN SVM RPC code detects if the set record returned describes
1587  * a MN set and then will copy it using mnsetdup.
1588  */
1589 md_set_record *
setdup(md_set_record * sr)1590 setdup(md_set_record *sr)
1591 {
1592 	md_set_record		*tsr = NULL;
1593 	md_drive_record		**tdrpp = NULL;
1594 
1595 	if (sr && (tsr = Malloc(sizeof (*sr))) != NULL) {
1596 		(void) memmove(tsr, sr, sizeof (*sr));
1597 		tsr->sr_next = NULL;
1598 		tdrpp = &tsr->sr_drivechain;
1599 		while (*tdrpp) {
1600 			*tdrpp = drdup(*tdrpp);
1601 			tdrpp = &(*tdrpp)->dr_next;
1602 		}
1603 	}
1604 	return (tsr);
1605 }
1606 
1607 /*
1608  * This routine only copies MN set records.   If a non-MN set
1609  * record was passed in NULL pointer will be returned.
1610  */
1611 md_mnset_record *
mnsetdup(md_mnset_record * mnsr)1612 mnsetdup(md_mnset_record *mnsr)
1613 {
1614 	md_mnset_record		*tmnsr = NULL;
1615 	md_drive_record		**tdrpp = NULL;
1616 	md_mnnode_record	**tnrpp = NULL;
1617 
1618 	if (!MD_MNSET_REC(mnsr)) {
1619 		return (NULL);
1620 	}
1621 
1622 	if (mnsr && (tmnsr = Malloc(sizeof (*mnsr))) != NULL) {
1623 		(void) memmove(tmnsr, mnsr, sizeof (*mnsr));
1624 		tmnsr->sr_next = NULL;
1625 		tdrpp = &tmnsr->sr_drivechain;
1626 		while (*tdrpp) {
1627 			*tdrpp = drdup(*tdrpp);
1628 			tdrpp = &(*tdrpp)->dr_next;
1629 		}
1630 		tnrpp = &tmnsr->sr_nodechain;
1631 		while (*tnrpp) {
1632 			*tnrpp = nrdup(*tnrpp);
1633 			tnrpp = &(*tnrpp)->nr_next;
1634 		}
1635 	}
1636 	return (tmnsr);
1637 }
1638 
1639 md_drive_record *
drdup(md_drive_record * dr)1640 drdup(md_drive_record *dr)
1641 {
1642 	md_drive_record		*tdr = NULL;
1643 
1644 	if (dr && (tdr = Malloc(sizeof (*dr))) != NULL)
1645 		(void) memmove(tdr, dr, sizeof (*dr));
1646 	return (tdr);
1647 }
1648 
1649 md_mnnode_record *
nrdup(md_mnnode_record * nr)1650 nrdup(md_mnnode_record *nr)
1651 {
1652 	md_mnnode_record	*tnr = NULL;
1653 
1654 	if (nr && (tnr = Malloc(sizeof (*nr))) != NULL)
1655 		(void) memmove(tnr, nr, sizeof (*nr));
1656 	return (tnr);
1657 }
1658 
1659 /*
1660  * Duplicate parts of the drive decriptor list for this node.
1661  * Only duplicate the drive name string in the mddrivename structure, don't
1662  * need to copy any other pointers since only interested in the flags and
1663  * the drive name (i.e. other pointers will be set to NULL).
1664  *	Returns NULL if failure due to Malloc failure.
1665  *	Returns pointer (non-NULL) to dup'd list if successful.
1666  */
1667 md_drive_desc *
dd_list_dup(md_drive_desc * dd)1668 dd_list_dup(md_drive_desc *dd)
1669 {
1670 	md_drive_desc	*orig_dd;
1671 	md_drive_desc	*copy_dd = NULL, *copy_dd_prev = NULL;
1672 	md_drive_desc	*copy_dd_head = NULL;
1673 	mddrivename_t	*copy_dnp;
1674 	char		*copy_cname;
1675 	char		*copy_devid;
1676 
1677 	if (dd == NULL)
1678 		return (NULL);
1679 
1680 	orig_dd = dd;
1681 
1682 	while (orig_dd) {
1683 		copy_dd = Zalloc(sizeof (*copy_dd));
1684 		copy_dnp = Zalloc(sizeof (mddrivename_t));
1685 		copy_cname = Zalloc(sizeof (orig_dd->dd_dnp->cname));
1686 		if (orig_dd->dd_dnp->devid) {
1687 			copy_devid = Zalloc(sizeof (orig_dd->dd_dnp->devid));
1688 		} else {
1689 			copy_devid = NULL;
1690 		}
1691 		copy_dd->dd_next = NULL;
1692 		if ((copy_dd == NULL) || (copy_dnp == NULL) ||
1693 		    (copy_cname == NULL)) {
1694 			while (copy_dd_head) {
1695 				copy_dd = copy_dd_head->dd_next;
1696 				Free(copy_dd_head);
1697 				copy_dd_head = copy_dd;
1698 			}
1699 			if (copy_dnp)
1700 				Free(copy_dnp);
1701 			if (copy_dd)
1702 				Free(copy_dd);
1703 			if (copy_cname)
1704 				Free(copy_cname);
1705 			if (copy_devid)
1706 				Free(copy_devid);
1707 			return (NULL);
1708 		}
1709 		(void) memmove(copy_dd, orig_dd, sizeof (*orig_dd));
1710 		(void) strlcpy(copy_cname, orig_dd->dd_dnp->cname,
1711 		    sizeof (orig_dd->dd_dnp->cname));
1712 		copy_dd->dd_next = NULL;
1713 		copy_dd->dd_dnp = copy_dnp;
1714 		copy_dd->dd_dnp->cname = copy_cname;
1715 		if (copy_devid) {
1716 			(void) strlcpy(copy_devid, orig_dd->dd_dnp->devid,
1717 			    sizeof (orig_dd->dd_dnp->devid));
1718 		}
1719 
1720 		if (copy_dd_prev == NULL) {
1721 			copy_dd_head = copy_dd;
1722 			copy_dd_prev = copy_dd;
1723 		} else {
1724 			copy_dd_prev->dd_next = copy_dd;
1725 			copy_dd_prev = copy_dd;
1726 		}
1727 		orig_dd = orig_dd->dd_next;
1728 	}
1729 	copy_dd->dd_next = NULL;
1730 	return (copy_dd_head);
1731 }
1732 
1733 void
sr_cache_flush(int flushnames)1734 sr_cache_flush(int flushnames)
1735 {
1736 	md_set_record	*sr, *tsr;
1737 	md_mnset_record	*mnsr;
1738 	md_drive_record *dr, *tdr;
1739 	md_mnnode_record *nr, *tnr;
1740 
1741 	sr = tsr = setrecords;
1742 	while (sr != NULL) {
1743 		dr = tdr = sr->sr_drivechain;
1744 		while (dr != NULL) {
1745 			tdr = dr;
1746 			dr = dr->dr_next;
1747 			Free(tdr);
1748 		}
1749 		tsr = sr;
1750 		sr = sr->sr_next;
1751 		if (MD_MNSET_REC(tsr)) {
1752 			mnsr = (struct md_mnset_record *)tsr;
1753 			nr = tnr = mnsr->sr_nodechain;
1754 			while (nr != NULL) {
1755 				tnr = nr;
1756 				nr = nr->nr_next;
1757 				Free(tnr);
1758 			}
1759 			Free(mnsr);
1760 		} else {
1761 			Free(tsr);
1762 		}
1763 	}
1764 
1765 	setrecords = NULL;
1766 
1767 	setsnarfdone = 0;
1768 
1769 	/* This will cause the other caches to be cleared */
1770 	if (flushnames)
1771 		metaflushnames(0);
1772 }
1773 
1774 void
sr_cache_flush_setno(set_t setno)1775 sr_cache_flush_setno(set_t setno)
1776 {
1777 	md_set_record	*sr, *tsr;
1778 	md_mnset_record	*mnsr;
1779 	md_drive_record *dr, *tdr;
1780 
1781 	assert(setsnarfdone != 0);
1782 
1783 	for (sr = tsr = setrecords; sr; tsr = sr, sr = sr->sr_next) {
1784 		if (sr->sr_setno != setno)
1785 			continue;
1786 
1787 		dr = tdr = sr->sr_drivechain;
1788 		while (dr != NULL) {
1789 			tdr = dr;
1790 			dr = dr->dr_next;
1791 			Free(tdr);
1792 		}
1793 		if (sr == setrecords)
1794 			setrecords = sr->sr_next;
1795 		else
1796 			tsr->sr_next = sr->sr_next;
1797 		if (MD_MNSET_REC(sr)) {
1798 			mnsr = (struct md_mnset_record *)sr;
1799 			Free(mnsr);
1800 		} else {
1801 			Free(sr);
1802 		}
1803 		break;
1804 	}
1805 
1806 	setsnarfdone = 0;
1807 
1808 	/* This will cause the other caches to be cleared */
1809 	metaflushnames(0);
1810 }
1811 
1812 int
s_ownset(set_t setno,md_error_t * ep)1813 s_ownset(set_t setno, md_error_t *ep)
1814 {
1815 	mddb_ownset_t		ownset_arg;
1816 
1817 	ownset_arg.setno = setno;
1818 	ownset_arg.owns_set = MD_SETOWNER_NONE;
1819 
1820 	if (metaioctl(MD_DB_OWNSET, &ownset_arg, ep, NULL) != 0)
1821 		return (0);
1822 
1823 	return (ownset_arg.owns_set);
1824 }
1825 
1826 void
s_delset(char * setname,md_error_t * ep)1827 s_delset(char *setname, md_error_t *ep)
1828 {
1829 	md_set_record		*sr;
1830 	md_set_record		*tsr;
1831 	md_drive_record		*dr;
1832 	md_drive_record		*tdr;
1833 	md_mnnode_record	*nr, *tnr;
1834 	mddb_userreq_t		req;
1835 	char			stringbuf[100];
1836 	int			i;
1837 	mdsetname_t		*sp = NULL;
1838 	mddrivename_t		*dn = NULL;
1839 	mdname_t		*np = NULL;
1840 	md_dev64_t		dev;
1841 	side_t			myside = MD_SIDEWILD;
1842 	md_error_t		xep = mdnullerror;
1843 	md_mnset_record		*mnsr;
1844 	int			num_sets = 0;
1845 	int			num_mn_sets = 0;
1846 
1847 	(void) memset(&req, 0, sizeof (mddb_userreq_t));
1848 
1849 	if ((sr = getsetbyname(setname, ep)) == NULL)
1850 		return;
1851 
1852 	sp = metasetnosetname(sr->sr_setno, &xep);
1853 	mdclrerror(&xep);
1854 
1855 	if (MD_MNSET_REC(sr)) {
1856 		/*
1857 		 * If this node is a set owner, halt the set before
1858 		 * deleting the set records.  Ignore any errors since
1859 		 * s_ownset and halt_set could fail if panic had occurred
1860 		 * during the add/delete of a node.
1861 		 */
1862 		if (s_ownset(sr->sr_setno, &xep)) {
1863 			mdclrerror(&xep);
1864 			if (halt_set(sp, &xep))
1865 				mdclrerror(&xep);
1866 		}
1867 	}
1868 
1869 	(void) snprintf(stringbuf, sizeof (stringbuf), "/dev/md/%s", setname);
1870 	(void) unlink(stringbuf);
1871 	(void) unlink(meta_lock_name(sr->sr_setno));
1872 
1873 	if (MD_MNSET_REC(sr)) {
1874 		mnsr = (struct md_mnset_record *)sr;
1875 		nr = mnsr->sr_nodechain;
1876 		while (nr) {
1877 			/* Setting myside for later use */
1878 			if (strcmp(mynode(), nr->nr_nodename) == 0)
1879 				myside = nr->nr_nodeid;
1880 
1881 			(void) memset(&req, 0, sizeof (req));
1882 			METAD_SETUP_NR(MD_DB_DELETE, nr->nr_selfid)
1883 			if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde,
1884 			    NULL) != 0) {
1885 				(void) mdstealerror(ep, &req.ur_mde);
1886 				free_sr(sr);
1887 				return;
1888 			}
1889 			tnr = nr;
1890 			nr = nr->nr_next;
1891 
1892 			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
1893 			    sr->sr_setno, tnr->nr_nodeid);
1894 
1895 			mnnr_cache_del((struct md_mnset_record *)sr,
1896 			    tnr->nr_selfid);
1897 		}
1898 	} else {
1899 		for (i = 0; i < MD_MAXSIDES; i++) {
1900 			/* Skip empty slots */
1901 			if (sr->sr_nodes[i][0] == '\0')
1902 				continue;
1903 
1904 			if (strcmp(mynode(), sr->sr_nodes[i]) == 0)
1905 				myside = i;
1906 
1907 			SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
1908 			    sr->sr_setno, i);
1909 		}
1910 	}
1911 
1912 	dr = sr->sr_drivechain;
1913 	while (dr) {
1914 		(void) memset(&req, 0, sizeof (req));
1915 		METAD_SETUP_DR(MD_DB_DELETE, dr->dr_selfid)
1916 		if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1917 			(void) mdstealerror(ep, &req.ur_mde);
1918 			free_sr(sr);
1919 			return;
1920 		}
1921 		tdr = dr;
1922 		dr = dr->dr_next;
1923 
1924 		dev = NODEV64;
1925 		if (myside != MD_SIDEWILD && sp != NULL) {
1926 			dn = metadrivename_withdrkey(sp, myside,
1927 			    tdr->dr_key, MD_BASICNAME_OK, &xep);
1928 			if (dn != NULL) {
1929 				uint_t	rep_slice;
1930 
1931 				np = NULL;
1932 				if (meta_replicaslice(dn, &rep_slice,
1933 				    &xep) == 0) {
1934 					np = metaslicename(dn, rep_slice, &xep);
1935 				}
1936 
1937 				if (np != NULL)
1938 					dev = np->dev;
1939 				else
1940 					mdclrerror(&xep);
1941 			} else
1942 				mdclrerror(&xep);
1943 		} else
1944 			mdclrerror(&xep);
1945 
1946 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE,
1947 		    sr->sr_setno, dev);
1948 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE,
1949 		    MD_LOCAL_SET, dev);
1950 
1951 		dr_cache_del(sr, tdr->dr_selfid);
1952 
1953 	}
1954 
1955 	(void) memset(&req, 0, sizeof (req));
1956 	METAD_SETUP_SR(MD_DB_DELETE, sr->sr_selfid)
1957 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1958 		(void) mdstealerror(ep, &req.ur_mde);
1959 		free_sr(sr);
1960 		return;
1961 	}
1962 
1963 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_SET, sr->sr_setno,
1964 	    NODEV64);
1965 
1966 	for (tsr = setrecords; tsr; tsr = tsr->sr_next) {
1967 		if (tsr == sr)
1968 			continue;
1969 
1970 		num_sets++;
1971 		if (MD_MNSET_REC(tsr))
1972 			num_mn_sets++;
1973 	}
1974 
1975 	if (num_mn_sets == 0)
1976 		(void) meta_smf_disable(META_SMF_MN_DISKSET, NULL);
1977 
1978 	/* The set we just deleted is the only one left */
1979 	if (num_sets == 0)
1980 		(void) meta_smf_disable(META_SMF_DISKSET, NULL);
1981 
1982 	sr_cache_del(sr->sr_selfid);
1983 	free_sr(sr);
1984 
1985 }
1986 
1987 void
s_delrec(mddb_recid_t recid,md_error_t * ep)1988 s_delrec(mddb_recid_t recid, md_error_t *ep)
1989 {
1990 	mddb_userreq_t		req;
1991 
1992 	(void) memset(&req, 0, sizeof (req));
1993 
1994 	METAD_SETUP_SR(MD_DB_DELETE, recid)
1995 
1996 	if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
1997 		(void) mdstealerror(ep, &req.ur_mde);
1998 }
1999 
2000 /*
2001  * resnarf the imported set
2002  */
2003 int
resnarf_set(set_t setno,md_error_t * ep)2004 resnarf_set(
2005 	set_t			setno,
2006 	md_error_t		*ep
2007 )
2008 {
2009 	md_set_record	*sr;
2010 	md_drive_record	*dr;
2011 	mddb_recid_t	id, *p;
2012 
2013 	if (meta_setup_db_locations(ep) != 0) {
2014 		if (! mdismddberror(ep, MDE_DB_STALE))
2015 			return (-1);
2016 		mdclrerror(ep);
2017 	}
2018 
2019 	setsnarfdone = 1;
2020 
2021 	id = 0;
2022 	while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR, &id,
2023 	    ep)) != NULL) {
2024 
2025 		if (sr->sr_setno != setno)
2026 			continue;
2027 
2028 		/* Don't allow resnarf of a multi-node diskset */
2029 		if (MD_MNSET_REC(sr))
2030 			goto out;
2031 
2032 		sr->sr_next = NULL;
2033 		sr->sr_drivechain = NULL;
2034 
2035 		if (md_in_daemon)
2036 			url_addl(&url_used, sr->sr_selfid);
2037 
2038 		sr->sr_flags |= MD_SR_CHECK;
2039 
2040 		sr_cache_add(sr);
2041 
2042 		if (sr->sr_driverec == 0)
2043 			break;
2044 
2045 		p = &sr->sr_driverec;
2046 		while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
2047 		    MDDB_UR_DR, p, ep)) != NULL) {
2048 			dr->dr_next = NULL;
2049 
2050 			if (md_in_daemon)
2051 				url_addl(&url_used, dr->dr_selfid);
2052 
2053 			dr_cache_add(sr, dr);
2054 
2055 			if (dr->dr_nextrec == 0)
2056 				break;
2057 
2058 			p = &dr->dr_nextrec;
2059 		}
2060 		if (! mdisok(ep)) {
2061 			if (! mdissyserror(ep, ENOENT))
2062 				goto out;
2063 			mdclrerror(ep);
2064 			commitset(sr, FALSE, ep);
2065 			if (! mdisok(ep))
2066 				goto out;
2067 		}
2068 	}
2069 	if (! mdisok(ep)) {
2070 		if (! mdissyserror(ep, ENOENT))
2071 			goto out;
2072 		mdclrerror(ep);
2073 	}
2074 
2075 	setsnarfdone = 2;
2076 
2077 	url_freel(&url_used);
2078 	url_freel(&url_tode);
2079 	return (0);
2080 
2081 out:
2082 	url_freel(&url_used);
2083 	url_freel(&url_tode);
2084 
2085 	sr_cache_flush(1);
2086 
2087 	setsnarfdone = 0;
2088 
2089 	return (-1);
2090 }
2091