1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 /*
29 * Just in case we're not in a build environment, make sure that
30 * TEXT_DOMAIN gets set to something.
31 */
32 #if !defined(TEXT_DOMAIN)
33 #define TEXT_DOMAIN "SYS_TEST"
34 #endif
35
36 /*
37 * interface between user land and the set records
38 */
39
40 #include <meta.h>
41 #include <metad.h>
42 #include <sdssc.h>
43 #include <syslog.h>
44 #include <sys/cladm.h>
45 #include "meta_set_prv.h"
46
47 #include <sys/sysevent/eventdefs.h>
48 #include <sys/sysevent/svm.h>
49
50 static md_set_record *setrecords = NULL; /* head of cache linked list */
51 static int setsnarfdone = 0;
52
53 typedef struct key_lst_t {
54 side_t kl_side;
55 mdkey_t kl_key;
56 struct key_lst_t *kl_next;
57 } key_lst_t;
58
59 typedef struct ur_recid_lst {
60 mddb_recid_t url_recid;
61 struct ur_recid_lst *url_nx;
62 } ur_recid_lst_t;
63
64 static ur_recid_lst_t *url_used = NULL;
65 static ur_recid_lst_t *url_tode = NULL;
66
67 static void
url_addl(ur_recid_lst_t ** urlpp,mddb_recid_t recid)68 url_addl(ur_recid_lst_t **urlpp, mddb_recid_t recid)
69 {
70 /* Run to the end of the list */
71 for (/* void */; (*urlpp != NULL); urlpp = &(*urlpp)->url_nx)
72 if ((*urlpp)->url_recid == recid)
73 return;
74
75 /* Add the new member */
76 *urlpp = Zalloc(sizeof (**urlpp));
77 if (*urlpp == NULL)
78 return;
79
80 (*urlpp)->url_recid = recid;
81 }
82
83 static int
url_findl(ur_recid_lst_t * urlp,mddb_recid_t recid)84 url_findl(ur_recid_lst_t *urlp, mddb_recid_t recid)
85 {
86 while (urlp != NULL) {
87 if (urlp->url_recid == recid)
88 return (1);
89 urlp = urlp->url_nx;
90 }
91 return (0);
92 }
93
94 static void
url_freel(ur_recid_lst_t ** urlpp)95 url_freel(ur_recid_lst_t **urlpp)
96 {
97 ur_recid_lst_t *urlp;
98 ur_recid_lst_t *turlp;
99
100 for (turlp = *urlpp; turlp != NULL; turlp = urlp) {
101 urlp = turlp->url_nx;
102 Free(turlp);
103 }
104 *urlpp = (ur_recid_lst_t *)NULL;
105 }
106
107 static int
ckncvt_set_record(mddb_userreq_t * reqp,md_error_t * ep)108 ckncvt_set_record(mddb_userreq_t *reqp, md_error_t *ep)
109 {
110 mddb_userreq_t req;
111 md_set_record *sr;
112 int recs[3];
113
114 if (reqp->ur_size == sizeof (*sr))
115 return (0);
116
117 if (! md_in_daemon) {
118 if (reqp->ur_size >= sizeof (*sr))
119 return (0);
120
121 reqp->ur_data = (uintptr_t)Realloc((void *)(uintptr_t)
122 reqp->ur_data, sizeof (*sr));
123 (void) memset(
124 ((char *)(uintptr_t)reqp->ur_data) + reqp->ur_size,
125 '\0', sizeof (*sr) - reqp->ur_size);
126 reqp->ur_size = sizeof (*sr);
127 return (0);
128 }
129
130 /*
131 * If here, then the daemon is calling, and so the automatic
132 * conversion will be performed.
133 */
134
135 /* shorthand */
136 req = *reqp; /* structure assignment */
137 sr = (md_set_record *)(uintptr_t)req.ur_data;
138
139 if (sr->sr_flags & MD_SR_CVT)
140 return (0);
141
142 /* Leave multi-node set records alone */
143 if (MD_MNSET_REC(sr)) {
144 return (0);
145 }
146
147 /* Mark the old record as converted */
148 sr->sr_flags |= MD_SR_CVT;
149
150 METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
151
152 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
153 return (mdstealerror(ep, &req.ur_mde));
154
155 /* Create space for the new record */
156 METAD_SETUP_SR(MD_DB_CREATE, 0);
157 req.ur_size = sizeof (*sr);
158
159 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
160 return (mdstealerror(ep, &req.ur_mde));
161
162 /* Allocate the new record */
163 sr = Zalloc(sizeof (*sr));
164
165 /* copy all the data from the record being converted */
166 (void) memmove(sr, (void *)(uintptr_t)reqp->ur_data, reqp->ur_size);
167 sr->sr_flags &= ~MD_SR_CVT;
168
169 /* adjust the selfid to point to the new record */
170 sr->sr_selfid = req.ur_recid;
171
172 METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
173 req.ur_size = sizeof (*sr);
174 req.ur_data = (uintptr_t)sr;
175
176 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
177 Free(sr);
178 return (mdstealerror(ep, &req.ur_mde));
179 }
180
181 /* Commit the old and the new */
182 recs[0] = ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid;
183 recs[1] = sr->sr_selfid;
184 recs[2] = 0;
185
186 METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
187 req.ur_size = sizeof (recs);
188 req.ur_data = (uintptr_t)recs;
189
190 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
191 Free(sr);
192 return (mdstealerror(ep, &req.ur_mde));
193 }
194
195 /* Add the the old record to the list of records to delete */
196 url_addl(&url_tode,
197 ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid);
198
199 /* Free the old records space */
200 Free((void *)(uintptr_t)reqp->ur_data);
201
202 /* Adjust the reqp structure to point to the new record and size */
203 reqp->ur_recid = sr->sr_selfid;
204 reqp->ur_size = sizeof (*sr);
205 reqp->ur_data = (uintptr_t)sr;
206
207 return (0);
208 }
209
210 mddb_userreq_t *
get_db_rec(md_ur_get_cmd_t cmd,set_t setno,mddb_type_t type,uint_t type2,mddb_recid_t * idp,md_error_t * ep)211 get_db_rec(
212 md_ur_get_cmd_t cmd,
213 set_t setno,
214 mddb_type_t type,
215 uint_t type2,
216 mddb_recid_t *idp,
217 md_error_t *ep
218 )
219 {
220 mddb_userreq_t *reqp = Zalloc(sizeof (*reqp));
221 mdsetname_t *sp;
222 md_set_desc *sd;
223 int ureq;
224
225 if ((sp = metasetnosetname(setno, ep)) == NULL) {
226 Free(reqp);
227 return (NULL);
228 }
229
230 if (metaislocalset(sp)) {
231 ureq = MD_DB_USERREQ;
232 } else {
233 if ((sd = metaget_setdesc(sp, ep)) == NULL) {
234 Free(reqp);
235 return (NULL);
236 }
237 ureq = MD_MNSET_DESC(sd) ? MD_MN_DB_USERREQ : MD_DB_USERREQ;
238 }
239
240 reqp->ur_setno = setno;
241 reqp->ur_type = type;
242 reqp->ur_type2 = type2;
243
244 switch (cmd) {
245 case MD_UR_GET_NEXT:
246 reqp->ur_cmd = MD_DB_GETNEXTREC;
247 reqp->ur_recid = *idp;
248 if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL)
249 != 0) {
250 (void) mdstealerror(ep, &reqp->ur_mde);
251 Free(reqp);
252 return (NULL);
253 }
254 *idp = reqp->ur_recid;
255 break;
256 case MD_UR_GET_WKEY:
257 reqp->ur_recid = *idp;
258 break;
259 }
260
261 if (*idp <= 0) {
262 Free(reqp);
263 return (NULL);
264 }
265
266 reqp->ur_cmd = MD_DB_GETSIZE;
267 if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
268 (void) mdstealerror(ep, &reqp->ur_mde);
269 Free(reqp);
270
271 *idp = 0;
272 return (NULL);
273 }
274
275 reqp->ur_cmd = MD_DB_GETDATA;
276 reqp->ur_data = (uintptr_t)Zalloc(reqp->ur_size);
277 if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) {
278 (void) mdstealerror(ep, &reqp->ur_mde);
279 Free((void *)(uintptr_t)reqp->ur_data);
280 Free(reqp);
281 *idp = 0;
282 return (NULL);
283 }
284
285 switch (reqp->ur_type) {
286 case MDDB_USER:
287 switch (reqp->ur_type2) {
288 case MDDB_UR_SR:
289 if (ckncvt_set_record(reqp, ep)) {
290 Free((void *)(uintptr_t)reqp->ur_data);
291 Free(reqp);
292 return (NULL);
293 }
294 break;
295 }
296 break;
297 }
298
299 return (reqp);
300 }
301
302 void *
get_ur_rec(set_t setno,md_ur_get_cmd_t cmd,uint_t type2,mddb_recid_t * idp,md_error_t * ep)303 get_ur_rec(
304 set_t setno,
305 md_ur_get_cmd_t cmd,
306 uint_t type2,
307 mddb_recid_t *idp,
308 md_error_t *ep
309 )
310 {
311 mddb_userreq_t *reqp = NULL;
312 void *ret_val;
313
314 assert(idp != NULL);
315
316 reqp = get_db_rec(cmd, setno, MDDB_USER, type2, idp, ep);
317 if (reqp == NULL)
318 return (NULL);
319
320 ret_val = (void *)(uintptr_t)reqp->ur_data;
321 Free(reqp);
322 return (ret_val);
323 }
324
325 /*
326 * Called by rpc.metad on startup of disksets to cleanup
327 * the host entries associated with a diskset. This is needed if
328 * a node failed or the metaset command was killed during the addition
329 * of a node to a diskset.
330 *
331 * This is called for all traditional disksets.
332 * This is only called for MNdisksets when in there is only one node
333 * in all of the MN disksets and this node is not running SunCluster.
334 * (Otherwise, the cleanup of the host entries is handled by a
335 * reconfig cycle that the SunCluster software calls).
336 */
337 static int
sr_hosts(md_set_record * sr)338 sr_hosts(md_set_record *sr)
339 {
340 int i,
341 nid = 0,
342 self_in_set = FALSE;
343 md_error_t xep = mdnullerror;
344 md_mnnode_record *nr;
345 md_mnset_record *mnsr;
346
347 if (MD_MNSET_REC(sr)) {
348 mnsr = (struct md_mnset_record *)sr;
349 nr = mnsr->sr_nodechain;
350 /*
351 * Already guaranteed to be only 1 node in set which
352 * is mynode (done in sr_validate).
353 * Now, check if node is in the OK state. If not in
354 * the OK state, leave self_in_set FALSE so that
355 * set will be removed.
356 */
357 if (nr->nr_flags & MD_MN_NODE_OK)
358 self_in_set = TRUE;
359 } else {
360 for (i = 0; i < MD_MAXSIDES; i++) {
361 /* Skip empty slots */
362 if (sr->sr_nodes[i][0] == '\0')
363 continue;
364
365 /* Make sure we are in the set and skip this node */
366 if (strcmp(sr->sr_nodes[i], mynode()) == 0) {
367 self_in_set = TRUE;
368 break;
369 }
370 }
371 }
372
373 if ((self_in_set == FALSE) && (!(MD_MNSET_REC(sr)))) {
374 /*
375 * Under some circumstances (/etc/cluster/nodeid file is
376 * missing) it is possible for the call to _cladm() to
377 * return 0 and a nid of 0. In this instance do not remove
378 * the set as it is Sun Cluster error that needs to be fixed.
379 */
380 if (_cladm(CL_CONFIG, CL_NODEID, &nid) == 0 && nid > 0) {
381
382 /*
383 * See if we've got a node which has been booted in
384 * non-cluster mode. If true the nodeid will match
385 * one of the sr_nodes values because the conversion
386 * from nodeid to hostname failed to occur.
387 */
388 for (i = 0; i < MD_MAXSIDES; i++) {
389 if (sr->sr_nodes[i][0] == 0)
390 continue;
391 if (atoi(sr->sr_nodes[i]) == nid)
392 self_in_set = TRUE;
393 }
394
395 /* If we aren't in the set, delete the set */
396 if (self_in_set == FALSE) {
397 syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
398 "Removing set %s from database\n"),
399 sr->sr_setname);
400 s_delset(sr->sr_setname, &xep);
401 if (! mdisok(&xep))
402 mdclrerror(&xep);
403 return (1);
404 }
405 } else {
406 /*
407 * Send a message to syslog and return without
408 * deleting any sets
409 */
410 syslog(LOG_ERR, dgettext(TEXT_DOMAIN,
411 "Call to _cladm failed for set %s nodeid %d\n"),
412 sr->sr_setname, nid);
413 return (1);
414 }
415 }
416 return (0);
417 }
418
419 void
sr_del_drv(md_set_record * sr,mddb_recid_t recid)420 sr_del_drv(md_set_record *sr, mddb_recid_t recid)
421 {
422 mddb_userreq_t req;
423 md_error_t xep = mdnullerror;
424
425 if (!s_ownset(sr->sr_setno, &xep)) {
426 if (! mdisok(&xep))
427 mdclrerror(&xep);
428 goto skip;
429 }
430
431 /* delete the replicas? */
432 /* release ownership of the drive? */
433 /* NOTE: We may not have a name, so both of the above are ugly! */
434
435 skip:
436 (void) memset(&req, 0, sizeof (req));
437 METAD_SETUP_DR(MD_DB_DELETE, recid)
438 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
439 mdclrerror(&req.ur_mde);
440
441 dr_cache_del(sr, recid);
442 }
443
444 static void
sr_drvs(md_set_record * sr)445 sr_drvs(md_set_record *sr)
446 {
447 md_drive_record *dr;
448 int i;
449 int modified = 0;
450 int sidesok;
451 mdnm_params_t nm;
452 static char device_name[MAXPATHLEN];
453 md_error_t xep = mdnullerror;
454 md_mnnode_record *nr;
455 md_mnset_record *mnsr;
456
457 for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
458 /* If we were mid-add, cleanup */
459 if ((dr->dr_flags & MD_DR_ADD)) {
460 sr_del_drv(sr, dr->dr_selfid);
461 modified++;
462 continue;
463 }
464
465 sidesok = TRUE;
466 if (MD_MNSET_REC(sr)) {
467 mnsr = (md_mnset_record *)sr;
468 nr = mnsr->sr_nodechain;
469 /*
470 * MultiNode disksets only have entries for
471 * their side in the local set. Verify
472 * that drive has a name associated with
473 * this node's side.
474 */
475 while (nr) {
476 /* Find my node */
477 if (strcmp(mynode(), nr->nr_nodename) != 0) {
478 nr = nr->nr_next;
479 continue;
480 }
481
482 (void) memset(&nm, '\0', sizeof (nm));
483 nm.setno = MD_LOCAL_SET;
484 nm.side = nr->nr_nodeid;
485 nm.key = dr->dr_key;
486 nm.devname = (uint64_t)device_name;
487
488 if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
489 NULL) != 0) {
490 if (! mdissyserror(&nm.mde, ENOENT)) {
491 mdclrerror(&nm.mde);
492 return;
493 }
494 }
495
496 /*
497 * If entry is found for this node, then
498 * break out of loop walking through
499 * node list. For a multi-node diskset,
500 * there should only be an entry for
501 * this node.
502 */
503 if (nm.key != MD_KEYWILD &&
504 ! mdissyserror(&nm.mde, ENOENT)) {
505 break;
506 }
507
508 /*
509 * If entry is not found for this node,
510 * then delete the drive. No need to
511 * continue through the node loop since
512 * our node has already been found.
513 */
514 sidesok = FALSE;
515 mdclrerror(&nm.mde);
516
517 /* If we are missing a sidename, cleanup */
518 sr_del_drv(sr, dr->dr_selfid);
519 modified++;
520
521 break;
522 }
523 } else {
524 for (i = 0; i < MD_MAXSIDES; i++) {
525 /* Skip empty slots */
526 if (sr->sr_nodes[i][0] == '\0')
527 continue;
528
529 (void) memset(&nm, '\0', sizeof (nm));
530 nm.setno = MD_LOCAL_SET;
531 nm.side = i + SKEW;
532 nm.key = dr->dr_key;
533 nm.devname = (uint64_t)device_name;
534
535 if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde,
536 NULL) != 0) {
537 if (! mdissyserror(&nm.mde, ENOENT)) {
538 mdclrerror(&nm.mde);
539 return;
540 }
541 }
542
543 if (nm.key != MD_KEYWILD &&
544 ! mdissyserror(&nm.mde, ENOENT))
545 continue;
546
547 sidesok = FALSE;
548 mdclrerror(&nm.mde);
549
550 /* If we are missing a sidename, cleanup */
551 sr_del_drv(sr, dr->dr_selfid);
552 modified++;
553
554 break;
555 }
556 }
557
558 if (sidesok == FALSE)
559 continue;
560
561 /*
562 * If we got this far, the drive record is either in the OK
563 * or DEL state, if it is in the DEL state and the sidenames
564 * all checked out, then we will make it OK.
565 */
566 if ((dr->dr_flags & MD_DR_OK))
567 continue;
568
569 dr->dr_flags = MD_DR_OK;
570
571 modified++;
572 }
573
574 if (modified) {
575 commitset(sr, FALSE, &xep);
576 if (! mdisok(&xep))
577 mdclrerror(&xep);
578 }
579 }
580
581 static void
add_key_to_lst(key_lst_t ** klpp,side_t side,mdkey_t key)582 add_key_to_lst(key_lst_t **klpp, side_t side, mdkey_t key)
583 {
584 key_lst_t *klp;
585
586 assert(klpp != NULL);
587
588 for (/* void */; *klpp != NULL; klpp = &(*klpp)->kl_next)
589 /* void */;
590
591 /* allocate new list element */
592 klp = *klpp = Zalloc(sizeof (*klp));
593
594 klp->kl_side = side;
595 klp->kl_key = key;
596 }
597
598 #ifdef DUMPKEYLST
599 static void
pr_key_lst(char * tag,key_lst_t * klp)600 pr_key_lst(char *tag, key_lst_t *klp)
601 {
602 key_lst_t *tklp;
603
604 md_eprintf("Tag=%s\n", tag);
605 for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
606 md_eprintf("side=%d, key=%lu\n", tklp->kl_side, tklp->kl_key);
607 }
608 #endif /* DUMPKEYLST */
609
610 static int
key_in_key_lst(key_lst_t * klp,side_t side,mdkey_t key)611 key_in_key_lst(key_lst_t *klp, side_t side, mdkey_t key)
612 {
613 key_lst_t *tklp;
614
615 for (tklp = klp; tklp != NULL; tklp = tklp->kl_next)
616 if (tklp->kl_side == side && tklp->kl_key == key)
617 return (1);
618
619 return (0);
620 }
621
622 static void
destroy_key_lst(key_lst_t ** klpp)623 destroy_key_lst(key_lst_t **klpp)
624 {
625 key_lst_t *tklp, *klp;
626
627 assert(klpp != NULL);
628
629 tklp = klp = *klpp;
630 while (klp != NULL) {
631 tklp = klp;
632 klp = klp->kl_next;
633 Free(tklp);
634 }
635 *klpp = NULL;
636 }
637
638 static void
sr_sidenms(void)639 sr_sidenms(void)
640 {
641 md_drive_record *dr;
642 md_set_record *sr;
643 key_lst_t *use = NULL;
644 mdnm_params_t nm;
645 int i;
646 md_mnset_record *mnsr;
647 md_mnnode_record *nr;
648 side_t myside = 0;
649
650 /*
651 * We now go through the list of set and drive records collecting
652 * the key/side pairs that are being used.
653 */
654 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
655 /*
656 * To handle the multi-node diskset case, get the sideno
657 * associated with this node. This sideno will be the
658 * same across all multi-node disksets.
659 */
660 if ((myside == 0) && (MD_MNSET_REC(sr))) {
661 mnsr = (struct md_mnset_record *)sr;
662 nr = mnsr->sr_nodechain;
663 while (nr) {
664 if (strcmp(mynode(), nr->nr_nodename) == 0) {
665 myside = nr->nr_nodeid;
666 break;
667 }
668 nr = nr->nr_next;
669 }
670 /*
671 * If this node is not in this MNset -
672 * then skip this set.
673 */
674 if (!nr) {
675 continue;
676 }
677 }
678
679 for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) {
680 if (MD_MNSET_REC(sr)) {
681 /*
682 * There are no non-local sidenames in the
683 * local set for a multi-node diskset.
684 */
685 add_key_to_lst(&use, myside, dr->dr_key);
686 } else {
687 for (i = 0; i < MD_MAXSIDES; i++) {
688 /* Skip empty slots */
689 if (sr->sr_nodes[i][0] == '\0')
690 continue;
691
692 add_key_to_lst(&use, i + SKEW,
693 dr->dr_key);
694 }
695 }
696 }
697 }
698
699 #ifdef DUMPKEYLST
700 pr_key_lst("use", use);
701 #endif /* DUMPKEYLST */
702
703 /*
704 * We take the list above and get all non-local sidenames, checking
705 * each to see if they are in use, if they are not used, we delete them.
706 * Do the check for myside to cover multinode disksets.
707 * Then do the check for MD_MAXSIDES to cover non-multinode disksets.
708 * If any multi-node disksets were present, myside would be non-zero.
709 * myside is the same for all multi-node disksets for this node.
710 */
711 if (myside) {
712 (void) memset(&nm, '\0', sizeof (nm));
713 nm.setno = MD_LOCAL_SET;
714 nm.side = myside;
715 nm.key = MD_KEYWILD;
716
717 /*CONSTCOND*/
718 while (1) {
719 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
720 NULL) != 0) {
721 mdclrerror(&nm.mde);
722 break;
723 }
724
725 if (nm.key == MD_KEYWILD)
726 break;
727
728 if (! key_in_key_lst(use, nm.side, nm.key)) {
729 if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
730 NULL) != 0) {
731 mdclrerror(&nm.mde);
732 continue;
733 }
734 }
735 }
736 }
737 /* Now handle the non-multinode disksets */
738 for (i = 0; i < MD_MAXSIDES; i++) {
739 (void) memset(&nm, '\0', sizeof (nm));
740 nm.setno = MD_LOCAL_SET;
741 nm.side = i + SKEW;
742 nm.key = MD_KEYWILD;
743
744 /*CONSTCOND*/
745 while (1) {
746 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde,
747 NULL) != 0) {
748 mdclrerror(&nm.mde);
749 break;
750 }
751
752 if (nm.key == MD_KEYWILD)
753 break;
754
755 if (! key_in_key_lst(use, nm.side, nm.key)) {
756 if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde,
757 NULL) != 0) {
758 mdclrerror(&nm.mde);
759 continue;
760 }
761 }
762 }
763 }
764
765 /* Cleanup */
766 destroy_key_lst(&use);
767 }
768
769 void
sr_validate(void)770 sr_validate(void)
771 {
772 md_set_record *sr;
773 md_error_t xep = mdnullerror;
774 int mnset_single_node;
775 md_mnnode_record *nr;
776 md_mnset_record *mnsr;
777
778 assert(setsnarfdone != 0);
779
780 /* We have validated the records already */
781 if (setsnarfdone == 3)
782 return;
783
784 /*
785 * Check if we are in a single node non-SC3.x environmemnt
786 */
787 mnset_single_node = meta_mn_singlenode();
788 /*
789 * If a possible single_node situation, verify that all
790 * MN disksets have only one node (which is mynode()).
791 */
792 if (mnset_single_node) {
793 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
794 if (MD_MNSET_REC(sr)) {
795 mnsr = (struct md_mnset_record *)sr;
796 nr = mnsr->sr_nodechain;
797 /*
798 * If next pointer is non-null (more than
799 * one node in list) or if the single node
800 * isn't my node - reset single node flag.
801 */
802 if ((nr->nr_next) ||
803 (strcmp(nr->nr_nodename, mynode()) != 0)) {
804 mnset_single_node = 0;
805 break;
806 }
807 }
808 }
809 }
810
811 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
812 /*
813 * If a MN diskset and not in the single node
814 * situation, then don't validate the MN set.
815 * This is done during a reconfig cycle since all
816 * nodes must take the same action.
817 */
818 if (MD_MNSET_REC(sr) && (mnset_single_node == 0))
819 continue;
820
821 /* Since we do "partial" snarf's, we only check new entries */
822 if (! (sr->sr_flags & MD_SR_CHECK))
823 continue;
824
825 /* If we were mid-add, cleanup */
826 if ((sr->sr_flags & MD_SR_ADD)) {
827 s_delset(sr->sr_setname, &xep);
828 if (! mdisok(&xep))
829 mdclrerror(&xep);
830 continue;
831 }
832
833 /* Make sure we are in the set. */
834 if (sr_hosts(sr))
835 continue;
836
837 /* Check has been done, clear the flag */
838 if ((sr->sr_flags & MD_SR_CHECK))
839 sr->sr_flags &= ~MD_SR_CHECK;
840
841 /*
842 * If we got here, we are in the set, make sure the flags make
843 * sense.
844 */
845 if (! (sr->sr_flags & MD_SR_OK)) {
846 sr->sr_flags &= ~MD_SR_STATE_FLAGS;
847 sr->sr_flags |= MD_SR_OK;
848 commitset(sr, FALSE, &xep);
849 if (! mdisok(&xep))
850 mdclrerror(&xep);
851 }
852
853 /* Make sure all the drives are in a stable state. */
854 sr_drvs(sr);
855 }
856
857 /* Cleanup any stray sidenames */
858 sr_sidenms();
859
860 setsnarfdone = 3;
861 }
862
863 static md_set_record *
sr_in_cache(mddb_recid_t recid)864 sr_in_cache(mddb_recid_t recid)
865 {
866 md_set_record *tsr;
867
868 for (tsr = setrecords; tsr != NULL; tsr = tsr->sr_next)
869 if (tsr->sr_selfid == recid)
870 return (tsr);
871 return ((md_set_record *)NULL);
872 }
873
874 int
set_snarf(md_error_t * ep)875 set_snarf(md_error_t *ep)
876 {
877 md_set_record *sr;
878 md_mnset_record *mnsr;
879 md_set_record *tsr;
880 md_drive_record *dr;
881 mddb_userreq_t *reqp;
882 ur_recid_lst_t *urlp;
883 mddb_recid_t id;
884 mddb_recid_t *p;
885 md_error_t xep = mdnullerror;
886 md_mnnode_record *nr;
887 mddb_set_node_params_t snp;
888 int nodecnt;
889 mndiskset_membershiplist_t *nl, *nl2;
890
891 /* We have done the snarf call */
892 if (setsnarfdone != 0)
893 return (0);
894
895 if (meta_setup_db_locations(ep) != 0) {
896 if (! mdismddberror(ep, MDE_DB_STALE))
897 return (-1);
898 mdclrerror(ep);
899 }
900
901 /*
902 * Get membershiplist from API routine.
903 * If there's an error, just use a NULL
904 * nodelist.
905 */
906 if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) {
907 nodecnt = 0; /* no nodes are alive */
908 nl = NULL;
909 mdclrerror(ep);
910 }
911
912 /* Let sr_cache_add and dr_cache_add know we are doing the snarf */
913 setsnarfdone = 1;
914
915 /* Go get the set records */
916 id = 0;
917 while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR,
918 &id, ep)) != NULL) {
919 sr->sr_next = NULL;
920 sr->sr_drivechain = NULL;
921
922 /*
923 * Cluster nodename support
924 * Convert nodeid -> nodename
925 * Don't do this for MN disksets since we've already stored
926 * both the nodeid and name.
927 */
928 if (!(MD_MNSET_REC(sr)))
929 sdssc_cm_sr_nid2nm(sr);
930
931 /* If we were mid-cvt, cleanup */
932 if (sr->sr_flags & MD_SR_CVT) {
933 /* If the daemon is calling, cleanup */
934 if (md_in_daemon)
935 url_addl(&url_tode, sr->sr_selfid);
936 continue;
937 }
938
939 if (md_in_daemon)
940 url_addl(&url_used, sr->sr_selfid);
941
942 /* Skip cached records */
943 tsr = sr_in_cache(sr->sr_selfid);
944 if (tsr != (md_set_record *)NULL) {
945 if (MD_MNSET_REC(sr)) {
946 mnsr = (struct md_mnset_record *)sr;
947 Free(mnsr);
948 } else {
949 Free(sr);
950 }
951 if (md_in_daemon)
952 for (dr = tsr->sr_drivechain;
953 dr != (md_drive_record *)NULL;
954 dr = dr->dr_next)
955 url_addl(&url_used, dr->dr_selfid);
956 continue;
957 }
958
959 /* Mark the record as one to be checked */
960 sr->sr_flags |= MD_SR_CHECK;
961
962 sr_cache_add(sr);
963
964 /* If MNdiskset, go get the node records */
965 if (MD_MNSET_REC(sr)) {
966 mnsr = (struct md_mnset_record *)sr;
967 mnsr->sr_nodechain = NULL;
968 p = &mnsr->sr_noderec;
969 while ((nr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
970 MDDB_UR_NR, p, ep)) != NULL) {
971 nr->nr_next = NULL;
972
973 if (md_in_daemon)
974 url_addl(&url_used, nr->nr_selfid);
975
976 /*
977 * Turn off ALIVE node flag based on member
978 * list.
979 * If ALIVE flag is not set, reset OWN flag.
980 * If this node is mynode, set the OWN flag
981 * to match the ownership of the diskset.
982 */
983 if (md_in_daemon) {
984 nr->nr_flags &= ~MD_MN_NODE_ALIVE;
985 nl2 = nl;
986 while (nl2) {
987 /*
988 * If in member list,
989 * set alive.
990 */
991 if (nl2->msl_node_id ==
992 nr->nr_nodeid) {
993 nr->nr_flags |=
994 MD_MN_NODE_ALIVE;
995 break;
996 }
997 nl2 = nl2->next;
998 }
999 /*
1000 * If mynode is in member list, then
1001 * check to see if set is snarfed.
1002 * If set snarfed, set own flag;
1003 * otherwise reset it.
1004 * Don't change master even if
1005 * node isn't an owner node, since
1006 * node may be master, but hasn't
1007 * joined the set yet.
1008 */
1009 if (nr->nr_flags & MD_MN_NODE_ALIVE) {
1010 if (strcmp(nr->nr_nodename,
1011 mynode()) == 0) {
1012 if (s_ownset(
1013 mnsr->sr_setno, ep)) {
1014 nr->nr_flags |=
1015 MD_MN_NODE_OWN;
1016 } else {
1017 nr->nr_flags &=
1018 ~MD_MN_NODE_OWN;
1019 }
1020 }
1021 } else {
1022 if (strcmp(nr->nr_nodename,
1023 mynode()) == 0) {
1024 /*
1025 * If my node isn't in member
1026 * list then reset master.
1027 */
1028 mnsr = (struct
1029 md_mnset_record *)sr;
1030 mnsr->sr_master_nodeid =
1031 MD_MN_INVALID_NID;
1032 mnsr->sr_master_nodenm[0] =
1033 '\0';
1034 }
1035 nr->nr_flags &= ~MD_MN_NODE_OWN;
1036 }
1037 }
1038
1039 /*
1040 * Must grab nr_nextrec now since
1041 * mnnr_cache_add may change it
1042 * (mnnr_cache_add is storing the nodes in
1043 * an ascending nodeid order list in order
1044 * to support reconfig).
1045 */
1046 if (nr->nr_nextrec != 0)
1047 p = &nr->nr_nextrec;
1048 else
1049 p = NULL;
1050
1051 mnnr_cache_add((struct md_mnset_record *)sr,
1052 nr);
1053
1054 if ((md_in_daemon) &&
1055 (strcmp(nr->nr_nodename, mynode()) == 0)) {
1056 (void) memset(&snp, 0, sizeof (snp));
1057 snp.sn_nodeid = nr->nr_nodeid;
1058 snp.sn_setno = mnsr->sr_setno;
1059 if (metaioctl(MD_MN_SET_NODEID, &snp,
1060 &snp.sn_mde, NULL) != 0) {
1061 (void) mdstealerror(ep,
1062 &snp.sn_mde);
1063 }
1064 }
1065
1066 if (p == NULL)
1067 break;
1068 }
1069 if (! mdisok(ep)) {
1070 if (! mdissyserror(ep, ENOENT))
1071 goto out;
1072 mdclrerror(ep);
1073 }
1074 }
1075
1076 if (sr->sr_driverec == 0)
1077 continue;
1078
1079 /* Go get the drive records */
1080 p = &sr->sr_driverec;
1081 while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
1082 MDDB_UR_DR, p, ep)) != NULL) {
1083 dr->dr_next = NULL;
1084
1085 if (md_in_daemon)
1086 url_addl(&url_used, dr->dr_selfid);
1087
1088 dr_cache_add(sr, dr);
1089
1090 if (dr->dr_nextrec == 0)
1091 break;
1092
1093 p = &dr->dr_nextrec;
1094 }
1095 if (! mdisok(ep)) {
1096 if (! mdissyserror(ep, ENOENT))
1097 goto out;
1098 mdclrerror(ep);
1099 /*
1100 * If dr_nextrec was not valid, or we had some
1101 * problem getting the record, we end up here.
1102 * get_ur_rec() zeroes the recid we passed in,
1103 * if we had a failure getting a record using a key,
1104 * so we simply commit the set record and valid
1105 * drive records, if this fails, we hand an error
1106 * back to the caller.
1107 */
1108 commitset(sr, FALSE, ep);
1109 if (! mdisok(ep))
1110 goto out;
1111 }
1112 }
1113 if (! mdisok(ep)) {
1114 if (! mdissyserror(ep, ENOENT))
1115 goto out;
1116 mdclrerror(ep);
1117 }
1118
1119 /*
1120 * If the daemon called, go through the USER records and cleanup
1121 * any that are not used by valid sets.
1122 */
1123 if (md_in_daemon) {
1124 id = 0;
1125 /* Make a list of records to delete */
1126 while ((reqp = get_db_rec(MD_UR_GET_NEXT, MD_LOCAL_SET,
1127 MDDB_USER, 0, &id, ep)) != NULL) {
1128 if (reqp->ur_type2 != MDDB_UR_SR &&
1129 reqp->ur_type2 != MDDB_UR_DR) {
1130 Free((void *)(uintptr_t)reqp->ur_data);
1131 Free(reqp);
1132 continue;
1133 }
1134 if (! url_findl(url_used, reqp->ur_recid))
1135 url_addl(&url_tode, reqp->ur_recid);
1136 Free((void *)(uintptr_t)reqp->ur_data);
1137 Free(reqp);
1138 }
1139 if (! mdisok(ep)) {
1140 if (! mdissyserror(ep, ENOENT))
1141 goto out;
1142 mdclrerror(ep);
1143 }
1144
1145 /* Delete all the delete listed records */
1146 for (urlp = url_tode; urlp != NULL; urlp = urlp->url_nx) {
1147 s_delrec(urlp->url_recid, &xep);
1148 if (! mdisok(&xep))
1149 mdclrerror(&xep);
1150 }
1151 }
1152
1153 url_freel(&url_used);
1154 url_freel(&url_tode);
1155
1156 if (nodecnt)
1157 meta_free_nodelist(nl);
1158
1159 /* Mark the snarf complete */
1160 setsnarfdone = 2;
1161 return (0);
1162
1163 out:
1164 url_freel(&url_used);
1165 url_freel(&url_tode);
1166
1167 sr_cache_flush(1);
1168
1169 if (nodecnt)
1170 meta_free_nodelist(nl);
1171
1172 /* Snarf failed, reset state */
1173 setsnarfdone = 0;
1174
1175 return (-1);
1176 }
1177
1178 void
sr_cache_add(md_set_record * sr)1179 sr_cache_add(md_set_record *sr)
1180 {
1181 md_set_record *tsr;
1182
1183 assert(setsnarfdone != 0);
1184
1185 if (setrecords == NULL) {
1186 setrecords = sr;
1187 return;
1188 }
1189
1190 for (tsr = setrecords; tsr->sr_next != NULL; tsr = tsr->sr_next)
1191 /* void */;
1192 tsr->sr_next = sr;
1193 }
1194
1195 void
sr_cache_del(mddb_recid_t recid)1196 sr_cache_del(mddb_recid_t recid)
1197 {
1198 md_set_record *sr, *tsr;
1199 md_mnset_record *mnsr;
1200
1201 assert(setsnarfdone != 0);
1202
1203 for (sr = tsr = setrecords; sr != NULL; tsr = sr, sr = sr->sr_next) {
1204 if (sr->sr_selfid != recid)
1205 continue;
1206 if (sr == setrecords)
1207 setrecords = sr->sr_next;
1208 else
1209 tsr->sr_next = sr->sr_next;
1210 if (MD_MNSET_REC(sr)) {
1211 mnsr = (struct md_mnset_record *)sr;
1212 Free(mnsr);
1213 } else {
1214 Free(sr);
1215 }
1216 break;
1217 }
1218 if (setrecords == NULL)
1219 setsnarfdone = 0;
1220 }
1221
1222 void
dr_cache_add(md_set_record * sr,md_drive_record * dr)1223 dr_cache_add(md_set_record *sr, md_drive_record *dr)
1224 {
1225 md_drive_record *tdr;
1226
1227 assert(setsnarfdone != 0);
1228
1229 assert(sr != NULL);
1230
1231 if (sr->sr_drivechain == NULL) {
1232 sr->sr_drivechain = dr;
1233 sr->sr_driverec = dr->dr_selfid;
1234 return;
1235 }
1236
1237 for (tdr = sr->sr_drivechain; tdr->dr_next != NULL; tdr = tdr->dr_next)
1238 /* void */;
1239
1240 tdr->dr_next = dr;
1241 tdr->dr_nextrec = dr->dr_selfid;
1242 }
1243
1244 void
dr_cache_del(md_set_record * sr,mddb_recid_t recid)1245 dr_cache_del(md_set_record *sr, mddb_recid_t recid)
1246 {
1247 md_drive_record *dr;
1248 md_drive_record *tdr;
1249
1250 assert(setsnarfdone != 0);
1251
1252 assert(sr != NULL);
1253
1254 for (dr = tdr = sr->sr_drivechain; dr != NULL;
1255 tdr = dr, dr = dr->dr_next) {
1256 if (dr->dr_selfid != recid)
1257 continue;
1258
1259 if (dr == sr->sr_drivechain) {
1260 sr->sr_drivechain = dr->dr_next;
1261 sr->sr_driverec = dr->dr_nextrec;
1262 } else {
1263 tdr->dr_next = dr->dr_next;
1264 tdr->dr_nextrec = dr->dr_nextrec;
1265 }
1266 Free(dr);
1267 break;
1268 }
1269 }
1270
1271 /*
1272 * Nodes must be kept in ascending node id order in order to
1273 * support reconfig.
1274 *
1275 * This routine may change nr->nr_next and nr->nr_nextrec.
1276 */
1277 void
mnnr_cache_add(md_mnset_record * mnsr,md_mnnode_record * nr)1278 mnnr_cache_add(md_mnset_record *mnsr, md_mnnode_record *nr)
1279 {
1280 md_mnnode_record *tnr, *tnr_prev;
1281
1282 assert(mnsr != NULL);
1283
1284 if (mnsr->sr_nodechain == NULL) {
1285 mnsr->sr_nodechain = nr;
1286 mnsr->sr_noderec = nr->nr_selfid;
1287 return;
1288 }
1289
1290 /*
1291 * If new_record->nodeid < first_record->nodeid,
1292 * put new_record at beginning of list.
1293 */
1294 if (nr->nr_nodeid < mnsr->sr_nodechain->nr_nodeid) {
1295 nr->nr_next = mnsr->sr_nodechain;
1296 nr->nr_nextrec = mnsr->sr_noderec;
1297 mnsr->sr_nodechain = nr;
1298 mnsr->sr_noderec = nr->nr_selfid;
1299 return;
1300 }
1301
1302 /*
1303 * Walk list looking for place to insert record.
1304 */
1305
1306 tnr_prev = mnsr->sr_nodechain;
1307 tnr = tnr_prev->nr_next;
1308 while (tnr) {
1309 /* Insert new record between tnr_prev and tnr */
1310 if (nr->nr_nodeid < tnr->nr_nodeid) {
1311 nr->nr_next = tnr;
1312 nr->nr_nextrec = tnr->nr_selfid; /* tnr's recid */
1313 tnr_prev->nr_next = nr;
1314 tnr_prev->nr_nextrec = nr->nr_selfid;
1315 return;
1316 }
1317 tnr_prev = tnr;
1318 tnr = tnr->nr_next;
1319 }
1320
1321 /*
1322 * Add record to end of list.
1323 */
1324 tnr_prev->nr_next = nr;
1325 tnr_prev->nr_nextrec = nr->nr_selfid;
1326 }
1327
1328 void
mnnr_cache_del(md_mnset_record * mnsr,mddb_recid_t recid)1329 mnnr_cache_del(md_mnset_record *mnsr, mddb_recid_t recid)
1330 {
1331 md_mnnode_record *nr;
1332 md_mnnode_record *tnr;
1333
1334 assert(mnsr != NULL);
1335
1336 tnr = 0;
1337 nr = mnsr->sr_nodechain;
1338 while (nr) {
1339 if (nr->nr_selfid != recid) {
1340 tnr = nr;
1341 nr = nr->nr_next;
1342 continue;
1343 }
1344
1345 if (nr == mnsr->sr_nodechain) {
1346 mnsr->sr_nodechain = nr->nr_next;
1347 mnsr->sr_noderec = nr->nr_nextrec;
1348 } else {
1349 tnr->nr_next = nr->nr_next;
1350 tnr->nr_nextrec = nr->nr_nextrec;
1351 }
1352 Free(nr);
1353 break;
1354 }
1355 }
1356
1357 int
metad_isautotakebyname(char * setname)1358 metad_isautotakebyname(char *setname)
1359 {
1360 md_error_t error = mdnullerror;
1361 md_set_record *sr;
1362
1363 if (md_in_daemon)
1364 assert(setsnarfdone != 0);
1365 else if (set_snarf(&error)) {
1366 mdclrerror(&error);
1367 return (0);
1368 }
1369
1370 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
1371 if (strcmp(setname, sr->sr_setname) == 0) {
1372 if (sr->sr_flags & MD_SR_AUTO_TAKE)
1373 return (1);
1374 return (0);
1375 }
1376 }
1377
1378 return (0);
1379 }
1380
1381 int
metad_isautotakebynum(set_t setno)1382 metad_isautotakebynum(set_t setno)
1383 {
1384 md_error_t error = mdnullerror;
1385 md_set_record *sr;
1386
1387 if (md_in_daemon)
1388 assert(setsnarfdone != 0);
1389 else if (set_snarf(&error)) {
1390 mdclrerror(&error);
1391 return (0);
1392 }
1393
1394 for (sr = setrecords; sr != NULL; sr = sr->sr_next) {
1395 if (setno == sr->sr_setno) {
1396 if (sr->sr_flags & MD_SR_AUTO_TAKE)
1397 return (1);
1398 return (0);
1399 }
1400 }
1401
1402 return (0);
1403 }
1404
1405 md_set_record *
metad_getsetbyname(char * setname,md_error_t * ep)1406 metad_getsetbyname(char *setname, md_error_t *ep)
1407 {
1408 md_set_record *sr;
1409 char buf[100];
1410
1411 assert(setsnarfdone != 0);
1412
1413 for (sr = setrecords; sr != NULL; sr = sr->sr_next)
1414 if (strcmp(setname, sr->sr_setname) == 0)
1415 return (sr);
1416
1417 (void) snprintf(buf, sizeof (buf), "setname \"%s\"", setname);
1418 (void) mderror(ep, MDE_NO_SET, buf);
1419 return (NULL);
1420 }
1421
1422 md_set_record *
metad_getsetbynum(set_t setno,md_error_t * ep)1423 metad_getsetbynum(set_t setno, md_error_t *ep)
1424 {
1425 md_set_record *sr;
1426 char buf[100];
1427
1428 if (md_in_daemon)
1429 assert(setsnarfdone != 0);
1430 else if (set_snarf(ep)) /* BYPASS DAEMON mode */
1431 return (NULL);
1432
1433 for (sr = setrecords; sr != NULL; sr = sr->sr_next)
1434 if (setno == sr->sr_setno)
1435 return (sr);
1436
1437 (void) sprintf(buf, "setno %u", setno);
1438 (void) mderror(ep, MDE_NO_SET, buf);
1439 return (NULL);
1440 }
1441
1442
1443 /*
1444 * Commit the set record and all of its associated records
1445 * (drive records, node records for a MNset) to the local mddb.
1446 */
1447 void
commitset(md_set_record * sr,int inc_genid,md_error_t * ep)1448 commitset(md_set_record *sr, int inc_genid, md_error_t *ep)
1449 {
1450 int drc, nrc, rc;
1451 int *recs;
1452 uint_t size;
1453 md_drive_record *dr;
1454 mddb_userreq_t req;
1455 md_mnset_record *mnsr;
1456 md_mnnode_record *nr;
1457
1458 assert(setsnarfdone != 0);
1459
1460 /*
1461 * Cluster nodename support
1462 * Convert nodename -> nodeid
1463 * Don't do this for MN disksets since we've already stored
1464 * both the nodeid and name.
1465 */
1466 if (!(MD_MNSET_REC(sr)))
1467 sdssc_cm_sr_nm2nid(sr);
1468
1469 /* Send down to kernel the data in mddb USER set record */
1470 if (inc_genid)
1471 sr->sr_genid++;
1472 (void) memset(&req, 0, sizeof (req));
1473 METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid)
1474 if (MD_MNSET_REC(sr)) {
1475 req.ur_size = sizeof (*mnsr);
1476 } else {
1477 req.ur_size = sizeof (*sr);
1478 }
1479 req.ur_data = (uintptr_t)sr;
1480 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1481 (void) mdstealerror(ep, &req.ur_mde);
1482 return;
1483 }
1484
1485 /*
1486 * Walk through the drive records associated with this set record
1487 * and send down to kernel the data in mddb USER drive record.
1488 */
1489 drc = 0;
1490 dr = sr->sr_drivechain;
1491 while (dr) {
1492 if (inc_genid)
1493 dr->dr_genid++;
1494 METAD_SETUP_DR(MD_DB_SETDATA, dr->dr_selfid)
1495 req.ur_size = sizeof (*dr);
1496 req.ur_data = (uintptr_t)dr;
1497 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1498 (void) mdstealerror(ep, &req.ur_mde);
1499 return;
1500 }
1501 drc++;
1502 dr = dr->dr_next;
1503 }
1504
1505
1506 /*
1507 * If this set is a multi-node set -
1508 * walk through the node records associated with this set record
1509 * and send down to kernel the data in mddb USER node record.
1510 */
1511 nrc = 0;
1512 if (MD_MNSET_REC(sr)) {
1513 mnsr = (struct md_mnset_record *)sr;
1514 nr = mnsr->sr_nodechain;
1515 while (nr) {
1516 if (inc_genid)
1517 nr->nr_genid++;
1518 METAD_SETUP_NR(MD_DB_SETDATA, nr->nr_selfid)
1519 req.ur_size = sizeof (*nr);
1520 req.ur_data = (uint64_t)(uintptr_t)nr;
1521 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL)
1522 != 0) {
1523 (void) mdstealerror(ep, &req.ur_mde);
1524 return;
1525 }
1526 nrc++;
1527 nr = nr->nr_next;
1528 }
1529 }
1530
1531 /*
1532 * Set up list of mddb USER recids containing set and drive records
1533 * and node records if a MNset.
1534 */
1535 rc = 0;
1536 size = (nrc + drc + 2) * sizeof (int);
1537 recs = Zalloc(size);
1538 /* First recid in list is the set record's id */
1539 recs[rc] = sr->sr_selfid;
1540 rc++;
1541 dr = sr->sr_drivechain;
1542 while (dr) {
1543 /* Now, fill in the drive record ids */
1544 recs[rc] = dr->dr_selfid;
1545 dr = dr->dr_next;
1546 rc++;
1547 }
1548 if (MD_MNSET_REC(sr)) {
1549 nr = mnsr->sr_nodechain;
1550 while (nr) {
1551 /* If a MNset, fill in the node record ids */
1552 recs[rc] = nr->nr_selfid;
1553 nr = nr->nr_next;
1554 rc++;
1555 }
1556 }
1557 /* Set last record to null recid */
1558 recs[rc] = 0;
1559
1560 /* Write out the set and drive and node records to the local mddb */
1561 METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0);
1562 req.ur_size = size;
1563 req.ur_data = (uintptr_t)recs;
1564 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1565 (void) mdstealerror(ep, &req.ur_mde);
1566 return;
1567 }
1568
1569 /*
1570 * Cluster nodename support
1571 * Convert nodeid -> nodename
1572 * Don't do this for MN disksets since we've already stored
1573 * both the nodeid and name.
1574 */
1575 if (!(MD_MNSET_REC(sr)))
1576 sdssc_cm_sr_nid2nm(sr);
1577
1578 Free(recs);
1579 }
1580
1581 /*
1582 * This routine only handles returns a md_set_record structure even
1583 * if the set record describes a MN set. This will allow pre-MN
1584 * SVM RPC code to access a MN set record and to display it.
1585 *
1586 * The MN SVM RPC code detects if the set record returned describes
1587 * a MN set and then will copy it using mnsetdup.
1588 */
1589 md_set_record *
setdup(md_set_record * sr)1590 setdup(md_set_record *sr)
1591 {
1592 md_set_record *tsr = NULL;
1593 md_drive_record **tdrpp = NULL;
1594
1595 if (sr && (tsr = Malloc(sizeof (*sr))) != NULL) {
1596 (void) memmove(tsr, sr, sizeof (*sr));
1597 tsr->sr_next = NULL;
1598 tdrpp = &tsr->sr_drivechain;
1599 while (*tdrpp) {
1600 *tdrpp = drdup(*tdrpp);
1601 tdrpp = &(*tdrpp)->dr_next;
1602 }
1603 }
1604 return (tsr);
1605 }
1606
1607 /*
1608 * This routine only copies MN set records. If a non-MN set
1609 * record was passed in NULL pointer will be returned.
1610 */
1611 md_mnset_record *
mnsetdup(md_mnset_record * mnsr)1612 mnsetdup(md_mnset_record *mnsr)
1613 {
1614 md_mnset_record *tmnsr = NULL;
1615 md_drive_record **tdrpp = NULL;
1616 md_mnnode_record **tnrpp = NULL;
1617
1618 if (!MD_MNSET_REC(mnsr)) {
1619 return (NULL);
1620 }
1621
1622 if (mnsr && (tmnsr = Malloc(sizeof (*mnsr))) != NULL) {
1623 (void) memmove(tmnsr, mnsr, sizeof (*mnsr));
1624 tmnsr->sr_next = NULL;
1625 tdrpp = &tmnsr->sr_drivechain;
1626 while (*tdrpp) {
1627 *tdrpp = drdup(*tdrpp);
1628 tdrpp = &(*tdrpp)->dr_next;
1629 }
1630 tnrpp = &tmnsr->sr_nodechain;
1631 while (*tnrpp) {
1632 *tnrpp = nrdup(*tnrpp);
1633 tnrpp = &(*tnrpp)->nr_next;
1634 }
1635 }
1636 return (tmnsr);
1637 }
1638
1639 md_drive_record *
drdup(md_drive_record * dr)1640 drdup(md_drive_record *dr)
1641 {
1642 md_drive_record *tdr = NULL;
1643
1644 if (dr && (tdr = Malloc(sizeof (*dr))) != NULL)
1645 (void) memmove(tdr, dr, sizeof (*dr));
1646 return (tdr);
1647 }
1648
1649 md_mnnode_record *
nrdup(md_mnnode_record * nr)1650 nrdup(md_mnnode_record *nr)
1651 {
1652 md_mnnode_record *tnr = NULL;
1653
1654 if (nr && (tnr = Malloc(sizeof (*nr))) != NULL)
1655 (void) memmove(tnr, nr, sizeof (*nr));
1656 return (tnr);
1657 }
1658
1659 /*
1660 * Duplicate parts of the drive decriptor list for this node.
1661 * Only duplicate the drive name string in the mddrivename structure, don't
1662 * need to copy any other pointers since only interested in the flags and
1663 * the drive name (i.e. other pointers will be set to NULL).
1664 * Returns NULL if failure due to Malloc failure.
1665 * Returns pointer (non-NULL) to dup'd list if successful.
1666 */
1667 md_drive_desc *
dd_list_dup(md_drive_desc * dd)1668 dd_list_dup(md_drive_desc *dd)
1669 {
1670 md_drive_desc *orig_dd;
1671 md_drive_desc *copy_dd = NULL, *copy_dd_prev = NULL;
1672 md_drive_desc *copy_dd_head = NULL;
1673 mddrivename_t *copy_dnp;
1674 char *copy_cname;
1675 char *copy_devid;
1676
1677 if (dd == NULL)
1678 return (NULL);
1679
1680 orig_dd = dd;
1681
1682 while (orig_dd) {
1683 copy_dd = Zalloc(sizeof (*copy_dd));
1684 copy_dnp = Zalloc(sizeof (mddrivename_t));
1685 copy_cname = Zalloc(sizeof (orig_dd->dd_dnp->cname));
1686 if (orig_dd->dd_dnp->devid) {
1687 copy_devid = Zalloc(sizeof (orig_dd->dd_dnp->devid));
1688 } else {
1689 copy_devid = NULL;
1690 }
1691 copy_dd->dd_next = NULL;
1692 if ((copy_dd == NULL) || (copy_dnp == NULL) ||
1693 (copy_cname == NULL)) {
1694 while (copy_dd_head) {
1695 copy_dd = copy_dd_head->dd_next;
1696 Free(copy_dd_head);
1697 copy_dd_head = copy_dd;
1698 }
1699 if (copy_dnp)
1700 Free(copy_dnp);
1701 if (copy_dd)
1702 Free(copy_dd);
1703 if (copy_cname)
1704 Free(copy_cname);
1705 if (copy_devid)
1706 Free(copy_devid);
1707 return (NULL);
1708 }
1709 (void) memmove(copy_dd, orig_dd, sizeof (*orig_dd));
1710 (void) strlcpy(copy_cname, orig_dd->dd_dnp->cname,
1711 sizeof (orig_dd->dd_dnp->cname));
1712 copy_dd->dd_next = NULL;
1713 copy_dd->dd_dnp = copy_dnp;
1714 copy_dd->dd_dnp->cname = copy_cname;
1715 if (copy_devid) {
1716 (void) strlcpy(copy_devid, orig_dd->dd_dnp->devid,
1717 sizeof (orig_dd->dd_dnp->devid));
1718 }
1719
1720 if (copy_dd_prev == NULL) {
1721 copy_dd_head = copy_dd;
1722 copy_dd_prev = copy_dd;
1723 } else {
1724 copy_dd_prev->dd_next = copy_dd;
1725 copy_dd_prev = copy_dd;
1726 }
1727 orig_dd = orig_dd->dd_next;
1728 }
1729 copy_dd->dd_next = NULL;
1730 return (copy_dd_head);
1731 }
1732
1733 void
sr_cache_flush(int flushnames)1734 sr_cache_flush(int flushnames)
1735 {
1736 md_set_record *sr, *tsr;
1737 md_mnset_record *mnsr;
1738 md_drive_record *dr, *tdr;
1739 md_mnnode_record *nr, *tnr;
1740
1741 sr = tsr = setrecords;
1742 while (sr != NULL) {
1743 dr = tdr = sr->sr_drivechain;
1744 while (dr != NULL) {
1745 tdr = dr;
1746 dr = dr->dr_next;
1747 Free(tdr);
1748 }
1749 tsr = sr;
1750 sr = sr->sr_next;
1751 if (MD_MNSET_REC(tsr)) {
1752 mnsr = (struct md_mnset_record *)tsr;
1753 nr = tnr = mnsr->sr_nodechain;
1754 while (nr != NULL) {
1755 tnr = nr;
1756 nr = nr->nr_next;
1757 Free(tnr);
1758 }
1759 Free(mnsr);
1760 } else {
1761 Free(tsr);
1762 }
1763 }
1764
1765 setrecords = NULL;
1766
1767 setsnarfdone = 0;
1768
1769 /* This will cause the other caches to be cleared */
1770 if (flushnames)
1771 metaflushnames(0);
1772 }
1773
1774 void
sr_cache_flush_setno(set_t setno)1775 sr_cache_flush_setno(set_t setno)
1776 {
1777 md_set_record *sr, *tsr;
1778 md_mnset_record *mnsr;
1779 md_drive_record *dr, *tdr;
1780
1781 assert(setsnarfdone != 0);
1782
1783 for (sr = tsr = setrecords; sr; tsr = sr, sr = sr->sr_next) {
1784 if (sr->sr_setno != setno)
1785 continue;
1786
1787 dr = tdr = sr->sr_drivechain;
1788 while (dr != NULL) {
1789 tdr = dr;
1790 dr = dr->dr_next;
1791 Free(tdr);
1792 }
1793 if (sr == setrecords)
1794 setrecords = sr->sr_next;
1795 else
1796 tsr->sr_next = sr->sr_next;
1797 if (MD_MNSET_REC(sr)) {
1798 mnsr = (struct md_mnset_record *)sr;
1799 Free(mnsr);
1800 } else {
1801 Free(sr);
1802 }
1803 break;
1804 }
1805
1806 setsnarfdone = 0;
1807
1808 /* This will cause the other caches to be cleared */
1809 metaflushnames(0);
1810 }
1811
1812 int
s_ownset(set_t setno,md_error_t * ep)1813 s_ownset(set_t setno, md_error_t *ep)
1814 {
1815 mddb_ownset_t ownset_arg;
1816
1817 ownset_arg.setno = setno;
1818 ownset_arg.owns_set = MD_SETOWNER_NONE;
1819
1820 if (metaioctl(MD_DB_OWNSET, &ownset_arg, ep, NULL) != 0)
1821 return (0);
1822
1823 return (ownset_arg.owns_set);
1824 }
1825
1826 void
s_delset(char * setname,md_error_t * ep)1827 s_delset(char *setname, md_error_t *ep)
1828 {
1829 md_set_record *sr;
1830 md_set_record *tsr;
1831 md_drive_record *dr;
1832 md_drive_record *tdr;
1833 md_mnnode_record *nr, *tnr;
1834 mddb_userreq_t req;
1835 char stringbuf[100];
1836 int i;
1837 mdsetname_t *sp = NULL;
1838 mddrivename_t *dn = NULL;
1839 mdname_t *np = NULL;
1840 md_dev64_t dev;
1841 side_t myside = MD_SIDEWILD;
1842 md_error_t xep = mdnullerror;
1843 md_mnset_record *mnsr;
1844 int num_sets = 0;
1845 int num_mn_sets = 0;
1846
1847 (void) memset(&req, 0, sizeof (mddb_userreq_t));
1848
1849 if ((sr = getsetbyname(setname, ep)) == NULL)
1850 return;
1851
1852 sp = metasetnosetname(sr->sr_setno, &xep);
1853 mdclrerror(&xep);
1854
1855 if (MD_MNSET_REC(sr)) {
1856 /*
1857 * If this node is a set owner, halt the set before
1858 * deleting the set records. Ignore any errors since
1859 * s_ownset and halt_set could fail if panic had occurred
1860 * during the add/delete of a node.
1861 */
1862 if (s_ownset(sr->sr_setno, &xep)) {
1863 mdclrerror(&xep);
1864 if (halt_set(sp, &xep))
1865 mdclrerror(&xep);
1866 }
1867 }
1868
1869 (void) snprintf(stringbuf, sizeof (stringbuf), "/dev/md/%s", setname);
1870 (void) unlink(stringbuf);
1871 (void) unlink(meta_lock_name(sr->sr_setno));
1872
1873 if (MD_MNSET_REC(sr)) {
1874 mnsr = (struct md_mnset_record *)sr;
1875 nr = mnsr->sr_nodechain;
1876 while (nr) {
1877 /* Setting myside for later use */
1878 if (strcmp(mynode(), nr->nr_nodename) == 0)
1879 myside = nr->nr_nodeid;
1880
1881 (void) memset(&req, 0, sizeof (req));
1882 METAD_SETUP_NR(MD_DB_DELETE, nr->nr_selfid)
1883 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde,
1884 NULL) != 0) {
1885 (void) mdstealerror(ep, &req.ur_mde);
1886 free_sr(sr);
1887 return;
1888 }
1889 tnr = nr;
1890 nr = nr->nr_next;
1891
1892 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
1893 sr->sr_setno, tnr->nr_nodeid);
1894
1895 mnnr_cache_del((struct md_mnset_record *)sr,
1896 tnr->nr_selfid);
1897 }
1898 } else {
1899 for (i = 0; i < MD_MAXSIDES; i++) {
1900 /* Skip empty slots */
1901 if (sr->sr_nodes[i][0] == '\0')
1902 continue;
1903
1904 if (strcmp(mynode(), sr->sr_nodes[i]) == 0)
1905 myside = i;
1906
1907 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST,
1908 sr->sr_setno, i);
1909 }
1910 }
1911
1912 dr = sr->sr_drivechain;
1913 while (dr) {
1914 (void) memset(&req, 0, sizeof (req));
1915 METAD_SETUP_DR(MD_DB_DELETE, dr->dr_selfid)
1916 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1917 (void) mdstealerror(ep, &req.ur_mde);
1918 free_sr(sr);
1919 return;
1920 }
1921 tdr = dr;
1922 dr = dr->dr_next;
1923
1924 dev = NODEV64;
1925 if (myside != MD_SIDEWILD && sp != NULL) {
1926 dn = metadrivename_withdrkey(sp, myside,
1927 tdr->dr_key, MD_BASICNAME_OK, &xep);
1928 if (dn != NULL) {
1929 uint_t rep_slice;
1930
1931 np = NULL;
1932 if (meta_replicaslice(dn, &rep_slice,
1933 &xep) == 0) {
1934 np = metaslicename(dn, rep_slice, &xep);
1935 }
1936
1937 if (np != NULL)
1938 dev = np->dev;
1939 else
1940 mdclrerror(&xep);
1941 } else
1942 mdclrerror(&xep);
1943 } else
1944 mdclrerror(&xep);
1945
1946 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE,
1947 sr->sr_setno, dev);
1948 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE,
1949 MD_LOCAL_SET, dev);
1950
1951 dr_cache_del(sr, tdr->dr_selfid);
1952
1953 }
1954
1955 (void) memset(&req, 0, sizeof (req));
1956 METAD_SETUP_SR(MD_DB_DELETE, sr->sr_selfid)
1957 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) {
1958 (void) mdstealerror(ep, &req.ur_mde);
1959 free_sr(sr);
1960 return;
1961 }
1962
1963 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_SET, sr->sr_setno,
1964 NODEV64);
1965
1966 for (tsr = setrecords; tsr; tsr = tsr->sr_next) {
1967 if (tsr == sr)
1968 continue;
1969
1970 num_sets++;
1971 if (MD_MNSET_REC(tsr))
1972 num_mn_sets++;
1973 }
1974
1975 if (num_mn_sets == 0)
1976 (void) meta_smf_disable(META_SMF_MN_DISKSET, NULL);
1977
1978 /* The set we just deleted is the only one left */
1979 if (num_sets == 0)
1980 (void) meta_smf_disable(META_SMF_DISKSET, NULL);
1981
1982 sr_cache_del(sr->sr_selfid);
1983 free_sr(sr);
1984
1985 }
1986
1987 void
s_delrec(mddb_recid_t recid,md_error_t * ep)1988 s_delrec(mddb_recid_t recid, md_error_t *ep)
1989 {
1990 mddb_userreq_t req;
1991
1992 (void) memset(&req, 0, sizeof (req));
1993
1994 METAD_SETUP_SR(MD_DB_DELETE, recid)
1995
1996 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0)
1997 (void) mdstealerror(ep, &req.ur_mde);
1998 }
1999
2000 /*
2001 * resnarf the imported set
2002 */
2003 int
resnarf_set(set_t setno,md_error_t * ep)2004 resnarf_set(
2005 set_t setno,
2006 md_error_t *ep
2007 )
2008 {
2009 md_set_record *sr;
2010 md_drive_record *dr;
2011 mddb_recid_t id, *p;
2012
2013 if (meta_setup_db_locations(ep) != 0) {
2014 if (! mdismddberror(ep, MDE_DB_STALE))
2015 return (-1);
2016 mdclrerror(ep);
2017 }
2018
2019 setsnarfdone = 1;
2020
2021 id = 0;
2022 while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR, &id,
2023 ep)) != NULL) {
2024
2025 if (sr->sr_setno != setno)
2026 continue;
2027
2028 /* Don't allow resnarf of a multi-node diskset */
2029 if (MD_MNSET_REC(sr))
2030 goto out;
2031
2032 sr->sr_next = NULL;
2033 sr->sr_drivechain = NULL;
2034
2035 if (md_in_daemon)
2036 url_addl(&url_used, sr->sr_selfid);
2037
2038 sr->sr_flags |= MD_SR_CHECK;
2039
2040 sr_cache_add(sr);
2041
2042 if (sr->sr_driverec == 0)
2043 break;
2044
2045 p = &sr->sr_driverec;
2046 while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY,
2047 MDDB_UR_DR, p, ep)) != NULL) {
2048 dr->dr_next = NULL;
2049
2050 if (md_in_daemon)
2051 url_addl(&url_used, dr->dr_selfid);
2052
2053 dr_cache_add(sr, dr);
2054
2055 if (dr->dr_nextrec == 0)
2056 break;
2057
2058 p = &dr->dr_nextrec;
2059 }
2060 if (! mdisok(ep)) {
2061 if (! mdissyserror(ep, ENOENT))
2062 goto out;
2063 mdclrerror(ep);
2064 commitset(sr, FALSE, ep);
2065 if (! mdisok(ep))
2066 goto out;
2067 }
2068 }
2069 if (! mdisok(ep)) {
2070 if (! mdissyserror(ep, ENOENT))
2071 goto out;
2072 mdclrerror(ep);
2073 }
2074
2075 setsnarfdone = 2;
2076
2077 url_freel(&url_used);
2078 url_freel(&url_tode);
2079 return (0);
2080
2081 out:
2082 url_freel(&url_used);
2083 url_freel(&url_tode);
2084
2085 sr_cache_flush(1);
2086
2087 setsnarfdone = 0;
2088
2089 return (-1);
2090 }
2091