1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Just in case we're not in a build environment, make sure that 30 * TEXT_DOMAIN gets set to something. 31 */ 32 #if !defined(TEXT_DOMAIN) 33 #define TEXT_DOMAIN "SYS_TEST" 34 #endif 35 36 /* 37 * interface between user land and the set records 38 */ 39 40 #include <meta.h> 41 #include <metad.h> 42 #include <sdssc.h> 43 #include <syslog.h> 44 #include <sys/cladm.h> 45 #include "meta_set_prv.h" 46 47 #include <sys/sysevent/eventdefs.h> 48 #include <sys/sysevent/svm.h> 49 50 static md_set_record *setrecords = NULL; /* head of cache linked list */ 51 static int setsnarfdone = 0; 52 53 typedef struct key_lst_t { 54 side_t kl_side; 55 mdkey_t kl_key; 56 struct key_lst_t *kl_next; 57 } key_lst_t; 58 59 typedef struct ur_recid_lst { 60 mddb_recid_t url_recid; 61 struct ur_recid_lst *url_nx; 62 } ur_recid_lst_t; 63 64 static ur_recid_lst_t *url_used = NULL; 65 static ur_recid_lst_t *url_tode = NULL; 66 67 static void 68 url_addl(ur_recid_lst_t **urlpp, mddb_recid_t recid) 69 { 70 /* Run to the end of the list */ 71 for (/* void */; (*urlpp != NULL); urlpp = &(*urlpp)->url_nx) 72 if ((*urlpp)->url_recid == recid) 73 return; 74 75 /* Add the new member */ 76 *urlpp = Zalloc(sizeof (**urlpp)); 77 if (*urlpp == NULL) 78 return; 79 80 (*urlpp)->url_recid = recid; 81 } 82 83 static int 84 url_findl(ur_recid_lst_t *urlp, mddb_recid_t recid) 85 { 86 while (urlp != NULL) { 87 if (urlp->url_recid == recid) 88 return (1); 89 urlp = urlp->url_nx; 90 } 91 return (0); 92 } 93 94 static void 95 url_freel(ur_recid_lst_t **urlpp) 96 { 97 ur_recid_lst_t *urlp; 98 ur_recid_lst_t *turlp; 99 100 for (turlp = *urlpp; turlp != NULL; turlp = urlp) { 101 urlp = turlp->url_nx; 102 Free(turlp); 103 } 104 *urlpp = (ur_recid_lst_t *)NULL; 105 } 106 107 static int 108 ckncvt_set_record(mddb_userreq_t *reqp, md_error_t *ep) 109 { 110 mddb_userreq_t req; 111 md_set_record *sr; 112 int recs[3]; 113 114 if (reqp->ur_size == sizeof (*sr)) 115 return (0); 116 117 if (! md_in_daemon) { 118 if (reqp->ur_size >= sizeof (*sr)) 119 return (0); 120 121 reqp->ur_data = (uintptr_t)Realloc((void *)(uintptr_t) 122 reqp->ur_data, sizeof (*sr)); 123 (void) memset( 124 ((char *)(uintptr_t)reqp->ur_data) + reqp->ur_size, 125 '\0', sizeof (*sr) - reqp->ur_size); 126 reqp->ur_size = sizeof (*sr); 127 return (0); 128 } 129 130 /* 131 * If here, then the daemon is calling, and so the automatic 132 * conversion will be performed. 133 */ 134 135 /* shorthand */ 136 req = *reqp; /* structure assignment */ 137 sr = (md_set_record *)(uintptr_t)req.ur_data; 138 139 if (sr->sr_flags & MD_SR_CVT) 140 return (0); 141 142 /* Leave multi-node set records alone */ 143 if (MD_MNSET_REC(sr)) { 144 return (0); 145 } 146 147 /* Mark the old record as converted */ 148 sr->sr_flags |= MD_SR_CVT; 149 150 METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid) 151 152 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) 153 return (mdstealerror(ep, &req.ur_mde)); 154 155 /* Create space for the new record */ 156 METAD_SETUP_SR(MD_DB_CREATE, 0); 157 req.ur_size = sizeof (*sr); 158 159 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) 160 return (mdstealerror(ep, &req.ur_mde)); 161 162 /* Allocate the new record */ 163 sr = Zalloc(sizeof (*sr)); 164 165 /* copy all the data from the record being converted */ 166 (void) memmove(sr, (void *)(uintptr_t)reqp->ur_data, reqp->ur_size); 167 sr->sr_flags &= ~MD_SR_CVT; 168 169 /* adjust the selfid to point to the new record */ 170 sr->sr_selfid = req.ur_recid; 171 172 METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid) 173 req.ur_size = sizeof (*sr); 174 req.ur_data = (uintptr_t)sr; 175 176 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { 177 Free(sr); 178 return (mdstealerror(ep, &req.ur_mde)); 179 } 180 181 /* Commit the old and the new */ 182 recs[0] = ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid; 183 recs[1] = sr->sr_selfid; 184 recs[2] = 0; 185 186 METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0); 187 req.ur_size = sizeof (recs); 188 req.ur_data = (uintptr_t)recs; 189 190 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { 191 Free(sr); 192 return (mdstealerror(ep, &req.ur_mde)); 193 } 194 195 /* Add the the old record to the list of records to delete */ 196 url_addl(&url_tode, 197 ((md_set_record *)(uintptr_t)reqp->ur_data)->sr_selfid); 198 199 /* Free the old records space */ 200 Free((void *)(uintptr_t)reqp->ur_data); 201 202 /* Adjust the reqp structure to point to the new record and size */ 203 reqp->ur_recid = sr->sr_selfid; 204 reqp->ur_size = sizeof (*sr); 205 reqp->ur_data = (uintptr_t)sr; 206 207 return (0); 208 } 209 210 mddb_userreq_t * 211 get_db_rec( 212 md_ur_get_cmd_t cmd, 213 set_t setno, 214 mddb_type_t type, 215 uint_t type2, 216 mddb_recid_t *idp, 217 md_error_t *ep 218 ) 219 { 220 mddb_userreq_t *reqp = Zalloc(sizeof (*reqp)); 221 mdsetname_t *sp; 222 md_set_desc *sd; 223 int ureq; 224 225 if ((sp = metasetnosetname(setno, ep)) == NULL) { 226 Free(reqp); 227 return (NULL); 228 } 229 230 if (metaislocalset(sp)) { 231 ureq = MD_DB_USERREQ; 232 } else { 233 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 234 Free(reqp); 235 return (NULL); 236 } 237 ureq = MD_MNSET_DESC(sd) ? MD_MN_DB_USERREQ : MD_DB_USERREQ; 238 } 239 240 reqp->ur_setno = setno; 241 reqp->ur_type = type; 242 reqp->ur_type2 = type2; 243 244 switch (cmd) { 245 case MD_UR_GET_NEXT: 246 reqp->ur_cmd = MD_DB_GETNEXTREC; 247 reqp->ur_recid = *idp; 248 if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) 249 != 0) { 250 (void) mdstealerror(ep, &reqp->ur_mde); 251 Free(reqp); 252 return (NULL); 253 } 254 *idp = reqp->ur_recid; 255 break; 256 case MD_UR_GET_WKEY: 257 reqp->ur_recid = *idp; 258 break; 259 } 260 261 if (*idp <= 0) { 262 Free(reqp); 263 return (NULL); 264 } 265 266 reqp->ur_cmd = MD_DB_GETSIZE; 267 if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) { 268 (void) mdstealerror(ep, &reqp->ur_mde); 269 Free(reqp); 270 271 *idp = 0; 272 return (NULL); 273 } 274 275 reqp->ur_cmd = MD_DB_GETDATA; 276 reqp->ur_data = (uintptr_t)Zalloc(reqp->ur_size); 277 if (metaioctl(ureq, reqp, &reqp->ur_mde, NULL) != 0) { 278 (void) mdstealerror(ep, &reqp->ur_mde); 279 Free((void *)(uintptr_t)reqp->ur_data); 280 Free(reqp); 281 *idp = 0; 282 return (NULL); 283 } 284 285 switch (reqp->ur_type) { 286 case MDDB_USER: 287 switch (reqp->ur_type2) { 288 case MDDB_UR_SR: 289 if (ckncvt_set_record(reqp, ep)) { 290 Free((void *)(uintptr_t)reqp->ur_data); 291 Free(reqp); 292 return (NULL); 293 } 294 break; 295 } 296 break; 297 } 298 299 return (reqp); 300 } 301 302 void * 303 get_ur_rec( 304 set_t setno, 305 md_ur_get_cmd_t cmd, 306 uint_t type2, 307 mddb_recid_t *idp, 308 md_error_t *ep 309 ) 310 { 311 mddb_userreq_t *reqp = NULL; 312 void *ret_val; 313 314 assert(idp != NULL); 315 316 reqp = get_db_rec(cmd, setno, MDDB_USER, type2, idp, ep); 317 if (reqp == NULL) 318 return (NULL); 319 320 ret_val = (void *)(uintptr_t)reqp->ur_data; 321 Free(reqp); 322 return (ret_val); 323 } 324 325 /* 326 * Called by rpc.metad on startup of disksets to cleanup 327 * the host entries associated with a diskset. This is needed if 328 * a node failed or the metaset command was killed during the addition 329 * of a node to a diskset. 330 * 331 * This is called for all traditional disksets. 332 * This is only called for MNdisksets when in there is only one node 333 * in all of the MN disksets and this node is not running SunCluster. 334 * (Otherwise, the cleanup of the host entries is handled by a 335 * reconfig cycle that the SunCluster software calls). 336 */ 337 static int 338 sr_hosts(md_set_record *sr) 339 { 340 int i, 341 nid, 342 self_in_set = FALSE; 343 md_error_t xep = mdnullerror; 344 md_mnnode_record *nr; 345 md_mnset_record *mnsr; 346 347 if (MD_MNSET_REC(sr)) { 348 mnsr = (struct md_mnset_record *)sr; 349 nr = mnsr->sr_nodechain; 350 /* 351 * Already guaranteed to be only 1 node in set which 352 * is mynode (done in sr_validate). 353 * Now, check if node is in the OK state. If not in 354 * the OK state, leave self_in_set FALSE so that 355 * set will be removed. 356 */ 357 if (nr->nr_flags & MD_MN_NODE_OK) 358 self_in_set = TRUE; 359 } else { 360 for (i = 0; i < MD_MAXSIDES; i++) { 361 /* Skip empty slots */ 362 if (sr->sr_nodes[i][0] == '\0') 363 continue; 364 365 /* Make sure we are in the set and skip this node */ 366 if (strcmp(sr->sr_nodes[i], mynode()) == 0) { 367 self_in_set = TRUE; 368 break; 369 } 370 } 371 } 372 373 if ((self_in_set == FALSE) && (!(MD_MNSET_REC(sr)))) { 374 if (_cladm(CL_CONFIG, CL_NODEID, &nid) == 0) { 375 376 /* 377 * See if we've got a node which has been booted in 378 * non-cluster mode. If true the nodeid will match 379 * one of the sr_nodes values because the conversion 380 * from nodeid to hostname failed to occur. 381 */ 382 for (i = 0; i < MD_MAXSIDES; i++) { 383 if (sr->sr_nodes[i][0] == 0) 384 continue; 385 if (atoi(sr->sr_nodes[i]) == nid) 386 self_in_set = TRUE; 387 } 388 389 /* If we aren't in the set, delete the set */ 390 if (self_in_set == FALSE) { 391 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 392 "Removing set %s from database\n"), sr->sr_setname); 393 s_delset(sr->sr_setname, &xep); 394 if (! mdisok(&xep)) 395 mdclrerror(&xep); 396 return (1); 397 } 398 } else { 399 /* 400 * Send a message to syslog and return without 401 * deleting any sets 402 */ 403 syslog(LOG_ERR, dgettext(TEXT_DOMAIN, 404 "Call to _cladm failed for set %s\n"), 405 sr->sr_setname); 406 return (1); 407 } 408 } 409 return (0); 410 } 411 412 void 413 sr_del_drv(md_set_record *sr, mddb_recid_t recid) 414 { 415 mddb_userreq_t req; 416 md_error_t xep = mdnullerror; 417 418 if (!s_ownset(sr->sr_setno, &xep)) { 419 if (! mdisok(&xep)) 420 mdclrerror(&xep); 421 goto skip; 422 } 423 424 /* delete the replicas? */ 425 /* release ownership of the drive? */ 426 /* NOTE: We may not have a name, so both of the above are ugly! */ 427 428 skip: 429 (void) memset(&req, 0, sizeof (req)); 430 METAD_SETUP_DR(MD_DB_DELETE, recid) 431 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) 432 mdclrerror(&req.ur_mde); 433 434 dr_cache_del(sr, recid); 435 } 436 437 static void 438 sr_drvs(md_set_record *sr) 439 { 440 md_drive_record *dr; 441 int i; 442 int modified = 0; 443 int sidesok; 444 mdnm_params_t nm; 445 static char device_name[MAXPATHLEN]; 446 md_error_t xep = mdnullerror; 447 md_mnnode_record *nr; 448 md_mnset_record *mnsr; 449 450 for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) { 451 /* If we were mid-add, cleanup */ 452 if ((dr->dr_flags & MD_DR_ADD)) { 453 sr_del_drv(sr, dr->dr_selfid); 454 modified++; 455 continue; 456 } 457 458 sidesok = TRUE; 459 if (MD_MNSET_REC(sr)) { 460 mnsr = (md_mnset_record *)sr; 461 nr = mnsr->sr_nodechain; 462 /* 463 * MultiNode disksets only have entries for 464 * their side in the local set. Verify 465 * that drive has a name associated with 466 * this node's side. 467 */ 468 while (nr) { 469 /* Find my node */ 470 if (strcmp(mynode(), nr->nr_nodename) != 0) { 471 nr = nr->nr_next; 472 continue; 473 } 474 475 (void) memset(&nm, '\0', sizeof (nm)); 476 nm.setno = MD_LOCAL_SET; 477 nm.side = nr->nr_nodeid; 478 nm.key = dr->dr_key; 479 nm.devname = (uint64_t)device_name; 480 481 if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde, 482 NULL) != 0) { 483 if (! mdissyserror(&nm.mde, ENOENT)) { 484 mdclrerror(&nm.mde); 485 return; 486 } 487 } 488 489 /* 490 * If entry is found for this node, then 491 * break out of loop walking through 492 * node list. For a multi-node diskset, 493 * there should only be an entry for 494 * this node. 495 */ 496 if (nm.key != MD_KEYWILD && 497 ! mdissyserror(&nm.mde, ENOENT)) { 498 break; 499 } 500 501 /* 502 * If entry is not found for this node, 503 * then delete the drive. No need to 504 * continue through the node loop since 505 * our node has already been found. 506 */ 507 sidesok = FALSE; 508 mdclrerror(&nm.mde); 509 510 /* If we are missing a sidename, cleanup */ 511 sr_del_drv(sr, dr->dr_selfid); 512 modified++; 513 514 break; 515 } 516 } else { 517 for (i = 0; i < MD_MAXSIDES; i++) { 518 /* Skip empty slots */ 519 if (sr->sr_nodes[i][0] == '\0') 520 continue; 521 522 (void) memset(&nm, '\0', sizeof (nm)); 523 nm.setno = MD_LOCAL_SET; 524 nm.side = i + SKEW; 525 nm.key = dr->dr_key; 526 nm.devname = (uint64_t)device_name; 527 528 if (metaioctl(MD_IOCGET_NM, &nm, &nm.mde, 529 NULL) != 0) { 530 if (! mdissyserror(&nm.mde, ENOENT)) { 531 mdclrerror(&nm.mde); 532 return; 533 } 534 } 535 536 if (nm.key != MD_KEYWILD && 537 ! mdissyserror(&nm.mde, ENOENT)) 538 continue; 539 540 sidesok = FALSE; 541 mdclrerror(&nm.mde); 542 543 /* If we are missing a sidename, cleanup */ 544 sr_del_drv(sr, dr->dr_selfid); 545 modified++; 546 547 break; 548 } 549 } 550 551 if (sidesok == FALSE) 552 continue; 553 554 /* 555 * If we got this far, the drive record is either in the OK 556 * or DEL state, if it is in the DEL state and the sidenames 557 * all checked out, then we will make it OK. 558 */ 559 if ((dr->dr_flags & MD_DR_OK)) 560 continue; 561 562 dr->dr_flags = MD_DR_OK; 563 564 modified++; 565 } 566 567 if (modified) { 568 commitset(sr, FALSE, &xep); 569 if (! mdisok(&xep)) 570 mdclrerror(&xep); 571 } 572 } 573 574 static void 575 add_key_to_lst(key_lst_t **klpp, side_t side, mdkey_t key) 576 { 577 key_lst_t *klp; 578 579 assert(klpp != NULL); 580 581 for (/* void */; *klpp != NULL; klpp = &(*klpp)->kl_next) 582 /* void */; 583 584 /* allocate new list element */ 585 klp = *klpp = Zalloc(sizeof (*klp)); 586 587 klp->kl_side = side; 588 klp->kl_key = key; 589 } 590 591 #ifdef DUMPKEYLST 592 static void 593 pr_key_lst(char *tag, key_lst_t *klp) 594 { 595 key_lst_t *tklp; 596 597 md_eprintf("Tag=%s\n", tag); 598 for (tklp = klp; tklp != NULL; tklp = tklp->kl_next) 599 md_eprintf("side=%d, key=%lu\n", tklp->kl_side, tklp->kl_key); 600 } 601 #endif /* DUMPKEYLST */ 602 603 static int 604 key_in_key_lst(key_lst_t *klp, side_t side, mdkey_t key) 605 { 606 key_lst_t *tklp; 607 608 for (tklp = klp; tklp != NULL; tklp = tklp->kl_next) 609 if (tklp->kl_side == side && tklp->kl_key == key) 610 return (1); 611 612 return (0); 613 } 614 615 static void 616 destroy_key_lst(key_lst_t **klpp) 617 { 618 key_lst_t *tklp, *klp; 619 620 assert(klpp != NULL); 621 622 tklp = klp = *klpp; 623 while (klp != NULL) { 624 tklp = klp; 625 klp = klp->kl_next; 626 Free(tklp); 627 } 628 *klpp = NULL; 629 } 630 631 static void 632 sr_sidenms(void) 633 { 634 md_drive_record *dr; 635 md_set_record *sr; 636 key_lst_t *use = NULL; 637 mdnm_params_t nm; 638 int i; 639 md_mnset_record *mnsr; 640 md_mnnode_record *nr; 641 side_t myside = 0; 642 643 /* 644 * We now go through the list of set and drive records collecting 645 * the key/side pairs that are being used. 646 */ 647 for (sr = setrecords; sr != NULL; sr = sr->sr_next) { 648 /* 649 * To handle the multi-node diskset case, get the sideno 650 * associated with this node. This sideno will be the 651 * same across all multi-node disksets. 652 */ 653 if ((myside == 0) && (MD_MNSET_REC(sr))) { 654 mnsr = (struct md_mnset_record *)sr; 655 nr = mnsr->sr_nodechain; 656 while (nr) { 657 if (strcmp(mynode(), nr->nr_nodename) == 0) { 658 myside = nr->nr_nodeid; 659 break; 660 } 661 nr = nr->nr_next; 662 } 663 /* 664 * If this node is not in this MNset - 665 * then skip this set. 666 */ 667 if (!nr) { 668 continue; 669 } 670 } 671 672 for (dr = sr->sr_drivechain; dr != NULL; dr = dr->dr_next) { 673 if (MD_MNSET_REC(sr)) { 674 /* 675 * There are no non-local sidenames in the 676 * local set for a multi-node diskset. 677 */ 678 add_key_to_lst(&use, myside, dr->dr_key); 679 } else { 680 for (i = 0; i < MD_MAXSIDES; i++) { 681 /* Skip empty slots */ 682 if (sr->sr_nodes[i][0] == '\0') 683 continue; 684 685 add_key_to_lst(&use, i + SKEW, 686 dr->dr_key); 687 } 688 } 689 } 690 } 691 692 #ifdef DUMPKEYLST 693 pr_key_lst("use", use); 694 #endif /* DUMPKEYLST */ 695 696 /* 697 * We take the list above and get all non-local sidenames, checking 698 * each to see if they are in use, if they are not used, we delete them. 699 * Do the check for myside to cover multinode disksets. 700 * Then do the check for MD_MAXSIDES to cover non-multinode disksets. 701 * If any multi-node disksets were present, myside would be non-zero. 702 * myside is the same for all multi-node disksets for this node. 703 */ 704 if (myside) { 705 (void) memset(&nm, '\0', sizeof (nm)); 706 nm.setno = MD_LOCAL_SET; 707 nm.side = myside; 708 nm.key = MD_KEYWILD; 709 710 /*CONSTCOND*/ 711 while (1) { 712 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, 713 NULL) != 0) { 714 mdclrerror(&nm.mde); 715 break; 716 } 717 718 if (nm.key == MD_KEYWILD) 719 break; 720 721 if (! key_in_key_lst(use, nm.side, nm.key)) { 722 if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde, 723 NULL) != 0) { 724 mdclrerror(&nm.mde); 725 continue; 726 } 727 } 728 } 729 } 730 /* Now handle the non-multinode disksets */ 731 for (i = 0; i < MD_MAXSIDES; i++) { 732 (void) memset(&nm, '\0', sizeof (nm)); 733 nm.setno = MD_LOCAL_SET; 734 nm.side = i + SKEW; 735 nm.key = MD_KEYWILD; 736 737 /*CONSTCOND*/ 738 while (1) { 739 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, 740 NULL) != 0) { 741 mdclrerror(&nm.mde); 742 break; 743 } 744 745 if (nm.key == MD_KEYWILD) 746 break; 747 748 if (! key_in_key_lst(use, nm.side, nm.key)) { 749 if (metaioctl(MD_IOCREM_NM, &nm, &nm.mde, 750 NULL) != 0) { 751 mdclrerror(&nm.mde); 752 continue; 753 } 754 } 755 } 756 } 757 758 /* Cleanup */ 759 destroy_key_lst(&use); 760 } 761 762 void 763 sr_validate(void) 764 { 765 md_set_record *sr; 766 md_error_t xep = mdnullerror; 767 int mnset_single_node; 768 md_mnnode_record *nr; 769 md_mnset_record *mnsr; 770 771 assert(setsnarfdone != 0); 772 773 /* We have validated the records already */ 774 if (setsnarfdone == 3) 775 return; 776 777 /* 778 * Check if we are in a single node non-SC3.x environmemnt 779 */ 780 mnset_single_node = meta_mn_singlenode(); 781 /* 782 * If a possible single_node situation, verify that all 783 * MN disksets have only one node (which is mynode()). 784 */ 785 if (mnset_single_node) { 786 for (sr = setrecords; sr != NULL; sr = sr->sr_next) { 787 if (MD_MNSET_REC(sr)) { 788 mnsr = (struct md_mnset_record *)sr; 789 nr = mnsr->sr_nodechain; 790 /* 791 * If next pointer is non-null (more than 792 * one node in list) or if the single node 793 * isn't my node - reset single node flag. 794 */ 795 if ((nr->nr_next) || 796 (strcmp(nr->nr_nodename, mynode()) != 0)) { 797 mnset_single_node = 0; 798 break; 799 } 800 } 801 } 802 } 803 804 for (sr = setrecords; sr != NULL; sr = sr->sr_next) { 805 /* 806 * If a MN diskset and not in the single node 807 * situation, then don't validate the MN set. 808 * This is done during a reconfig cycle since all 809 * nodes must take the same action. 810 */ 811 if (MD_MNSET_REC(sr) && (mnset_single_node == 0)) 812 continue; 813 814 /* Since we do "partial" snarf's, we only check new entries */ 815 if (! (sr->sr_flags & MD_SR_CHECK)) 816 continue; 817 818 /* If we were mid-add, cleanup */ 819 if ((sr->sr_flags & MD_SR_ADD)) { 820 s_delset(sr->sr_setname, &xep); 821 if (! mdisok(&xep)) 822 mdclrerror(&xep); 823 continue; 824 } 825 826 /* Make sure we are in the set. */ 827 if (sr_hosts(sr)) 828 continue; 829 830 /* Check has been done, clear the flag */ 831 if ((sr->sr_flags & MD_SR_CHECK)) 832 sr->sr_flags &= ~MD_SR_CHECK; 833 834 /* 835 * If we got here, we are in the set, make sure the flags make 836 * sense. 837 */ 838 if (! (sr->sr_flags & MD_SR_OK)) { 839 sr->sr_flags &= ~MD_SR_STATE_FLAGS; 840 sr->sr_flags |= MD_SR_OK; 841 commitset(sr, FALSE, &xep); 842 if (! mdisok(&xep)) 843 mdclrerror(&xep); 844 } 845 846 /* Make sure all the drives are in a stable state. */ 847 sr_drvs(sr); 848 } 849 850 /* Cleanup any stray sidenames */ 851 sr_sidenms(); 852 853 setsnarfdone = 3; 854 } 855 856 static md_set_record * 857 sr_in_cache(mddb_recid_t recid) 858 { 859 md_set_record *tsr; 860 861 for (tsr = setrecords; tsr != NULL; tsr = tsr->sr_next) 862 if (tsr->sr_selfid == recid) 863 return (tsr); 864 return ((md_set_record *)NULL); 865 } 866 867 int 868 set_snarf(md_error_t *ep) 869 { 870 md_set_record *sr; 871 md_mnset_record *mnsr; 872 md_set_record *tsr; 873 md_drive_record *dr; 874 mddb_userreq_t *reqp; 875 ur_recid_lst_t *urlp; 876 mddb_recid_t id; 877 mddb_recid_t *p; 878 md_error_t xep = mdnullerror; 879 md_mnnode_record *nr; 880 mddb_set_node_params_t snp; 881 int nodecnt; 882 mndiskset_membershiplist_t *nl, *nl2; 883 884 /* We have done the snarf call */ 885 if (setsnarfdone != 0) 886 return (0); 887 888 if (meta_setup_db_locations(ep) != 0) { 889 if (! mdismddberror(ep, MDE_DB_STALE)) 890 return (-1); 891 mdclrerror(ep); 892 } 893 894 /* 895 * Get membershiplist from API routine. 896 * If there's an error, just use a NULL 897 * nodelist. 898 */ 899 if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) { 900 nodecnt = 0; /* no nodes are alive */ 901 nl = NULL; 902 mdclrerror(ep); 903 } 904 905 /* Let sr_cache_add and dr_cache_add know we are doing the snarf */ 906 setsnarfdone = 1; 907 908 /* Go get the set records */ 909 id = 0; 910 while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR, 911 &id, ep)) != NULL) { 912 sr->sr_next = NULL; 913 sr->sr_drivechain = NULL; 914 915 /* 916 * Cluster nodename support 917 * Convert nodeid -> nodename 918 * Don't do this for MN disksets since we've already stored 919 * both the nodeid and name. 920 */ 921 if (!(MD_MNSET_REC(sr))) 922 sdssc_cm_sr_nid2nm(sr); 923 924 /* If we were mid-cvt, cleanup */ 925 if (sr->sr_flags & MD_SR_CVT) { 926 /* If the daemon is calling, cleanup */ 927 if (md_in_daemon) 928 url_addl(&url_tode, sr->sr_selfid); 929 continue; 930 } 931 932 if (md_in_daemon) 933 url_addl(&url_used, sr->sr_selfid); 934 935 /* Skip cached records */ 936 tsr = sr_in_cache(sr->sr_selfid); 937 if (tsr != (md_set_record *)NULL) { 938 if (MD_MNSET_REC(sr)) { 939 mnsr = (struct md_mnset_record *)sr; 940 Free(mnsr); 941 } else { 942 Free(sr); 943 } 944 if (md_in_daemon) 945 for (dr = tsr->sr_drivechain; 946 dr != (md_drive_record *)NULL; 947 dr = dr->dr_next) 948 url_addl(&url_used, dr->dr_selfid); 949 continue; 950 } 951 952 /* Mark the record as one to be checked */ 953 sr->sr_flags |= MD_SR_CHECK; 954 955 sr_cache_add(sr); 956 957 /* If MNdiskset, go get the node records */ 958 if (MD_MNSET_REC(sr)) { 959 mnsr = (struct md_mnset_record *)sr; 960 mnsr->sr_nodechain = NULL; 961 p = &mnsr->sr_noderec; 962 while ((nr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY, 963 MDDB_UR_NR, p, ep)) != NULL) { 964 nr->nr_next = NULL; 965 966 if (md_in_daemon) 967 url_addl(&url_used, nr->nr_selfid); 968 969 /* 970 * Turn off ALIVE node flag based on member 971 * list. 972 * If ALIVE flag is not set, reset OWN flag. 973 * If this node is mynode, set the OWN flag 974 * to match the ownership of the diskset. 975 */ 976 if (md_in_daemon) { 977 nr->nr_flags &= ~MD_MN_NODE_ALIVE; 978 nl2 = nl; 979 while (nl2) { 980 /* 981 * If in member list, 982 * set alive. 983 */ 984 if (nl2->msl_node_id == 985 nr->nr_nodeid) { 986 nr->nr_flags |= 987 MD_MN_NODE_ALIVE; 988 break; 989 } 990 nl2 = nl2->next; 991 } 992 /* 993 * If mynode is in member list, then 994 * check to see if set is snarfed. 995 * If set snarfed, set own flag; 996 * otherwise reset it. 997 * Don't change master even if 998 * node isn't an owner node, since 999 * node may be master, but hasn't 1000 * joined the set yet. 1001 */ 1002 if (nr->nr_flags & MD_MN_NODE_ALIVE) { 1003 if (strcmp(nr->nr_nodename, 1004 mynode()) == 0) { 1005 if (s_ownset( 1006 mnsr->sr_setno, ep)) { 1007 nr->nr_flags |= 1008 MD_MN_NODE_OWN; 1009 } else { 1010 nr->nr_flags &= 1011 ~MD_MN_NODE_OWN; 1012 } 1013 } 1014 } else { 1015 if (strcmp(nr->nr_nodename, 1016 mynode()) == 0) { 1017 /* 1018 * If my node isn't in member 1019 * list then reset master. 1020 */ 1021 mnsr = (struct 1022 md_mnset_record *)sr; 1023 mnsr->sr_master_nodeid = 1024 MD_MN_INVALID_NID; 1025 mnsr->sr_master_nodenm[0] = 1026 '\0'; 1027 } 1028 nr->nr_flags &= ~MD_MN_NODE_OWN; 1029 } 1030 } 1031 1032 /* 1033 * Must grab nr_nextrec now since 1034 * mnnr_cache_add may change it 1035 * (mnnr_cache_add is storing the nodes in 1036 * an ascending nodeid order list in order 1037 * to support reconfig). 1038 */ 1039 if (nr->nr_nextrec != 0) 1040 p = &nr->nr_nextrec; 1041 else 1042 p = NULL; 1043 1044 mnnr_cache_add((struct md_mnset_record *)sr, 1045 nr); 1046 1047 if ((md_in_daemon) && 1048 (strcmp(nr->nr_nodename, mynode()) == 0)) { 1049 (void) memset(&snp, 0, sizeof (snp)); 1050 snp.sn_nodeid = nr->nr_nodeid; 1051 snp.sn_setno = mnsr->sr_setno; 1052 if (metaioctl(MD_MN_SET_NODEID, &snp, 1053 &snp.sn_mde, NULL) != 0) { 1054 (void) mdstealerror(ep, 1055 &snp.sn_mde); 1056 } 1057 } 1058 1059 if (p == NULL) 1060 break; 1061 } 1062 if (! mdisok(ep)) { 1063 if (! mdissyserror(ep, ENOENT)) 1064 goto out; 1065 mdclrerror(ep); 1066 } 1067 } 1068 1069 if (sr->sr_driverec == 0) 1070 continue; 1071 1072 /* Go get the drive records */ 1073 p = &sr->sr_driverec; 1074 while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY, 1075 MDDB_UR_DR, p, ep)) != NULL) { 1076 dr->dr_next = NULL; 1077 1078 if (md_in_daemon) 1079 url_addl(&url_used, dr->dr_selfid); 1080 1081 dr_cache_add(sr, dr); 1082 1083 if (dr->dr_nextrec == 0) 1084 break; 1085 1086 p = &dr->dr_nextrec; 1087 } 1088 if (! mdisok(ep)) { 1089 if (! mdissyserror(ep, ENOENT)) 1090 goto out; 1091 mdclrerror(ep); 1092 /* 1093 * If dr_nextrec was not valid, or we had some 1094 * problem getting the record, we end up here. 1095 * get_ur_rec() zeroes the recid we passed in, 1096 * if we had a failure getting a record using a key, 1097 * so we simply commit the set record and valid 1098 * drive records, if this fails, we hand an error 1099 * back to the caller. 1100 */ 1101 commitset(sr, FALSE, ep); 1102 if (! mdisok(ep)) 1103 goto out; 1104 } 1105 } 1106 if (! mdisok(ep)) { 1107 if (! mdissyserror(ep, ENOENT)) 1108 goto out; 1109 mdclrerror(ep); 1110 } 1111 1112 /* 1113 * If the daemon called, go through the USER records and cleanup 1114 * any that are not used by valid sets. 1115 */ 1116 if (md_in_daemon) { 1117 id = 0; 1118 /* Make a list of records to delete */ 1119 while ((reqp = get_db_rec(MD_UR_GET_NEXT, MD_LOCAL_SET, 1120 MDDB_USER, 0, &id, ep)) != NULL) { 1121 if (reqp->ur_type2 != MDDB_UR_SR && 1122 reqp->ur_type2 != MDDB_UR_DR) { 1123 Free((void *)(uintptr_t)reqp->ur_data); 1124 Free(reqp); 1125 continue; 1126 } 1127 if (! url_findl(url_used, reqp->ur_recid)) 1128 url_addl(&url_tode, reqp->ur_recid); 1129 Free((void *)(uintptr_t)reqp->ur_data); 1130 Free(reqp); 1131 } 1132 if (! mdisok(ep)) { 1133 if (! mdissyserror(ep, ENOENT)) 1134 goto out; 1135 mdclrerror(ep); 1136 } 1137 1138 /* Delete all the delete listed records */ 1139 for (urlp = url_tode; urlp != NULL; urlp = urlp->url_nx) { 1140 s_delrec(urlp->url_recid, &xep); 1141 if (! mdisok(&xep)) 1142 mdclrerror(&xep); 1143 } 1144 } 1145 1146 url_freel(&url_used); 1147 url_freel(&url_tode); 1148 1149 if (nodecnt) 1150 meta_free_nodelist(nl); 1151 1152 /* Mark the snarf complete */ 1153 setsnarfdone = 2; 1154 return (0); 1155 1156 out: 1157 url_freel(&url_used); 1158 url_freel(&url_tode); 1159 1160 sr_cache_flush(1); 1161 1162 if (nodecnt) 1163 meta_free_nodelist(nl); 1164 1165 /* Snarf failed, reset state */ 1166 setsnarfdone = 0; 1167 1168 return (-1); 1169 } 1170 1171 void 1172 sr_cache_add(md_set_record *sr) 1173 { 1174 md_set_record *tsr; 1175 1176 assert(setsnarfdone != 0); 1177 1178 if (setrecords == NULL) { 1179 setrecords = sr; 1180 return; 1181 } 1182 1183 for (tsr = setrecords; tsr->sr_next != NULL; tsr = tsr->sr_next) 1184 /* void */; 1185 tsr->sr_next = sr; 1186 } 1187 1188 void 1189 sr_cache_del(mddb_recid_t recid) 1190 { 1191 md_set_record *sr, *tsr; 1192 md_mnset_record *mnsr; 1193 1194 assert(setsnarfdone != 0); 1195 1196 for (sr = tsr = setrecords; sr != NULL; tsr = sr, sr = sr->sr_next) { 1197 if (sr->sr_selfid != recid) 1198 continue; 1199 if (sr == setrecords) 1200 setrecords = sr->sr_next; 1201 else 1202 tsr->sr_next = sr->sr_next; 1203 if (MD_MNSET_REC(sr)) { 1204 mnsr = (struct md_mnset_record *)sr; 1205 Free(mnsr); 1206 } else { 1207 Free(sr); 1208 } 1209 break; 1210 } 1211 if (setrecords == NULL) 1212 setsnarfdone = 0; 1213 } 1214 1215 void 1216 dr_cache_add(md_set_record *sr, md_drive_record *dr) 1217 { 1218 md_drive_record *tdr; 1219 1220 assert(setsnarfdone != 0); 1221 1222 assert(sr != NULL); 1223 1224 if (sr->sr_drivechain == NULL) { 1225 sr->sr_drivechain = dr; 1226 sr->sr_driverec = dr->dr_selfid; 1227 return; 1228 } 1229 1230 for (tdr = sr->sr_drivechain; tdr->dr_next != NULL; tdr = tdr->dr_next) 1231 /* void */; 1232 1233 tdr->dr_next = dr; 1234 tdr->dr_nextrec = dr->dr_selfid; 1235 } 1236 1237 void 1238 dr_cache_del(md_set_record *sr, mddb_recid_t recid) 1239 { 1240 md_drive_record *dr; 1241 md_drive_record *tdr; 1242 1243 assert(setsnarfdone != 0); 1244 1245 assert(sr != NULL); 1246 1247 for (dr = tdr = sr->sr_drivechain; dr != NULL; 1248 tdr = dr, dr = dr->dr_next) { 1249 if (dr->dr_selfid != recid) 1250 continue; 1251 1252 if (dr == sr->sr_drivechain) { 1253 sr->sr_drivechain = dr->dr_next; 1254 sr->sr_driverec = dr->dr_nextrec; 1255 } else { 1256 tdr->dr_next = dr->dr_next; 1257 tdr->dr_nextrec = dr->dr_nextrec; 1258 } 1259 Free(dr); 1260 break; 1261 } 1262 } 1263 1264 /* 1265 * Nodes must be kept in ascending node id order in order to 1266 * support reconfig. 1267 * 1268 * This routine may change nr->nr_next and nr->nr_nextrec. 1269 */ 1270 void 1271 mnnr_cache_add(md_mnset_record *mnsr, md_mnnode_record *nr) 1272 { 1273 md_mnnode_record *tnr, *tnr_prev; 1274 1275 assert(mnsr != NULL); 1276 1277 if (mnsr->sr_nodechain == NULL) { 1278 mnsr->sr_nodechain = nr; 1279 mnsr->sr_noderec = nr->nr_selfid; 1280 return; 1281 } 1282 1283 /* 1284 * If new_record->nodeid < first_record->nodeid, 1285 * put new_record at beginning of list. 1286 */ 1287 if (nr->nr_nodeid < mnsr->sr_nodechain->nr_nodeid) { 1288 nr->nr_next = mnsr->sr_nodechain; 1289 nr->nr_nextrec = mnsr->sr_noderec; 1290 mnsr->sr_nodechain = nr; 1291 mnsr->sr_noderec = nr->nr_selfid; 1292 return; 1293 } 1294 1295 /* 1296 * Walk list looking for place to insert record. 1297 */ 1298 1299 tnr_prev = mnsr->sr_nodechain; 1300 tnr = tnr_prev->nr_next; 1301 while (tnr) { 1302 /* Insert new record between tnr_prev and tnr */ 1303 if (nr->nr_nodeid < tnr->nr_nodeid) { 1304 nr->nr_next = tnr; 1305 nr->nr_nextrec = tnr->nr_selfid; /* tnr's recid */ 1306 tnr_prev->nr_next = nr; 1307 tnr_prev->nr_nextrec = nr->nr_selfid; 1308 return; 1309 } 1310 tnr_prev = tnr; 1311 tnr = tnr->nr_next; 1312 } 1313 1314 /* 1315 * Add record to end of list. 1316 */ 1317 tnr_prev->nr_next = nr; 1318 tnr_prev->nr_nextrec = nr->nr_selfid; 1319 } 1320 1321 void 1322 mnnr_cache_del(md_mnset_record *mnsr, mddb_recid_t recid) 1323 { 1324 md_mnnode_record *nr; 1325 md_mnnode_record *tnr; 1326 1327 assert(mnsr != NULL); 1328 1329 tnr = 0; 1330 nr = mnsr->sr_nodechain; 1331 while (nr) { 1332 if (nr->nr_selfid != recid) { 1333 tnr = nr; 1334 nr = nr->nr_next; 1335 continue; 1336 } 1337 1338 if (nr == mnsr->sr_nodechain) { 1339 mnsr->sr_nodechain = nr->nr_next; 1340 mnsr->sr_noderec = nr->nr_nextrec; 1341 } else { 1342 tnr->nr_next = nr->nr_next; 1343 tnr->nr_nextrec = nr->nr_nextrec; 1344 } 1345 Free(nr); 1346 break; 1347 } 1348 } 1349 1350 int 1351 metad_isautotakebyname(char *setname) 1352 { 1353 md_error_t error = mdnullerror; 1354 md_set_record *sr; 1355 1356 if (md_in_daemon) 1357 assert(setsnarfdone != 0); 1358 else if (set_snarf(&error)) { 1359 mdclrerror(&error); 1360 return (0); 1361 } 1362 1363 for (sr = setrecords; sr != NULL; sr = sr->sr_next) { 1364 if (strcmp(setname, sr->sr_setname) == 0) { 1365 if (sr->sr_flags & MD_SR_AUTO_TAKE) 1366 return (1); 1367 return (0); 1368 } 1369 } 1370 1371 return (0); 1372 } 1373 1374 int 1375 metad_isautotakebynum(set_t setno) 1376 { 1377 md_error_t error = mdnullerror; 1378 md_set_record *sr; 1379 1380 if (md_in_daemon) 1381 assert(setsnarfdone != 0); 1382 else if (set_snarf(&error)) { 1383 mdclrerror(&error); 1384 return (0); 1385 } 1386 1387 for (sr = setrecords; sr != NULL; sr = sr->sr_next) { 1388 if (setno == sr->sr_setno) { 1389 if (sr->sr_flags & MD_SR_AUTO_TAKE) 1390 return (1); 1391 return (0); 1392 } 1393 } 1394 1395 return (0); 1396 } 1397 1398 md_set_record * 1399 metad_getsetbyname(char *setname, md_error_t *ep) 1400 { 1401 md_set_record *sr; 1402 char buf[100]; 1403 1404 assert(setsnarfdone != 0); 1405 1406 for (sr = setrecords; sr != NULL; sr = sr->sr_next) 1407 if (strcmp(setname, sr->sr_setname) == 0) 1408 return (sr); 1409 1410 (void) snprintf(buf, sizeof (buf), "setname \"%s\"", setname); 1411 (void) mderror(ep, MDE_NO_SET, buf); 1412 return (NULL); 1413 } 1414 1415 md_set_record * 1416 metad_getsetbynum(set_t setno, md_error_t *ep) 1417 { 1418 md_set_record *sr; 1419 char buf[100]; 1420 1421 if (md_in_daemon) 1422 assert(setsnarfdone != 0); 1423 else if (set_snarf(ep)) /* BYPASS DAEMON mode */ 1424 return (NULL); 1425 1426 for (sr = setrecords; sr != NULL; sr = sr->sr_next) 1427 if (setno == sr->sr_setno) 1428 return (sr); 1429 1430 (void) sprintf(buf, "setno %u", setno); 1431 (void) mderror(ep, MDE_NO_SET, buf); 1432 return (NULL); 1433 } 1434 1435 1436 /* 1437 * Commit the set record and all of its associated records 1438 * (drive records, node records for a MNset) to the local mddb. 1439 */ 1440 void 1441 commitset(md_set_record *sr, int inc_genid, md_error_t *ep) 1442 { 1443 int drc, nrc, rc; 1444 int *recs; 1445 uint_t size; 1446 md_drive_record *dr; 1447 mddb_userreq_t req; 1448 md_mnset_record *mnsr; 1449 md_mnnode_record *nr; 1450 1451 assert(setsnarfdone != 0); 1452 1453 /* 1454 * Cluster nodename support 1455 * Convert nodename -> nodeid 1456 * Don't do this for MN disksets since we've already stored 1457 * both the nodeid and name. 1458 */ 1459 if (!(MD_MNSET_REC(sr))) 1460 sdssc_cm_sr_nm2nid(sr); 1461 1462 /* Send down to kernel the data in mddb USER set record */ 1463 if (inc_genid) 1464 sr->sr_genid++; 1465 (void) memset(&req, 0, sizeof (req)); 1466 METAD_SETUP_SR(MD_DB_SETDATA, sr->sr_selfid) 1467 if (MD_MNSET_REC(sr)) { 1468 req.ur_size = sizeof (*mnsr); 1469 } else { 1470 req.ur_size = sizeof (*sr); 1471 } 1472 req.ur_data = (uintptr_t)sr; 1473 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { 1474 (void) mdstealerror(ep, &req.ur_mde); 1475 return; 1476 } 1477 1478 /* 1479 * Walk through the drive records associated with this set record 1480 * and send down to kernel the data in mddb USER drive record. 1481 */ 1482 drc = 0; 1483 dr = sr->sr_drivechain; 1484 while (dr) { 1485 if (inc_genid) 1486 dr->dr_genid++; 1487 METAD_SETUP_DR(MD_DB_SETDATA, dr->dr_selfid) 1488 req.ur_size = sizeof (*dr); 1489 req.ur_data = (uintptr_t)dr; 1490 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { 1491 (void) mdstealerror(ep, &req.ur_mde); 1492 return; 1493 } 1494 drc++; 1495 dr = dr->dr_next; 1496 } 1497 1498 1499 /* 1500 * If this set is a multi-node set - 1501 * walk through the node records associated with this set record 1502 * and send down to kernel the data in mddb USER node record. 1503 */ 1504 nrc = 0; 1505 if (MD_MNSET_REC(sr)) { 1506 mnsr = (struct md_mnset_record *)sr; 1507 nr = mnsr->sr_nodechain; 1508 while (nr) { 1509 if (inc_genid) 1510 nr->nr_genid++; 1511 METAD_SETUP_NR(MD_DB_SETDATA, nr->nr_selfid) 1512 req.ur_size = sizeof (*nr); 1513 req.ur_data = (uint64_t)(uintptr_t)nr; 1514 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) 1515 != 0) { 1516 (void) mdstealerror(ep, &req.ur_mde); 1517 return; 1518 } 1519 nrc++; 1520 nr = nr->nr_next; 1521 } 1522 } 1523 1524 /* 1525 * Set up list of mddb USER recids containing set and drive records 1526 * and node records if a MNset. 1527 */ 1528 rc = 0; 1529 size = (nrc + drc + 2) * sizeof (int); 1530 recs = Zalloc(size); 1531 /* First recid in list is the set record's id */ 1532 recs[rc] = sr->sr_selfid; 1533 rc++; 1534 dr = sr->sr_drivechain; 1535 while (dr) { 1536 /* Now, fill in the drive record ids */ 1537 recs[rc] = dr->dr_selfid; 1538 dr = dr->dr_next; 1539 rc++; 1540 } 1541 if (MD_MNSET_REC(sr)) { 1542 nr = mnsr->sr_nodechain; 1543 while (nr) { 1544 /* If a MNset, fill in the node record ids */ 1545 recs[rc] = nr->nr_selfid; 1546 nr = nr->nr_next; 1547 rc++; 1548 } 1549 } 1550 /* Set last record to null recid */ 1551 recs[rc] = 0; 1552 1553 /* Write out the set and drive and node records to the local mddb */ 1554 METAD_SETUP_UR(MD_DB_COMMIT_MANY, 0, 0); 1555 req.ur_size = size; 1556 req.ur_data = (uintptr_t)recs; 1557 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { 1558 (void) mdstealerror(ep, &req.ur_mde); 1559 return; 1560 } 1561 1562 /* 1563 * Cluster nodename support 1564 * Convert nodeid -> nodename 1565 * Don't do this for MN disksets since we've already stored 1566 * both the nodeid and name. 1567 */ 1568 if (!(MD_MNSET_REC(sr))) 1569 sdssc_cm_sr_nid2nm(sr); 1570 1571 Free(recs); 1572 } 1573 1574 /* 1575 * This routine only handles returns a md_set_record structure even 1576 * if the set record describes a MN set. This will allow pre-MN 1577 * SVM RPC code to access a MN set record and to display it. 1578 * 1579 * The MN SVM RPC code detects if the set record returned describes 1580 * a MN set and then will copy it using mnsetdup. 1581 */ 1582 md_set_record * 1583 setdup(md_set_record *sr) 1584 { 1585 md_set_record *tsr = NULL; 1586 md_drive_record **tdrpp = NULL; 1587 1588 if (sr && (tsr = Malloc(sizeof (*sr))) != NULL) { 1589 (void) memmove(tsr, sr, sizeof (*sr)); 1590 tsr->sr_next = NULL; 1591 tdrpp = &tsr->sr_drivechain; 1592 while (*tdrpp) { 1593 *tdrpp = drdup(*tdrpp); 1594 tdrpp = &(*tdrpp)->dr_next; 1595 } 1596 } 1597 return (tsr); 1598 } 1599 1600 /* 1601 * This routine only copies MN set records. If a non-MN set 1602 * record was passed in NULL pointer will be returned. 1603 */ 1604 md_mnset_record * 1605 mnsetdup(md_mnset_record *mnsr) 1606 { 1607 md_mnset_record *tmnsr = NULL; 1608 md_drive_record **tdrpp = NULL; 1609 md_mnnode_record **tnrpp = NULL; 1610 1611 if (!MD_MNSET_REC(mnsr)) { 1612 return (NULL); 1613 } 1614 1615 if (mnsr && (tmnsr = Malloc(sizeof (*mnsr))) != NULL) { 1616 (void) memmove(tmnsr, mnsr, sizeof (*mnsr)); 1617 tmnsr->sr_next = NULL; 1618 tdrpp = &tmnsr->sr_drivechain; 1619 while (*tdrpp) { 1620 *tdrpp = drdup(*tdrpp); 1621 tdrpp = &(*tdrpp)->dr_next; 1622 } 1623 tnrpp = &tmnsr->sr_nodechain; 1624 while (*tnrpp) { 1625 *tnrpp = nrdup(*tnrpp); 1626 tnrpp = &(*tnrpp)->nr_next; 1627 } 1628 } 1629 return (tmnsr); 1630 } 1631 1632 md_drive_record * 1633 drdup(md_drive_record *dr) 1634 { 1635 md_drive_record *tdr = NULL; 1636 1637 if (dr && (tdr = Malloc(sizeof (*dr))) != NULL) 1638 (void) memmove(tdr, dr, sizeof (*dr)); 1639 return (tdr); 1640 } 1641 1642 md_mnnode_record * 1643 nrdup(md_mnnode_record *nr) 1644 { 1645 md_mnnode_record *tnr = NULL; 1646 1647 if (nr && (tnr = Malloc(sizeof (*nr))) != NULL) 1648 (void) memmove(tnr, nr, sizeof (*nr)); 1649 return (tnr); 1650 } 1651 1652 /* 1653 * Duplicate parts of the drive decriptor list for this node. 1654 * Only duplicate the drive name string in the mddrivename structure, don't 1655 * need to copy any other pointers since only interested in the flags and 1656 * the drive name (i.e. other pointers will be set to NULL). 1657 * Returns NULL if failure due to Malloc failure. 1658 * Returns pointer (non-NULL) to dup'd list if successful. 1659 */ 1660 md_drive_desc * 1661 dd_list_dup(md_drive_desc *dd) 1662 { 1663 md_drive_desc *orig_dd; 1664 md_drive_desc *copy_dd = NULL, *copy_dd_prev = NULL; 1665 md_drive_desc *copy_dd_head = NULL; 1666 mddrivename_t *copy_dnp; 1667 char *copy_cname; 1668 char *copy_devid; 1669 1670 if (dd == NULL) 1671 return (NULL); 1672 1673 orig_dd = dd; 1674 1675 while (orig_dd) { 1676 copy_dd = Zalloc(sizeof (*copy_dd)); 1677 copy_dnp = Zalloc(sizeof (mddrivename_t)); 1678 copy_cname = Zalloc(sizeof (orig_dd->dd_dnp->cname)); 1679 if (orig_dd->dd_dnp->devid) { 1680 copy_devid = Zalloc(sizeof (orig_dd->dd_dnp->devid)); 1681 } else { 1682 copy_devid = NULL; 1683 } 1684 copy_dd->dd_next = NULL; 1685 if ((copy_dd == NULL) || (copy_dnp == NULL) || 1686 (copy_cname == NULL)) { 1687 while (copy_dd_head) { 1688 copy_dd = copy_dd_head->dd_next; 1689 Free(copy_dd_head); 1690 copy_dd_head = copy_dd; 1691 } 1692 if (copy_dnp) 1693 Free(copy_dnp); 1694 if (copy_dd) 1695 Free(copy_dd); 1696 if (copy_cname) 1697 Free(copy_cname); 1698 if (copy_devid) 1699 Free(copy_devid); 1700 return (NULL); 1701 } 1702 (void) memmove(copy_dd, orig_dd, sizeof (*orig_dd)); 1703 (void) strlcpy(copy_cname, orig_dd->dd_dnp->cname, 1704 sizeof (orig_dd->dd_dnp->cname)); 1705 copy_dd->dd_next = NULL; 1706 copy_dd->dd_dnp = copy_dnp; 1707 copy_dd->dd_dnp->cname = copy_cname; 1708 if (copy_devid) { 1709 (void) strlcpy(copy_devid, orig_dd->dd_dnp->devid, 1710 sizeof (orig_dd->dd_dnp->devid)); 1711 } 1712 1713 if (copy_dd_prev == NULL) { 1714 copy_dd_head = copy_dd; 1715 copy_dd_prev = copy_dd; 1716 } else { 1717 copy_dd_prev->dd_next = copy_dd; 1718 copy_dd_prev = copy_dd; 1719 } 1720 orig_dd = orig_dd->dd_next; 1721 } 1722 copy_dd->dd_next = NULL; 1723 return (copy_dd_head); 1724 } 1725 1726 void 1727 sr_cache_flush(int flushnames) 1728 { 1729 md_set_record *sr, *tsr; 1730 md_mnset_record *mnsr; 1731 md_drive_record *dr, *tdr; 1732 md_mnnode_record *nr, *tnr; 1733 1734 sr = tsr = setrecords; 1735 while (sr != NULL) { 1736 dr = tdr = sr->sr_drivechain; 1737 while (dr != NULL) { 1738 tdr = dr; 1739 dr = dr->dr_next; 1740 Free(tdr); 1741 } 1742 tsr = sr; 1743 sr = sr->sr_next; 1744 if (MD_MNSET_REC(tsr)) { 1745 mnsr = (struct md_mnset_record *)tsr; 1746 nr = tnr = mnsr->sr_nodechain; 1747 while (nr != NULL) { 1748 tnr = nr; 1749 nr = nr->nr_next; 1750 Free(tnr); 1751 } 1752 Free(mnsr); 1753 } else { 1754 Free(tsr); 1755 } 1756 } 1757 1758 setrecords = NULL; 1759 1760 setsnarfdone = 0; 1761 1762 /* This will cause the other caches to be cleared */ 1763 if (flushnames) 1764 metaflushnames(0); 1765 } 1766 1767 void 1768 sr_cache_flush_setno(set_t setno) 1769 { 1770 md_set_record *sr, *tsr; 1771 md_mnset_record *mnsr; 1772 md_drive_record *dr, *tdr; 1773 1774 assert(setsnarfdone != 0); 1775 1776 for (sr = tsr = setrecords; sr; tsr = sr, sr = sr->sr_next) { 1777 if (sr->sr_setno != setno) 1778 continue; 1779 1780 dr = tdr = sr->sr_drivechain; 1781 while (dr != NULL) { 1782 tdr = dr; 1783 dr = dr->dr_next; 1784 Free(tdr); 1785 } 1786 if (sr == setrecords) 1787 setrecords = sr->sr_next; 1788 else 1789 tsr->sr_next = sr->sr_next; 1790 if (MD_MNSET_REC(sr)) { 1791 mnsr = (struct md_mnset_record *)sr; 1792 Free(mnsr); 1793 } else { 1794 Free(sr); 1795 } 1796 break; 1797 } 1798 1799 setsnarfdone = 0; 1800 1801 /* This will cause the other caches to be cleared */ 1802 metaflushnames(0); 1803 } 1804 1805 int 1806 s_ownset(set_t setno, md_error_t *ep) 1807 { 1808 mddb_ownset_t ownset_arg; 1809 1810 ownset_arg.setno = setno; 1811 ownset_arg.owns_set = MD_SETOWNER_NONE; 1812 1813 if (metaioctl(MD_DB_OWNSET, &ownset_arg, ep, NULL) != 0) 1814 return (0); 1815 1816 return (ownset_arg.owns_set); 1817 } 1818 1819 void 1820 s_delset(char *setname, md_error_t *ep) 1821 { 1822 md_set_record *sr; 1823 md_set_record *tsr; 1824 md_drive_record *dr; 1825 md_drive_record *tdr; 1826 md_mnnode_record *nr, *tnr; 1827 mddb_userreq_t req; 1828 char stringbuf[100]; 1829 int i; 1830 mdsetname_t *sp = NULL; 1831 mddrivename_t *dn = NULL; 1832 mdname_t *np = NULL; 1833 md_dev64_t dev; 1834 side_t myside = MD_SIDEWILD; 1835 md_error_t xep = mdnullerror; 1836 md_mnset_record *mnsr; 1837 int num_sets = 0; 1838 int num_mn_sets = 0; 1839 1840 (void) memset(&req, 0, sizeof (mddb_userreq_t)); 1841 1842 if ((sr = getsetbyname(setname, ep)) == NULL) 1843 return; 1844 1845 sp = metasetnosetname(sr->sr_setno, &xep); 1846 mdclrerror(&xep); 1847 1848 if (MD_MNSET_REC(sr)) { 1849 /* 1850 * If this node is a set owner, halt the set before 1851 * deleting the set records. Ignore any errors since 1852 * s_ownset and halt_set could fail if panic had occurred 1853 * during the add/delete of a node. 1854 */ 1855 if (s_ownset(sr->sr_setno, &xep)) { 1856 mdclrerror(&xep); 1857 if (halt_set(sp, &xep)) 1858 mdclrerror(&xep); 1859 } 1860 } 1861 1862 (void) snprintf(stringbuf, sizeof (stringbuf), "/dev/md/%s", setname); 1863 (void) unlink(stringbuf); 1864 (void) unlink(meta_lock_name(sr->sr_setno)); 1865 1866 if (MD_MNSET_REC(sr)) { 1867 mnsr = (struct md_mnset_record *)sr; 1868 nr = mnsr->sr_nodechain; 1869 while (nr) { 1870 /* Setting myside for later use */ 1871 if (strcmp(mynode(), nr->nr_nodename) == 0) 1872 myside = nr->nr_nodeid; 1873 1874 (void) memset(&req, 0, sizeof (req)); 1875 METAD_SETUP_NR(MD_DB_DELETE, nr->nr_selfid) 1876 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, 1877 NULL) != 0) { 1878 (void) mdstealerror(ep, &req.ur_mde); 1879 free_sr(sr); 1880 return; 1881 } 1882 tnr = nr; 1883 nr = nr->nr_next; 1884 1885 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST, 1886 sr->sr_setno, tnr->nr_nodeid); 1887 1888 mnnr_cache_del((struct md_mnset_record *)sr, 1889 tnr->nr_selfid); 1890 } 1891 } else { 1892 for (i = 0; i < MD_MAXSIDES; i++) { 1893 /* Skip empty slots */ 1894 if (sr->sr_nodes[i][0] == '\0') 1895 continue; 1896 1897 if (strcmp(mynode(), sr->sr_nodes[i]) == 0) 1898 myside = i; 1899 1900 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HOST, 1901 sr->sr_setno, i); 1902 } 1903 } 1904 1905 dr = sr->sr_drivechain; 1906 while (dr) { 1907 (void) memset(&req, 0, sizeof (req)); 1908 METAD_SETUP_DR(MD_DB_DELETE, dr->dr_selfid) 1909 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { 1910 (void) mdstealerror(ep, &req.ur_mde); 1911 free_sr(sr); 1912 return; 1913 } 1914 tdr = dr; 1915 dr = dr->dr_next; 1916 1917 dev = NODEV64; 1918 if (myside != MD_SIDEWILD && sp != NULL) { 1919 dn = metadrivename_withdrkey(sp, myside, 1920 tdr->dr_key, MD_BASICNAME_OK, &xep); 1921 if (dn != NULL) { 1922 uint_t rep_slice; 1923 1924 np = NULL; 1925 if (meta_replicaslice(dn, &rep_slice, 1926 &xep) == 0) { 1927 np = metaslicename(dn, rep_slice, &xep); 1928 } 1929 1930 if (np != NULL) 1931 dev = np->dev; 1932 else 1933 mdclrerror(&xep); 1934 } else 1935 mdclrerror(&xep); 1936 } else 1937 mdclrerror(&xep); 1938 1939 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_DRIVE, 1940 sr->sr_setno, dev); 1941 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_DRIVE, 1942 MD_LOCAL_SET, dev); 1943 1944 dr_cache_del(sr, tdr->dr_selfid); 1945 1946 } 1947 1948 (void) memset(&req, 0, sizeof (req)); 1949 METAD_SETUP_SR(MD_DB_DELETE, sr->sr_selfid) 1950 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) { 1951 (void) mdstealerror(ep, &req.ur_mde); 1952 free_sr(sr); 1953 return; 1954 } 1955 1956 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_SET, sr->sr_setno, 1957 NODEV64); 1958 1959 for (tsr = setrecords; tsr; tsr = tsr->sr_next) { 1960 if (tsr == sr) 1961 continue; 1962 1963 num_sets++; 1964 if (MD_MNSET_REC(tsr)) 1965 num_mn_sets++; 1966 } 1967 1968 if (num_mn_sets == 0) 1969 (void) meta_smf_disable(META_SMF_MN_DISKSET, NULL); 1970 1971 /* The set we just deleted is the only one left */ 1972 if (num_sets == 0) 1973 (void) meta_smf_disable(META_SMF_DISKSET, NULL); 1974 1975 sr_cache_del(sr->sr_selfid); 1976 free_sr(sr); 1977 1978 } 1979 1980 void 1981 s_delrec(mddb_recid_t recid, md_error_t *ep) 1982 { 1983 mddb_userreq_t req; 1984 1985 (void) memset(&req, 0, sizeof (req)); 1986 1987 METAD_SETUP_SR(MD_DB_DELETE, recid) 1988 1989 if (metaioctl(MD_DB_USERREQ, &req, &req.ur_mde, NULL) != 0) 1990 (void) mdstealerror(ep, &req.ur_mde); 1991 } 1992 1993 /* 1994 * resnarf the imported set 1995 */ 1996 int 1997 resnarf_set( 1998 set_t setno, 1999 md_error_t *ep 2000 ) 2001 { 2002 md_set_record *sr; 2003 md_drive_record *dr; 2004 mddb_recid_t id, *p; 2005 2006 if (meta_setup_db_locations(ep) != 0) { 2007 if (! mdismddberror(ep, MDE_DB_STALE)) 2008 return (-1); 2009 mdclrerror(ep); 2010 } 2011 2012 setsnarfdone = 1; 2013 2014 id = 0; 2015 while ((sr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_NEXT, MDDB_UR_SR, &id, 2016 ep)) != NULL) { 2017 2018 if (sr->sr_setno != setno) 2019 continue; 2020 2021 /* Don't allow resnarf of a multi-node diskset */ 2022 if (MD_MNSET_REC(sr)) 2023 goto out; 2024 2025 sr->sr_next = NULL; 2026 sr->sr_drivechain = NULL; 2027 2028 if (md_in_daemon) 2029 url_addl(&url_used, sr->sr_selfid); 2030 2031 sr->sr_flags |= MD_SR_CHECK; 2032 2033 sr_cache_add(sr); 2034 2035 if (sr->sr_driverec == 0) 2036 break; 2037 2038 p = &sr->sr_driverec; 2039 while ((dr = get_ur_rec(MD_LOCAL_SET, MD_UR_GET_WKEY, 2040 MDDB_UR_DR, p, ep)) != NULL) { 2041 dr->dr_next = NULL; 2042 2043 if (md_in_daemon) 2044 url_addl(&url_used, dr->dr_selfid); 2045 2046 dr_cache_add(sr, dr); 2047 2048 if (dr->dr_nextrec == 0) 2049 break; 2050 2051 p = &dr->dr_nextrec; 2052 } 2053 if (! mdisok(ep)) { 2054 if (! mdissyserror(ep, ENOENT)) 2055 goto out; 2056 mdclrerror(ep); 2057 commitset(sr, FALSE, ep); 2058 if (! mdisok(ep)) 2059 goto out; 2060 } 2061 } 2062 if (! mdisok(ep)) { 2063 if (! mdissyserror(ep, ENOENT)) 2064 goto out; 2065 mdclrerror(ep); 2066 } 2067 2068 setsnarfdone = 2; 2069 2070 url_freel(&url_used); 2071 url_freel(&url_tode); 2072 return (0); 2073 2074 out: 2075 url_freel(&url_used); 2076 url_freel(&url_tode); 2077 2078 sr_cache_flush(1); 2079 2080 setsnarfdone = 0; 2081 2082 return (-1); 2083 } 2084