1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Metadevice diskset interfaces 30 */ 31 32 #include <meta.h> 33 #include <mdmn_changelog.h> 34 #include "meta_set_prv.h" 35 #include "meta_repartition.h" 36 37 static int 38 check_setnodes_againstdrivelist( 39 mdsetname_t *sp, 40 mddrivenamelist_t *dnlp, 41 md_error_t *ep 42 ) 43 { 44 md_set_desc *sd; 45 mddrivenamelist_t *p; 46 int i; 47 md_mnnode_desc *nd; 48 49 if ((sd = metaget_setdesc(sp, ep)) == NULL) 50 return (-1); 51 52 if (MD_MNSET_DESC(sd)) { 53 nd = sd->sd_nodelist; 54 while (nd) { 55 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 56 nd = nd->nd_next; 57 continue; 58 } 59 for (p = dnlp; p != NULL; p = p->next) 60 if (checkdrive_onnode(sp, p->drivenamep, 61 nd->nd_nodename, ep)) 62 return (-1); 63 nd = nd->nd_next; 64 } 65 } else { 66 for (i = 0; i < MD_MAXSIDES; i++) { 67 /* Skip empty slots */ 68 if (sd->sd_nodes[i][0] == '\0') 69 continue; 70 71 for (p = dnlp; p != NULL; p = p->next) 72 if (checkdrive_onnode(sp, p->drivenamep, 73 sd->sd_nodes[i], ep)) 74 return (-1); 75 } 76 } 77 return (0); 78 } 79 80 static int 81 drvsuniq(mdsetname_t *sp, mddrivenamelist_t *dnlp, md_error_t *ep) 82 { 83 mddrivenamelist_t *dl1, *dl2; 84 mddrivename_t *dn1, *dn2; 85 86 for (dl1 = dnlp; dl1 != NULL; dl1 = dl1->next) { 87 dn1 = dl1->drivenamep; 88 89 for (dl2 = dl1->next; dl2 != NULL; dl2 = dl2->next) { 90 dn2 = dl2->drivenamep; 91 if (strcmp(dn1->cname, dn2->cname) != 0) 92 continue; 93 94 return (mddserror(ep, MDE_DS_DUPDRIVE, sp->setno, 95 NULL, dn1->cname, sp->setname)); 96 } 97 } 98 return (0); 99 } 100 101 static md_drive_desc * 102 metaget_drivedesc_fromdrivelist( 103 mdsetname_t *sp, 104 mddrivenamelist_t *dnlp, 105 uint_t flags, 106 md_error_t *ep 107 ) 108 { 109 mddrivenamelist_t *p; 110 md_drive_desc *dd = NULL; 111 md_set_desc *sd; 112 113 if ((sd = metaget_setdesc(sp, ep)) == NULL) 114 return (NULL); 115 116 for (p = dnlp; p != NULL; p = p->next) { 117 (void) metadrivedesc_append(&dd, p->drivenamep, 0, 0, 118 sd->sd_ctime, sd->sd_genid, flags); 119 } 120 121 return (dd); 122 } 123 124 /* 125 * Exported Entry Points 126 */ 127 128 int 129 meta_make_sidenmlist( 130 mdsetname_t *sp, 131 mddrivename_t *dnp, 132 int import_flag, /* flags partial import */ 133 md_im_drive_info_t *midp, /* import drive information */ 134 md_error_t *ep 135 ) 136 { 137 mdsidenames_t *sn, **sn_next; 138 mdname_t *np; 139 int done; 140 side_t sideno = MD_SIDEWILD; 141 uint_t rep_slice; 142 char *bname; 143 144 if (!import_flag) { 145 /* 146 * Normal (aka NOT partial import) code path. 147 */ 148 if (meta_replicaslice(dnp, &rep_slice, ep) != 0) { 149 return (-1); 150 } 151 152 dnp->side_names_key = MD_KEYWILD; 153 154 if ((np = metaslicename(dnp, rep_slice, ep)) == NULL) 155 return (-1); 156 bname = Strdup(np->bname); 157 } else { 158 /* 159 * When doing a partial import, we'll get the needed 160 * information from somewhere other than the system. 161 */ 162 dnp->side_names_key = MD_KEYWILD; 163 bname = Strdup(midp->mid_devname); 164 } 165 metaflushsidenames(dnp); 166 sn_next = &dnp->side_names; 167 /*CONSTCOND*/ 168 while (1) { 169 sn = Zalloc(sizeof (*sn)); 170 171 if ((done = meta_getnextside_devinfo(sp, bname, &sideno, 172 &sn->cname, &sn->dname, &sn->mnum, ep)) == -1) { 173 if (import_flag) { 174 mdclrerror(ep); 175 sn->dname = Strdup(midp->mid_driver_name); 176 sn->mnum = midp->mid_mnum; 177 } else { 178 Free(sn); 179 Free(bname); 180 return (-1); 181 } 182 } 183 184 if (done == 0) { 185 Free(sn); 186 Free(bname); 187 return (0); 188 } 189 190 sn->sideno = sideno; 191 192 /* Add to the end of the linked list */ 193 assert(*sn_next == NULL); 194 *sn_next = sn; 195 sn_next = &sn->next; 196 } 197 /*NOTREACHED*/ 198 } 199 200 int 201 meta_set_adddrives( 202 mdsetname_t *sp, 203 mddrivenamelist_t *dnlp, 204 daddr_t dbsize, 205 int force_label, 206 md_error_t *ep 207 ) 208 { 209 md_set_desc *sd; 210 md_drive_desc *dd = NULL, *curdd = NULL, *ddp; 211 int i; 212 mddrivenamelist_t *p; 213 mhd_mhiargs_t mhiargs; 214 int rval = 0; 215 md_timeval32_t now; 216 sigset_t oldsigs; 217 ulong_t genid; 218 ulong_t max_genid = 0; 219 md_setkey_t *cl_sk; 220 int rb_level = 0; 221 md_error_t xep = mdnullerror; 222 md_mnnode_desc *nd; 223 int suspendall_flag = 0; 224 int suspend1_flag = 0; 225 int lock_flag = 0; 226 int flush_set_onerr = 0; 227 md_replicalist_t *rlp = NULL, *rl; 228 229 if ((sd = metaget_setdesc(sp, ep)) == NULL) 230 return (-1); 231 232 /* Make sure we own the set */ 233 if (meta_check_ownership(sp, ep) != 0) 234 return (-1); 235 236 /* 237 * The drive and node records are stored in the local mddbs of each 238 * node in the diskset. Each node's rpc.metad daemon reads in the set, 239 * drive and node records from that node's local mddb and caches them 240 * internally. Any process needing diskset information contacts its 241 * local rpc.metad to get this information. Since each node in the 242 * diskset is independently reading the set information from its local 243 * mddb, the set, drive and node records in the local mddbs must stay 244 * in-sync, so that all nodes have a consistent view of the diskset. 245 * 246 * For a multinode diskset, explicitly verify that all nodes in the 247 * diskset are ALIVE (i.e. are in the API membership list). Otherwise, 248 * fail this operation since all nodes must be ALIVE in order to add 249 * the new drive record to their local mddb. If a panic of this node 250 * leaves the local mddbs set, node and drive records out-of-sync, the 251 * reconfig cycle will fix the local mddbs and force them back into 252 * synchronization. 253 */ 254 if (MD_MNSET_DESC(sd)) { 255 nd = sd->sd_nodelist; 256 while (nd) { 257 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 258 (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, 259 sp->setno, 260 nd->nd_nodename, NULL, sp->setname); 261 return (-1); 262 } 263 nd = nd->nd_next; 264 } 265 } 266 267 if (drvsuniq(sp, dnlp, ep) == -1) 268 return (-1); 269 270 /* 271 * Lock the set on current set members. 272 * Set locking done much earlier for MN diskset than for traditional 273 * diskset since lock_set and SUSPEND are used to protect against 274 * other meta* commands running on the other nodes. 275 */ 276 if (MD_MNSET_DESC(sd)) { 277 /* Make sure we are blocking all signals */ 278 if (procsigs(TRUE, &oldsigs, &xep) < 0) 279 mdclrerror(&xep); 280 281 nd = sd->sd_nodelist; 282 /* All nodes are guaranteed to be ALIVE */ 283 while (nd) { 284 if (clnt_lock_set(nd->nd_nodename, sp, ep)) { 285 rval = -1; 286 goto out; 287 } 288 lock_flag = 1; 289 nd = nd->nd_next; 290 } 291 /* 292 * Lock out other meta* commands by suspending 293 * class 1 messages across the diskset. 294 */ 295 nd = sd->sd_nodelist; 296 /* All nodes are guaranteed to be ALIVE */ 297 while (nd) { 298 if (clnt_mdcommdctl(nd->nd_nodename, 299 COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1, 300 MD_MSCF_NO_FLAGS, ep)) { 301 rval = -1; 302 goto out; 303 } 304 suspend1_flag = 1; 305 nd = nd->nd_next; 306 } 307 } 308 309 if (check_setnodes_againstdrivelist(sp, dnlp, ep)) { 310 rval = -1; 311 goto out; 312 } 313 314 for (p = dnlp; p != NULL; p = p->next) { 315 mdsetname_t *tmp; 316 317 if (meta_is_drive_in_anyset(p->drivenamep, &tmp, FALSE, 318 ep) == -1) { 319 rval = -1; 320 goto out; 321 } 322 323 if (tmp != NULL) { 324 (void) mddserror(ep, MDE_DS_DRIVEINSET, sp->setno, 325 tmp->setname, p->drivenamep->cname, sp->setname); 326 rval = -1; 327 goto out; 328 } 329 } 330 331 /* END CHECK CODE */ 332 333 /* 334 * This is a separate loop (from above) so that we validate all the 335 * drives handed to us before we repartition any one drive. 336 */ 337 for (p = dnlp; p != NULL; p = p->next) { 338 if (meta_repartition_drive(sp, 339 p->drivenamep, force_label == TRUE ? MD_REPART_FORCE : 0, 340 NULL, /* Don't return the VTOC. */ 341 ep) != 0) { 342 rval = -1; 343 goto out; 344 } 345 /* 346 * Create the names for the drives we are adding per side. 347 */ 348 if (meta_make_sidenmlist(sp, p->drivenamep, 0, NULL, 349 ep) == -1) { 350 rval = -1; 351 goto out; 352 } 353 } 354 355 /* 356 * Get the list of drives descriptors that we are adding. 357 */ 358 dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_ADD, ep); 359 360 if (! mdisok(ep)) { 361 rval = -1; 362 goto out; 363 } 364 365 /* 366 * Get the set timeout information. 367 */ 368 (void) memset(&mhiargs, '\0', sizeof (mhiargs)); 369 if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) { 370 rval = -1; 371 goto out; 372 } 373 374 /* 375 * Get timestamp and generation id for new records 376 */ 377 now = sd->sd_ctime; 378 genid = sd->sd_genid; 379 380 381 /* At this point, in case of error, set should be flushed. */ 382 flush_set_onerr = 1; 383 384 /* Lock the set on current set members */ 385 if (!(MD_MNSET_DESC(sd))) { 386 md_rb_sig_handling_on(); 387 for (i = 0; i < MD_MAXSIDES; i++) { 388 /* Skip empty slots */ 389 if (sd->sd_nodes[i][0] == '\0') 390 continue; 391 392 if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { 393 rval = -1; 394 goto out; 395 } 396 lock_flag = 1; 397 } 398 } 399 400 /* 401 * Get drive descriptors for the drives that are currently in the set. 402 */ 403 curdd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep); 404 if (! mdisok(ep)) 405 goto rollback; 406 407 /* 408 * If first drive being added to set, set the mastership 409 * of the multinode diskset to be this node. 410 * Only set it on this node. If all goes well 411 * and there are no errors, the mastership of this node will be set 412 * on all nodes in user space and in the kernel. 413 */ 414 if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) { 415 if (clnt_mnsetmaster(mynode(), sp, 416 sd->sd_mn_mynode->nd_nodename, 417 sd->sd_mn_mynode->nd_nodeid, ep)) { 418 goto rollback; 419 } 420 /* 421 * Set this up in my local cache of the set desc so that 422 * the set descriptor won't have to be gotten again from 423 * rpc.metad. If it is flushed and gotten again, these 424 * values will be set in sr2setdesc. 425 */ 426 sd->sd_mn_master_nodeid = sd->sd_mn_mynode->nd_nodeid; 427 (void) strcpy(sd->sd_mn_master_nodenm, 428 sd->sd_mn_mynode->nd_nodename); 429 sd->sd_mn_am_i_master = 1; 430 } 431 432 RB_TEST(1, "adddrives", ep) 433 434 RB_PREEMPT; 435 rb_level = 1; /* level 1 */ 436 437 RB_TEST(2, "adddrives", ep) 438 439 /* 440 * Add the drive records for the drives that we are adding to 441 * each host in the set. Marks the drive as MD_DR_ADD. 442 */ 443 if (MD_MNSET_DESC(sd)) { 444 nd = sd->sd_nodelist; 445 /* All nodes are guaranteed to be ALIVE */ 446 while (nd) { 447 if (clnt_adddrvs(nd->nd_nodename, sp, dd, now, genid, 448 ep) == -1) 449 goto rollback; 450 451 RB_TEST(3, "adddrives", ep) 452 nd = nd->nd_next; 453 } 454 } else { 455 for (i = 0; i < MD_MAXSIDES; i++) { 456 /* Skip empty slots */ 457 if (sd->sd_nodes[i][0] == '\0') 458 continue; 459 460 if (clnt_adddrvs(sd->sd_nodes[i], sp, dd, now, genid, 461 ep) == -1) 462 goto rollback; 463 464 RB_TEST(3, "adddrives", ep) 465 } 466 } 467 468 RB_TEST(4, "adddrives", ep) 469 470 RB_PREEMPT; 471 rb_level = 2; /* level 2 */ 472 473 RB_TEST(5, "adddrives", ep) 474 475 /* 476 * Take ownership of the added drives. 477 */ 478 if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) { 479 if (tk_own_bydd(sp, dd, &mhiargs, TRUE, ep)) 480 goto rollback; 481 } 482 483 RB_TEST(6, "adddrives", ep) 484 485 RB_PREEMPT; 486 rb_level = 3; /* level 3 */ 487 488 RB_TEST(7, "adddrives", ep) 489 490 /* 491 * Balance the DB's according to the list of existing drives and the 492 * list of added drives. 493 */ 494 if ((rval = meta_db_balance(sp, dd, curdd, dbsize, ep)) == -1) 495 goto rollback; 496 497 /* 498 * Slam a dummy master block on all the disks that we are adding 499 * that don't have replicas on them. 500 * Used by diskset import if the disksets are remotely replicated 501 */ 502 if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) >= 0) { 503 for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) { 504 uint_t rep_slice; 505 int fd = -1; 506 mdname_t *np = NULL; 507 char *drive_name; 508 509 drive_name = ddp->dd_dnp->cname; 510 511 for (rl = rlp; rl != NULL; rl = rl->rl_next) { 512 char *rep_name; 513 514 rep_name = 515 rl->rl_repp->r_namep->drivenamep->cname; 516 517 if (strcmp(drive_name, rep_name) == 0) { 518 /* 519 * Disk has a replica on it so don't 520 * add dummy master block. 521 */ 522 break; 523 } 524 } 525 if (rl == NULL) { 526 /* 527 * Drive doesn't have a replica on it so 528 * we need a dummy master block. Add it. 529 */ 530 if (meta_replicaslice(ddp->dd_dnp, &rep_slice, 531 &xep) != 0) { 532 mdclrerror(&xep); 533 continue; 534 } 535 536 if ((np = metaslicename(ddp->dd_dnp, rep_slice, 537 &xep)) == NULL) { 538 mdclrerror(&xep); 539 continue; 540 } 541 542 if ((fd = open(np->rname, O_RDWR)) >= 0) { 543 meta_mkdummymaster(sp, fd, 16); 544 (void) close(fd); 545 } 546 } 547 } 548 } 549 550 if ((curdd == NULL) && (MD_MNSET_DESC(sd))) { 551 /* 552 * Notify rpc.mdcommd on all nodes of a nodelist change. 553 * Start by suspending rpc.mdcommd (which drains it of all 554 * messages), then change the nodelist followed by a reinit 555 * and resume. 556 */ 557 nd = sd->sd_nodelist; 558 /* All nodes are guaranteed to be ALIVE */ 559 while (nd) { 560 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND, 561 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) { 562 rval = -1; 563 goto out; 564 } 565 suspendall_flag = 1; 566 nd = nd->nd_next; 567 } 568 } 569 570 /* 571 * If a MN diskset and this is the first disk(s) being added 572 * to set, then pre-allocate change log records here. 573 * When the other nodes are joined into the MN diskset, the 574 * USER records will just be snarfed in. 575 */ 576 if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) { 577 if (mdmn_allocate_changelog(sp, ep) != 0) 578 goto rollback; 579 } 580 581 /* 582 * Mark the drives MD_DR_OK. 583 * If first drive being added to MN diskset, then set 584 * master on all nodes to be this node and then join 585 * all alive nodes (nodes in membership list) to set. 586 */ 587 if (MD_MNSET_DESC(sd)) { 588 nd = sd->sd_nodelist; 589 /* All nodes are guaranteed to be ALIVE */ 590 while (nd) { 591 /* don't set master on this node - done earlier */ 592 if ((curdd == NULL) && (nd->nd_nodeid != 593 sd->sd_mn_mynode->nd_nodeid)) { 594 /* 595 * Set master on all alive nodes since 596 * all alive nodes will become joined nodes. 597 */ 598 if (clnt_mnsetmaster(nd->nd_nodename, sp, 599 sd->sd_mn_mynode->nd_nodename, 600 sd->sd_mn_mynode->nd_nodeid, ep)) { 601 goto rollback; 602 } 603 } 604 605 if (curdd == NULL) { 606 /* 607 * No special flags for join set. Since 608 * all nodes are joining if 1st drive is being 609 * added to set then all nodes will be either 610 * STALE or non-STALE and each node can 611 * determine this on its own. 612 */ 613 if (clnt_joinset(nd->nd_nodename, sp, 614 NULL, ep)) { 615 goto rollback; 616 } 617 /* Sets join node flag on all nodes in list */ 618 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 619 sd->sd_nodelist, MD_NR_JOIN, NULL, ep)) { 620 goto rollback; 621 } 622 } 623 624 /* 625 * Set MD_DR_OK as last thing before unlock. 626 * In case of panic on this node, recovery 627 * code can check for MD_DR_OK to determine 628 * status of diskset. 629 */ 630 if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd, 631 MD_DR_OK, ep) == -1) 632 goto rollback; 633 634 635 RB_TEST(8, "adddrives", ep) 636 nd = nd->nd_next; 637 } 638 } else { 639 for (i = 0; i < MD_MAXSIDES; i++) { 640 /* Skip empty slots */ 641 if (sd->sd_nodes[i][0] == '\0') 642 continue; 643 644 if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd, MD_DR_OK, 645 ep) == -1) 646 goto rollback; 647 648 RB_TEST(8, "adddrives", ep) 649 } 650 } 651 652 RB_TEST(9, "adddrives", ep) 653 654 out: 655 /* 656 * Notify rpc.mdcommd on all nodes of a nodelist change. 657 * Send reinit command to mdcommd which forces it to get 658 * fresh set description. 659 */ 660 if (suspendall_flag) { 661 /* Send reinit */ 662 nd = sd->sd_nodelist; 663 /* All nodes are guaranteed to be ALIVE */ 664 while (nd) { 665 /* Class is ignored for REINIT */ 666 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT, 667 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 668 if (rval == 0) 669 (void) mdstealerror(ep, &xep); 670 rval = -1; 671 mde_perror(ep, dgettext(TEXT_DOMAIN, 672 "Unable to reinit rpc.mdcommd.\n")); 673 } 674 nd = nd->nd_next; 675 } 676 } 677 /* 678 * Unlock diskset by resuming messages across the diskset. 679 * Just resume all classes so that resume is the same whether 680 * just one class was locked or all classes were locked. 681 */ 682 if ((suspend1_flag) || (suspendall_flag)) { 683 nd = sd->sd_nodelist; 684 /* All nodes are guaranteed to be ALIVE */ 685 while (nd) { 686 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 687 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { 688 if (rval == 0) 689 (void) mdstealerror(ep, &xep); 690 rval = -1; 691 mde_perror(ep, dgettext(TEXT_DOMAIN, 692 "Unable to resume rpc.mdcommd.\n")); 693 } 694 nd = nd->nd_next; 695 } 696 meta_ping_mnset(sp->setno); 697 } 698 699 if (lock_flag) { 700 cl_sk = cl_get_setkey(sp->setno, sp->setname); 701 if (MD_MNSET_DESC(sd)) { 702 nd = sd->sd_nodelist; 703 /* All nodes are guaranteed to be ALIVE */ 704 while (nd) { 705 if (clnt_unlock_set(nd->nd_nodename, 706 cl_sk, &xep)) { 707 if (rval == 0) 708 (void) mdstealerror(ep, &xep); 709 rval = -1; 710 } 711 nd = nd->nd_next; 712 } 713 } else { 714 for (i = 0; i < MD_MAXSIDES; i++) { 715 /* Skip empty slots */ 716 if (sd->sd_nodes[i][0] == '\0') 717 continue; 718 719 if (clnt_unlock_set(sd->sd_nodes[i], 720 cl_sk, &xep)) { 721 if (rval == 0) 722 (void) mdstealerror(ep, &xep); 723 rval = -1; 724 } 725 } 726 } 727 cl_set_setkey(NULL); 728 } 729 730 metafreedrivedesc(&dd); 731 732 if (flush_set_onerr) { 733 metaflushsetname(sp); 734 if (!(MD_MNSET_DESC(sd))) { 735 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 736 } 737 } 738 739 if (MD_MNSET_DESC(sd)) { 740 /* release signals back to what they were on entry */ 741 if (procsigs(FALSE, &oldsigs, &xep) < 0) 742 mdclrerror(&xep); 743 } 744 745 return (rval); 746 747 rollback: 748 /* all signals already blocked for MN disket */ 749 if (!(MD_MNSET_DESC(sd))) { 750 /* Make sure we are blocking all signals */ 751 if (procsigs(TRUE, &oldsigs, &xep) < 0) 752 mdclrerror(&xep); 753 } 754 755 rval = -1; 756 757 max_genid = sd->sd_genid; 758 759 /* level 3 */ 760 if (rb_level > 2) { 761 /* 762 * Since the add drive operation is failing, need 763 * to reset config back to the way it was 764 * before the add drive opration. 765 * If a MN diskset and this is the first drive being added, 766 * then reset master on all ALIVE nodes (which is all nodes) 767 * since the master would have not been set previously. 768 * Don't reset master on this node, since this 769 * is done later. 770 * This is ok to fail since next node to add first 771 * disk to diskset will also set the master on all nodes. 772 * 773 * Also, if this is the first drive being added, 774 * need to have each node withdraw itself from the set. 775 */ 776 if ((MD_MNSET_DESC(sd)) && (curdd == NULL)) { 777 nd = sd->sd_nodelist; 778 /* All nodes are guaranteed to be ALIVE */ 779 while (nd) { 780 /* 781 * Be careful with ordering in case of 782 * panic between the steps and the 783 * effect on recovery during reconfig. 784 */ 785 if (clnt_withdrawset(nd->nd_nodename, sp, &xep)) 786 mdclrerror(&xep); 787 788 /* Sets withdraw flag on all nodes in list */ 789 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 790 sd->sd_nodelist, MD_NR_WITHDRAW, 791 NULL, &xep)) { 792 mdclrerror(&xep); 793 } 794 795 /* Skip this node */ 796 if (nd->nd_nodeid == 797 sd->sd_mn_mynode->nd_nodeid) { 798 nd = nd->nd_next; 799 continue; 800 } 801 /* Reset master on all of the other nodes. */ 802 if (clnt_mnsetmaster(nd->nd_nodename, sp, 803 "", MD_MN_INVALID_NID, &xep)) 804 mdclrerror(&xep); 805 nd = nd->nd_next; 806 } 807 } 808 } 809 810 /* 811 * Send resume command to mdcommd. Don't send reinit command 812 * since nodelist should not have changed. 813 * If suspendall_flag is set, then user would have been adding 814 * first drives to set. Since this failed, there is certainly 815 * no reinit message to send to rpc.commd since no nodes will 816 * be joined to set at the end of this metaset command. 817 */ 818 if (suspendall_flag) { 819 /* Send resume */ 820 nd = sd->sd_nodelist; 821 /* All nodes are guaranteed to be ALIVE */ 822 while (nd) { 823 /* 824 * Resume all classes but class 1 so that lock is held 825 * against meta* commands. 826 * To later resume class1, must issue a class0 resume. 827 */ 828 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 829 sp, MD_MSG_CLASS0, 830 MD_MSCF_DONT_RESUME_CLASS1, &xep)) { 831 mde_perror(&xep, dgettext(TEXT_DOMAIN, 832 "Unable to resume rpc.mdcommd.\n")); 833 mdclrerror(&xep); 834 } 835 nd = nd->nd_next; 836 } 837 meta_ping_mnset(sp->setno); 838 } 839 840 /* level 3 */ 841 if (rb_level > 2) { 842 mdnamelist_t *nlp; 843 mdname_t *np; 844 845 for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) { 846 uint_t rep_slice; 847 848 if ((meta_replicaslice(ddp->dd_dnp, 849 &rep_slice, &xep) != 0) || 850 ((np = metaslicename(ddp->dd_dnp, rep_slice, 851 &xep)) == NULL)) { 852 mdclrerror(&xep); 853 continue; 854 } 855 nlp = NULL; 856 (void) metanamelist_append(&nlp, np); 857 858 if (meta_db_detach(sp, nlp, 859 (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL, &xep)) 860 mdclrerror(&xep); 861 862 metafreenamelist(nlp); 863 } 864 865 /* Re-balance */ 866 if (meta_db_balance(sp, NULL, curdd, 0, &xep) == -1) 867 mdclrerror(&xep); 868 869 /* Only if we are adding the first drive */ 870 /* Handled MN diskset above. */ 871 if ((curdd == NULL) && !(MD_MNSET_DESC(sd))) { 872 if (clnt_stimeout(mynode(), sp, &defmhiargs, 873 &xep) == -1) 874 mdclrerror(&xep); 875 876 /* This is needed because of a corner case */ 877 if (halt_set(sp, &xep)) 878 mdclrerror(&xep); 879 } 880 max_genid++; 881 } 882 883 /* level 2 */ 884 if (rb_level > 1) { 885 if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) { 886 if (rel_own_bydd(sp, dd, TRUE, &xep)) 887 mdclrerror(&xep); 888 } 889 } 890 891 /* level 1 */ 892 if (rb_level > 0) { 893 if (MD_MNSET_DESC(sd)) { 894 nd = sd->sd_nodelist; 895 /* All nodes are guaranteed to be ALIVE */ 896 while (nd) { 897 if (clnt_deldrvs(nd->nd_nodename, sp, dd, 898 &xep) == -1) 899 mdclrerror(&xep); 900 nd = nd->nd_next; 901 } 902 } else { 903 for (i = 0; i < MD_MAXSIDES; i++) { 904 /* Skip empty slots */ 905 if (sd->sd_nodes[i][0] == '\0') 906 continue; 907 908 if (clnt_deldrvs(sd->sd_nodes[i], sp, dd, 909 &xep) == -1) 910 mdclrerror(&xep); 911 } 912 } 913 max_genid += 2; 914 resync_genid(sp, sd, max_genid, 0, NULL); 915 } 916 917 if ((suspend1_flag) || (suspendall_flag)) { 918 /* Send resume */ 919 nd = sd->sd_nodelist; 920 /* All nodes are guaranteed to be ALIVE */ 921 while (nd) { 922 /* 923 * Just resume all classes so that resume is the 924 * same whether just one class was locked or all 925 * classes were locked. 926 */ 927 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 928 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { 929 mdclrerror(&xep); 930 } 931 nd = nd->nd_next; 932 } 933 meta_ping_mnset(sp->setno); 934 } 935 936 /* level 0 */ 937 cl_sk = cl_get_setkey(sp->setno, sp->setname); 938 /* Don't test lock flag since guaranteed to be set if in rollback */ 939 if (MD_MNSET_DESC(sd)) { 940 /* 941 * Since the add drive operation is failing, need 942 * to reset config back to the way it was 943 * before the add drive opration. 944 * If a MN diskset and this is the first drive being 945 * added, then reset master on this node since 946 * the master would have not been set previously. 947 * This is ok to fail since next node to add first 948 * disk to diskset will also set the master on all nodes. 949 */ 950 if (curdd == NULL) { 951 /* Reset master on mynode */ 952 if (clnt_mnsetmaster(mynode(), sp, "", 953 MD_MN_INVALID_NID, &xep)) 954 mdclrerror(&xep); 955 } 956 nd = sd->sd_nodelist; 957 /* All nodes are guaranteed to be ALIVE */ 958 while (nd) { 959 if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) 960 mdclrerror(&xep); 961 nd = nd->nd_next; 962 } 963 } else { 964 for (i = 0; i < MD_MAXSIDES; i++) { 965 /* Skip empty slots */ 966 if (sd->sd_nodes[i][0] == '\0') 967 continue; 968 969 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) 970 mdclrerror(&xep); 971 } 972 } 973 cl_set_setkey(NULL); 974 975 /* release signals back to what they were on entry */ 976 if (procsigs(FALSE, &oldsigs, &xep) < 0) 977 mdclrerror(&xep); 978 979 metafreedrivedesc(&dd); 980 981 if (flush_set_onerr) { 982 metaflushsetname(sp); 983 if (!(MD_MNSET_DESC(sd))) { 984 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 985 } 986 } 987 988 return (rval); 989 } 990 991 /* 992 * Add drives routine used during import of a diskset. 993 */ 994 int 995 meta_imp_set_adddrives( 996 mdsetname_t *sp, 997 mddrivenamelist_t *dnlp, 998 md_im_set_desc_t *misp, 999 md_error_t *ep 1000 ) 1001 { 1002 md_set_desc *sd; 1003 mddrivenamelist_t *p; 1004 md_drive_desc *dd = NULL, *ddp; 1005 int flush_set_onerr = 0; 1006 md_timeval32_t now; 1007 ulong_t genid; 1008 mhd_mhiargs_t mhiargs; 1009 md_im_replica_info_t *mirp; 1010 md_im_drive_info_t *midp; 1011 int rval = 0; 1012 sigset_t oldsigs; 1013 ulong_t max_genid = 0; 1014 int rb_level = 0; 1015 md_error_t xep = mdnullerror; 1016 1017 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1018 return (-1); 1019 1020 for (p = dnlp; p != NULL; p = p->next) { 1021 int imp_flag = 0; 1022 1023 /* 1024 * If we have a partial diskset, meta_make_sidenmlist will 1025 * need information from midp to complete making the 1026 * side name structure. 1027 */ 1028 if (misp->mis_partial) { 1029 imp_flag = MDDB_C_IMPORT; 1030 for (midp = misp->mis_drives; midp != NULL; 1031 midp = midp->mid_next) { 1032 if (midp->mid_dnp == p->drivenamep) 1033 break; 1034 } 1035 if (midp == NULL) { 1036 (void) mddserror(ep, MDE_DS_SETNOTIMP, 1037 MD_SET_BAD, mynode(), NULL, sp->setname); 1038 rval = -1; 1039 goto out; 1040 } 1041 } 1042 /* 1043 * Create the names for the drives we are adding per side. 1044 */ 1045 if (meta_make_sidenmlist(sp, p->drivenamep, imp_flag, 1046 midp, ep) == -1) { 1047 rval = -1; 1048 goto out; 1049 } 1050 } 1051 1052 /* 1053 * Get the list of drives descriptors that we are adding. 1054 */ 1055 dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_ADD, ep); 1056 1057 if (! mdisok(ep)) { 1058 rval = -1; 1059 goto out; 1060 } 1061 1062 /* 1063 * Get the set timeout information. 1064 */ 1065 (void) memset(&mhiargs, '\0', sizeof (mhiargs)); 1066 if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) { 1067 rval = -1; 1068 goto out; 1069 } 1070 1071 /* 1072 * Get timestamp and generation id for new records 1073 */ 1074 now = sd->sd_ctime; 1075 genid = sd->sd_genid; 1076 1077 /* At this point, in case of error, set should be flushed. */ 1078 flush_set_onerr = 1; 1079 1080 rb_level = 1; /* level 1 */ 1081 1082 for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) { 1083 for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) { 1084 if (ddp->dd_dnp == midp->mid_dnp) { 1085 /* same disk */ 1086 ddp->dd_dnp->devid = 1087 devid_str_encode(midp->mid_devid, 1088 midp->mid_minor_name); 1089 1090 ddp->dd_dbcnt = 0; 1091 mirp = midp->mid_replicas; 1092 if (mirp) { 1093 ddp->dd_dbsize = mirp->mir_length; 1094 for (; mirp != NULL; 1095 mirp = mirp->mir_next) { 1096 ddp->dd_dbcnt++; 1097 } 1098 } 1099 if ((midp->mid_available & 1100 MD_IM_DISK_NOT_AVAILABLE) && 1101 (misp->mis_flags & MD_IM_SET_REPLICATED)) { 1102 ddp->dd_flags = MD_DR_UNRSLV_REPLICATED; 1103 } 1104 } 1105 } 1106 } 1107 1108 /* 1109 * Add the drive records for the drives that we are adding to 1110 * each host in the set. Marks the drive records as MD_DR_ADD. 1111 * May also mark a drive record as MD_DR_UNRSLV_REPLICATED if 1112 * this flag was set in the dd_flags for that drive. 1113 */ 1114 if (clnt_imp_adddrvs(mynode(), sp, dd, now, genid, ep) == -1) 1115 goto rollback; 1116 1117 rb_level = 2; /* level 2 */ 1118 1119 /* 1120 * Take ownership of the added drives. 1121 */ 1122 if (tk_own_bydd(sp, dd, &mhiargs, TRUE, ep)) 1123 goto rollback; 1124 1125 out: 1126 metafreedrivedesc(&dd); 1127 1128 if (flush_set_onerr) { 1129 metaflushsetname(sp); 1130 } 1131 1132 return (rval); 1133 1134 rollback: 1135 /* Make sure we are blocking all signals */ 1136 if (procsigs(TRUE, &oldsigs, &xep) < 0) 1137 mdclrerror(&xep); 1138 1139 rval = -1; 1140 1141 max_genid = sd->sd_genid; 1142 1143 /* level 2 */ 1144 if (rb_level > 1) { 1145 if (!MD_ATSET_DESC(sd)) { 1146 if (rel_own_bydd(sp, dd, TRUE, &xep)) { 1147 mdclrerror(&xep); 1148 } 1149 } 1150 } 1151 1152 /* level 1 */ 1153 if (rb_level > 0) { 1154 if (clnt_deldrvs(mynode(), sp, dd, &xep) == -1) { 1155 mdclrerror(&xep); 1156 } 1157 max_genid += 2; 1158 resync_genid(sp, sd, max_genid, 0, NULL); 1159 } 1160 1161 /* level 0 */ 1162 1163 /* release signals back to what they were on entry */ 1164 if (procsigs(FALSE, &oldsigs, &xep) < 0) 1165 mdclrerror(&xep); 1166 1167 metafreedrivedesc(&dd); 1168 1169 if (flush_set_onerr) { 1170 metaflushsetname(sp); 1171 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1172 } 1173 1174 return (rval); 1175 } 1176 1177 int 1178 meta_set_deletedrives( 1179 mdsetname_t *sp, 1180 mddrivenamelist_t *dnlp, 1181 int forceflg, 1182 md_error_t *ep 1183 ) 1184 { 1185 md_set_desc *sd; 1186 md_drive_desc *ddp, *dd = NULL, *curdd = NULL; 1187 md_replicalist_t *rlp = NULL, *rl; 1188 mddrivenamelist_t *p; 1189 int deldrvcnt = 0; 1190 int rval = 0; 1191 mhd_mhiargs_t mhiargs; 1192 int i; 1193 sigset_t oldsigs; 1194 md_setkey_t *cl_sk; 1195 ulong_t max_genid = 0; 1196 int rb_level = 0; 1197 md_error_t xep = mdnullerror; 1198 md_mnnode_desc *nd; 1199 int has_set; 1200 int current_drv_cnt = 0; 1201 int suspendall_flag = 0, suspendall_flag_rb = 0; 1202 int suspend1_flag = 0; 1203 int lock_flag = 0; 1204 bool_t stale_bool = FALSE; 1205 int flush_set_onerr = 0; 1206 mdnamelist_t *nlp; 1207 mdname_t *np; 1208 1209 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1210 return (-1); 1211 1212 /* Make sure we own the set */ 1213 if (meta_check_ownership(sp, ep) != 0) 1214 return (-1); 1215 1216 if (drvsuniq(sp, dnlp, ep) == -1) 1217 return (-1); 1218 1219 /* 1220 * Check and see if all the nodes have the set. 1221 * 1222 * The drive and node records are stored in the local mddbs of each 1223 * node in the diskset. Each node's rpc.metad daemon reads in the set, 1224 * drive and node records from that node's local mddb and caches them 1225 * internally. Any process needing diskset information contacts its 1226 * local rpc.metad to get this information. Since each node in the 1227 * diskset is independently reading the set information from its local 1228 * mddb, the set, drive and node records in the local mddbs must stay 1229 * in-sync, so that all nodes have a consistent view of the diskset. 1230 * 1231 * For a multinode diskset, explicitly verify that all nodes in the 1232 * diskset are ALIVE (i.e. are in the API membership list). Otherwise, 1233 * fail this operation since all nodes must be ALIVE in order to delete 1234 * a drive record from their local mddb. If a panic of this node 1235 * leaves the local mddbs set, node and drive records out-of-sync, the 1236 * reconfig cycle will fix the local mddbs and force them back into 1237 * synchronization. 1238 */ 1239 if (MD_MNSET_DESC(sd)) { 1240 nd = sd->sd_nodelist; 1241 while (nd) { 1242 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 1243 (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, 1244 sp->setno, 1245 nd->nd_nodename, NULL, sp->setname); 1246 return (-1); 1247 } 1248 nd = nd->nd_next; 1249 } 1250 1251 /* Make sure we are blocking all signals */ 1252 if (procsigs(TRUE, &oldsigs, &xep) < 0) 1253 mdclrerror(&xep); 1254 1255 /* 1256 * Lock the set on current set members. 1257 * Set locking done much earlier for MN diskset than for 1258 * traditional diskset since lock_set and SUSPEND are used 1259 * to protect against other meta* commands running on the 1260 * other nodes. 1261 */ 1262 nd = sd->sd_nodelist; 1263 /* All nodes are guaranteed to be ALIVE */ 1264 while (nd) { 1265 if (clnt_lock_set(nd->nd_nodename, sp, ep)) { 1266 rval = -1; 1267 goto out; 1268 } 1269 lock_flag = 1; 1270 nd = nd->nd_next; 1271 } 1272 /* 1273 * Lock out other meta* commands by suspending 1274 * class 1 messages across the diskset. 1275 */ 1276 nd = sd->sd_nodelist; 1277 /* All nodes are guaranteed to be ALIVE */ 1278 while (nd) { 1279 if (clnt_mdcommdctl(nd->nd_nodename, 1280 COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1, 1281 MD_MSCF_NO_FLAGS, ep)) { 1282 rval = -1; 1283 goto out; 1284 } 1285 suspend1_flag = 1; 1286 nd = nd->nd_next; 1287 } 1288 1289 nd = sd->sd_nodelist; 1290 /* All nodes are guaranteed to be ALIVE */ 1291 while (nd) { 1292 if (strcmp(nd->nd_nodename, mynode()) == 0) { 1293 nd = nd->nd_next; 1294 continue; 1295 } 1296 1297 has_set = nodehasset(sp, nd->nd_nodename, 1298 NHS_NSTG_EQ, ep); 1299 if (has_set < 0) { 1300 rval = -1; 1301 goto out; 1302 } 1303 1304 if (! has_set) { 1305 (void) mddserror(ep, MDE_DS_NODENOSET, 1306 sp->setno, nd->nd_nodename, 1307 NULL, sp->setname); 1308 rval = -1; 1309 goto out; 1310 } 1311 nd = nd->nd_next; 1312 } 1313 } else { 1314 for (i = 0; i < MD_MAXSIDES; i++) { 1315 /* Skip empty slots */ 1316 if (sd->sd_nodes[i][0] == '\0') 1317 continue; 1318 1319 if (strcmp(sd->sd_nodes[i], mynode()) == 0) 1320 continue; 1321 1322 has_set = nodehasset(sp, sd->sd_nodes[i], NHS_NSTG_EQ, 1323 ep); 1324 if (has_set < 0) { 1325 /* 1326 * Can directly return since !MN diskset; 1327 * nothing to unlock. 1328 */ 1329 return (-1); 1330 } 1331 1332 if (! has_set) { 1333 /* 1334 * Can directly return since !MN diskset; 1335 * nothing to unlock. 1336 */ 1337 return (mddserror(ep, MDE_DS_NODENOSET, 1338 sp->setno, sd->sd_nodes[i], NULL, 1339 sp->setname)); 1340 } 1341 } 1342 } 1343 1344 for (p = dnlp; p != NULL; p = p->next) { 1345 int is_it; 1346 mddrivename_t *dnp; 1347 1348 dnp = p->drivenamep; 1349 1350 if ((is_it = meta_is_drive_in_thisset(sp, dnp, FALSE, ep)) 1351 == -1) { 1352 rval = -1; 1353 goto out; 1354 } 1355 1356 if (! is_it) { 1357 (void) mddserror(ep, MDE_DS_DRIVENOTINSET, sp->setno, 1358 NULL, dnp->cname, sp->setname); 1359 rval = -1; 1360 goto out; 1361 } 1362 1363 if ((meta_check_drive_inuse(sp, dnp, FALSE, ep)) == -1) { 1364 rval = -1; 1365 goto out; 1366 } 1367 1368 deldrvcnt++; 1369 } 1370 current_drv_cnt = deldrvcnt; 1371 1372 /* 1373 * Get drive descriptors for the drives that are currently in the set. 1374 */ 1375 curdd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep); 1376 if (! mdisok(ep)) { 1377 rval = -1; 1378 goto out; 1379 } 1380 1381 /* 1382 * Decrement the the delete drive count for each drive currently in the 1383 * set. 1384 */ 1385 for (ddp = curdd; ddp != NULL; ddp = ddp->dd_next) 1386 deldrvcnt--; 1387 1388 /* 1389 * If the count of drives we are deleting is equal to the drives in the 1390 * set, and we haven't specified forceflg, return an error 1391 */ 1392 if (deldrvcnt == 0 && forceflg == FALSE) { 1393 (void) mderror(ep, MDE_FORCE_DEL_ALL_DRV, NULL); 1394 rval = -1; 1395 goto out; 1396 } 1397 1398 /* 1399 * Get the list of drive descriptors that we are deleting. 1400 */ 1401 dd = metaget_drivedesc_fromdrivelist(sp, dnlp, MD_DR_DEL, ep); 1402 if (! mdisok(ep)) { 1403 rval = -1; 1404 goto out; 1405 } 1406 1407 /* 1408 * Get the set timeout information in case we have to roll back. 1409 */ 1410 (void) memset(&mhiargs, '\0', sizeof (mhiargs)); 1411 if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) == -1) { 1412 rval = -1; 1413 goto out; 1414 } 1415 1416 /* At this point, in case of error, set should be flushed. */ 1417 flush_set_onerr = 1; 1418 1419 /* END CHECK CODE */ 1420 1421 /* Lock the set on current set members */ 1422 if (!(MD_MNSET_DESC(sd))) { 1423 md_rb_sig_handling_on(); 1424 for (i = 0; i < MD_MAXSIDES; i++) { 1425 /* Skip empty slots */ 1426 if (sd->sd_nodes[i][0] == '\0') 1427 continue; 1428 1429 if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { 1430 rval = -1; 1431 goto out; 1432 } 1433 lock_flag = 1; 1434 } 1435 } 1436 1437 if ((deldrvcnt == 0) && (MD_MNSET_DESC(sd))) { 1438 mddb_config_t c; 1439 /* 1440 * Is current set STALE? 1441 */ 1442 (void) memset(&c, 0, sizeof (c)); 1443 c.c_id = 0; 1444 c.c_setno = sp->setno; 1445 if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 1446 (void) mdstealerror(ep, &c.c_mde); 1447 rval = -1; 1448 goto out; 1449 } 1450 if (c.c_flags & MDDB_C_STALE) { 1451 stale_bool = TRUE; 1452 } 1453 } 1454 1455 RB_TEST(1, "deletedrives", ep) 1456 1457 RB_PREEMPT; 1458 rb_level = 1; /* level 1 */ 1459 1460 RB_TEST(2, "deletedrives", ep) 1461 1462 /* 1463 * Mark the drives MD_DR_DEL 1464 */ 1465 if (MD_MNSET_DESC(sd)) { 1466 nd = sd->sd_nodelist; 1467 /* All nodes are guaranteed to be ALIVE */ 1468 while (nd) { 1469 if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd, 1470 MD_DR_DEL, ep) == -1) 1471 goto rollback; 1472 1473 RB_TEST(3, "deletedrives", ep) 1474 nd = nd->nd_next; 1475 } 1476 } else { 1477 for (i = 0; i < MD_MAXSIDES; i++) { 1478 /* Skip empty slots */ 1479 if (sd->sd_nodes[i][0] == '\0') 1480 continue; 1481 1482 if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd, 1483 MD_DR_DEL, ep) == -1) 1484 goto rollback; 1485 1486 RB_TEST(3, "deletedrives", ep) 1487 } 1488 } 1489 1490 RB_TEST(4, "deletedrives", ep) 1491 1492 RB_PREEMPT; 1493 rb_level = 2; /* level 2 */ 1494 1495 RB_TEST(5, "deletedrives", ep) 1496 1497 /* 1498 * Balance the DB's according to the list of existing drives and the 1499 * list of deleted drives. 1500 */ 1501 if (meta_db_balance(sp, dd, curdd, 0, ep) == -1) 1502 goto rollback; 1503 1504 /* 1505 * If the drive(s) to be deleted cannot be accessed, 1506 * they haven't really been deleted yet. Check and delete now 1507 * if need be. 1508 */ 1509 if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) >= 0) { 1510 nlp = NULL; 1511 for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) { 1512 char *delete_name; 1513 1514 delete_name = ddp->dd_dnp->cname; 1515 1516 for (rl = rlp; rl != NULL; rl = rl->rl_next) { 1517 char *cur_name; 1518 1519 cur_name = 1520 rl->rl_repp->r_namep->drivenamep->cname; 1521 1522 if (strcmp(delete_name, cur_name) == 0) { 1523 /* put it on the delete list */ 1524 np = rl->rl_repp->r_namep; 1525 (void) metanamelist_append(&nlp, np); 1526 1527 } 1528 } 1529 } 1530 1531 if (nlp != NULL) { 1532 if (meta_db_detach(sp, nlp, 1533 (MDFORCE_DS | MDFORCE_SET_LOCKED), NULL, 1534 ep) == -1) { 1535 metafreenamelist(nlp); 1536 goto rollback; 1537 } 1538 metafreenamelist(nlp); 1539 } 1540 } 1541 1542 RB_TEST(6, "deletedrives", ep) 1543 1544 RB_PREEMPT; 1545 rb_level = 3; /* level 3 */ 1546 1547 RB_TEST(7, "deletedrives", ep) 1548 1549 /* 1550 * Cannot suspend set until after meta_db_balance since 1551 * meta_db_balance uses META_DB_ATTACH/DETACH messages. 1552 */ 1553 if ((deldrvcnt == 0) && (MD_MNSET_DESC(sd))) { 1554 /* 1555 * Notify rpc.mdcommd on all nodes of a nodelist change. 1556 * Start by suspending rpc.mdcommd (which drains it of all 1557 * messages), then change the nodelist followed by a reinit 1558 * and resume. 1559 */ 1560 nd = sd->sd_nodelist; 1561 /* All nodes are guaranteed to be ALIVE */ 1562 while (nd) { 1563 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND, 1564 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) { 1565 rval = -1; 1566 goto out; 1567 } 1568 suspendall_flag = 1; 1569 nd = nd->nd_next; 1570 } 1571 } 1572 1573 /* 1574 * Remove the drive records for the drives that were deleted from 1575 * each host in the set. This removes the record and dr_flags. 1576 */ 1577 if (MD_MNSET_DESC(sd)) { 1578 nd = sd->sd_nodelist; 1579 /* All nodes are guaranteed to be ALIVE */ 1580 while (nd) { 1581 if (clnt_deldrvs(nd->nd_nodename, sp, dd, ep) == -1) 1582 goto rollback; 1583 1584 RB_TEST(8, "deletedrives", ep) 1585 nd = nd->nd_next; 1586 } 1587 } else { 1588 for (i = 0; i < MD_MAXSIDES; i++) { 1589 /* Skip empty slots */ 1590 if (sd->sd_nodes[i][0] == '\0') 1591 continue; 1592 1593 if (clnt_deldrvs(sd->sd_nodes[i], sp, dd, ep) == -1) 1594 goto rollback; 1595 1596 RB_TEST(8, "deletedrives", ep) 1597 } 1598 } 1599 1600 RB_TEST(9, "deletedrives", ep) 1601 1602 RB_PREEMPT; 1603 rb_level = 4; /* level 4 */ 1604 1605 RB_TEST(10, "deletedrives", ep) 1606 1607 if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) { 1608 if (rel_own_bydd(sp, dd, TRUE, ep)) 1609 goto rollback; 1610 } 1611 1612 /* If we deleted all the drives, then we need to halt the set. */ 1613 if (deldrvcnt == 0) { 1614 RB_TEST(11, "deletedrives", ep) 1615 1616 RB_PREEMPT; 1617 rb_level = 5; /* level 5 */ 1618 1619 RB_TEST(12, "deletedrives", ep) 1620 1621 if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1) 1622 goto rollback; 1623 1624 RB_TEST(13, "deletedrives", ep) 1625 1626 RB_PREEMPT; 1627 rb_level = 6; /* level 6 */ 1628 1629 RB_TEST(14, "deletedrives", ep) 1630 1631 /* Halt MN diskset on all nodes by having node withdraw */ 1632 if (MD_MNSET_DESC(sd)) { 1633 nd = sd->sd_nodelist; 1634 /* All nodes are guaranteed to be ALIVE */ 1635 while (nd) { 1636 /* Only withdraw nodes that are joined */ 1637 if (!(nd->nd_flags & MD_MN_NODE_OWN)) { 1638 nd = nd->nd_next; 1639 continue; 1640 } 1641 /* 1642 * Going to set locally cached node flags to 1643 * rollback join so in case of error, the 1644 * rollback code knows which nodes to re-join. 1645 */ 1646 nd->nd_flags |= MD_MN_NODE_RB_JOIN; 1647 1648 /* 1649 * Be careful in ordering of following steps 1650 * so that recovery from a panic between 1651 * the steps is viable. 1652 * Only reset master info in rpc.metad - 1653 * don't reset local cached information 1654 * which will be used to set master information 1655 * back in case of failure (rollback). 1656 */ 1657 if (clnt_withdrawset(nd->nd_nodename, sp, ep)) 1658 goto rollback; 1659 /* Sets withdraw flag on all nodes in list */ 1660 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 1661 sd->sd_nodelist, MD_NR_WITHDRAW, 1662 NULL, ep)) { 1663 goto rollback; 1664 } 1665 if (clnt_mnsetmaster(nd->nd_nodename, sp, 1666 "", MD_MN_INVALID_NID, ep)) { 1667 goto rollback; 1668 } 1669 nd = nd->nd_next; 1670 } 1671 } else { 1672 if (halt_set(sp, ep)) 1673 goto rollback; 1674 } 1675 1676 RB_TEST(15, "deletedrives", ep) 1677 } 1678 1679 RB_TEST(16, "deletedrives", ep) 1680 1681 out: 1682 /* 1683 * Notify rpc.mdcommd on all nodes of a nodelist change. 1684 * Send reinit command to mdcommd which forces it to get 1685 * fresh set description. 1686 */ 1687 if (suspendall_flag) { 1688 /* Send reinit */ 1689 nd = sd->sd_nodelist; 1690 /* All nodes are guaranteed to be ALIVE */ 1691 while (nd) { 1692 /* Class is ignored for REINIT */ 1693 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT, 1694 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 1695 if (rval == 0) 1696 (void) mdstealerror(ep, &xep); 1697 rval = -1; 1698 mde_perror(ep, dgettext(TEXT_DOMAIN, 1699 "Unable to reinit rpc.mdcommd.\n")); 1700 } 1701 nd = nd->nd_next; 1702 } 1703 } 1704 1705 /* 1706 * Just resume all classes so that resume is the same whether 1707 * just one class was locked or all classes were locked. 1708 */ 1709 if ((suspend1_flag) || (suspendall_flag)) { 1710 /* Send resume */ 1711 nd = sd->sd_nodelist; 1712 /* All nodes are guaranteed to be ALIVE */ 1713 while (nd) { 1714 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 1715 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { 1716 if (rval == 0) 1717 (void) mdstealerror(ep, &xep); 1718 rval = -1; 1719 mde_perror(ep, dgettext(TEXT_DOMAIN, 1720 "Unable to resume rpc.mdcommd.\n")); 1721 } 1722 nd = nd->nd_next; 1723 } 1724 meta_ping_mnset(sp->setno); 1725 } 1726 if (lock_flag) { 1727 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1728 if (MD_MNSET_DESC(sd)) { 1729 nd = sd->sd_nodelist; 1730 /* All nodes are guaranteed to be ALIVE */ 1731 while (nd) { 1732 if (clnt_unlock_set(nd->nd_nodename, 1733 cl_sk, &xep)) { 1734 if (rval == 0) 1735 (void) mdstealerror(ep, &xep); 1736 rval = -1; 1737 } 1738 nd = nd->nd_next; 1739 } 1740 } else { 1741 for (i = 0; i < MD_MAXSIDES; i++) { 1742 /* Skip empty slots */ 1743 if (sd->sd_nodes[i][0] == '\0') 1744 continue; 1745 1746 if (clnt_unlock_set(sd->sd_nodes[i], 1747 cl_sk, &xep)) { 1748 if (rval == 0) 1749 (void) mdstealerror(ep, &xep); 1750 rval = -1; 1751 } 1752 } 1753 } 1754 cl_set_setkey(NULL); 1755 } 1756 1757 metafreedrivedesc(&dd); 1758 1759 if (flush_set_onerr) { 1760 metaflushsetname(sp); 1761 if (!(MD_MNSET_DESC(sd))) { 1762 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1763 } 1764 } 1765 1766 if (MD_MNSET_DESC(sd)) { 1767 /* release signals back to what they were on entry */ 1768 if (procsigs(FALSE, &oldsigs, &xep) < 0) 1769 mdclrerror(&xep); 1770 } 1771 1772 return (rval); 1773 1774 rollback: 1775 /* all signals already blocked for MN disket */ 1776 if (!(MD_MNSET_DESC(sd))) { 1777 /* Make sure we are blocking all signals */ 1778 if (procsigs(TRUE, &oldsigs, &xep) < 0) 1779 mdclrerror(&xep); 1780 } 1781 1782 rval = -1; 1783 1784 max_genid = sd->sd_genid; 1785 1786 /* Set the master on all nodes first thing */ 1787 if (rb_level > 5) { 1788 if (MD_MNSET_DESC(sd)) { 1789 nd = sd->sd_nodelist; 1790 /* All nodes are guaranteed to be ALIVE */ 1791 while (nd) { 1792 if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) { 1793 continue; 1794 } 1795 /* 1796 * Set master on all re-joining nodes to be 1797 * my cached view of master. 1798 */ 1799 if (clnt_mnsetmaster(nd->nd_nodename, sp, 1800 sd->sd_mn_master_nodenm, 1801 sd->sd_mn_master_nodeid, &xep)) { 1802 mdclrerror(&xep); 1803 } 1804 } 1805 } 1806 } 1807 1808 /* level 3 */ 1809 if (rb_level > 2) { 1810 md_set_record *sr; 1811 md_mnset_record *mnsr; 1812 md_drive_record *dr; 1813 int sr_drive_cnt; 1814 1815 /* 1816 * See if we have to re-add the drives specified. 1817 */ 1818 if (MD_MNSET_DESC(sd)) { 1819 nd = sd->sd_nodelist; 1820 /* All nodes are guaranteed to be ALIVE */ 1821 while (nd) { 1822 /* 1823 * Must get current set record from each 1824 * node to see what else must be done 1825 * to recover. 1826 * Record should be for a multi-node diskset. 1827 */ 1828 if (clnt_mngetset(nd->nd_nodename, sp->setname, 1829 MD_SET_BAD, &mnsr, &xep) == -1) { 1830 mdclrerror(&xep); 1831 nd = nd->nd_next; 1832 continue; 1833 } 1834 1835 /* 1836 * If all drives are already there, skip 1837 * to next node. 1838 */ 1839 sr_drive_cnt = 0; 1840 dr = mnsr->sr_drivechain; 1841 while (dr) { 1842 sr_drive_cnt++; 1843 dr = dr->dr_next; 1844 } 1845 if (sr_drive_cnt == current_drv_cnt) { 1846 free_sr((md_set_record *)mnsr); 1847 nd = nd->nd_next; 1848 continue; 1849 } 1850 1851 /* Readd all drives */ 1852 if (clnt_adddrvs(nd->nd_nodename, sp, dd, 1853 mnsr->sr_ctime, mnsr->sr_genid, &xep) == -1) 1854 mdclrerror(&xep); 1855 1856 free_sr((struct md_set_record *)mnsr); 1857 nd = nd->nd_next; 1858 } 1859 } else { 1860 for (i = 0; i < MD_MAXSIDES; i++) { 1861 /* Skip empty slots */ 1862 if (sd->sd_nodes[i][0] == '\0') 1863 continue; 1864 1865 /* Record should be for a non-multi-node set */ 1866 if (clnt_getset(sd->sd_nodes[i], sp->setname, 1867 MD_SET_BAD, &sr, &xep) == -1) { 1868 mdclrerror(&xep); 1869 continue; 1870 } 1871 1872 /* 1873 * Set record structure was allocated from RPC 1874 * routine getset so this structure is only of 1875 * size md_set_record even if the MN flag is 1876 * set. So, clear the flag so that the free 1877 * code doesn't attempt to free a structure 1878 * the size of md_mnset_record. 1879 */ 1880 if (MD_MNSET_REC(sr)) { 1881 sr->sr_flags &= ~MD_SR_MN; 1882 free_sr(sr); 1883 continue; 1884 } 1885 1886 /* Drive already added, skip to next node */ 1887 if (sr->sr_drivechain != NULL) { 1888 free_sr(sr); 1889 continue; 1890 } 1891 1892 if (clnt_adddrvs(sd->sd_nodes[i], sp, dd, 1893 sr->sr_ctime, sr->sr_genid, &xep) == -1) 1894 mdclrerror(&xep); 1895 1896 free_sr(sr); 1897 } 1898 } 1899 max_genid += 2; 1900 } 1901 1902 /* 1903 * Notify rpc.mdcommd on all nodes of a nodelist change. 1904 * At this point in time, don't know which nodes are joined 1905 * to the set. So, send a reinit command to mdcommd 1906 * which forces it to get fresh set description. Then send resume. 1907 * 1908 * Later, this code will use rpc.mdcommd messages to reattach disks 1909 * and then rpc.mdcommd may be suspended again, rest of the nodes 1910 * joined, rpc.mdcommd reinited and then resumed. 1911 */ 1912 if (suspendall_flag) { 1913 /* Send reinit */ 1914 nd = sd->sd_nodelist; 1915 /* All nodes are guaranteed to be ALIVE */ 1916 while (nd) { 1917 /* Class is ignored for REINIT */ 1918 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT, 1919 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 1920 mde_perror(&xep, dgettext(TEXT_DOMAIN, 1921 "Unable to reinit rpc.mdcommd.\n")); 1922 mdclrerror(&xep); 1923 } 1924 nd = nd->nd_next; 1925 } 1926 1927 /* Send resume */ 1928 nd = sd->sd_nodelist; 1929 /* All nodes are guaranteed to be ALIVE */ 1930 while (nd) { 1931 /* 1932 * Resume all classes but class 1 so that lock is held 1933 * against meta* commands. 1934 * To later resume class1, must issue a class0 resume. 1935 */ 1936 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 1937 sp, MD_MSG_CLASS0, 1938 MD_MSCF_DONT_RESUME_CLASS1, &xep)) { 1939 mde_perror(&xep, dgettext(TEXT_DOMAIN, 1940 "Unable to resume rpc.mdcommd.\n")); 1941 mdclrerror(&xep); 1942 } 1943 nd = nd->nd_next; 1944 } 1945 meta_ping_mnset(sp->setno); 1946 } 1947 1948 /* level 2 */ 1949 if (rb_level > 1) { 1950 mdnamelist_t *nlp; 1951 mdname_t *np; 1952 1953 for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) { 1954 uint_t rep_slice; 1955 1956 if ((meta_replicaslice(ddp->dd_dnp, 1957 &rep_slice, &xep) != 0) || 1958 ((np = metaslicename(ddp->dd_dnp, rep_slice, 1959 &xep)) == NULL)) { 1960 mdclrerror(&xep); 1961 continue; 1962 } 1963 nlp = NULL; 1964 (void) metanamelist_append(&nlp, np); 1965 1966 if (meta_db_attach(sp, nlp, 1967 (MDCHK_DRVINSET | MDCHK_SET_LOCKED), 1968 &sd->sd_ctime, ddp->dd_dbcnt, ddp->dd_dbsize, 1969 NULL, &xep) == -1) 1970 mdclrerror(&xep); 1971 1972 metafreenamelist(nlp); 1973 } 1974 /* Re-balance */ 1975 if (meta_db_balance(sp, NULL, curdd, 0, &xep) == -1) 1976 mdclrerror(&xep); 1977 } 1978 1979 /* level 4 */ 1980 if (rb_level > 3) { 1981 if (!(MD_MNSET_DESC(sd)) && !MD_ATSET_DESC(sd)) { 1982 if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep)) 1983 mdclrerror(&xep); 1984 } 1985 } 1986 1987 /* level 5 */ 1988 if (rb_level > 4) { 1989 if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1) 1990 mdclrerror(&xep); 1991 } 1992 1993 /* 1994 * If at least one node needs to be rejoined to MN diskset, 1995 * then suspend commd again. 1996 */ 1997 if (MD_MNSET_DESC(sd)) { 1998 nd = sd->sd_nodelist; 1999 /* All nodes are guaranteed to be ALIVE */ 2000 while (nd) { 2001 if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) { 2002 nd = nd->nd_next; 2003 continue; 2004 } 2005 break; 2006 } 2007 if (nd) { 2008 /* 2009 * Found node that will be rejoined so 2010 * notify rpc.mdcommd on all nodes of a nodelist change. 2011 * Start by suspending rpc.mdcommd (which drains it of 2012 * all messages), then change the nodelist followed by 2013 * a reinit and resume. 2014 */ 2015 nd = sd->sd_nodelist; 2016 /* All nodes are guaranteed to be ALIVE */ 2017 while (nd) { 2018 if (clnt_mdcommdctl(nd->nd_nodename, 2019 COMMDCTL_SUSPEND, sp, MD_MSG_CLASS0, 2020 MD_MSCF_NO_FLAGS, &xep)) { 2021 mdclrerror(&xep); 2022 } 2023 suspendall_flag_rb = 1; 2024 nd = nd->nd_next; 2025 } 2026 } 2027 } 2028 2029 2030 2031 /* level 6 */ 2032 if (rb_level > 5) { 2033 if (MD_MNSET_DESC(sd)) { 2034 int join_flags = 0; 2035 2036 nd = sd->sd_nodelist; 2037 /* All nodes are guaranteed to be ALIVE */ 2038 while (nd) { 2039 /* Only rejoin nodes that were joined before */ 2040 if (!(nd->nd_flags & MD_MN_NODE_RB_JOIN)) { 2041 nd = nd->nd_next; 2042 continue; 2043 } 2044 /* 2045 * Rejoin nodes to same state as before - 2046 * either STALE or non-STALE. 2047 */ 2048 if (stale_bool == TRUE) 2049 join_flags = MNSET_IS_STALE; 2050 if (clnt_joinset(nd->nd_nodename, sp, 2051 join_flags, &xep)) 2052 mdclrerror(&xep); 2053 /* Sets OWN flag on all nodes in list */ 2054 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 2055 sd->sd_nodelist, MD_NR_JOIN, NULL, &xep)) { 2056 mdclrerror(&xep); 2057 } 2058 nd = nd->nd_next; 2059 } 2060 } else { 2061 if (setup_db_bydd(sp, dd, TRUE, &xep) == -1) 2062 mdclrerror(&xep); 2063 2064 /* No special flag for traditional diskset */ 2065 if (snarf_set(sp, NULL, &xep)) 2066 mdclrerror(&xep); 2067 } 2068 } 2069 2070 /* level 1 */ 2071 if (rb_level > 0) { 2072 /* 2073 * Mark the drives as OK. 2074 */ 2075 if (MD_MNSET_DESC(sd)) { 2076 nd = sd->sd_nodelist; 2077 /* All nodes are guaranteed to be ALIVE */ 2078 while (nd) { 2079 /* 2080 * Must be last action before unlock. 2081 * In case of panic, recovery code checks 2082 * for MD_DR_OK to know that drive 2083 * and possible master are fully added back. 2084 */ 2085 if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd, 2086 MD_DR_OK, &xep) == -1) 2087 mdclrerror(&xep); 2088 nd = nd->nd_next; 2089 } 2090 } else { 2091 for (i = 0; i < MD_MAXSIDES; i++) { 2092 /* Skip empty slots */ 2093 if (sd->sd_nodes[i][0] == '\0') 2094 continue; 2095 2096 if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd, 2097 MD_DR_OK, &xep) == -1) 2098 mdclrerror(&xep); 2099 2100 } 2101 } 2102 max_genid += 2; 2103 resync_genid(sp, sd, max_genid, 0, NULL); 2104 } 2105 /* 2106 * Notify rpc.mdcommd on all nodes of a nodelist change. 2107 * Send a reinit command to mdcommd which forces it to get 2108 * fresh set description. 2109 */ 2110 if (suspendall_flag_rb) { 2111 /* Send reinit */ 2112 nd = sd->sd_nodelist; 2113 /* All nodes are guaranteed to be ALIVE */ 2114 while (nd) { 2115 /* Class is ignored for REINIT */ 2116 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT, 2117 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 2118 mde_perror(&xep, dgettext(TEXT_DOMAIN, 2119 "Unable to reinit rpc.mdcommd.\n")); 2120 mdclrerror(&xep); 2121 } 2122 nd = nd->nd_next; 2123 } 2124 } 2125 2126 /* 2127 * Just resume all classes so that resume is the same whether 2128 * just one class was locked or all classes were locked. 2129 */ 2130 if ((suspend1_flag) || (suspendall_flag_rb) || (suspendall_flag)) { 2131 /* Send resume */ 2132 nd = sd->sd_nodelist; 2133 /* All nodes are guaranteed to be ALIVE */ 2134 while (nd) { 2135 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 2136 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { 2137 mde_perror(&xep, dgettext(TEXT_DOMAIN, 2138 "Unable to resume rpc.mdcommd.\n")); 2139 mdclrerror(&xep); 2140 } 2141 nd = nd->nd_next; 2142 } 2143 meta_ping_mnset(sp->setno); 2144 } 2145 2146 2147 /* level 0 */ 2148 cl_sk = cl_get_setkey(sp->setno, sp->setname); 2149 /* Don't test lock flag since guaranteed to be set if in rollback */ 2150 if (MD_MNSET_DESC(sd)) { 2151 nd = sd->sd_nodelist; 2152 /* All nodes are guaranteed to be ALIVE */ 2153 while (nd) { 2154 if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) 2155 mdclrerror(&xep); 2156 nd = nd->nd_next; 2157 } 2158 } else { 2159 for (i = 0; i < MD_MAXSIDES; i++) { 2160 /* Skip empty slots */ 2161 if (sd->sd_nodes[i][0] == '\0') 2162 continue; 2163 2164 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) 2165 mdclrerror(&xep); 2166 } 2167 } 2168 cl_set_setkey(NULL); 2169 2170 /* release signals back to what they were on entry */ 2171 if (procsigs(FALSE, &oldsigs, &xep) < 0) 2172 mdclrerror(&xep); 2173 2174 metafreedrivedesc(&dd); 2175 2176 if (flush_set_onerr) { 2177 metaflushsetname(sp); 2178 if (!(MD_MNSET_DESC(sd))) { 2179 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 2180 } 2181 } 2182 2183 return (rval); 2184 } 2185