1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Just in case we're not in a build environment, make sure that 31 * TEXT_DOMAIN gets set to something. 32 */ 33 #if !defined(TEXT_DOMAIN) 34 #define TEXT_DOMAIN "SYS_TEST" 35 #endif 36 37 /* 38 * Metadevice diskset interfaces 39 */ 40 41 #include "meta_set_prv.h" 42 #include <meta.h> 43 #include <sys/lvm/md_crc.h> 44 #include <sys/time.h> 45 #include <sdssc.h> 46 47 static int 48 add_db_sidenms( 49 mdsetname_t *sp, 50 md_error_t *ep 51 ) 52 { 53 md_replicalist_t *rlp = NULL; 54 md_replicalist_t *rl; 55 int rval = 0; 56 57 if (metareplicalist(sp, MD_FULLNAME_ONLY, &rlp, ep) < 0) 58 return (-1); 59 60 for (rl = rlp; rl != NULL; rl = rl->rl_next) { 61 md_replica_t *r = rl->rl_repp; 62 63 /* 64 * This is not the first replica being added to the 65 * diskset so call with ADDSIDENMS_BCAST. If this 66 * is a traditional diskset, the bcast flag is ignored 67 * since traditional disksets don't use the rpc.mdcommd. 68 */ 69 if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno, 70 DB_ADDSIDENMS_BCAST, ep)) { 71 rval = -1; 72 goto out; 73 } 74 } 75 76 out: 77 metafreereplicalist(rlp); 78 return (rval); 79 } 80 81 static int 82 add_drvs_to_hosts( 83 mdsetname_t *sp, 84 int node_c, 85 char **node_v, 86 md_error_t *ep 87 ) 88 { 89 int i; 90 md_set_desc *sd; 91 md_drive_desc *dd; 92 md_timeval32_t now; 93 ulong_t genid; 94 95 if ((sd = metaget_setdesc(sp, ep)) == NULL) 96 return (-1); 97 98 if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) { 99 if (! mdisok(ep)) 100 return (-1); 101 return (0); 102 } 103 104 now = sd->sd_ctime; 105 genid = sd->sd_genid - 1; 106 107 for (i = 0; i < node_c; i++) { 108 if (clnt_adddrvs(node_v[i], sp, dd, now, genid, ep) == -1) 109 return (-1); 110 } 111 112 return (0); 113 } 114 115 static int 116 add_md_sidenms(mdsetname_t *sp, side_t sideno, side_t otherside, md_error_t *ep) 117 { 118 mdnm_params_t nm; 119 char *cname, *dname; 120 side_t tmp_sideno; 121 minor_t mnum; 122 int done, i; 123 int rval = 0; 124 md_set_desc *sd; 125 126 (void) memset(&nm, '\0', sizeof (nm)); 127 nm.key = MD_KEYWILD; 128 129 if (!metaislocalset(sp)) { 130 if ((sd = metaget_setdesc(sp, ep)) == NULL) 131 return (-1); 132 } 133 /* Use rpc.mdcommd to add md side info from all nodes */ 134 if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 135 (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 136 md_mn_result_t *resultp = NULL; 137 md_mn_msg_meta_md_addside_t md_as; 138 int send_rval; 139 140 md_as.msg_sideno = sideno; 141 md_as.msg_otherside = otherside; 142 /* 143 * If reconfig cycle has been started, this node is stuck in 144 * in the return step until this command has completed. If 145 * mdcommd is suspended, ask send_message to fail (instead of 146 * retrying) so that metaset can finish allowing the 147 * reconfig cycle to proceed. 148 */ 149 send_rval = mdmn_send_message(sp->setno, 150 MD_MN_MSG_META_MD_ADDSIDE, 151 MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT, 152 (char *)&md_as, sizeof (md_mn_msg_meta_md_addside_t), 153 &resultp, ep); 154 if (send_rval != 0) { 155 (void) mdstealerror(ep, &(resultp->mmr_ep)); 156 if (resultp) 157 free_result(resultp); 158 return (-1); 159 } 160 if (resultp) 161 free_result(resultp); 162 return (0); 163 } else { 164 /*CONSTCOND*/ 165 while (1) { 166 nm.mde = mdnullerror; 167 nm.setno = sp->setno; 168 nm.side = otherside; 169 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) 170 return (mdstealerror(ep, &nm.mde)); 171 172 if (nm.key == MD_KEYWILD) 173 return (0); 174 175 nm.devname = (uintptr_t)meta_getnmbykey(sp->setno, 176 otherside, nm.key, ep); 177 if (nm.devname == NULL) 178 return (-1); 179 180 nm.side = sideno; 181 if (MD_MNSET_DESC(sd)) { 182 tmp_sideno = sideno; 183 } else { 184 tmp_sideno = sideno - 1; 185 } 186 187 if ((done = meta_getnextside_devinfo(sp, 188 (char *)(uintptr_t)nm.devname, &tmp_sideno, 189 &cname, &dname, &mnum, ep)) == -1) { 190 Free((void *)(uintptr_t)nm.devname); 191 return (-1); 192 } 193 194 assert(done == 1); 195 Free((void *)(uintptr_t)nm.devname); 196 197 /* 198 * The device reference count can be greater than 1 if 199 * more than one softpart is configured on top of the 200 * same device. If this is the case then we want to 201 * increment the count to sync up with the other sides. 202 */ 203 for (i = 0; i < nm.ref_count; i++) { 204 if (add_name(sp, sideno, nm.key, dname, mnum, cname, 205 ep) == -1) 206 rval = -1; 207 } 208 209 Free(cname); 210 Free(dname); 211 212 if (rval != 0) 213 return (rval); 214 } 215 } 216 217 /*NOTREACHED*/ 218 } 219 220 static int 221 check_setdrvs_againstnode(mdsetname_t *sp, char *node, md_error_t *ep) 222 { 223 mddrivename_t *dp; 224 md_drive_desc *dd, *ddp; 225 226 if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) 227 if (! mdisok(ep)) 228 return (-1); 229 230 for (ddp = dd; ddp != NULL; ddp = ddp->dd_next) { 231 dp = ddp->dd_dnp; 232 233 if (checkdrive_onnode(sp, dp, node, ep)) 234 return (-1); 235 } 236 237 return (0); 238 } 239 240 static int 241 create_multinode_set_on_hosts( 242 mdsetname_t *sp, 243 int node_c, /* Number of new nodes */ 244 char **node_v, /* Nodes which are being added */ 245 int new_set, 246 md_error_t *ep 247 ) 248 { 249 int i; 250 md_set_desc *sd; 251 md_timeval32_t now; 252 ulong_t genid; 253 int rval = 0; 254 md_mnnode_desc *nd, *ndm = NULL; 255 md_mnnode_desc *nd_prev, *nd_curr; 256 int nodecnt; 257 mndiskset_membershiplist_t *nl, *nl2; 258 259 if (!new_set) { 260 if ((sd = metaget_setdesc(sp, ep)) == NULL) 261 return (-1); 262 now = sd->sd_ctime; 263 genid = sd->sd_genid - 1; 264 if (sd->sd_drvs) 265 genid--; 266 } else { 267 sd = Zalloc(sizeof (*sd)); 268 269 if (meta_gettimeofday(&now) == -1) { 270 (void) mdsyserror(ep, errno, 271 dgettext(TEXT_DOMAIN, "meta_gettimeofday()")); 272 rval = -1; 273 goto out; 274 } 275 276 /* Put the new entries into the set */ 277 /* 278 * Get membershiplist from API routine. If there's 279 * an error, fail to create set and pass back error. 280 */ 281 if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) { 282 rval = -1; 283 goto out; 284 } 285 286 /* 287 * meta_set_addhosts has already verified that 288 * this node list is in the membership list 289 * so set ALIVE flag. 290 * Since this is a new set, all hosts being 291 * added are new to the set, so also set ADD flag. 292 */ 293 for (i = 0; i < node_c; i++) { 294 nd = Zalloc(sizeof (*nd)); 295 (void) strcpy(nd->nd_nodename, node_v[i]); 296 nd->nd_ctime = now; 297 nd->nd_flags = (MD_MN_NODE_ALIVE | 298 MD_MN_NODE_ADD); 299 nl2 = nl; 300 while (nl2) { 301 if (strcmp(nl2->msl_node_name, 302 node_v[i]) == 0) { 303 nd->nd_nodeid = nl2->msl_node_id; 304 (void) strcpy(nd->nd_priv_ic, 305 nl2->msl_node_addr); 306 break; 307 } 308 nl2 = nl2->next; 309 } 310 311 /* 312 * Nodelist must be kept in ascending 313 * nodeid order. 314 */ 315 if (sd->sd_nodelist == NULL) { 316 /* Nothing in list, just add it */ 317 sd->sd_nodelist = nd; 318 } else if (nd->nd_nodeid < sd->sd_nodelist->nd_nodeid) { 319 /* Add to head of list */ 320 nd->nd_next = sd->sd_nodelist; 321 sd->sd_nodelist = nd; 322 } else { 323 nd_curr = sd->sd_nodelist->nd_next; 324 nd_prev = sd->sd_nodelist; 325 /* Search for place ot add it */ 326 while (nd_curr) { 327 if (nd->nd_nodeid < 328 nd_curr->nd_nodeid) { 329 /* Add before nd_curr */ 330 nd->nd_next = nd_curr; 331 nd_prev->nd_next = nd; 332 break; 333 } 334 nd_prev = nd_curr; 335 nd_curr = nd_curr->nd_next; 336 } 337 /* Add to end of list */ 338 if (nd_curr == NULL) { 339 nd_prev->nd_next = nd; 340 } 341 342 } 343 /* Set master to be first node added */ 344 if (ndm == NULL) 345 ndm = nd; 346 } 347 348 meta_free_nodelist(nl); 349 /* 350 * Creating mnset for first time. 351 * Set master to be invalid until first drive is 352 * in set. 353 */ 354 (void) strcpy(sd->sd_mn_master_nodenm, ""); 355 sd->sd_mn_master_nodeid = MD_MN_INVALID_NID; 356 sd->sd_mn_masternode = ndm; 357 sd->sd_ctime = now; 358 genid = sd->sd_genid = 0; 359 } 360 361 /* Create the set where needed */ 362 for (i = 0; i < node_c; i++) { 363 /* 364 * Create the set on each new node. If the set already 365 * exists, then the node list being created on each new node 366 * is the current node list from before the new nodes 367 * were added. If the set doesn't exist, then the node 368 * list being created on each new node is the entire 369 * new node list. 370 */ 371 if (clnt_mncreateset(node_v[i], sp, sd->sd_nodelist, 372 now, genid, sd->sd_mn_master_nodenm, 373 sd->sd_mn_master_nodeid, ep) == -1) { 374 rval = -1; 375 break; 376 } 377 } 378 379 out: 380 if (new_set) { 381 nd = sd->sd_nodelist; 382 while (nd) { 383 sd->sd_nodelist = nd->nd_next; 384 Free(nd); 385 nd = sd->sd_nodelist; 386 } 387 Free(sd); 388 } 389 390 if (rval != 0 || new_set) 391 return (rval); 392 393 /* 394 * Add the drive records to the new sets 395 * and names for the new sides. 396 */ 397 return (add_drvs_to_hosts(sp, node_c, node_v, ep)); 398 } 399 400 401 static int 402 create_traditional_set_on_hosts( 403 mdsetname_t *sp, 404 int node_c, /* Number of new nodes */ 405 char **node_v, /* Nodes which are being added */ 406 int new_set, 407 md_error_t *ep 408 ) 409 { 410 int i; 411 md_set_desc *sd; 412 md_timeval32_t now; 413 ulong_t genid; 414 int rval = 0; 415 416 if (!new_set) { 417 418 if ((sd = metaget_setdesc(sp, ep)) == NULL) 419 return (-1); 420 now = sd->sd_ctime; 421 422 genid = sd->sd_genid; 423 424 if (sd->sd_drvs) 425 genid--; 426 } else { 427 if (node_c > MD_MAXSIDES) 428 return (mddserror(ep, MDE_DS_SIDENUMNOTAVAIL, 429 sp->setno, NULL, NULL, sp->setname)); 430 431 sd = Zalloc(sizeof (*sd)); 432 433 /* Put the new entries into the set */ 434 for (i = 0; i < node_c; i++) { 435 (void) strcpy(sd->sd_nodes[i], node_v[i]); 436 } 437 438 if (meta_gettimeofday(&now) == -1) { 439 (void) mdsyserror(ep, errno, "meta_gettimeofday()"); 440 rval = -1; 441 goto out; 442 } 443 444 sd->sd_ctime = now; 445 genid = sd->sd_genid = 0; 446 } 447 448 /* Create the set where needed */ 449 for (i = 0; i < node_c; i++) { 450 /* 451 * Create the set on each new host 452 */ 453 if (clnt_createset(node_v[i], sp, sd->sd_nodes, now, genid, 454 ep) == -1) { 455 rval = -1; 456 break; 457 } 458 } 459 460 out: 461 if (new_set) 462 Free(sd); 463 464 if (rval != 0 || new_set) 465 return (rval); 466 467 /* 468 * Add the drive records to the new sets 469 * and names for the new sides. 470 */ 471 return (add_drvs_to_hosts(sp, node_c, node_v, ep)); 472 } 473 474 static int 475 create_set_on_hosts( 476 mdsetname_t *sp, 477 int multi_node, /* Multi_node diskset or not? */ 478 int node_c, /* Number of new nodes */ 479 char **node_v, /* Nodes which are being added */ 480 int new_set, 481 md_error_t *ep 482 ) 483 { 484 if (multi_node) 485 return (create_multinode_set_on_hosts(sp, node_c, node_v, 486 new_set, ep)); 487 else 488 return (create_traditional_set_on_hosts(sp, node_c, node_v, 489 new_set, ep)); 490 } 491 492 static int 493 create_set( 494 mdsetname_t *sp, 495 int multi_node, /* Multi-node diskset or not? */ 496 int node_c, 497 char **node_v, 498 int auto_take, 499 md_error_t *ep 500 ) 501 { 502 int i; 503 int rval = 0; 504 set_t max_sets; 505 set_t setno; 506 int bool; 507 uint_t sr_flags; 508 sigset_t oldsigs; 509 md_setkey_t *cl_sk; 510 int rb_level = 0; 511 md_error_t xep = mdnullerror; 512 rval_e sdssc_rval; 513 int lock_flag = 0; 514 int sig_flag = 0; 515 516 if ((max_sets = get_max_sets(ep)) == 0) 517 return (-1); 518 519 /* We must be a member of the set we are creating */ 520 if (! strinlst(mynode(), node_c, node_v)) 521 return (mddserror(ep, MDE_DS_SELFNOTIN, 522 sp->setno, mynode(), NULL, sp->setname)); 523 524 /* 525 * If auto_take then we must be the only member of the set 526 * that we are creating. 527 */ 528 if (auto_take && node_c > 1) 529 return (mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL, 530 sp->setname)); 531 532 /* 533 * If we're part of SC3.0 we'll already have allocated the 534 * set number so we can skip the allocation algorithm used. 535 * Set number is unique across traditional and MN disksets. 536 */ 537 if ((sdssc_rval = sdssc_get_index(sp->setname, &setno)) 538 == SDSSC_NOT_BOUND) { 539 540 for (i = 0; i < node_c; i++) { 541 int has_set; 542 543 /* Skip my node */ 544 if (strcmp(mynode(), node_v[i]) == 0) 545 continue; 546 547 /* 548 * Make sure this set name is not used on the 549 * other hosts 550 */ 551 has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep); 552 if (has_set < 0) { 553 if (! mdiserror(ep, MDE_NO_SET)) { 554 rval = -1; 555 goto out; 556 } 557 mdclrerror(ep); 558 continue; 559 } 560 561 if (has_set) { 562 (void) mddserror(ep, MDE_DS_NODEHASSET, 563 sp->setno, node_v[i], NULL, sp->setname); 564 rval = -1; 565 goto out; 566 } 567 } 568 569 for (setno = 1; setno < max_sets; setno++) { 570 for (i = 0; i < node_c; i++) { 571 if (clnt_setnumbusy(node_v[i], setno, 572 &bool, ep) == -1) { 573 rval = -1; 574 goto out; 575 } 576 577 if (bool == TRUE) 578 break; 579 } 580 if (i == node_c) 581 break; 582 } 583 } else if (sdssc_rval != SDSSC_OKAY) { 584 (void) mddserror(ep, MDE_DS_SETNUMNOTAVAIL, MD_SET_BAD, NULL, 585 NULL, sp->setname); 586 rval = -1; 587 goto out; 588 } 589 590 if (setno == max_sets) { 591 (void) mddserror(ep, MDE_DS_SETNUMNOTAVAIL, MD_SET_BAD, NULL, 592 NULL, sp->setname); 593 rval = -1; 594 goto out; 595 } 596 597 sp->setno = setno; 598 599 /* 600 * Lock the set on current set members. 601 * Set locking done much earlier for MN diskset than for traditional 602 * diskset since lock_set is used to protect against 603 * other meta* commands running on the other nodes. 604 * Don't issue mdcommd SUSPEND command since there is nothing 605 * to suspend since there currently is no set. 606 */ 607 if (multi_node) { 608 /* Make sure we are blocking all signals */ 609 if (procsigs(TRUE, &oldsigs, &xep) < 0) 610 mdclrerror(&xep); 611 sig_flag = 1; 612 613 /* Lock the set on new set members */ 614 for (i = 0; i < node_c; i++) { 615 if (clnt_lock_set(node_v[i], sp, ep)) { 616 rval = -1; 617 goto out; 618 } 619 lock_flag = 1; 620 } 621 /* Now have the diskset locked, verify set number is still ok */ 622 for (i = 0; i < node_c; i++) { 623 if (clnt_setnumbusy(node_v[i], setno, 624 &bool, ep) == -1) { 625 rval = -1; 626 goto out; 627 } 628 } 629 } 630 631 632 if (meta_set_checkname(sp->setname, ep)) { 633 rval = -1; 634 goto out; 635 } 636 637 for (i = 0; i < node_c; i++) { 638 if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1) { 639 rval = -1; 640 goto out; 641 } 642 if (bool == FALSE) { 643 (void) mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno, 644 node_v[i], NULL, sp->setname); 645 rval = -1; 646 goto out; 647 } 648 } 649 650 /* END CHECK CODE */ 651 652 /* Lock the set on new set members */ 653 if (!multi_node) { 654 md_rb_sig_handling_on(); 655 sig_flag = 1; 656 for (i = 0; i < node_c; i++) { 657 if (clnt_lock_set(node_v[i], sp, ep)) { 658 rval = -1; 659 goto out; 660 } 661 lock_flag = 1; 662 } 663 } 664 665 RB_TEST(1, "create_set", ep) 666 667 RB_PREEMPT; 668 rb_level = 1; /* level 1 */ 669 670 RB_TEST(2, "create_set", ep) 671 672 if ((rval = create_set_on_hosts(sp, multi_node, node_c, node_v, 673 1, ep)) == -1) 674 goto rollback; 675 676 RB_TEST(3, "create_set", ep) 677 678 if (auto_take) 679 sr_flags = MD_SR_OK | MD_SR_AUTO_TAKE; 680 else 681 sr_flags = MD_SR_OK; 682 683 /* 684 * Mark the set record MD_SR_OK 685 */ 686 for (i = 0; i < node_c; i++) 687 if (clnt_upd_sr_flags(node_v[i], sp, sr_flags, ep)) 688 goto rollback; 689 690 rb_level = 2; /* level 2 */ 691 692 /* 693 * For MN diskset: 694 * On each added node, set the node record for that node 695 * to OK. Then set all node records for the newly added 696 * nodes on all nodes to ok. 697 * 698 * By setting a node's own node record to ok first, even if 699 * the node adding the hosts panics, the rest of the nodes can 700 * determine the same node list during the choosing of the master 701 * during reconfig. So, only nodes considered for mastership 702 * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set 703 * on that node's rpc.metad. If all nodes have MD_SR_OK set, 704 * but no node has its own MD_MN_NODE_OK set, then the set will 705 * be removed during reconfig since a panic occurred during the 706 * creation of the initial diskset. 707 */ 708 709 if (multi_node) { 710 md_mnnode_desc *nd, *saved_nd_next; 711 md_set_desc *sd; 712 713 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 714 goto rollback; 715 } 716 717 for (i = 0; i < node_c; i++) { 718 nd = sd->sd_nodelist; 719 /* All nodes are guaranteed to be ALIVE */ 720 while (nd) { 721 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 722 break; 723 nd = nd->nd_next; 724 } 725 /* Something wrong, will pick this up in next loop */ 726 if (nd == NULL) 727 continue; 728 729 /* Only changing my local cache of node list */ 730 saved_nd_next = nd->nd_next; 731 nd->nd_next = NULL; 732 733 /* Set node record for added host to ok on that host */ 734 if (clnt_upd_nr_flags(node_v[i], sp, 735 nd, MD_NR_OK, NULL, ep)) { 736 nd->nd_next = saved_nd_next; 737 goto rollback; 738 } 739 nd->nd_next = saved_nd_next; 740 } 741 742 /* Now set all node records on all nodes to be ok */ 743 nd = sd->sd_nodelist; 744 /* All nodes are guaranteed to be ALIVE */ 745 while (nd) { 746 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 747 sd->sd_nodelist, MD_NR_OK, NULL, ep)) { 748 goto rollback; 749 } 750 nd = nd->nd_next; 751 } 752 } 753 754 RB_TEST(4, "create_set", ep) 755 756 out: 757 if ((rval == 0) && multi_node) { 758 /* 759 * Set successfully created. 760 * Notify rpc.mdcommd on all nodes of a nodelist change. 761 * Send reinit command to mdcommd which forces it to get 762 * fresh set description. Then send resume. 763 * Resume on class 0 will resume all classes. 764 */ 765 for (i = 0; i < node_c; i++) { 766 /* Class is ignored for REINIT */ 767 if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT, 768 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 769 if (rval == 0) 770 (void) mdstealerror(ep, &xep); 771 rval = -1; 772 mde_perror(ep, dgettext(TEXT_DOMAIN, 773 "Unable to reinit rpc.mdcommd.\n")); 774 } 775 } 776 for (i = 0; i < node_c; i++) { 777 if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME, 778 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { 779 if (rval == 0) 780 (void) mdstealerror(ep, &xep); 781 rval = -1; 782 mde_perror(ep, dgettext(TEXT_DOMAIN, 783 "Unable to resume rpc.mdcommd.\n")); 784 } 785 } 786 meta_ping_mnset(sp->setno); 787 } 788 if (lock_flag) { 789 cl_sk = cl_get_setkey(sp->setno, sp->setname); 790 for (i = 0; i < node_c; i++) { 791 if (clnt_unlock_set(node_v[i], cl_sk, &xep)) { 792 if (rval == 0) 793 (void) mdstealerror(ep, &xep); 794 rval = -1; 795 } 796 } 797 cl_set_setkey(NULL); 798 } 799 800 if (sig_flag) { 801 if (multi_node) { 802 /* release signals back to what they were on entry */ 803 if (procsigs(FALSE, &oldsigs, &xep) < 0) 804 mdclrerror(&xep); 805 } else { 806 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 807 } 808 } 809 810 return (rval); 811 812 rollback: 813 /* all signals already blocked for MN disket */ 814 if (!multi_node) { 815 /* Make sure we are blocking all signals */ 816 if (procsigs(TRUE, &oldsigs, &xep) < 0) 817 mdclrerror(&xep); 818 } 819 820 rval = -1; 821 822 /* 823 * For MN diskset: 824 * On each added node (which is now each node to be deleted), 825 * set the node record for that node to DEL. Then set all 826 * node records for the newly added (soon to be deleted) nodes 827 * on all nodes to ok. 828 * 829 * By setting a node's own node record to DEL first, even if 830 * the node doing the rollback panics, the rest of the nodes can 831 * determine the same node list during the choosing of the master 832 * during reconfig. 833 */ 834 835 /* level 3 */ 836 if ((rb_level > 1) && (multi_node)) { 837 md_mnnode_desc *nd, *saved_nd_next; 838 md_set_desc *sd; 839 840 if ((sd = metaget_setdesc(sp, &xep)) == NULL) { 841 mdclrerror(&xep); 842 } 843 844 for (i = 0; i < node_c; i++) { 845 nd = sd->sd_nodelist; 846 /* All nodes are guaranteed to be ALIVE */ 847 while (nd) { 848 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 849 break; 850 nd = nd->nd_next; 851 } 852 /* Something wrong, will pick this up in next loop */ 853 if (nd == NULL) 854 continue; 855 856 /* Only changing my local cache of node list */ 857 saved_nd_next = nd->nd_next; 858 nd->nd_next = NULL; 859 860 /* Set node record for added host to DEL on that host */ 861 if (clnt_upd_nr_flags(node_v[i], sp, 862 nd, MD_NR_DEL, NULL, &xep)) { 863 nd->nd_next = saved_nd_next; 864 mdclrerror(&xep); 865 } 866 nd->nd_next = saved_nd_next; 867 } 868 869 /* Now set all node records on all nodes to be DEL */ 870 nd = sd->sd_nodelist; 871 /* All nodes are guaranteed to be ALIVE */ 872 while (nd) { 873 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 874 sd->sd_nodelist, MD_NR_DEL, NULL, &xep)) { 875 mdclrerror(&xep); 876 } 877 nd = nd->nd_next; 878 } 879 880 /* Mark set record on all hosts to be DELETED */ 881 for (i = 0; i < node_c; i++) { 882 if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, &xep)) { 883 mdclrerror(&xep); 884 } 885 } 886 } 887 /* level 1 */ 888 if (rb_level > 0) { 889 for (i = 0; i < node_c; i++) { 890 if (clnt_delset(node_v[i], sp, &xep) == -1) 891 mdclrerror(&xep); 892 } 893 } 894 895 /* level 0 */ 896 /* Don't test lock flag since guaranteed to be set if in rollback */ 897 cl_sk = cl_get_setkey(sp->setno, sp->setname); 898 for (i = 0; i < node_c; i++) { 899 if (clnt_unlock_set(node_v[i], cl_sk, &xep)) 900 mdclrerror(&xep); 901 } 902 cl_set_setkey(NULL); 903 904 /* release signals back to what they were on entry */ 905 if (procsigs(FALSE, &oldsigs, &xep) < 0) 906 mdclrerror(&xep); 907 908 if ((sig_flag) && (!multi_node)) 909 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 910 911 return (rval); 912 } 913 914 static int 915 del_db_sidenms( 916 mdsetname_t *sp, 917 side_t sideno, 918 md_error_t *ep 919 ) 920 { 921 md_replicalist_t *rlp = NULL; 922 md_replicalist_t *rl; 923 int rval = 0; 924 925 if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) 926 return (-1); 927 928 for (rl = rlp; rl != NULL; rl = rl->rl_next) { 929 md_replica_t *r = rl->rl_repp; 930 931 if (meta_db_delsidenm(sp, sideno, r->r_namep, r->r_blkno, ep)) { 932 rval = -1; 933 goto out; 934 } 935 } 936 937 out: 938 metafreereplicalist(rlp); 939 return (rval); 940 } 941 942 static int 943 del_drvs_from_hosts( 944 mdsetname_t *sp, 945 md_set_desc *sd, 946 md_drive_desc *dd, 947 int node_c, 948 char **node_v, 949 int oha, 950 md_error_t *ep 951 ) 952 { 953 int i; 954 md_mnnode_desc *nd; 955 956 for (i = 0; i < node_c; i++) { 957 if (MD_MNSET_DESC(sd) && (oha == TRUE)) { 958 /* 959 * During OHA mode, don't issue RPCs to 960 * non-alive nodes since there is no reason to 961 * wait for RPC timeouts. 962 */ 963 nd = sd->sd_nodelist; 964 while (nd) { 965 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 966 break; 967 nd = nd->nd_next; 968 } 969 if (nd == NULL) { 970 return (mddserror(ep, MDE_DS_NOTINMEMBERLIST, 971 sp->setno, nd->nd_nodename, 972 NULL, sp->setname)); 973 } 974 975 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 976 continue; 977 } 978 if (clnt_deldrvs(node_v[i], sp, dd, ep)) { 979 return (-1); 980 } 981 } else if (MD_MNSET_DESC(sd) && (oha == FALSE)) { 982 /* 983 * All nodes should be alive in non-oha mode. 984 */ 985 if (clnt_deldrvs(node_v[i], sp, dd, ep)) { 986 return (-1); 987 } 988 } else { 989 /* 990 * For traditional diskset, issue the RPC and 991 * ignore RPC failure if in OHA mode. 992 */ 993 if (clnt_deldrvs(node_v[i], sp, dd, ep)) { 994 if (oha == TRUE && mdanyrpcerror(ep)) { 995 mdclrerror(ep); 996 continue; 997 } 998 return (-1); 999 } 1000 } 1001 } 1002 1003 return (0); 1004 } 1005 1006 static int 1007 del_host_noset( 1008 mdsetname_t *sp, 1009 char **anode, 1010 md_error_t *ep 1011 ) 1012 { 1013 int rval = 0; 1014 md_setkey_t *cl_sk; 1015 md_drive_desc *dd; 1016 md_error_t xep = mdnullerror; 1017 md_set_desc *sd; 1018 1019 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1020 return (-1); 1021 1022 /* Make sure we own the set */ 1023 if (meta_check_ownership(sp, ep) != 0) 1024 return (-1); 1025 1026 /* Lock the set on our side */ 1027 if (clnt_lock_set(mynode(), sp, ep)) { 1028 rval = -1; 1029 goto out; 1030 } 1031 1032 if (clnt_delhosts(mynode(), sp, 1, anode, ep)) { 1033 rval = -1; 1034 goto out; 1035 } 1036 1037 if (!MD_MNSET_DESC(sd)) { 1038 if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), 1039 ep)) == NULL) { 1040 if (! mdisok(ep)) { 1041 rval = -1; 1042 goto out; 1043 } 1044 } 1045 1046 /* If we have drives */ 1047 if (dd != NULL) { 1048 if (clnt_del_drv_sidenms(mynode(), sp, ep)) { 1049 rval = -1; 1050 goto out; 1051 } 1052 } 1053 } 1054 1055 out: 1056 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1057 if (clnt_unlock_set(mynode(), cl_sk, &xep)) { 1058 if (rval == 0) 1059 (void) mdstealerror(ep, &xep); 1060 rval = -1; 1061 } 1062 cl_set_setkey(NULL); 1063 1064 metaflushsetname(sp); 1065 1066 return (rval); 1067 } 1068 1069 static int 1070 del_md_sidenms(mdsetname_t *sp, side_t sideno, md_error_t *ep) 1071 { 1072 mdnm_params_t nm; 1073 md_set_desc *sd; 1074 int i; 1075 1076 if (!metaislocalset(sp)) { 1077 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1078 return (-1); 1079 } 1080 /* Use rpc.mdcommd to add md side info from all nodes */ 1081 if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) && 1082 (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) { 1083 md_mn_result_t *resultp = NULL; 1084 md_mn_msg_meta_md_delside_t md_ds; 1085 int send_rval; 1086 1087 md_ds.msg_sideno = sideno; 1088 /* 1089 * If reconfig cycle has been started, this node is stuck in 1090 * in the return step until this command has completed. If 1091 * mdcommd is suspended, ask send_message to fail (instead of 1092 * retrying) so that metaset can finish allowing the 1093 * reconfig cycle to proceed. 1094 */ 1095 send_rval = mdmn_send_message(sp->setno, 1096 MD_MN_MSG_META_MD_DELSIDE, 1097 MD_MSGF_FAIL_ON_SUSPEND | MD_MSGF_PANIC_WHEN_INCONSISTENT, 1098 (char *)&md_ds, sizeof (md_mn_msg_meta_md_delside_t), 1099 &resultp, ep); 1100 if (send_rval != 0) { 1101 (void) mdstealerror(ep, &(resultp->mmr_ep)); 1102 if (resultp) 1103 free_result(resultp); 1104 return (-1); 1105 } 1106 if (resultp) 1107 free_result(resultp); 1108 } else { 1109 (void) memset(&nm, '\0', sizeof (nm)); 1110 nm.key = MD_KEYWILD; 1111 1112 /*CONSTCOND*/ 1113 while (1) { 1114 nm.mde = mdnullerror; 1115 nm.setno = sp->setno; 1116 nm.side = MD_SIDEWILD; 1117 if (metaioctl(MD_IOCNXTKEY_NM, &nm, &nm.mde, NULL) != 0) 1118 return (mdstealerror(ep, &nm.mde)); 1119 1120 if (nm.key == MD_KEYWILD) 1121 return (0); 1122 1123 /* 1124 * The device reference count can be greater than 1 if 1125 * more than one softpart is configured on top of the 1126 * same device. If this is the case then we want to 1127 * decrement the count to zero so the entry can be 1128 * actually removed. 1129 */ 1130 for (i = 0; i < nm.ref_count; i++) { 1131 if (del_name(sp, sideno, nm.key, ep) == -1) 1132 return (-1); 1133 } 1134 } 1135 } 1136 return (0); 1137 } 1138 1139 static void 1140 recreate_set( 1141 mdsetname_t *sp, 1142 md_set_desc *sd 1143 ) 1144 { 1145 int i; 1146 int has_set; 1147 md_error_t xep = mdnullerror; 1148 md_mnnode_desc *nd; 1149 1150 if (MD_MNSET_DESC(sd)) { 1151 nd = sd->sd_nodelist; 1152 while (nd) { 1153 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 1154 nd = nd->nd_next; 1155 continue; 1156 } 1157 has_set = nodehasset(sp, nd->nd_nodename, 1158 NHS_NST_EQ, &xep); 1159 1160 if (has_set >= 0) { 1161 nd = nd->nd_next; 1162 continue; 1163 } 1164 1165 mdclrerror(&xep); 1166 1167 if (clnt_mncreateset(nd->nd_nodename, sp, 1168 sd->sd_nodelist, 1169 sd->sd_ctime, sd->sd_genid, 1170 sd->sd_mn_master_nodenm, 1171 sd->sd_mn_master_nodeid, &xep) == -1) 1172 mdclrerror(&xep); 1173 nd = nd->nd_next; 1174 } 1175 } else { 1176 for (i = 0; i < MD_MAXSIDES; i++) { 1177 /* Skip empty slots */ 1178 if (sd->sd_nodes[i][0] == '\0') 1179 continue; 1180 1181 has_set = nodehasset(sp, sd->sd_nodes[i], 1182 NHS_NST_EQ, &xep); 1183 1184 if (has_set >= 0) 1185 continue; 1186 1187 mdclrerror(&xep); 1188 1189 if (clnt_createset(sd->sd_nodes[i], sp, sd->sd_nodes, 1190 sd->sd_ctime, sd->sd_genid, &xep) == -1) 1191 mdclrerror(&xep); 1192 } 1193 } 1194 } 1195 1196 /* 1197 * If a MN diskset, set is already locked on all nodes via clnt_lock_set. 1198 */ 1199 static int 1200 del_set_nodrives( 1201 mdsetname_t *sp, 1202 int node_c, 1203 char **node_v, 1204 int oha, 1205 md_error_t *ep 1206 ) 1207 { 1208 md_set_desc *sd; 1209 int i; 1210 sigset_t oldsigs; 1211 md_setkey_t *cl_sk; 1212 int rb_level = 0; 1213 ulong_t max_genid = 0; 1214 int rval = 0; 1215 md_error_t xep = mdnullerror; 1216 md_mnnode_desc *nd; 1217 int delete_end = 1; 1218 1219 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1220 return (-1); 1221 1222 if (MD_MNSET_DESC(sd)) { 1223 /* Make sure we are blocking all signals */ 1224 if (procsigs(TRUE, &oldsigs, &xep) < 0) 1225 mdclrerror(&xep); 1226 } else { 1227 md_rb_sig_handling_on(); 1228 } 1229 1230 /* 1231 * Lock the set on current set members for traditional disksets. 1232 */ 1233 if (!(MD_MNSET_DESC(sd))) { 1234 for (i = 0; i < node_c; i++) { 1235 /* 1236 * For traditional diskset, issue the RPC and 1237 * ignore RPC failure if in OHA mode. 1238 */ 1239 if (clnt_lock_set(node_v[i], sp, ep)) { 1240 if (oha == TRUE && mdanyrpcerror(ep)) { 1241 mdclrerror(ep); 1242 continue; 1243 } 1244 rval = -1; 1245 goto out; 1246 } 1247 } 1248 } 1249 1250 1251 RB_TEST(1, "deletehosts", ep) 1252 1253 RB_PREEMPT; 1254 rb_level = 1; /* level 1 */ 1255 1256 RB_TEST(2, "deletehosts", ep) 1257 1258 /* 1259 * Mark the set record MD_SR_DEL 1260 */ 1261 for (i = 0; i < node_c; i++) { 1262 1263 RB_TEST(3, "deletehosts", ep) 1264 1265 if (MD_MNSET_DESC(sd) && (oha == TRUE)) { 1266 /* 1267 * During OHA mode, don't issue RPCs to 1268 * non-alive nodes since there is no reason to 1269 * wait for RPC timeouts. 1270 */ 1271 nd = sd->sd_nodelist; 1272 while (nd) { 1273 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 1274 break; 1275 nd = nd->nd_next; 1276 } 1277 if (nd == NULL) { 1278 (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, 1279 sp->setno, nd->nd_nodename, 1280 NULL, sp->setname); 1281 goto rollback; 1282 } 1283 1284 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 1285 continue; 1286 } 1287 1288 if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) { 1289 goto rollback; 1290 } 1291 } else if (MD_MNSET_DESC(sd) && (oha == FALSE)) { 1292 /* 1293 * All nodes should be alive in non-oha mode. 1294 */ 1295 if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) { 1296 goto rollback; 1297 } 1298 } else { 1299 /* 1300 * For traditional diskset, issue the RPC and 1301 * ignore RPC failure if in OHA mode. 1302 */ 1303 if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) { 1304 if (oha == TRUE && mdanyrpcerror(ep)) { 1305 mdclrerror(ep); 1306 continue; 1307 } 1308 goto rollback; 1309 } 1310 } 1311 1312 RB_TEST(4, "deletehosts", ep) 1313 } 1314 1315 RB_TEST(5, "deletehosts", ep) 1316 1317 RB_PREEMPT; 1318 rb_level = 2; /* level 2 */ 1319 1320 RB_TEST(6, "deletehosts", ep) 1321 1322 if (sdssc_delete_begin(sp->setname) == SDSSC_ERROR) 1323 if (metad_isautotakebyname(sp->setname)) 1324 delete_end = 0; 1325 else 1326 goto rollback; 1327 1328 /* The set is OK to delete, make it so. */ 1329 for (i = 0; i < node_c; i++) { 1330 1331 RB_TEST(7, "deletehosts", ep) 1332 1333 if (MD_MNSET_DESC(sd) && (oha == TRUE)) { 1334 /* 1335 * During OHA mode, don't issue RPCs to 1336 * non-alive nodes since there is no reason to 1337 * wait for RPC timeouts. 1338 */ 1339 nd = sd->sd_nodelist; 1340 while (nd) { 1341 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 1342 break; 1343 nd = nd->nd_next; 1344 } 1345 if (nd == NULL) { 1346 (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, 1347 sp->setno, nd->nd_nodename, 1348 NULL, sp->setname); 1349 goto rollback; 1350 } 1351 1352 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 1353 continue; 1354 } 1355 1356 if (clnt_delset(node_v[i], sp, ep) == -1) { 1357 goto rollback; 1358 } 1359 } else if (MD_MNSET_DESC(sd) && (oha == FALSE)) { 1360 /* 1361 * All nodes should be alive in non-oha mode. 1362 */ 1363 if (clnt_delset(node_v[i], sp, ep) == -1) { 1364 goto rollback; 1365 } 1366 } else { 1367 /* 1368 * For traditional diskset, issue the RPC and 1369 * ignore RPC failure if in OHA mode. 1370 */ 1371 if (clnt_delset(node_v[i], sp, ep) == -1) { 1372 if (oha == TRUE && mdanyrpcerror(ep)) { 1373 mdclrerror(ep); 1374 continue; 1375 } 1376 goto rollback; 1377 } 1378 } 1379 1380 RB_TEST(8, "deletehosts", ep) 1381 } 1382 1383 RB_TEST(9, "deletehosts", ep) 1384 1385 out: 1386 /* 1387 * Unlock the set on current set members 1388 * for traditional disksets. 1389 */ 1390 if (!(MD_MNSET_DESC(sd))) { 1391 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1392 for (i = 0; i < node_c; i++) { 1393 /* 1394 * For traditional diskset, issue the RPC and 1395 * ignore RPC failure if in OHA mode. 1396 */ 1397 if (clnt_unlock_set(node_v[i], cl_sk, &xep)) { 1398 if (oha == TRUE && mdanyrpcerror(&xep)) { 1399 mdclrerror(&xep); 1400 continue; 1401 } 1402 if (rval == 0) 1403 (void) mdstealerror(ep, &xep); 1404 rval = -1; 1405 } 1406 } 1407 cl_set_setkey(NULL); 1408 } 1409 1410 /* 1411 * A MN diskset has the clnt_locks held by meta_set_deletehosts so 1412 * don't flush that data until meta_set_deletehosts has finished 1413 * with it. meta_set_deletehosts will handle the flush of the 1414 * setname. 1415 */ 1416 if (!(MD_MNSET_DESC(sd))) { 1417 metaflushsetname(sp); 1418 } 1419 1420 if (delete_end && 1421 sdssc_delete_end(sp->setname, SDSSC_COMMIT) == SDSSC_ERROR) 1422 rval = -1; 1423 1424 if (MD_MNSET_DESC(sd)) { 1425 /* release signals back to what they were on entry */ 1426 if (procsigs(FALSE, &oldsigs, &xep) < 0) 1427 mdclrerror(&xep); 1428 } else { 1429 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1430 } 1431 1432 return (rval); 1433 1434 rollback: 1435 /* all signals already blocked for MN disket */ 1436 if (!(MD_MNSET_DESC(sd))) { 1437 /* Make sure we are blocking all signals */ 1438 if (procsigs(TRUE, &oldsigs, &xep) < 0) 1439 mdclrerror(&xep); 1440 } 1441 1442 rval = -1; 1443 1444 max_genid = sd->sd_genid; 1445 1446 /* level 2 */ 1447 if (rb_level > 1) { 1448 recreate_set(sp, sd); 1449 max_genid++; 1450 1451 if (delete_end) 1452 (void) sdssc_delete_end(sp->setname, SDSSC_CLEANUP); 1453 } 1454 1455 /* level 1 */ 1456 if (rb_level > 0) { 1457 max_genid++; 1458 resync_genid(sp, sd, max_genid, node_c, node_v); 1459 } 1460 1461 /* level 0 */ 1462 /* 1463 * Unlock the set on current set members 1464 * for traditional disksets. 1465 */ 1466 if (!(MD_MNSET_DESC(sd))) { 1467 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1468 for (i = 0; i < node_c; i++) { 1469 /* 1470 * For traditional diskset, issue the RPC and 1471 * ignore RPC failure if in OHA mode. 1472 */ 1473 if (clnt_unlock_set(node_v[i], cl_sk, &xep)) 1474 mdclrerror(&xep); 1475 } 1476 cl_set_setkey(NULL); 1477 } 1478 1479 /* release signals back to what they were on entry */ 1480 if (procsigs(FALSE, &oldsigs, &xep) < 0) 1481 mdclrerror(&xep); 1482 1483 /* 1484 * A MN diskset has the clnt_locks held by meta_set_deletehosts so 1485 * don't flush that data until meta_set_deletehosts has finished 1486 * with it. meta_set_deletehosts will handle the flush of the 1487 * setname. 1488 */ 1489 if (!(MD_MNSET_DESC(sd))) { 1490 metaflushsetname(sp); 1491 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1492 } 1493 1494 return (rval); 1495 } 1496 1497 /* 1498 * On entry: 1499 * procsigs already called for MN diskset. 1500 * md_rb_sig_handling already called for traditional diskset. 1501 */ 1502 static int 1503 del_set_on_hosts( 1504 mdsetname_t *sp, 1505 md_set_desc *sd, 1506 md_drive_desc *dd, 1507 int node_c, /* Number of nodes */ 1508 char **node_v, /* Nodes being deleted */ 1509 int oha, 1510 md_error_t *ep 1511 ) 1512 { 1513 int i; 1514 int j; 1515 side_t sideno; 1516 md_replicalist_t *rlp = NULL; 1517 sigset_t oldsigs; 1518 md_setkey_t *cl_sk; 1519 ulong_t max_genid = 0; 1520 int rb_level = 1; /* This is a special case */ 1521 md_error_t xep = mdnullerror; 1522 md_mnnode_desc *nd; 1523 1524 RB_PREEMPT; 1525 1526 RB_TEST(7, "deletehosts", ep) 1527 1528 if (dd != NULL) { 1529 /* 1530 * May need this to re-add sidenames on roll back. 1531 */ 1532 if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, 1533 ep) < 0) 1534 goto rollback; 1535 1536 RB_TEST(8, "deletehosts", ep) 1537 1538 RB_PREEMPT; 1539 rb_level = 2; /* level 2 */ 1540 1541 RB_TEST(9, "deletehosts", ep) 1542 1543 if (del_drvs_from_hosts(sp, sd, dd, node_c, node_v, oha, ep)) 1544 goto rollback; 1545 1546 RB_TEST(10, "deletehosts", ep) 1547 1548 RB_PREEMPT; 1549 rb_level = 3; /* level 3 */ 1550 1551 RB_TEST(11, "deletehosts", ep) 1552 1553 /* 1554 * Delete the db replica sides 1555 * This is done before the next loop, so that 1556 * the db does not get unloaded before we are finished 1557 * deleting the sides. 1558 */ 1559 if (MD_MNSET_DESC(sd)) { 1560 nd = sd->sd_nodelist; 1561 while (nd) { 1562 /* Skip hosts not being deleted */ 1563 if (! strinlst(nd->nd_nodename, node_c, 1564 node_v)) { 1565 nd = nd->nd_next; 1566 continue; 1567 } 1568 1569 if (del_db_sidenms(sp, nd->nd_nodeid, ep)) 1570 goto rollback; 1571 1572 RB_TEST(12, "deletehosts", ep) 1573 nd = nd->nd_next; 1574 } 1575 } else { 1576 for (sideno = 0; sideno < MD_MAXSIDES; sideno++) { 1577 /* Skip empty slots */ 1578 if (sd->sd_nodes[sideno][0] == '\0') 1579 continue; 1580 1581 /* Skip hosts not being deleted */ 1582 if (! strinlst(sd->sd_nodes[sideno], node_c, 1583 node_v)) 1584 continue; 1585 1586 if (del_db_sidenms(sp, sideno, ep)) 1587 goto rollback; 1588 1589 RB_TEST(12, "deletehosts", ep) 1590 } 1591 } 1592 1593 RB_TEST(13, "deletehosts", ep) 1594 1595 RB_PREEMPT; 1596 rb_level = 4; /* level 4 */ 1597 1598 RB_TEST(14, "deletehosts", ep) 1599 1600 /* Delete the names from the namespace */ 1601 if (MD_MNSET_DESC(sd)) { 1602 nd = sd->sd_nodelist; 1603 while (nd) { 1604 /* Skip hosts not being deleted */ 1605 if (! strinlst(nd->nd_nodename, node_c, 1606 node_v)) { 1607 nd = nd->nd_next; 1608 continue; 1609 } 1610 1611 if (del_md_sidenms(sp, nd->nd_nodeid, ep)) 1612 goto rollback; 1613 1614 RB_TEST(15, "deletehosts", ep) 1615 nd = nd->nd_next; 1616 } 1617 } else { 1618 for (sideno = 0; sideno < MD_MAXSIDES; sideno++) { 1619 /* Skip empty slots */ 1620 if (sd->sd_nodes[sideno][0] == '\0') 1621 continue; 1622 1623 /* Skip hosts not being deleted */ 1624 if (! strinlst(sd->sd_nodes[sideno], node_c, 1625 node_v)) 1626 continue; 1627 1628 if (del_md_sidenms(sp, sideno, ep)) 1629 goto rollback; 1630 1631 RB_TEST(15, "deletehosts", ep) 1632 } 1633 } 1634 } 1635 1636 RB_TEST(16, "deletehosts", ep) 1637 1638 RB_PREEMPT; 1639 rb_level = 5; /* level 6 */ 1640 1641 RB_TEST(17, "deletehosts", ep) 1642 1643 for (i = 0; i < node_c; i++) { 1644 if (MD_MNSET_DESC(sd) && (oha == TRUE)) { 1645 /* 1646 * During OHA mode, don't issue RPCs to 1647 * non-alive nodes since there is no reason to 1648 * wait for RPC timeouts. 1649 */ 1650 nd = sd->sd_nodelist; 1651 while (nd) { 1652 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 1653 break; 1654 nd = nd->nd_next; 1655 } 1656 if (nd == NULL) { 1657 (void) mddserror(ep, MDE_DS_NOTINMEMBERLIST, 1658 sp->setno, nd->nd_nodename, 1659 NULL, sp->setname); 1660 goto rollback; 1661 } 1662 1663 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 1664 continue; 1665 } 1666 1667 if (clnt_delset(node_v[i], sp, ep) == -1) { 1668 goto rollback; 1669 } 1670 } else if (MD_MNSET_DESC(sd) && (oha == FALSE)) { 1671 /* 1672 * All nodes should be alive in non-oha mode. 1673 */ 1674 if (clnt_delset(node_v[i], sp, ep) == -1) { 1675 goto rollback; 1676 } 1677 } else { 1678 /* 1679 * For traditional diskset, issue the RPC and 1680 * ignore RPC failure if in OHA mode. 1681 */ 1682 if (clnt_delset(node_v[i], sp, ep) == -1) { 1683 if (oha == TRUE && mdanyrpcerror(ep)) { 1684 mdclrerror(ep); 1685 continue; 1686 } 1687 goto rollback; 1688 } 1689 } 1690 1691 RB_TEST(18, "deletehosts", ep) 1692 } 1693 1694 metafreereplicalist(rlp); 1695 1696 if (MD_MNSET_DESC(sd)) { 1697 /* release signals back to what they were on entry */ 1698 if (procsigs(FALSE, &oldsigs, &xep) < 0) 1699 mdclrerror(&xep); 1700 } else { 1701 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1702 } 1703 1704 return (0); 1705 1706 rollback: 1707 /* all signals already blocked for MN disket */ 1708 if (!(MD_MNSET_DESC(sd))) { 1709 /* Make sure we are blocking all signals */ 1710 if (procsigs(TRUE, &oldsigs, &xep) < 0) 1711 mdclrerror(&xep); 1712 } 1713 1714 max_genid = sd->sd_genid; 1715 1716 /* level 5 */ 1717 if (rb_level > 4) { 1718 recreate_set(sp, sd); 1719 max_genid++; 1720 } 1721 1722 /* level 2 */ 1723 if (rb_level > 1 && dd != NULL) { 1724 /* 1725 * See if we have to re-add the drives specified. 1726 */ 1727 for (i = 0; i < node_c; i++) { 1728 md_set_record *sr; 1729 1730 if (MD_MNSET_DESC(sd) && (oha == TRUE)) { 1731 /* 1732 * During OHA mode, don't issue RPCs to 1733 * non-alive nodes since there is no reason to 1734 * wait for RPC timeouts. 1735 */ 1736 nd = sd->sd_nodelist; 1737 while (nd) { 1738 if (strcmp(nd->nd_nodename, node_v[i]) 1739 == 0) 1740 break; 1741 nd = nd->nd_next; 1742 } 1743 if (nd == NULL) 1744 continue; 1745 1746 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) 1747 continue; 1748 } 1749 1750 /* Don't care if set record is MN or not */ 1751 if (clnt_getset(node_v[i], sp->setname, 1752 MD_SET_BAD, &sr, &xep) == -1) { 1753 mdclrerror(&xep); 1754 continue; 1755 } 1756 1757 /* Drive already added, skip to next node */ 1758 if (sr->sr_drivechain != NULL) { 1759 /* 1760 * Set record structure was allocated from RPC 1761 * routine getset so this structure is only of 1762 * size md_set_record even if the MN flag is 1763 * set. So, clear the flag so that the free 1764 * code doesn't attempt to free a structure 1765 * the size of md_mnset_record. 1766 */ 1767 sr->sr_flags &= ~MD_SR_MN; 1768 free_sr(sr); 1769 continue; 1770 } 1771 1772 if (clnt_adddrvs(node_v[i], sp, dd, 1773 sr->sr_ctime, sr->sr_genid, &xep) == -1) 1774 mdclrerror(&xep); 1775 1776 if (clnt_upd_dr_flags(node_v[i], sp, dd, 1777 MD_DR_OK, &xep) == -1) 1778 mdclrerror(&xep); 1779 1780 /* 1781 * Set record structure was allocated from RPC routine 1782 * getset so this structure is only of size 1783 * md_set_record even if the MN flag is set. So, 1784 * clear the flag so that the free code doesn't 1785 * attempt to free a structure the size of 1786 * md_mnset_record. 1787 */ 1788 sr->sr_flags &= ~MD_SR_MN; 1789 free_sr(sr); 1790 } 1791 max_genid += 3; 1792 } 1793 1794 /* level 3 */ 1795 if (rb_level > 2 && dd != NULL) { 1796 md_replicalist_t *rl; 1797 1798 for (rl = rlp; rl != NULL; rl = rl->rl_next) { 1799 md_replica_t *r = rl->rl_repp; 1800 1801 /* 1802 * This is not the first replica being added to the 1803 * diskset so call with ADDSIDENMS_BCAST. If this 1804 * is a traditional diskset, the bcast flag is ignored 1805 * since traditional disksets don't use the rpc.mdcommd. 1806 */ 1807 if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno, 1808 DB_ADDSIDENMS_BCAST, &xep)) 1809 mdclrerror(&xep); 1810 } 1811 } 1812 1813 /* level 4 */ 1814 if (rb_level > 3 && dd != NULL) { 1815 int nodeid_addsides = 0; 1816 /* 1817 * Add the device names for the new sides into the namespace, 1818 * on all hosts not being deleted. 1819 */ 1820 if (MD_MNSET_DESC(sd)) { 1821 nd = sd->sd_nodelist; 1822 while (nd) { 1823 /* Find a node that is not being deleted */ 1824 if (! strinlst(nd->nd_nodename, node_c, 1825 node_v)) { 1826 nodeid_addsides = nd->nd_nodeid; 1827 break; 1828 } 1829 nd = nd->nd_next; 1830 } 1831 } else { 1832 for (j = 0; j < MD_MAXSIDES; j++) { 1833 /* Skip empty slots */ 1834 if (sd->sd_nodes[j][0] == '\0') 1835 continue; 1836 1837 /* Find a node that is not being deleted */ 1838 if (! strinlst(sd->sd_nodes[j], node_c, 1839 node_v)) 1840 break; 1841 } 1842 nodeid_addsides = j; 1843 } 1844 1845 if (MD_MNSET_DESC(sd)) { 1846 nd = sd->sd_nodelist; 1847 while (nd) { 1848 /* Skip nodes not being deleted */ 1849 if (!strinlst(nd->nd_nodename, node_c, 1850 node_v)) { 1851 nd = nd->nd_next; 1852 continue; 1853 } 1854 1855 /* this side was just created, add the names */ 1856 if (add_md_sidenms(sp, nd->nd_nodeid, 1857 nodeid_addsides, &xep)) 1858 mdclrerror(&xep); 1859 nd = nd->nd_next; 1860 } 1861 } else { 1862 for (i = 0; i < MD_MAXSIDES; i++) { 1863 /* Skip empty slots */ 1864 if (sd->sd_nodes[i][0] == '\0') 1865 continue; 1866 1867 /* Skip nodes not being deleted */ 1868 if (!strinlst(sd->sd_nodes[i], node_c, node_v)) 1869 continue; 1870 1871 /* this side was just created, add the names */ 1872 if (add_md_sidenms(sp, i, nodeid_addsides, 1873 &xep)) 1874 mdclrerror(&xep); 1875 } 1876 } 1877 } 1878 1879 /* level 1 */ 1880 if (rb_level > 0) { 1881 max_genid++; 1882 resync_genid(sp, sd, max_genid, node_c, node_v); 1883 } 1884 1885 /* level 0 */ 1886 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1887 if (MD_MNSET_DESC(sd)) { 1888 nd = sd->sd_nodelist; 1889 while (nd) { 1890 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) 1891 continue; 1892 /* To balance lock/unlock; can send to dead node */ 1893 if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) 1894 mdclrerror(&xep); 1895 nd = nd->nd_next; 1896 } 1897 } else { 1898 for (i = 0; i < MD_MAXSIDES; i++) { 1899 /* Skip empty slots */ 1900 if (sd->sd_nodes[i][0] == '\0') 1901 continue; 1902 1903 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) 1904 mdclrerror(&xep); 1905 } 1906 } 1907 cl_set_setkey(NULL); 1908 1909 /* release signals back to what they were on entry */ 1910 if (procsigs(FALSE, &oldsigs, &xep) < 0) 1911 mdclrerror(&xep); 1912 1913 metafreereplicalist(rlp); 1914 1915 if (!(MD_MNSET_DESC(sd))) { 1916 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1917 } 1918 1919 return (-1); 1920 } 1921 1922 static int 1923 make_sideno_sidenm( 1924 mdsetname_t *sp, 1925 mddrivename_t *dnp, 1926 side_t sideno, 1927 md_error_t *ep 1928 ) 1929 { 1930 mdsidenames_t *sn, **sn_next; 1931 md_set_desc *sd; 1932 mdname_t *np; 1933 uint_t rep_slice; 1934 int err = 0; 1935 1936 assert(dnp->side_names_key != MD_KEYWILD); 1937 1938 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1939 return (-1); 1940 1941 /* find the end of the link list */ 1942 for (sn = dnp->side_names; sn->next != NULL; sn = sn->next); 1943 sn_next = &sn->next; 1944 1945 if (meta_replicaslice(dnp, &rep_slice, ep) != 0) 1946 return (-1); 1947 1948 if ((np = metaslicename(dnp, rep_slice, ep)) == NULL) 1949 return (-1); 1950 1951 sn = Zalloc(sizeof (*sn)); 1952 sn->sideno = sideno; 1953 1954 if (MD_MNSET_DESC(sd)) { 1955 /* 1956 * For MO diskset the sideno is not an index into 1957 * the array of nodes. Hence getside_devinfo is 1958 * used instead of meta_getnextside_devinfo. 1959 */ 1960 if (meta_getside_devinfo(sp, np->bname, sideno, &sn->cname, 1961 &sn->dname, &sn->mnum, ep) == -1) 1962 err = -1; 1963 } else { 1964 /* decrement sideno, to look like the previous sideno */ 1965 sideno--; 1966 if (meta_getnextside_devinfo(sp, np->bname, &sideno, &sn->cname, 1967 &sn->dname, &sn->mnum, ep) == -1) 1968 err = -1; 1969 } 1970 1971 if (err) { 1972 Free(sn); 1973 return (err); 1974 } 1975 assert(sn->sideno == sideno); 1976 1977 /* Add to the end of the linked list */ 1978 *sn_next = sn; 1979 return (0); 1980 } 1981 1982 static int 1983 validate_nodes( 1984 mdsetname_t *sp, 1985 int node_c, 1986 char **node_v, 1987 md_error_t *ep 1988 ) 1989 { 1990 char *hostname; 1991 int i; 1992 1993 1994 for (i = 0; i < node_c; i++) { 1995 if (strlen(node_v[i]) > (size_t)MD_MAX_NODENAME) 1996 return (mddserror(ep, MDE_DS_NODENAMETOOLONG, 1997 sp->setno, node_v[i], NULL, sp->setname)); 1998 if (clnt_hostname(node_v[i], &hostname, ep)) 1999 return (-1); 2000 if (strcmp(node_v[i], hostname) != 0) { 2001 Free(hostname); 2002 return (mddserror(ep, MDE_DS_NOTNODENAME, sp->setno, 2003 node_v[i], NULL, sp->setname)); 2004 } 2005 Free(hostname); 2006 } 2007 return (0); 2008 } 2009 2010 /* 2011 * Exported Entry Points 2012 */ 2013 2014 /* 2015 * Check the given disk set name for syntactic correctness. 2016 */ 2017 int 2018 meta_set_checkname(char *setname, md_error_t *ep) 2019 { 2020 char *cp; 2021 2022 if (strlen(setname) > (size_t)MD_MAX_SETNAME) 2023 return (mddserror(ep, MDE_DS_SETNAMETOOLONG, 2024 MD_SET_BAD, NULL, NULL, setname)); 2025 2026 for (cp = setname; *cp; cp++) 2027 if (!isprint(*cp) || strchr(INVALID_IN_NAMES, *cp) != NULL) 2028 return (mddserror(ep, MDE_DS_INVALIDSETNAME, 2029 MD_SET_BAD, NULL, NULL, setname)); 2030 return (0); 2031 } 2032 2033 /* 2034 * Add host(s) to the multi-node diskset provided in sp. 2035 * - create set if non-existent. 2036 */ 2037 static int 2038 meta_multinode_set_addhosts( 2039 mdsetname_t *sp, 2040 int multi_node, 2041 int node_c, 2042 char **node_v, 2043 int auto_take, 2044 md_error_t *ep 2045 ) 2046 { 2047 md_set_desc *sd; 2048 md_drive_desc *dd, *p; 2049 int rval = 0; 2050 int bool; 2051 int nodeindex; 2052 int i; 2053 int has_set; 2054 sigset_t oldsigs; 2055 md_setkey_t *cl_sk; 2056 int rb_level = 0; 2057 md_error_t xep = mdnullerror; 2058 md_mnnode_desc *nd, *nd_curr, *nd_prev; 2059 md_timeval32_t now; 2060 int nodecnt; 2061 mndiskset_membershiplist_t *nl, *nl2; 2062 int suspendall_flag = 0; 2063 int suspend1_flag = 0; 2064 int lock_flag = 0; 2065 int stale_flag = 0; 2066 md_mnnode_desc *saved_nd_next; 2067 int remote_sets_created = 0; 2068 2069 /* 2070 * Check membershiplist first. If there's 2071 * an error, fail to create set and pass back error. 2072 */ 2073 if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) { 2074 return (-1); 2075 } 2076 /* Verify that all nodes are in member list */ 2077 for (i = 0; i < node_c; i++) { 2078 /* 2079 * If node in list isn't a member of the membership, 2080 * just return error. 2081 */ 2082 if (meta_is_member(node_v[i], NULL, nl) == 0) { 2083 meta_free_nodelist(nl); 2084 return (mddserror(ep, MDE_DS_NOTINMEMBERLIST, 2085 sp->setno, node_v[i], NULL, sp->setname)); 2086 } 2087 } 2088 /* 2089 * Node list is needed later, but there is a lot of error 2090 * checking and possible failures between here and there, so 2091 * just re-get the list later if there are no errors. 2092 */ 2093 meta_free_nodelist(nl); 2094 nl = NULL; 2095 2096 /* 2097 * Verify that list of nodes being added contains no 2098 * duplicates. 2099 */ 2100 if (nodesuniq(sp, node_c, node_v, ep)) 2101 return (-1); 2102 2103 /* 2104 * Verify that each node being added thinks that its nodename 2105 * is the same as the nodename given. 2106 */ 2107 if (validate_nodes(sp, node_c, node_v, ep)) 2108 return (-1); 2109 2110 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 2111 if (! mdiserror(ep, MDE_NO_SET)) 2112 return (-1); 2113 mdclrerror(ep); 2114 return (create_set(sp, multi_node, node_c, node_v, auto_take, 2115 ep)); 2116 } else { 2117 /* 2118 * If this node and another node were both attempting to 2119 * create the same setname at the same time, and the other 2120 * node has just created the set on this node then sd would 2121 * be non-NULL, but sp->setno would be null (setno is filled 2122 * in by the create_set). If this is true, then fail since 2123 * the other node has already won this race. 2124 */ 2125 if (sp->setno == NULL) { 2126 return (mddserror(ep, MDE_DS_NODEINSET, 2127 NULL, mynode(), NULL, sp->setname)); 2128 } 2129 } 2130 2131 /* The auto_take behavior is inconsistent with multiple hosts. */ 2132 if (auto_take || sd->sd_flags & MD_SR_AUTO_TAKE) { 2133 (void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL, 2134 sp->setname); 2135 return (-1); 2136 } 2137 2138 /* 2139 * We already have the set. 2140 */ 2141 2142 /* Make sure we own the set */ 2143 if (meta_check_ownership(sp, ep) != 0) 2144 return (-1); 2145 2146 /* 2147 * The drive and node records are stored in the local mddbs of each 2148 * node in the diskset. Each node's rpc.metad daemon reads in the set, 2149 * drive and node records from that node's local mddb and caches them 2150 * internally. Any process needing diskset information contacts its 2151 * local rpc.metad to get this information. Since each node in the 2152 * diskset is independently reading the set information from its local 2153 * mddb, the set, drive and node records in the local mddbs must stay 2154 * in-sync, so that all nodes have a consistent view of the diskset. 2155 * 2156 * For a multinode diskset, explicitly verify that all nodes in the 2157 * diskset are ALIVE (i.e. are in the API membership list). Otherwise, 2158 * fail this operation since all nodes must be ALIVE in order to add 2159 * the new node record to their local mddb. If a panic of this node 2160 * leaves the local mddbs set, node and drive records out-of-sync, the 2161 * reconfig cycle will fix the local mddbs and force them back into 2162 * synchronization. 2163 */ 2164 nd = sd->sd_nodelist; 2165 while (nd) { 2166 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 2167 return (mddserror(ep, MDE_DS_NOTINMEMBERLIST, 2168 sp->setno, nd->nd_nodename, NULL, 2169 sp->setname)); 2170 } 2171 nd = nd->nd_next; 2172 } 2173 2174 /* 2175 * Check if node is already in set. 2176 */ 2177 for (i = 0; i < node_c; i++) { 2178 /* Is node already in set? */ 2179 nd = sd->sd_nodelist; 2180 while (nd) { 2181 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 2182 break; 2183 nd = nd->nd_next; 2184 } 2185 if (nd) { 2186 return (mddserror(ep, MDE_DS_NODEINSET, 2187 sp->setno, node_v[i], NULL, 2188 sp->setname)); 2189 } 2190 } 2191 2192 /* 2193 * Lock the set on current set members. 2194 * Set locking done much earlier for MN diskset than for traditional 2195 * diskset since lock_set and SUSPEND are used to protect against 2196 * other meta* commands running on the other nodes. 2197 */ 2198 /* Make sure we are blocking all signals */ 2199 if (procsigs(TRUE, &oldsigs, &xep) < 0) 2200 mdclrerror(&xep); 2201 2202 nd = sd->sd_nodelist; 2203 /* All nodes are guaranteed to be ALIVE */ 2204 while (nd) { 2205 if (clnt_lock_set(nd->nd_nodename, sp, ep)) { 2206 rval = -1; 2207 goto out; 2208 } 2209 lock_flag = 1; 2210 nd = nd->nd_next; 2211 } 2212 /* 2213 * Lock out other meta* commands by suspending 2214 * class 1 messages across the diskset. 2215 */ 2216 nd = sd->sd_nodelist; 2217 /* Send suspend to nodes in nodelist before addhosts call */ 2218 /* All nodes are guaranteed to be ALIVE */ 2219 while (nd) { 2220 if (clnt_mdcommdctl(nd->nd_nodename, 2221 COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1, 2222 MD_MSCF_NO_FLAGS, ep)) { 2223 rval = -1; 2224 goto out; 2225 } 2226 suspend1_flag = 1; 2227 nd = nd->nd_next; 2228 } 2229 2230 /* Lock the set on new set members */ 2231 for (i = 0; i < node_c; i++) { 2232 /* Already verified to be alive */ 2233 if (clnt_lock_set(node_v[i], sp, ep)) { 2234 rval = -1; 2235 goto out; 2236 } 2237 lock_flag = 1; 2238 } 2239 2240 /* 2241 * Perform the required checks for new hosts 2242 */ 2243 for (i = 0; i < node_c; i++) { 2244 /* Make sure this set name is not used on the other hosts */ 2245 has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep); 2246 if (has_set < 0) { 2247 if (! mdiserror(ep, MDE_NO_SET)) { 2248 rval = -1; 2249 goto out; 2250 } 2251 /* Keep on truck'n */ 2252 mdclrerror(ep); 2253 } else if (has_set) { 2254 (void) mddserror(ep, MDE_DS_NODEHASSET, sp->setno, 2255 node_v[i], NULL, sp->setname); 2256 rval = -1; 2257 goto out; 2258 } 2259 2260 if (clnt_setnumbusy(node_v[i], sp->setno, &bool, ep) == -1) { 2261 rval = -1; 2262 goto out; 2263 } 2264 2265 if (bool == TRUE) { 2266 (void) mddserror(ep, MDE_DS_SETNUMBUSY, sp->setno, 2267 node_v[i], NULL, sp->setname); 2268 rval = -1; 2269 goto out; 2270 } 2271 2272 if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1) { 2273 rval = -1; 2274 goto out; 2275 } 2276 2277 if (bool == FALSE) { 2278 (void) mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno, 2279 node_v[i], NULL, sp->setname); 2280 rval = -1; 2281 goto out; 2282 } 2283 2284 if (check_setdrvs_againstnode(sp, node_v[i], ep)) { 2285 rval = -1; 2286 goto out; 2287 } 2288 } 2289 2290 /* Get drive descriptors for the set */ 2291 if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) { 2292 if (! mdisok(ep)) { 2293 rval = -1; 2294 goto out; 2295 } 2296 } 2297 2298 /* END CHECK CODE */ 2299 2300 RB_TEST(1, "addhosts", ep) 2301 2302 RB_PREEMPT; 2303 rb_level = 1; /* level 1 */ 2304 2305 RB_TEST(2, "addhosts", ep) 2306 2307 /* 2308 * Create the set where needed 2309 */ 2310 if (create_set_on_hosts(sp, multi_node, node_c, node_v, 0, ep)) { 2311 goto rollback; 2312 } 2313 2314 /* 2315 * Send suspend to rpc.mdcommd on nodes where a set has been 2316 * created since rpc.mdcommd must now be running on the remote nodes. 2317 */ 2318 remote_sets_created = 1; 2319 for (i = 0; i < node_c; i++) { 2320 /* 2321 * Lock out other meta* commands by suspending 2322 * class 1 messages across the diskset. 2323 */ 2324 if (clnt_mdcommdctl(node_v[i], 2325 COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1, 2326 MD_MSCF_NO_FLAGS, ep)) { 2327 rval = -1; 2328 goto rollback; 2329 } 2330 } 2331 2332 /* 2333 * Merge the new entries into the set with the existing sides. 2334 * Get membershiplist from API routine. If there's 2335 * an error, fail to create set and pass back error. 2336 */ 2337 if (meta_read_nodelist(&nodecnt, &nl, ep) == -1) { 2338 goto rollback; 2339 } 2340 if (meta_gettimeofday(&now) == -1) { 2341 meta_free_nodelist(nl); 2342 (void) mdsyserror(ep, errno, 2343 dgettext(TEXT_DOMAIN, "meta_gettimeofday()")); 2344 goto rollback; 2345 } 2346 for (nodeindex = 0; nodeindex < node_c; nodeindex++) { 2347 nd = Zalloc(sizeof (*nd)); 2348 (void) strcpy(nd->nd_nodename, node_v[nodeindex]); 2349 nd->nd_ctime = now; 2350 nl2 = nl; 2351 while (nl2) { 2352 if (strcmp(nl2->msl_node_name, 2353 node_v[nodeindex]) == 0) { 2354 nd->nd_nodeid = nl2->msl_node_id; 2355 (void) strcpy(nd->nd_priv_ic, 2356 nl2->msl_node_addr); 2357 break; 2358 } 2359 nl2 = nl2->next; 2360 } 2361 2362 /* 2363 * Nodelist must be kept in ascending nodeid order. 2364 */ 2365 if (sd->sd_nodelist == NULL) { 2366 /* Nothing in list, just add it */ 2367 sd->sd_nodelist = nd; 2368 } else if (nd->nd_nodeid < 2369 sd->sd_nodelist->nd_nodeid) { 2370 /* Add to head of list */ 2371 nd->nd_next = sd->sd_nodelist; 2372 sd->sd_nodelist = nd; 2373 } else { 2374 nd_curr = sd->sd_nodelist->nd_next; 2375 nd_prev = sd->sd_nodelist; 2376 /* Search for place to add it */ 2377 while (nd_curr) { 2378 if (nd->nd_nodeid < nd_curr->nd_nodeid) { 2379 /* Add before nd_curr */ 2380 nd->nd_next = nd_curr; 2381 nd_prev->nd_next = nd; 2382 break; 2383 } 2384 nd_prev = nd_curr; 2385 nd_curr = nd_curr->nd_next; 2386 } 2387 /* Add to end of list */ 2388 if (nd_curr == NULL) { 2389 nd_prev->nd_next = nd; 2390 } 2391 2392 } 2393 /* Node already verified to be in membership */ 2394 nd->nd_flags |= MD_MN_NODE_ALIVE; 2395 } 2396 meta_free_nodelist(nl); 2397 2398 /* If we have drives */ 2399 if (dd != NULL) { 2400 /* 2401 * For all the hosts being added, create a sidename structure 2402 */ 2403 nd = sd->sd_nodelist; 2404 while (nd) { 2405 /* Skip nodes not being added */ 2406 if (!strinlst(nd->nd_nodename, node_c, node_v)) { 2407 nd = nd->nd_next; 2408 continue; 2409 } 2410 for (p = dd; p != NULL; p = p->dd_next) { 2411 if (make_sideno_sidenm(sp, p->dd_dnp, 2412 nd->nd_nodeid, ep) != 0) 2413 goto rollback; 2414 } 2415 nd = nd->nd_next; 2416 } 2417 2418 RB_PREEMPT; 2419 rb_level = 2; /* level 2 */ 2420 2421 RB_TEST(4, "addhosts", ep) 2422 2423 /* 2424 * Add the new sidename for each drive to all the hosts 2425 * 2426 * If a multi-node diskset, each host only stores 2427 * the side information for itself. So, only send 2428 * side information to the new hosts where each host 2429 * will add the appropriate side information to its 2430 * local mddb. 2431 */ 2432 nd = sd->sd_nodelist; 2433 while (nd) { 2434 /* Skip nodes not being added */ 2435 if (!strinlst(nd->nd_nodename, node_c, 2436 node_v)) { 2437 nd = nd->nd_next; 2438 continue; 2439 } 2440 2441 /* Add side info to new hosts */ 2442 if (clnt_add_drv_sidenms(nd->nd_nodename, 2443 mynode(), sp, sd, node_c, node_v, ep)) 2444 goto rollback; 2445 2446 nd = nd->nd_next; 2447 } 2448 2449 RB_TEST(5, "addhosts", ep) 2450 2451 RB_PREEMPT; 2452 rb_level = 3; /* level 3 */ 2453 2454 RB_TEST(6, "addhosts", ep) 2455 2456 /* 2457 * Add the device names for the new sides into the namespace 2458 * for all hosts being added. This is adding the side 2459 * names to the diskset's mddb so add sidenames for all 2460 * of the new hosts. 2461 */ 2462 nd = sd->sd_nodelist; 2463 while (nd) { 2464 /* Skip nodes not being added */ 2465 if (!strinlst(nd->nd_nodename, node_c, node_v)) { 2466 nd = nd->nd_next; 2467 continue; 2468 } 2469 2470 /* this side was just created, add the names */ 2471 if (add_md_sidenms(sp, nd->nd_nodeid, 2472 MD_SIDEWILD, ep)) 2473 goto rollback; 2474 2475 nd = nd->nd_next; 2476 } 2477 2478 RB_TEST(7, "addhosts", ep) 2479 2480 RB_PREEMPT; 2481 rb_level = 4; /* level 4 */ 2482 2483 RB_TEST(8, "addhosts", ep) 2484 2485 if (add_db_sidenms(sp, ep)) 2486 goto rollback; 2487 2488 } else { 2489 RB_PREEMPT; 2490 rb_level = 4; 2491 } 2492 2493 RB_TEST(9, "addhosts", ep) 2494 2495 RB_PREEMPT; 2496 rb_level = 5; /* level 5 */ 2497 2498 RB_TEST(10, "addhosts", ep) 2499 2500 if (dd != NULL) { 2501 /* 2502 * Notify rpc.mdcommd on all nodes of a nodelist change. 2503 * Start by suspending rpc.mdcommd (which drains it of all 2504 * messages), then change the nodelist followed by a reinit 2505 * and resume. 2506 */ 2507 nd = sd->sd_nodelist; 2508 /* Send suspend_all to nodes in nodelist (existing + new) */ 2509 /* All nodes are guaranteed to be ALIVE */ 2510 while (nd) { 2511 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_SUSPEND, 2512 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, ep)) { 2513 rval = -1; 2514 goto rollback; 2515 } 2516 suspendall_flag = 1; 2517 nd = nd->nd_next; 2518 } 2519 } 2520 2521 /* Add the node(s) to the each host that is currently in the set */ 2522 nd = sd->sd_nodelist; 2523 /* All nodes are guaranteed to be ALIVE */ 2524 while (nd) { 2525 if (clnt_addhosts(nd->nd_nodename, sp, node_c, node_v, ep)) { 2526 goto rollback; 2527 } 2528 nd = nd->nd_next; 2529 } 2530 2531 RB_TEST(11, "addhosts", ep) 2532 2533 if (dd != NULL) { 2534 /* 2535 * Mark the drives MD_DR_OK. 2536 */ 2537 nd = sd->sd_nodelist; 2538 /* All nodes are guaranteed to be ALIVE */ 2539 while (nd) { 2540 if (clnt_upd_dr_flags(nd->nd_nodename, sp, dd, 2541 MD_DR_OK, ep) == -1) 2542 goto rollback; 2543 nd = nd->nd_next; 2544 } 2545 } 2546 2547 RB_TEST(12, "addhosts", ep) 2548 2549 RB_PREEMPT; 2550 rb_level = 6; /* level 6 */ 2551 2552 RB_TEST(13, "addhosts", ep) 2553 2554 2555 /* Add the mediator information to all hosts in the set. */ 2556 nd = sd->sd_nodelist; 2557 /* All nodes are guaranteed to be ALIVE */ 2558 while (nd) { 2559 if (clnt_updmeds(nd->nd_nodename, sp, &sd->sd_med, ep)) 2560 goto rollback; 2561 nd = nd->nd_next; 2562 } 2563 2564 RB_TEST(14, "addhosts", ep) 2565 2566 /* 2567 * If a MN diskset and there are drives in the set, 2568 * set the master on the new nodes and 2569 * automatically join the new nodes into the set. 2570 */ 2571 if (dd != NULL) { 2572 mddb_config_t c; 2573 /* 2574 * Is current set STALE? 2575 */ 2576 (void) memset(&c, 0, sizeof (c)); 2577 c.c_id = 0; 2578 c.c_setno = sp->setno; 2579 if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 2580 (void) mdstealerror(ep, &c.c_mde); 2581 rval = -1; 2582 goto out; 2583 } 2584 if (c.c_flags & MDDB_C_STALE) { 2585 stale_flag = MNSET_IS_STALE; 2586 } 2587 2588 /* Set master on newly added nodes */ 2589 for (i = 0; i < node_c; i++) { 2590 if (clnt_mnsetmaster(node_v[i], sp, 2591 sd->sd_mn_master_nodenm, 2592 sd->sd_mn_master_nodeid, ep)) { 2593 goto rollback; 2594 } 2595 } 2596 /* Join newly added nodes to diskset and set OWN flag */ 2597 for (i = 0; i < node_c; i++) { 2598 if (clnt_joinset(node_v[i], sp, stale_flag, ep)) 2599 goto rollback; 2600 nd = sd->sd_nodelist; 2601 while (nd) { 2602 if (strcmp(nd->nd_nodename, node_v[i]) == 0) { 2603 nd->nd_flags |= MD_MN_NODE_OWN; 2604 /* 2605 * Also set ADD flag since this flag 2606 * is already set in rpc.metad - it's 2607 * just not in the local copy. 2608 * Could flush local cache and call 2609 * metaget_setdesc, but this just 2610 * adds time. Since this node knows 2611 * the state of the node flags in 2612 * rpc.metad, just set the ADD 2613 * flag and save time. 2614 */ 2615 nd->nd_flags |= MD_MN_NODE_ADD; 2616 break; 2617 } 2618 nd = nd->nd_next; 2619 } 2620 } 2621 2622 /* Send new node flag list to all Owner nodes */ 2623 nd = sd->sd_nodelist; 2624 while (nd) { 2625 if (!(nd->nd_flags & MD_MN_NODE_OWN)) { 2626 nd = nd->nd_next; 2627 continue; 2628 } 2629 /* 2630 * Will effectively set OWN flag in records kept 2631 * cached in rpc.metad. The ADD flag would have 2632 * already been set by the call to clnt_addhosts. 2633 */ 2634 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 2635 sd->sd_nodelist, MD_NR_SET, NULL, ep)) { 2636 goto rollback; 2637 } 2638 nd = nd->nd_next; 2639 } 2640 } 2641 2642 /* 2643 * Mark the set record MD_SR_OK 2644 */ 2645 nd = sd->sd_nodelist; 2646 /* All nodes are guaranteed to be ALIVE */ 2647 while (nd) { 2648 if (clnt_upd_sr_flags(nd->nd_nodename, sp, MD_SR_OK, 2649 ep)) { 2650 goto rollback; 2651 } 2652 nd = nd->nd_next; 2653 } 2654 2655 /* 2656 * For MN diskset: 2657 * On each newly added node, set the node record for that node 2658 * to OK. Then set all node records for the newly added 2659 * nodes on all nodes to ok. 2660 * 2661 * By setting a node's own node record to ok first, even if 2662 * the node adding the hosts panics, the rest of the nodes can 2663 * determine the same node list during the choosing of the master 2664 * during reconfig. So, only nodes considered for mastership 2665 * are nodes that have both MD_MN_NODE_OK and MD_SR_OK set 2666 * on that node's rpc.metad. If all nodes have MD_SR_OK set, 2667 * but no node has its own MD_MN_NODE_OK set, then the set will 2668 * be removed during reconfig since a panic occurred during the 2669 * creation of the initial diskset. 2670 */ 2671 2672 for (i = 0; i < node_c; i++) { 2673 nd = sd->sd_nodelist; 2674 /* All nodes are guaranteed to be ALIVE */ 2675 while (nd) { 2676 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 2677 break; 2678 nd = nd->nd_next; 2679 } 2680 /* Something wrong, will pick this up in next loop */ 2681 if (nd == NULL) 2682 continue; 2683 2684 /* Only changing my local cache of node list */ 2685 saved_nd_next = nd->nd_next; 2686 nd->nd_next = NULL; 2687 2688 /* Set node record for added host to ok on that host */ 2689 if (clnt_upd_nr_flags(node_v[i], sp, 2690 nd, MD_NR_OK, NULL, ep)) { 2691 nd->nd_next = saved_nd_next; 2692 goto rollback; 2693 } 2694 nd->nd_next = saved_nd_next; 2695 } 2696 2697 /* Now set all node records on all nodes to be ok */ 2698 nd = sd->sd_nodelist; 2699 /* All nodes are guaranteed to be ALIVE */ 2700 while (nd) { 2701 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 2702 sd->sd_nodelist, MD_NR_OK, NULL, ep)) { 2703 goto rollback; 2704 } 2705 nd = nd->nd_next; 2706 } 2707 2708 RB_TEST(15, "addhosts", ep) 2709 out: 2710 /* 2711 * Notify rpc.mdcommd on all nodes of a nodelist change. 2712 * Send reinit command to mdcommd which forces it to get 2713 * fresh set description. Then send resume. 2714 * Resume on class 0 will resume all classes, so can skip 2715 * doing an explicit resume of class1 (ignore suspend1_flag). 2716 */ 2717 if (suspendall_flag) { 2718 /* 2719 * Don't know if nodelist contains the nodes being added 2720 * or not, so do reinit to nodes not being added (by skipping 2721 * any nodes in the nodelist being added) and then do 2722 * reinit to nodes being added if remote_sets_created is 1. 2723 */ 2724 nd = sd->sd_nodelist; 2725 /* All nodes are guaranteed to be ALIVE */ 2726 while (nd) { 2727 /* Skip nodes being added - handled later */ 2728 if (strinlst(nd->nd_nodename, node_c, node_v)) { 2729 nd = nd->nd_next; 2730 continue; 2731 } 2732 /* Class is ignored for REINIT */ 2733 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT, 2734 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 2735 if (rval == 0) 2736 (void) mdstealerror(ep, &xep); 2737 rval = -1; 2738 mde_perror(ep, dgettext(TEXT_DOMAIN, 2739 "Unable to reinit rpc.mdcommd.\n")); 2740 } 2741 nd = nd->nd_next; 2742 } 2743 /* 2744 * Send reinit to added nodes that had a set created since 2745 * rpc.mdcommd is running on the nodes with a set. 2746 */ 2747 if (remote_sets_created == 1) { 2748 for (i = 0; i < node_c; i++) { 2749 if (clnt_mdcommdctl(node_v[i], COMMDCTL_REINIT, 2750 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 2751 if (rval == 0) 2752 (void) mdstealerror(ep, &xep); 2753 rval = -1; 2754 mde_perror(ep, dgettext(TEXT_DOMAIN, 2755 "Unable to reinit rpc.mdcommd.\n")); 2756 } 2757 } 2758 } 2759 } 2760 if ((suspend1_flag) || (suspendall_flag)) { 2761 /* 2762 * Unlock diskset by resuming messages across the diskset. 2763 * Just resume all classes so that resume is the same whether 2764 * just one class was locked or all classes were locked. 2765 * 2766 * Don't know if nodelist contains the nodes being added 2767 * or not, so do resume_all to nodes not being added (by 2768 * skipping any nodes in the nodelist being added) and then do 2769 * resume_all to nodes being added if remote_sets_created is 1. 2770 */ 2771 nd = sd->sd_nodelist; 2772 /* All nodes are guaranteed to be ALIVE */ 2773 while (nd) { 2774 /* Skip nodes being added - handled later */ 2775 if (strinlst(nd->nd_nodename, node_c, node_v)) { 2776 nd = nd->nd_next; 2777 continue; 2778 } 2779 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 2780 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { 2781 if (rval == 0) 2782 (void) mdstealerror(ep, &xep); 2783 rval = -1; 2784 mde_perror(ep, dgettext(TEXT_DOMAIN, 2785 "Unable to resume rpc.mdcommd.\n")); 2786 } 2787 nd = nd->nd_next; 2788 } 2789 /* 2790 * Send resume to added nodes that had a set created since 2791 * rpc.mdcommd is be running on the nodes with a set. 2792 */ 2793 if (remote_sets_created == 1) { 2794 for (i = 0; i < node_c; i++) { 2795 /* Already verified to be alive */ 2796 if (clnt_mdcommdctl(node_v[i], COMMDCTL_RESUME, 2797 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { 2798 if (rval == 0) 2799 (void) mdstealerror(ep, &xep); 2800 rval = -1; 2801 mde_perror(ep, dgettext(TEXT_DOMAIN, 2802 "Unable to resume rpc.mdcommd.\n")); 2803 } 2804 } 2805 } 2806 meta_ping_mnset(sp->setno); 2807 /* 2808 * Start a resync thread on the newly added nodes 2809 * if set is not stale. Also start a thread to update the 2810 * abr state of all soft partitions 2811 */ 2812 if (stale_flag != MNSET_IS_STALE) { 2813 for (i = 0; i < node_c; i++) { 2814 if (clnt_mn_mirror_resync_all(node_v[i], 2815 sp->setno, &xep)) { 2816 if (rval == 0) 2817 (void) mdstealerror(ep, &xep); 2818 rval = -1; 2819 mde_perror(ep, dgettext(TEXT_DOMAIN, 2820 "Unable to start resync " 2821 "thread.\n")); 2822 } 2823 if (clnt_mn_sp_update_abr(node_v[i], 2824 sp->setno, &xep)) { 2825 if (rval == 0) 2826 (void) mdstealerror(ep, &xep); 2827 rval = -1; 2828 mde_perror(ep, dgettext(TEXT_DOMAIN, 2829 "Unable to start sp update " 2830 "thread.\n")); 2831 } 2832 } 2833 } 2834 } 2835 cl_sk = cl_get_setkey(sp->setno, sp->setname); 2836 /* 2837 * Don't know if nodelist contains the nodes being added 2838 * or not, so do clnt_unlock_set to nodes not being added (by 2839 * skipping any nodes in the nodelist being added) and then do 2840 * clnt_unlock_set to nodes being added. 2841 */ 2842 if (lock_flag) { 2843 nd = sd->sd_nodelist; 2844 /* All nodes are guaranteed to be ALIVE */ 2845 while (nd) { 2846 /* Skip hosts we get in the next loop */ 2847 if (strinlst(nd->nd_nodename, node_c, node_v)) { 2848 nd = nd->nd_next; 2849 continue; 2850 } 2851 if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) { 2852 if (rval == 0) 2853 (void) mdstealerror(ep, &xep); 2854 rval = -1; 2855 } 2856 nd = nd->nd_next; 2857 } 2858 for (i = 0; i < node_c; i++) { 2859 /* Already verified to be alive */ 2860 if (clnt_unlock_set(node_v[i], cl_sk, &xep)) { 2861 if (rval == 0) 2862 (void) mdstealerror(ep, &xep); 2863 rval = -1; 2864 } 2865 } 2866 } 2867 cl_set_setkey(NULL); 2868 2869 metaflushsetname(sp); 2870 2871 /* release signals back to what they were on entry */ 2872 if (procsigs(FALSE, &oldsigs, &xep) < 0) 2873 mdclrerror(&xep); 2874 2875 return (rval); 2876 2877 rollback: 2878 rval = -1; 2879 2880 /* level 6 */ 2881 if (rb_level > 5) { 2882 /* 2883 * For each node being deleted, set DEL flag and 2884 * reset OK flag on that node first. 2885 * Until a node has turned off its own 2886 * rpc.metad's NODE_OK flag, that node could be 2887 * considered for master during a reconfig. 2888 */ 2889 for (i = 0; i < node_c; i++) { 2890 nd = sd->sd_nodelist; 2891 /* All nodes are guaranteed to be ALIVE */ 2892 while (nd) { 2893 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 2894 break; 2895 nd = nd->nd_next; 2896 } 2897 /* Something wrong, handle this in next loop */ 2898 if (nd == NULL) 2899 continue; 2900 2901 /* Only changing my local cache of node list */ 2902 saved_nd_next = nd->nd_next; 2903 nd->nd_next = NULL; 2904 2905 /* Set flags for del host to DEL on that host */ 2906 if (clnt_upd_nr_flags(node_v[i], sp, 2907 nd, MD_NR_DEL, NULL, &xep)) { 2908 mdclrerror(&xep); 2909 } 2910 nd->nd_next = saved_nd_next; 2911 } 2912 2913 for (i = 0; i < node_c; i++) { 2914 if (dd != NULL) { 2915 /* Reset master on newly added node */ 2916 if (clnt_mnsetmaster(node_v[i], sp, "", 2917 MD_MN_INVALID_NID, &xep)) 2918 mdclrerror(&xep); 2919 /* Withdraw set on newly added node */ 2920 if (clnt_withdrawset(node_v[i], sp, &xep)) 2921 mdclrerror(&xep); 2922 } 2923 /* 2924 * Turn off owner flag in nodes to be deleted 2925 * if there are drives in the set. 2926 * Also, turn off NODE_OK and turn on NODE_DEL 2927 * for nodes to be deleted. 2928 * These flags are used to set the node 2929 * record flags in all nodes in the set. 2930 */ 2931 nd = sd->sd_nodelist; 2932 while (nd) { 2933 if (strcmp(nd->nd_nodename, node_v[i]) == 0) { 2934 if (dd != NULL) { 2935 nd->nd_flags &= ~MD_MN_NODE_OWN; 2936 } 2937 nd->nd_flags |= MD_MN_NODE_DEL; 2938 nd->nd_flags &= ~MD_MN_NODE_OK; 2939 break; 2940 } 2941 nd = nd->nd_next; 2942 } 2943 } 2944 2945 /* 2946 * Now, reset owner and set delete flags for the deleted 2947 * nodes on all nodes. 2948 */ 2949 nd = sd->sd_nodelist; 2950 while (nd) { 2951 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 2952 sd->sd_nodelist, MD_NR_SET, NULL, &xep)) { 2953 mdclrerror(&xep); 2954 } 2955 nd = nd->nd_next; 2956 } 2957 2958 /* 2959 * On each node being deleted, set the set record 2960 * to be in DEL state. 2961 */ 2962 for (i = 0; i < node_c; i++) { 2963 if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, &xep)) { 2964 mdclrerror(&xep); 2965 } 2966 } 2967 } 2968 2969 /* level 5 */ 2970 if (rb_level > 4) { 2971 nd = sd->sd_nodelist; 2972 /* All nodes are guaranteed to be ALIVE */ 2973 while (nd) { 2974 if (clnt_delhosts(nd->nd_nodename, sp, node_c, 2975 node_v, &xep) == -1) 2976 mdclrerror(&xep); 2977 nd = nd->nd_next; 2978 } 2979 } 2980 2981 /* 2982 * Notify rpc.mdcommd on all nodes of a nodelist change. 2983 * Send reinit command to mdcommd which forces it to get 2984 * fresh set description. Then send resume. 2985 * Nodelist contains all nodes (existing + added). 2986 */ 2987 if (suspendall_flag) { 2988 /* Send reinit */ 2989 nd = sd->sd_nodelist; 2990 /* All nodes are guaranteed to be ALIVE */ 2991 /* Send reinit to nodes in nodelist before addhosts call */ 2992 while (nd) { 2993 /* 2994 * Skip nodes being added if remote sets were not 2995 * created since rpc.mdcommd may not be running 2996 * on the remote nodes. 2997 */ 2998 if ((remote_sets_created == 0) && 2999 (strinlst(nd->nd_nodename, node_c, node_v))) { 3000 nd = nd->nd_next; 3001 continue; 3002 } 3003 /* Class is ignored for REINIT */ 3004 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT, 3005 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 3006 mde_perror(&xep, dgettext(TEXT_DOMAIN, 3007 "Unable to reinit rpc.mdcommd.\n")); 3008 mdclrerror(&xep); 3009 } 3010 nd = nd->nd_next; 3011 } 3012 3013 /* Send resume */ 3014 nd = sd->sd_nodelist; 3015 /* All nodes are guaranteed to be ALIVE */ 3016 while (nd) { 3017 /* 3018 * Skip nodes being added if remote sets were not 3019 * created since rpc.mdcommd may not be running 3020 * on the remote nodes. 3021 */ 3022 if ((remote_sets_created == 0) && 3023 (strinlst(nd->nd_nodename, node_c, node_v))) { 3024 nd = nd->nd_next; 3025 continue; 3026 } 3027 /* 3028 * Resume all classes but class 1 so that lock is held 3029 * against meta* commands. 3030 * Send resume_all_but_1 to nodes in nodelist 3031 * before addhosts call. 3032 */ 3033 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 3034 sp, MD_MSG_CLASS0, MD_MSCF_DONT_RESUME_CLASS1, 3035 &xep)) { 3036 mde_perror(&xep, dgettext(TEXT_DOMAIN, 3037 "Unable to resume rpc.mdcommd.\n")); 3038 mdclrerror(&xep); 3039 } 3040 nd = nd->nd_next; 3041 } 3042 meta_ping_mnset(sp->setno); 3043 } 3044 3045 /* level 4 */ 3046 /* Nodelist may or may not contain nodes being added. */ 3047 if (rb_level > 3 && dd != NULL) { 3048 nd = sd->sd_nodelist; 3049 while (nd) { 3050 /* Skip nodes not being added */ 3051 if (!strinlst(nd->nd_nodename, node_c, node_v)) { 3052 nd = nd->nd_next; 3053 continue; 3054 } 3055 3056 if (del_db_sidenms(sp, nd->nd_nodeid, &xep)) 3057 mdclrerror(&xep); 3058 nd = nd->nd_next; 3059 } 3060 } 3061 3062 /* level 3 */ 3063 /* Nodelist may or may not contain nodes being added. */ 3064 if (rb_level > 2 && dd != NULL) { 3065 nd = sd->sd_nodelist; 3066 while (nd) { 3067 /* Skip nodes not being added */ 3068 if (!strinlst(nd->nd_nodename, node_c, node_v)) { 3069 nd = nd->nd_next; 3070 continue; 3071 } 3072 3073 if (del_md_sidenms(sp, nd->nd_nodeid, &xep)) 3074 mdclrerror(&xep); 3075 nd = nd->nd_next; 3076 } 3077 } 3078 3079 /* level 1 */ 3080 if (rb_level > 0) { 3081 if (dd != NULL) { 3082 /* delete the drive records */ 3083 for (i = 0; i < node_c; i++) { 3084 if (clnt_deldrvs(node_v[i], sp, dd, &xep) == -1) 3085 mdclrerror(&xep); 3086 } 3087 } 3088 3089 /* delete the set record */ 3090 for (i = 0; i < node_c; i++) { 3091 if (clnt_delset(node_v[i], sp, &xep) == -1) 3092 mdclrerror(&xep); 3093 } 3094 } 3095 3096 /* level 0 */ 3097 cl_sk = cl_get_setkey(sp->setno, sp->setname); 3098 /* Don't test lock flag since guaranteed to be set if in rollback */ 3099 /* Nodelist may or may not contain nodes being added. */ 3100 /* 3101 * Unlock diskset by resuming messages across the diskset. 3102 * Just resume all classes so that resume is the same whether 3103 * just one class was locked or all classes were locked. 3104 */ 3105 if ((suspend1_flag) || (suspendall_flag)) { 3106 /* All nodes are guaranteed to be ALIVE */ 3107 nd = sd->sd_nodelist; 3108 while (nd) { 3109 /* 3110 * Skip nodes being added since remote sets 3111 * were either created and then deleted or 3112 * were never created. Either way - rpc.mdcommd 3113 * may not be running on the remote node. 3114 */ 3115 if (strinlst(nd->nd_nodename, node_c, node_v)) { 3116 nd = nd->nd_next; 3117 continue; 3118 } 3119 if (clnt_mdcommdctl(nd->nd_nodename, 3120 COMMDCTL_RESUME, sp, MD_MSG_CLASS0, 3121 MD_MSCF_NO_FLAGS, &xep)) { 3122 mde_perror(&xep, dgettext(TEXT_DOMAIN, 3123 "Unable to resume rpc.mdcommd.\n")); 3124 mdclrerror(&xep); 3125 } 3126 nd = nd->nd_next; 3127 } 3128 meta_ping_mnset(sp->setno); 3129 } 3130 nd = sd->sd_nodelist; 3131 /* All nodes are guaranteed to be ALIVE */ 3132 while (nd) { 3133 /* Skip hosts we get in the next loop */ 3134 if (strinlst(nd->nd_nodename, node_c, node_v)) { 3135 nd = nd->nd_next; 3136 continue; 3137 } 3138 3139 if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) 3140 mdclrerror(&xep); 3141 nd = nd->nd_next; 3142 } 3143 3144 for (i = 0; i < node_c; i++) 3145 if (clnt_unlock_set(node_v[i], cl_sk, &xep)) 3146 mdclrerror(&xep); 3147 cl_set_setkey(NULL); 3148 3149 /* release signals back to what they were on entry */ 3150 if (procsigs(FALSE, &oldsigs, &xep) < 0) 3151 mdclrerror(&xep); 3152 3153 metaflushsetname(sp); 3154 3155 return (rval); 3156 } 3157 3158 /* 3159 * Add host(s) to the traditional diskset provided in sp. 3160 * - create set if non-existent. 3161 */ 3162 static int 3163 meta_traditional_set_addhosts( 3164 mdsetname_t *sp, 3165 int multi_node, 3166 int node_c, 3167 char **node_v, 3168 int auto_take, 3169 md_error_t *ep 3170 ) 3171 { 3172 md_set_desc *sd; 3173 md_drive_desc *dd, *p; 3174 med_rec_t medr; 3175 med_rec_t rb_medr; 3176 int rval = 0; 3177 int bool; 3178 int nodeindex; 3179 int i; 3180 int has_set; 3181 int numsides; 3182 sigset_t oldsigs; 3183 md_setkey_t *cl_sk; 3184 int rb_level = 0; 3185 md_error_t xep = mdnullerror; 3186 int max_meds; 3187 3188 if (nodesuniq(sp, node_c, node_v, ep)) 3189 return (-1); 3190 3191 if (validate_nodes(sp, node_c, node_v, ep)) 3192 return (-1); 3193 3194 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 3195 if (! mdiserror(ep, MDE_NO_SET)) 3196 return (-1); 3197 mdclrerror(ep); 3198 return (create_set(sp, multi_node, node_c, node_v, auto_take, 3199 ep)); 3200 } 3201 3202 /* The auto_take behavior is inconsistent with multiple hosts. */ 3203 if (auto_take || sd->sd_flags & MD_SR_AUTO_TAKE) { 3204 (void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, NULL, 3205 sp->setname); 3206 return (-1); 3207 } 3208 3209 /* 3210 * We already have the set. 3211 */ 3212 3213 /* Make sure we own the set */ 3214 if (meta_check_ownership(sp, ep) != 0) 3215 return (-1); 3216 3217 /* 3218 * Perform the required checks for new hosts 3219 */ 3220 for (i = 0; i < node_c; i++) { 3221 if (getnodeside(node_v[i], sd) != MD_SIDEWILD) 3222 return (mddserror(ep, MDE_DS_NODEINSET, sp->setno, 3223 node_v[i], NULL, sp->setname)); 3224 3225 /* Make sure this set name is not used on the other hosts */ 3226 has_set = nodehasset(sp, node_v[i], NHS_N_EQ, ep); 3227 if (has_set < 0) { 3228 if (! mdiserror(ep, MDE_NO_SET)) 3229 return (-1); 3230 /* Keep on truck'n */ 3231 mdclrerror(ep); 3232 } else if (has_set) 3233 return (mddserror(ep, MDE_DS_NODEHASSET, sp->setno, 3234 node_v[i], NULL, sp->setname)); 3235 3236 if (clnt_setnumbusy(node_v[i], sp->setno, &bool, ep) == -1) 3237 return (-1); 3238 3239 if (bool == TRUE) 3240 return (mddserror(ep, MDE_DS_SETNUMBUSY, sp->setno, 3241 node_v[i], NULL, sp->setname)); 3242 3243 if (clnt_setnameok(node_v[i], sp, &bool, ep) == -1) 3244 return (-1); 3245 3246 if (bool == FALSE) 3247 return (mddserror(ep, MDE_DS_SETNAMEBUSY, sp->setno, 3248 node_v[i], NULL, sp->setname)); 3249 3250 if (check_setdrvs_againstnode(sp, node_v[i], ep)) 3251 return (-1); 3252 } 3253 3254 /* Count the number of occupied slots */ 3255 numsides = 0; 3256 for (i = 0; i < MD_MAXSIDES; i++) { 3257 /* Count occupied slots */ 3258 if (sd->sd_nodes[i][0] != '\0') 3259 numsides++; 3260 } 3261 3262 /* Make sure the we have space to add the new sides */ 3263 if ((numsides + node_c) > MD_MAXSIDES) { 3264 (void) mddserror(ep, MDE_DS_SIDENUMNOTAVAIL, sp->setno, NULL, 3265 NULL, sp->setname); 3266 return (-1); 3267 } 3268 3269 /* Get drive descriptors for the set */ 3270 if ((dd = metaget_drivedesc(sp, MD_FULLNAME_ONLY, ep)) == NULL) 3271 if (! mdisok(ep)) 3272 return (-1); 3273 3274 /* Setup the mediator record roll-back structure */ 3275 (void) memset(&rb_medr, '\0', sizeof (med_rec_t)); 3276 rb_medr.med_rec_mag = MED_REC_MAGIC; 3277 rb_medr.med_rec_rev = MED_REC_REV; 3278 rb_medr.med_rec_fl = 0; 3279 rb_medr.med_rec_sn = sp->setno; 3280 (void) strcpy(rb_medr.med_rec_snm, sp->setname); 3281 for (i = 0; i < MD_MAXSIDES; i++) 3282 (void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]); 3283 rb_medr.med_rec_meds = sd->sd_med; /* structure assigment */ 3284 (void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t)); 3285 rb_medr.med_rec_foff = 0; 3286 crcgen(&rb_medr, &rb_medr.med_rec_cks, sizeof (med_rec_t), NULL); 3287 3288 if ((max_meds = get_max_meds(ep)) == 0) 3289 return (-1); 3290 3291 /* END CHECK CODE */ 3292 3293 md_rb_sig_handling_on(); 3294 3295 /* Lock the set on current set members */ 3296 for (i = 0; i < MD_MAXSIDES; i++) { 3297 /* Skip empty slots */ 3298 if (sd->sd_nodes[i][0] == '\0') 3299 continue; 3300 3301 if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { 3302 rval = -1; 3303 goto out; 3304 } 3305 } 3306 3307 /* Lock the set on new set members */ 3308 for (i = 0; i < node_c; i++) { 3309 if (clnt_lock_set(node_v[i], sp, ep)) { 3310 rval = -1; 3311 goto out; 3312 } 3313 } 3314 3315 RB_TEST(1, "addhosts", ep) 3316 3317 RB_PREEMPT; 3318 rb_level = 1; /* level 1 */ 3319 3320 RB_TEST(2, "addhosts", ep) 3321 3322 /* 3323 * Add the new hosts to the existing set record on the existing hosts 3324 */ 3325 for (i = 0; i < MD_MAXSIDES; i++) { 3326 /* skip empty slots */ 3327 if (sd->sd_nodes[i][0] == '\0') 3328 continue; 3329 3330 if (clnt_addhosts(sd->sd_nodes[i], sp, node_c, node_v, ep)) 3331 goto rollback; 3332 } 3333 3334 RB_PREEMPT; 3335 rb_level = 2; /* level 2 */ 3336 3337 RB_TEST(3, "addhosts", ep); 3338 3339 /* Merge the new entries into the set with the existing sides */ 3340 nodeindex = 0; 3341 for (i = 0; i < MD_MAXSIDES; i++) { 3342 /* Skip full slots */ 3343 if (sd->sd_nodes[i][0] != '\0') 3344 continue; 3345 3346 (void) strcpy(sd->sd_nodes[i], node_v[nodeindex++]); 3347 if (nodeindex == node_c) 3348 break; 3349 } 3350 3351 /* If we have drives */ 3352 if (dd != NULL) { 3353 /* 3354 * For all the hosts being added, create a sidename structure 3355 */ 3356 for (i = 0; i < MD_MAXSIDES; i++) { 3357 /* Skip empty slots */ 3358 if (sd->sd_nodes[i][0] == '\0') 3359 continue; 3360 3361 /* Skip nodes not being added */ 3362 if (! strinlst(sd->sd_nodes[i], node_c, node_v)) 3363 continue; 3364 3365 for (p = dd; p != NULL; p = p->dd_next) { 3366 if (make_sideno_sidenm(sp, p->dd_dnp, i, 3367 ep) != 0) 3368 goto rollback; 3369 } 3370 } 3371 3372 /* 3373 * Add the new sidename for each drive to the existing hosts 3374 */ 3375 for (i = 0; i < MD_MAXSIDES; i++) { 3376 /* Skip empty slots */ 3377 if (sd->sd_nodes[i][0] == '\0') 3378 continue; 3379 3380 /* Skip nodes being added */ 3381 if (strinlst(sd->sd_nodes[i], node_c, node_v)) 3382 continue; 3383 3384 if (clnt_add_drv_sidenms(sd->sd_nodes[i], mynode(), sp, 3385 sd, node_c, node_v, ep)) { 3386 goto rollback; 3387 } 3388 } 3389 3390 RB_TEST(4, "addhosts", ep) 3391 3392 RB_PREEMPT; 3393 rb_level = 3; /* level 3 */ 3394 3395 RB_TEST(5, "addhosts", ep) 3396 3397 if (add_db_sidenms(sp, ep)) { 3398 goto rollback; 3399 } 3400 3401 } else { 3402 RB_PREEMPT; 3403 rb_level = 3; 3404 } 3405 3406 RB_TEST(6, "addhosts", ep) 3407 3408 RB_PREEMPT; 3409 rb_level = 4; /* level 4 */ 3410 3411 RB_TEST(7, "addhosts", ep) 3412 3413 3414 /* create the set on the new nodes, this adds the drives as well */ 3415 if (create_set_on_hosts(sp, multi_node, node_c, node_v, 0, ep)) { 3416 goto rollback; 3417 } 3418 3419 RB_TEST(8, "addhosts", ep) 3420 3421 RB_PREEMPT; 3422 rb_level = 5; /* level 5 */ 3423 3424 RB_TEST(9, "addhosts", ep) 3425 3426 if (dd != NULL) { 3427 3428 /* 3429 * Add the device entries for the new sides into the namespace. 3430 */ 3431 for (i = 0; i < MD_MAXSIDES; i++) { 3432 /* Skip empty slots */ 3433 if (sd->sd_nodes[i][0] == '\0') 3434 continue; 3435 3436 /* Skip nodes not being added */ 3437 if (! strinlst(sd->sd_nodes[i], node_c, node_v)) 3438 continue; 3439 3440 if (add_md_sidenms(sp, i, MD_SIDEWILD, ep)) 3441 goto rollback; 3442 } 3443 } 3444 3445 RB_TEST(10, "addhosts", ep) 3446 3447 RB_PREEMPT; 3448 rb_level = 6; /* level 6 */ 3449 3450 RB_TEST(11, "addhosts", ep); 3451 3452 if (dd != NULL) { 3453 /* 3454 * Mark the drives MD_DR_OK. 3455 */ 3456 for (i = 0; i < MD_MAXSIDES; i++) { 3457 /* Skip empty slots */ 3458 if (sd->sd_nodes[i][0] == '\0') 3459 continue; 3460 3461 if (clnt_upd_dr_flags(sd->sd_nodes[i], sp, dd, 3462 MD_DR_OK, ep) == -1) { 3463 goto rollback; 3464 } 3465 } 3466 } 3467 3468 RB_TEST(12, "addhosts", ep) 3469 3470 /* Bring the mediator record up to date with the set record */ 3471 medr = rb_medr; /* structure assignment */ 3472 for (i = 0; i < MD_MAXSIDES; i++) 3473 (void) strcpy(medr.med_rec_nodes[i], sd->sd_nodes[i]); 3474 crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL); 3475 3476 /* Inform the mediator hosts of the new node list */ 3477 for (i = 0; i < max_meds; i++) { 3478 if (sd->sd_med.n_lst[i].a_cnt == 0) 3479 continue; 3480 3481 if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, &medr, ep)) 3482 goto rollback; 3483 } 3484 3485 /* Add the mediator information to all hosts in the set */ 3486 for (i = 0; i < MD_MAXSIDES; i++) { 3487 /* Skip empty slots */ 3488 if (sd->sd_nodes[i][0] == '\0') 3489 continue; 3490 3491 if (clnt_updmeds(sd->sd_nodes[i], sp, &sd->sd_med, ep)) 3492 goto rollback; 3493 } 3494 3495 RB_TEST(13, "addhosts", ep) 3496 3497 /* 3498 * Mark the set record MD_SR_OK 3499 */ 3500 for (i = 0; i < MD_MAXSIDES; i++) { 3501 /* Skip empty slots */ 3502 if (sd->sd_nodes[i][0] == '\0') 3503 continue; 3504 3505 if (clnt_upd_sr_flags(sd->sd_nodes[i], sp, MD_SR_OK, ep)) 3506 goto rollback; 3507 } 3508 3509 RB_TEST(14, "addhosts", ep) 3510 3511 out: 3512 cl_sk = cl_get_setkey(sp->setno, sp->setname); 3513 for (i = 0; i < MD_MAXSIDES; i++) { 3514 /* Skip empty slots */ 3515 if (sd->sd_nodes[i][0] == '\0') 3516 continue; 3517 3518 /* Skip hosts we get in the next loop */ 3519 if (strinlst(sd->sd_nodes[i], node_c, node_v)) 3520 continue; 3521 3522 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { 3523 if (rval == 0) 3524 (void) mdstealerror(ep, &xep); 3525 rval = -1; 3526 } 3527 } 3528 3529 if (rval == 0) { 3530 for (i = 0; i < node_c; i++) 3531 if (clnt_unlock_set(node_v[i], cl_sk, &xep)) { 3532 if (rval == 0) 3533 (void) mdstealerror(ep, &xep); 3534 rval = -1; 3535 } 3536 } 3537 cl_set_setkey(NULL); 3538 3539 metaflushsetname(sp); 3540 3541 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 3542 3543 return (rval); 3544 3545 rollback: 3546 /* Make sure we are blocking all signals */ 3547 if (procsigs(TRUE, &oldsigs, &xep) < 0) 3548 mdclrerror(&xep); 3549 3550 rval = -1; 3551 3552 /* level 6 */ 3553 if (rb_level > 5) { 3554 for (i = 0; i < max_meds; i++) { 3555 if (sd->sd_med.n_lst[i].a_cnt == 0) 3556 continue; 3557 3558 if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, 3559 &rb_medr, &xep)) 3560 mdclrerror(&xep); 3561 } 3562 if (dd != NULL) { 3563 for (i = 0; i < MD_MAXSIDES; i++) { 3564 /* Skip empty slots */ 3565 if (sd->sd_nodes[i][0] == '\0') 3566 continue; 3567 3568 /* Skip nodes not being added */ 3569 if (! strinlst(sd->sd_nodes[i], node_c, node_v)) 3570 continue; 3571 3572 if (del_md_sidenms(sp, i, &xep)) 3573 mdclrerror(&xep); 3574 } 3575 } 3576 } 3577 3578 /* level 5 */ 3579 if (rb_level > 4) { 3580 if (dd != NULL) { 3581 /* delete the drive records */ 3582 for (i = 0; i < node_c; i++) { 3583 if (clnt_deldrvs(node_v[i], sp, dd, &xep) == -1) 3584 mdclrerror(&xep); 3585 } 3586 } 3587 /* delete the set record on the 'new' hosts */ 3588 for (i = 0; i < node_c; i++) { 3589 if (clnt_delset(node_v[i], sp, &xep) == -1) 3590 mdclrerror(&xep); 3591 } 3592 } 3593 3594 /* level 4 */ 3595 if (rb_level > 3 && dd != NULL) { 3596 for (i = 0; i < MD_MAXSIDES; i++) { 3597 /* Skip empty slots */ 3598 if (sd->sd_nodes[i][0] == '\0') 3599 continue; 3600 3601 /* Skip nodes not being added */ 3602 if (! strinlst(sd->sd_nodes[i], node_c, node_v)) 3603 continue; 3604 3605 if (del_db_sidenms(sp, i, &xep)) 3606 mdclrerror(&xep); 3607 } 3608 } 3609 3610 /* level 3 */ 3611 if (rb_level > 2 && dd != NULL) { 3612 for (i = 0; i < MD_MAXSIDES; i++) { 3613 /* Skip empty slots */ 3614 if (sd->sd_nodes[i][0] == '\0') 3615 continue; 3616 3617 /* Skip nodes not being added */ 3618 if (! strinlst(sd->sd_nodes[i], node_c, node_v)) 3619 continue; 3620 3621 if (clnt_del_drv_sidenms(sd->sd_nodes[i], sp, 3622 &xep) == -1) 3623 mdclrerror(&xep); 3624 } 3625 } 3626 3627 /* level 2 */ 3628 if (rb_level > 1) { 3629 for (i = 0; i < MD_MAXSIDES; i++) { 3630 /* Skip empty slots */ 3631 if (sd->sd_nodes[i][0] == '\0') 3632 continue; 3633 3634 if (clnt_delhosts(sd->sd_nodes[i], sp, node_c, node_v, 3635 &xep) == -1) 3636 mdclrerror(&xep); 3637 } 3638 } 3639 3640 /* level 1 */ 3641 if (rb_level > 0) { 3642 cl_sk = cl_get_setkey(sp->setno, sp->setname); 3643 for (i = 0; i < MD_MAXSIDES; i++) { 3644 /* Skip empty slots */ 3645 if (sd->sd_nodes[i][0] == '\0') 3646 continue; 3647 3648 /* Skip hosts we get in the next loop */ 3649 if (strinlst(sd->sd_nodes[i], node_c, node_v)) 3650 continue; 3651 3652 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) 3653 mdclrerror(&xep); 3654 } 3655 3656 for (i = 0; i < node_c; i++) 3657 if (clnt_unlock_set(node_v[i], cl_sk, &xep)) 3658 mdclrerror(&xep); 3659 cl_set_setkey(NULL); 3660 } 3661 3662 /* release signals back to what they were on entry */ 3663 if (procsigs(FALSE, &oldsigs, &xep) < 0) 3664 mdclrerror(&xep); 3665 3666 metaflushsetname(sp); 3667 3668 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 3669 3670 return (rval); 3671 } 3672 3673 /* 3674 * Add host(s) to the diskset provided in sp. 3675 * - create set if non-existent. 3676 */ 3677 int 3678 meta_set_addhosts( 3679 mdsetname_t *sp, 3680 int multi_node, 3681 int node_c, 3682 char **node_v, 3683 int auto_take, 3684 md_error_t *ep 3685 ) 3686 { 3687 if (multi_node) 3688 return (meta_multinode_set_addhosts(sp, multi_node, node_c, 3689 node_v, auto_take, ep)); 3690 else 3691 return (meta_traditional_set_addhosts(sp, multi_node, node_c, 3692 node_v, auto_take, ep)); 3693 } 3694 3695 /* 3696 * Delete host(s) from the diskset provided in sp. 3697 * - destroy set if last host in set is removed. 3698 */ 3699 int 3700 meta_set_deletehosts( 3701 mdsetname_t *sp, 3702 int node_c, 3703 char **node_v, 3704 int forceflg, 3705 md_error_t *ep 3706 ) 3707 { 3708 md_set_desc *sd; 3709 md_drive_desc *dd; 3710 med_rec_t medr; 3711 med_rec_t rb_medr; 3712 int i, j; 3713 int has_set; 3714 int numsides = 0; 3715 int oha = FALSE; 3716 sigset_t oldsigs; 3717 mhd_mhiargs_t mhiargs; 3718 md_replicalist_t *rlp = NULL; 3719 md_setkey_t *cl_sk; 3720 ulong_t max_genid = 0; 3721 int rval = 0; 3722 int rb_level = 0; 3723 int max_meds = 0; 3724 md_error_t xep = mdnullerror; 3725 md_mnnode_desc *nd; 3726 md_mnnode_record *nr; 3727 int delete_master = 0; 3728 int suspendall_flag = 0, suspendall_flag_rb = 0; 3729 int suspend1_flag = 0; 3730 int lock_flag = 0; 3731 int stale_flag = 0; 3732 int *node_id_list = NULL; 3733 int remote_sets_deleted = 0; 3734 3735 if ((sd = metaget_setdesc(sp, ep)) == NULL) 3736 return (-1); 3737 3738 /* 3739 * Verify that list of nodes being deleted contains no 3740 * duplicates. 3741 */ 3742 if (nodesuniq(sp, node_c, node_v, ep)) 3743 return (-1); 3744 3745 /* Make sure we own the set */ 3746 if (meta_check_ownership(sp, ep) != 0) 3747 return (-1); 3748 3749 /* 3750 * The drive and node records are stored in the local mddbs of each 3751 * node in the diskset. Each node's rpc.metad daemon reads in the set, 3752 * drive and node records from that node's local mddb and caches them 3753 * internally. Any process needing diskset information contacts its 3754 * local rpc.metad to get this information. Since each node in the 3755 * diskset is independently reading the set information from its local 3756 * mddb, the set, drive and node records in the local mddbs must stay 3757 * in-sync, so that all nodes have a consistent view of the diskset. 3758 * 3759 * For a multinode diskset, explicitly verify that all nodes in the 3760 * diskset are ALIVE (i.e. are in the API membership list) if the 3761 * forceflag is FALSE. (The case of forceflag being TRUE is handled 3762 * in OHA check above.) 3763 * 3764 * If forceflag is FALSE and a node in the diskset is not in 3765 * the membership list, then fail this operation since all nodes must 3766 * be ALIVE in order to delete the node record from their local mddb. 3767 * If a panic of this node leaves the local mddbs set, node and drive 3768 * records out-of-sync, the reconfig cycle will fix the local mddbs 3769 * and force them back into synchronization. 3770 */ 3771 if ((forceflg == FALSE) && (MD_MNSET_DESC(sd))) { 3772 nd = sd->sd_nodelist; 3773 while (nd) { 3774 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 3775 return (mddserror(ep, MDE_DS_NOTINMEMBERLIST, 3776 sp->setno, nd->nd_nodename, 3777 NULL, sp->setname)); 3778 } 3779 nd = nd->nd_next; 3780 } 3781 } 3782 3783 3784 /* 3785 * Lock the set on current set members. 3786 * Set locking done much earlier for MN diskset than for traditional 3787 * diskset since lock_set and SUSPEND are used to protect against 3788 * other meta* commands running on the other nodes. 3789 */ 3790 if (MD_MNSET_DESC(sd)) { 3791 /* Make sure we are blocking all signals */ 3792 if (procsigs(TRUE, &oldsigs, &xep) < 0) 3793 mdclrerror(&xep); 3794 3795 nd = sd->sd_nodelist; 3796 while (nd) { 3797 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 3798 nd = nd->nd_next; 3799 continue; 3800 } 3801 3802 if (clnt_lock_set(nd->nd_nodename, sp, ep)) { 3803 rval = -1; 3804 goto out2; 3805 } 3806 lock_flag = 1; 3807 nd = nd->nd_next; 3808 } 3809 /* 3810 * Lock out other meta* commands by suspending 3811 * class 1 messages across the diskset. 3812 */ 3813 nd = sd->sd_nodelist; 3814 while (nd) { 3815 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 3816 nd = nd->nd_next; 3817 continue; 3818 } 3819 if (clnt_mdcommdctl(nd->nd_nodename, 3820 COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1, 3821 MD_MSCF_NO_FLAGS, ep)) { 3822 rval = -1; 3823 goto out2; 3824 } 3825 suspend1_flag = 1; 3826 nd = nd->nd_next; 3827 } 3828 } 3829 3830 for (i = 0; i < node_c; i++) 3831 if (getnodeside(node_v[i], sd) == MD_SIDEWILD) { 3832 (void) mddserror(ep, MDE_DS_NODENOTINSET, sp->setno, 3833 node_v[i], NULL, sp->setname); 3834 rval = -1; 3835 goto out2; 3836 } 3837 3838 /* 3839 * Count the number of nodes currently in the set. 3840 */ 3841 if (MD_MNSET_DESC(sd)) { 3842 nd = sd->sd_nodelist; 3843 while (nd) { 3844 numsides++; 3845 nd = nd->nd_next; 3846 } 3847 } else { 3848 for (i = 0; i < MD_MAXSIDES; i++) 3849 /* Count full slots */ 3850 if (sd->sd_nodes[i][0] != '\0') 3851 numsides++; 3852 } 3853 3854 /* 3855 * OHA mode == -f -h <hostname> 3856 * OHA is One Host Administration that occurs when the forceflag (-f) 3857 * is set and at least one host in the diskset isn't responding 3858 * to RPC requests. 3859 * 3860 * When in OHA mode, a node cannot delete itself from a diskset. 3861 * When in OHA mode, a node can delete a list of nodes from a diskset 3862 * even if some of the nodes in the diskset are unresponsive. 3863 * 3864 * For multinode diskset, only allow OHA mode when the nodes that 3865 * aren't responding in the diskset are not in the membership list 3866 * (i.e. nodes that aren't responding are not marked ALIVE). 3867 * Nodes that aren't in the membership list will be rejoining 3868 * the diskset through a reconfig cycle and the local mddb set 3869 * and node records can be reconciled during the reconfig cycle. 3870 * 3871 * If a node isn't responding, but is still in the membership list, 3872 * fail the request since the node may not be responding because 3873 * rpc.metad died and is restarting. In this case, no reconfig 3874 * cycle will be started, so there's no way to recover if 3875 * the host delete operation was allowed. 3876 * 3877 * NOTE: if nodes that weren't in the membership when the OHA host 3878 * delete occurred are now the only nodes in membership list, 3879 * those nodes will see the old view of the diskset. As soon as 3880 * a node re-enters the cluster that was present in the cluster 3881 * during the host deletion, the diskset will reflect the host 3882 * deletion on all nodes presently in the cluster. 3883 */ 3884 if (forceflg == TRUE) { 3885 if (MD_MNSET_DESC(sd)) { 3886 nd = sd->sd_nodelist; 3887 while (nd) { 3888 /* 3889 * If a node isn't ALIVE (in member list), 3890 * then allow a force-able delete in OHA mode. 3891 */ 3892 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 3893 oha = TRUE; 3894 break; 3895 } 3896 /* 3897 * Don't test for clnt_nullproc since already 3898 * tested the RPC connections by clnt_lock_set. 3899 */ 3900 nd = nd->nd_next; 3901 } 3902 } else { 3903 for (i = 0; i < MD_MAXSIDES; i++) { 3904 /* Skip empty slots */ 3905 if (sd->sd_nodes[i][0] == '\0') 3906 continue; 3907 3908 if (clnt_nullproc(sd->sd_nodes[i], ep) == -1) { 3909 /* 3910 * If we timeout to at least one 3911 * client, then we can allow OHA mode, 3912 * otherwise, we are in normal mode. 3913 */ 3914 if (mdanyrpcerror(ep)) { 3915 mdclrerror(ep); 3916 if (strinlst(sd->sd_nodes[i], 3917 node_c, node_v)) { 3918 oha = TRUE; 3919 break; 3920 } 3921 } 3922 } 3923 } 3924 } 3925 } 3926 3927 /* 3928 * Don't allow this for MN diskset since meta_set_destroy of 1 node 3929 * does NOT remove this node's node record from the other node's set 3930 * records in their local mddb. This leaves a MN diskset in a very 3931 * messed up state. 3932 */ 3933 if (!(MD_MNSET_DESC(sd))) { 3934 /* Destroy set */ 3935 if (forceflg == TRUE && node_c == 1 && 3936 strcmp(mynode(), node_v[0]) == 0) { 3937 /* Can return since !MN diskset so nothing to unlock */ 3938 return (meta_set_destroy(sp, TRUE, ep)); 3939 } 3940 } 3941 3942 3943 /* 3944 * In multinode diskset, can only delete self if this 3945 * is the last node in the set or if all nodes in 3946 * the set are being deleted. The traditional diskset code 3947 * allows a node to delete itself (when there are other nodes 3948 * in the diskset) when using the force flag, but that code 3949 * path doesn't have the node remove itself from 3950 * the set node list on the other nodes. Since this isn't 3951 * satisfactory for the multinode diskset, just don't 3952 * allow this operation. 3953 */ 3954 if (MD_MNSET_DESC(sd) && (numsides > 1) && (node_c != numsides) && 3955 strinlst(mynode(), node_c, node_v)) { 3956 (void) mddserror(ep, MDE_DS_MNCANTDELSELF, sp->setno, 3957 mynode(), NULL, sp->setname); 3958 rval = -1; 3959 goto out2; 3960 } 3961 3962 /* 3963 * In multinode diskset, don't allow deletion of master node unless 3964 * this is the only node left or unless all nodes are being 3965 * deleted since there is no way to switch 3966 * master ownership (unless via a cluster reconfig cycle). 3967 */ 3968 delete_master = strinlst(sd->sd_mn_master_nodenm, node_c, node_v); 3969 if (MD_MNSET_DESC(sd) && (numsides > 1) && (node_c != numsides) && 3970 delete_master) { 3971 (void) mddserror(ep, MDE_DS_CANTDELMASTER, sp->setno, 3972 sd->sd_mn_master_nodenm, NULL, sp->setname); 3973 rval = -1; 3974 goto out2; 3975 } 3976 3977 3978 /* Deleting self w/o forceflg */ 3979 if (forceflg == FALSE && numsides > 1 && 3980 strinlst(mynode(), node_c, node_v)) { 3981 (void) mddserror(ep, MDE_DS_CANTDELSELF, sp->setno, 3982 mynode(), NULL, sp->setname); 3983 rval = -1; 3984 goto out2; 3985 } 3986 3987 /* 3988 * Setup the mediator record roll-back structure for a trad diskset. 3989 * 3990 * For a MN diskset, the deletion of a host in the diskset 3991 * does not cause an update of the mediator record. If the 3992 * host deletion will cause the diskset to be removed (this is 3993 * the last host being removed or all hosts are being removed) 3994 * then the mediator record must have already been removed by the 3995 * user or this delete host operation will fail (a check for 3996 * this is done later in this routine). 3997 */ 3998 if (!(MD_MNSET_DESC(sd))) { 3999 (void) memset(&rb_medr, '\0', sizeof (med_rec_t)); 4000 rb_medr.med_rec_mag = MED_REC_MAGIC; 4001 rb_medr.med_rec_rev = MED_REC_REV; 4002 rb_medr.med_rec_fl = 0; 4003 rb_medr.med_rec_sn = sp->setno; 4004 (void) strcpy(rb_medr.med_rec_snm, sp->setname); 4005 for (i = 0; i < MD_MAXSIDES; i++) 4006 (void) strcpy(rb_medr.med_rec_nodes[i], sd->sd_nodes[i]); 4007 rb_medr.med_rec_meds = sd->sd_med; /* structure assigment */ 4008 (void) memset(&rb_medr.med_rec_data, '\0', sizeof (med_data_t)); 4009 rb_medr.med_rec_foff = 0; 4010 crcgen(&rb_medr, &rb_medr.med_rec_cks, 4011 sizeof (med_rec_t), NULL); 4012 4013 /* Bring the mediator record up to date with the set record */ 4014 medr = rb_medr; /* structure assignment */ 4015 4016 if ((max_meds = get_max_meds(ep)) == 0) { 4017 rval = -1; 4018 goto out2; 4019 } 4020 } 4021 4022 /* 4023 * For traditional diskset: 4024 * Check to see if all the hosts we are trying to delete the set from 4025 * have a set "setname" that is the same as ours, i.e. - same name, 4026 * same time stamp, same genid. We only do this if forceflg is not 4027 * specified or we are in OHA mode. 4028 */ 4029 if (!(MD_MNSET_DESC(sd)) && (forceflg == FALSE || oha == TRUE)) { 4030 int fix_node_v = FALSE; 4031 int j; 4032 4033 for (i = 0; i < node_c; i++) { 4034 /* We skip this side */ 4035 if (strcmp(mynode(), node_v[i]) == 0) 4036 continue; 4037 4038 has_set = nodehasset(sp, node_v[i], NHS_NSTG_EQ, ep); 4039 4040 if (has_set < 0) { 4041 char *anode[1]; 4042 4043 /* 4044 * Can't talk to the host only allowed in OHA 4045 * mode. 4046 */ 4047 if (oha == TRUE && mdanyrpcerror(ep)) { 4048 mdclrerror(ep); 4049 continue; 4050 } 4051 4052 /* 4053 * We got an error we do not, or are not, 4054 * prepared to handle. 4055 */ 4056 if (! mdiserror(ep, MDE_NO_SET) && 4057 ! mdismddberror(ep, MDE_DB_NODB)) { 4058 rval = -1; 4059 goto out2; 4060 } 4061 mdclrerror(ep); 4062 4063 /* 4064 * If we got here: both hosts are up; a host in 4065 * our set record does not have the set. So we 4066 * delete the host from our set and invalidate 4067 * the node. 4068 */ 4069 anode[0] = Strdup(node_v[i]); 4070 4071 rval = del_host_noset(sp, anode, ep); 4072 4073 /* 4074 * If we delete a host, make sure the mediator 4075 * hosts are made aware of this. 4076 */ 4077 for (j = 0; j < MD_MAXSIDES; j++) { 4078 if (strcmp(medr.med_rec_nodes[j], 4079 node_v[i]) != 0) 4080 continue; 4081 (void) memset(&medr.med_rec_nodes[j], 4082 '\0', sizeof (md_node_nm_t)); 4083 } 4084 crcgen(&medr, &medr.med_rec_cks, 4085 sizeof (med_rec_t), NULL); 4086 4087 rb_medr = medr; /* struct assignment */ 4088 4089 Free(anode[0]); 4090 4091 if (rval == -1) 4092 goto out2; 4093 4094 node_v[i][0] = '\0'; 4095 fix_node_v = TRUE; 4096 continue; 4097 } 4098 4099 /* 4100 * If we can talk to the host, and they do not have the 4101 * exact set, then we disallow the operation. 4102 */ 4103 if (has_set == FALSE) { 4104 (void) mddserror(ep, MDE_DS_NODENOSET, 4105 sp->setno, node_v[i], NULL, sp->setname); 4106 rval = -1; 4107 goto out2; 4108 } 4109 } 4110 4111 /* 4112 * Here we prune the node_v's that were invalidated above. 4113 */ 4114 if (fix_node_v == TRUE) { 4115 i = 0; 4116 while (i < node_c) { 4117 if (node_v[i][0] == '\0') { 4118 for (j = i; (j + 1) < node_c; j++) 4119 node_v[j] = node_v[j + 1]; 4120 node_c--; 4121 } 4122 i++; 4123 } 4124 /* 4125 * If we are left with no nodes, then we have 4126 * compeleted the operation. 4127 */ 4128 if (node_c == 0) { 4129 /* 4130 * Inform the mediator hosts of the new node 4131 * list 4132 */ 4133 for (i = 0; i < max_meds; i++) { 4134 if (sd->sd_med.n_lst[i].a_cnt == 0) 4135 continue; 4136 4137 if (clnt_med_upd_rec( 4138 &sd->sd_med.n_lst[i], sp, &medr, 4139 ep)) 4140 mdclrerror(ep); 4141 } 4142 rval = 0; 4143 goto out2; 4144 } 4145 } 4146 } 4147 4148 /* 4149 * For multinode diskset: 4150 * If forceflag is FALSE then check to see if all the hosts we 4151 * are trying to delete the set from have a set "setname" that 4152 * is the same as ours, i.e. - same name, same time stamp, same genid. 4153 * If forceflag is TRUE, then we don't care if the hosts being 4154 * deleted have the same set information or not since user is forcing 4155 * those hosts to be deleted. 4156 */ 4157 if ((MD_MNSET_DESC(sd)) && (forceflg == FALSE)) { 4158 for (i = 0; i < node_c; i++) { 4159 /* We skip this node since comparing against it */ 4160 if (strcmp(mynode(), node_v[i]) == 0) 4161 continue; 4162 4163 has_set = nodehasset(sp, node_v[i], NHS_NSTG_EQ, ep); 4164 4165 if (has_set < 0) { 4166 rval = -1; 4167 goto out2; 4168 } 4169 4170 /* 4171 * If we can talk to the host, and they do not have the 4172 * exact set, then we disallow the operation. 4173 */ 4174 if (has_set == FALSE) { 4175 (void) mddserror(ep, MDE_DS_NODENOSET, 4176 sp->setno, node_v[i], NULL, sp->setname); 4177 rval = -1; 4178 goto out2; 4179 } 4180 } 4181 } 4182 4183 /* 4184 * For traditional diskset: 4185 * Can't allow user to delete their node (without deleting all nodes) 4186 * out of a set in OHA mode, would leave a real mess. 4187 * This action was already failed above for a MN diskset. 4188 */ 4189 if (!(MD_MNSET_DESC(sd)) && (oha == TRUE) && 4190 strinlst(mynode(), node_c, node_v)) { 4191 /* Can directly return since !MN diskset; nothing to unlock */ 4192 return (mddserror(ep, MDE_DS_OHACANTDELSELF, sp->setno, 4193 mynode(), NULL, sp->setname)); 4194 } 4195 4196 4197 /* Get the drive descriptors for this set */ 4198 if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), 4199 ep)) == NULL) { 4200 if (! mdisok(ep)) { 4201 rval = -1; 4202 goto out2; 4203 } 4204 } 4205 4206 /* 4207 * We have been asked to delete all the hosts in the set, i.e. - delete 4208 * the whole set. 4209 */ 4210 if (node_c == numsides) { 4211 /* 4212 * This is only a valid operation if all drives have been 4213 * removed first. 4214 */ 4215 4216 if (dd != NULL) { 4217 (void) mddserror(ep, MDE_DS_HASDRIVES, sp->setno, 4218 NULL, NULL, sp->setname); 4219 rval = -1; 4220 goto out2; 4221 } 4222 4223 /* 4224 * If a mediator is currently associated with this set, 4225 * fail the deletion of the last host(s). 4226 */ 4227 if (sd->sd_med.n_cnt != 0) { 4228 (void) mddserror(ep, MDE_DS_HASMED, sp->setno, 4229 NULL, NULL, sp->setname); 4230 rval = -1; 4231 goto out2; 4232 } 4233 4234 if (! mdisok(ep)) { 4235 rval = -1; 4236 goto out2; 4237 } 4238 4239 rval = del_set_nodrives(sp, node_c, node_v, oha, ep); 4240 remote_sets_deleted = 1; 4241 goto out2; 4242 } 4243 4244 /* 4245 * Get timeout values in case we need to roll back 4246 */ 4247 (void) memset(&mhiargs, '\0', sizeof (mhiargs)); 4248 if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0) { 4249 rval = -1; 4250 goto out2; 4251 } 4252 4253 if (dd != NULL) { 4254 /* 4255 * We need this around for re-adding DB side names later. 4256 */ 4257 if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) { 4258 rval = -1; 4259 goto out2; 4260 } 4261 4262 /* 4263 * Alloc nodeid list if drives are present in diskset. 4264 * nodeid list is used to reset mirror owners if the 4265 * owner is a deleted node. 4266 */ 4267 if (MD_MNSET_DESC(sd)) { 4268 node_id_list = Zalloc(sizeof (int) * node_c); 4269 } 4270 } 4271 4272 /* Lock the set on current set members */ 4273 if (!(MD_MNSET_DESC(sd))) { 4274 md_rb_sig_handling_on(); 4275 for (i = 0; i < MD_MAXSIDES; i++) { 4276 /* Skip empty slots */ 4277 if (sd->sd_nodes[i][0] == '\0') 4278 continue; 4279 4280 if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { 4281 if (oha == TRUE && mdanyrpcerror(ep)) { 4282 mdclrerror(ep); 4283 continue; 4284 } 4285 rval = -1; 4286 goto out2; 4287 } 4288 lock_flag = 1; 4289 } 4290 } 4291 4292 RB_TEST(1, "deletehosts", ep) 4293 4294 RB_PREEMPT; 4295 rb_level = 1; /* level 1 */ 4296 4297 RB_TEST(2, "deletehosts", ep) 4298 4299 if (MD_MNSET_DESC(sd)) { 4300 md_mnnode_desc *saved_nd_next; 4301 mddb_config_t c; 4302 4303 if (dd != NULL) { 4304 /* 4305 * Notify rpc.mdcommd on all nodes of a nodelist change. 4306 * Start by suspending rpc.mdcommd (which drains it of 4307 * all messages), then change the nodelist followed 4308 * by a reinit and resume. 4309 */ 4310 nd = sd->sd_nodelist; 4311 while (nd) { 4312 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 4313 nd = nd->nd_next; 4314 continue; 4315 } 4316 if (clnt_mdcommdctl(nd->nd_nodename, 4317 COMMDCTL_SUSPEND, sp, 4318 MD_MSG_CLASS0, 4319 MD_MSCF_NO_FLAGS, ep)) { 4320 rval = -1; 4321 goto out2; 4322 } 4323 suspendall_flag = 1; 4324 nd = nd->nd_next; 4325 } 4326 /* 4327 * Is current set STALE? 4328 * Need to know this if delete host fails and node 4329 * is re-joined to diskset. 4330 */ 4331 (void) memset(&c, 0, sizeof (c)); 4332 c.c_id = 0; 4333 c.c_setno = sp->setno; 4334 if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) { 4335 (void) mdstealerror(ep, &c.c_mde); 4336 rval = -1; 4337 goto out2; 4338 } 4339 if (c.c_flags & MDDB_C_STALE) { 4340 stale_flag = MNSET_IS_STALE; 4341 } 4342 } 4343 4344 /* 4345 * For each node being deleted, set DEL flag and 4346 * reset OK flag on that node first. 4347 * Until a node has turned off its own 4348 * rpc.metad's NODE_OK flag, that node could be 4349 * considered for master during a reconfig. 4350 */ 4351 for (i = 0; i < node_c; i++) { 4352 /* 4353 * During OHA mode, don't issue RPCs to 4354 * non-alive nodes since there is no reason to 4355 * wait for RPC timeouts. 4356 */ 4357 nd = sd->sd_nodelist; 4358 while (nd) { 4359 if (strcmp(nd->nd_nodename, node_v[i]) == 0) 4360 break; 4361 nd = nd->nd_next; 4362 } 4363 /* Something wrong, handle this in next loop */ 4364 if (nd == NULL) 4365 continue; 4366 4367 /* If node_id_list is alloc'd, fill in for later use */ 4368 if (node_id_list) 4369 node_id_list[i] = nd->nd_nodeid; 4370 4371 /* All nodes are guaranteed to be ALIVE unless OHA */ 4372 if ((oha == TRUE) && 4373 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 4374 continue; 4375 } 4376 4377 /* Only changing my local cache of node list */ 4378 saved_nd_next = nd->nd_next; 4379 nd->nd_next = NULL; 4380 4381 /* Set flags for del host to DEL on that host */ 4382 if (clnt_upd_nr_flags(node_v[i], sp, 4383 nd, MD_NR_DEL, NULL, ep)) { 4384 nd->nd_next = saved_nd_next; 4385 goto rollback; 4386 } 4387 nd->nd_next = saved_nd_next; 4388 } 4389 for (i = 0; i < node_c; i++) { 4390 /* 4391 * Turn off owner flag in nodes to be deleted 4392 * if this node has been joined. 4393 * Also, turn off NODE_OK and turn on NODE_DEL 4394 * for nodes to be deleted. 4395 * These flags are used to set the node 4396 * record flags in all nodes in the set. 4397 * Only withdraw nodes that are joined. 4398 */ 4399 nd = sd->sd_nodelist; 4400 while (nd) { 4401 /* 4402 * Don't communicate with non-ALIVE node if 4403 * in OHA - but set flags in master list so 4404 * alive nodes are updated correctly. 4405 */ 4406 if (strcmp(nd->nd_nodename, node_v[i]) == 0) { 4407 if ((oha == TRUE) && 4408 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 4409 nd->nd_flags |= MD_MN_NODE_DEL; 4410 nd->nd_flags &= ~MD_MN_NODE_OK; 4411 nd = nd->nd_next; 4412 continue; 4413 } 4414 if (nd->nd_flags & MD_MN_NODE_OWN) { 4415 /* 4416 * Going to set locally cached node 4417 * flags to rollback join so in case 4418 * of error, the rollback code knows 4419 * which nodes to re-join. 4420 * rpc.metad ignores the RB_JOIN flag. 4421 */ 4422 nd->nd_flags |= MD_MN_NODE_RB_JOIN; 4423 nd->nd_flags &= ~MD_MN_NODE_OWN; 4424 4425 /* 4426 * Be careful in ordering of following 4427 * steps so that recovery from a panic 4428 * between the steps is viable. 4429 * Only reset master info in rpc.metad 4430 * - don't reset local cached info 4431 * which will be used to set master 4432 * info back if failure (rollback). 4433 */ 4434 if (clnt_withdrawset(nd->nd_nodename, 4435 sp, ep)) 4436 goto rollback; 4437 4438 /* Reset master on deleted node */ 4439 if (clnt_mnsetmaster(node_v[i], sp, "", 4440 MD_MN_INVALID_NID, ep)) 4441 goto rollback; 4442 } 4443 4444 nd->nd_flags |= MD_MN_NODE_DEL; 4445 nd->nd_flags &= ~MD_MN_NODE_OK; 4446 } 4447 nd = nd->nd_next; 4448 } 4449 } 4450 4451 /* 4452 * Now, reset owner and set delete flags for the 4453 * deleted nodes on all nodes. 4454 */ 4455 nd = sd->sd_nodelist; 4456 while (nd) { 4457 /* Skip non-ALIVE node if in OHA */ 4458 if ((oha == TRUE) && 4459 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 4460 nd = nd->nd_next; 4461 continue; 4462 } 4463 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 4464 sd->sd_nodelist, MD_NR_SET, NULL, ep)) { 4465 goto rollback; 4466 } 4467 nd = nd->nd_next; 4468 } 4469 /* 4470 * Notify rpc.mdcommd on all nodes of a nodelist change. 4471 * Send reinit command to mdcommd which forces it to get 4472 * fresh set description. 4473 */ 4474 if (suspendall_flag) { 4475 /* Send reinit */ 4476 nd = sd->sd_nodelist; 4477 while (nd) { 4478 if ((oha == TRUE) && 4479 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 4480 nd = nd->nd_next; 4481 continue; 4482 } 4483 /* Class is ignored for REINIT */ 4484 if (clnt_mdcommdctl(nd->nd_nodename, 4485 COMMDCTL_REINIT, 4486 sp, NULL, MD_MSCF_NO_FLAGS, ep)) { 4487 mde_perror(ep, dgettext(TEXT_DOMAIN, 4488 "Unable to reinit rpc.mdcommd.\n")); 4489 goto rollback; 4490 } 4491 nd = nd->nd_next; 4492 } 4493 /* Send resume */ 4494 nd = sd->sd_nodelist; 4495 while (nd) { 4496 if ((oha == TRUE) && 4497 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 4498 nd = nd->nd_next; 4499 continue; 4500 } 4501 if (clnt_mdcommdctl(nd->nd_nodename, 4502 COMMDCTL_RESUME, sp, MD_MSG_CLASS0, 4503 MD_MSCF_DONT_RESUME_CLASS1, ep)) { 4504 mde_perror(ep, dgettext(TEXT_DOMAIN, 4505 "Unable to resume rpc.mdcommd.\n")); 4506 goto rollback; 4507 } 4508 nd = nd->nd_next; 4509 } 4510 meta_ping_mnset(sp->setno); 4511 } 4512 } 4513 4514 4515 /* 4516 * Mark the set record MD_SR_DEL on the hosts we are deleting 4517 * If a MN diskset and OHA mode, don't issue RPC to nodes that 4518 * are not ALIVE. 4519 * If a MN diskset and not in OHA mode, then all nodes must respond 4520 * to RPC (be alive) or this routine will return failure. 4521 * If a traditional diskset, all RPC failures if in OHA mode. 4522 */ 4523 for (i = 0; i < node_c; i++) { 4524 4525 RB_TEST(3, "deletehosts", ep) 4526 4527 if ((MD_MNSET_DESC(sd)) && (oha == TRUE)) { 4528 /* 4529 * During OHA mode, don't issue RPCs to 4530 * non-alive nodes since there is no reason to 4531 * wait for RPC timeouts. 4532 */ 4533 nd = sd->sd_nodelist; 4534 while (nd) { 4535 if (strcmp(nd->nd_nodename, node_v[i]) == 0) { 4536 break; 4537 } 4538 nd = nd->nd_next; 4539 } 4540 if (nd == NULL) { 4541 (void) mddserror(ep, MDE_DS_NODENOTINSET, 4542 sp->setno, node_v[i], NULL, sp->setname); 4543 goto rollback; 4544 } else if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 4545 /* Skip non-ALIVE node if in OHA mode */ 4546 continue; 4547 } else { 4548 if (clnt_upd_sr_flags(node_v[i], sp, 4549 MD_SR_DEL, ep)) { 4550 goto rollback; 4551 } 4552 } 4553 } else if ((MD_MNSET_DESC(sd)) && (oha == FALSE)) { 4554 /* 4555 * All nodes should be alive in non-oha mode. 4556 */ 4557 if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) { 4558 goto rollback; 4559 } 4560 } else { 4561 /* 4562 * For traditional diskset, issue the RPC and 4563 * ignore RPC failure if in OHA mode. 4564 */ 4565 if (clnt_upd_sr_flags(node_v[i], sp, MD_SR_DEL, ep)) { 4566 if (oha == TRUE && mdanyrpcerror(ep)) { 4567 mdclrerror(ep); 4568 continue; 4569 } 4570 goto rollback; 4571 } 4572 } 4573 4574 RB_TEST(4, "deletehosts", ep) 4575 } 4576 4577 RB_TEST(5, "deletehosts", ep) 4578 4579 RB_PREEMPT; 4580 rb_level = 2; /* level 2 */ 4581 4582 RB_TEST(6, "deletehosts", ep) 4583 4584 /* Delete the set on the hosts we are deleting */ 4585 if (del_set_on_hosts(sp, sd, dd, node_c, node_v, oha, ep)) { 4586 if (node_id_list) 4587 Free(node_id_list); 4588 /* 4589 * Failure during del_set_on_hosts would have recreated 4590 * the diskset on the remote hosts, but for multi-owner 4591 * disksets need to set node flags properly and REINIT and 4592 * RESUME rpc.mdcommd, so just let the rollback code 4593 * do this. 4594 */ 4595 if (MD_MNSET_DESC(sd)) 4596 goto rollback; 4597 return (-1); 4598 } 4599 remote_sets_deleted = 1; 4600 4601 RB_TEST(19, "deletehosts", ep) 4602 4603 RB_PREEMPT; 4604 rb_level = 3; /* level 3 */ 4605 4606 RB_TEST(20, "deletehosts", ep) 4607 4608 /* Delete the host from sets on hosts not being deleted */ 4609 if (MD_MNSET_DESC(sd)) { 4610 nd = sd->sd_nodelist; 4611 /* All nodes are guaranteed to be ALIVE unless in oha mode */ 4612 while (nd) { 4613 /* 4614 * During OHA mode, don't issue RPCs to 4615 * non-alive nodes since there is no reason to 4616 * wait for RPC timeouts. 4617 */ 4618 if ((oha == TRUE) && 4619 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 4620 nd = nd->nd_next; 4621 continue; 4622 } 4623 4624 /* Skip nodes being deleted */ 4625 if (strinlst(nd->nd_nodename, node_c, node_v)) { 4626 nd = nd->nd_next; 4627 continue; 4628 } 4629 if (clnt_delhosts(nd->nd_nodename, sp, node_c, node_v, 4630 ep) == -1) { 4631 goto rollback; 4632 } 4633 4634 RB_TEST(21, "deletehosts", ep) 4635 nd = nd->nd_next; 4636 } 4637 } else { 4638 for (i = 0; i < MD_MAXSIDES; i++) { 4639 /* Skip empty slots */ 4640 if (sd->sd_nodes[i][0] == '\0') 4641 continue; 4642 4643 /* Skip nodes being deleted */ 4644 if (strinlst(sd->sd_nodes[i], node_c, node_v)) 4645 continue; 4646 4647 if (clnt_delhosts(sd->sd_nodes[i], sp, node_c, node_v, 4648 ep) == -1) { 4649 if (oha == TRUE && mdanyrpcerror(ep)) { 4650 mdclrerror(ep); 4651 continue; 4652 } 4653 goto rollback; 4654 } 4655 4656 RB_TEST(21, "deletehosts", ep) 4657 } 4658 } 4659 4660 /* We have drives */ 4661 if (dd != NULL) { 4662 RB_TEST(22, "deletehosts", ep) 4663 4664 RB_PREEMPT; 4665 rb_level = 4; /* level 4 */ 4666 4667 RB_TEST(23, "deletehosts", ep) 4668 4669 /* 4670 * Delete the old sidename for each drive on all the hosts. 4671 * If a multi-node diskset, each host only stores 4672 * the side information for itself. So, a multi-node 4673 * diskset doesn't delete the old sidename for 4674 * an old host. 4675 * 4676 * If a MN diskset, reset owners of mirrors that are 4677 * owned by the deleted nodes. 4678 */ 4679 if (!(MD_MNSET_DESC(sd))) { 4680 for (i = 0; i < MD_MAXSIDES; i++) { 4681 /* Skip empty slots */ 4682 if (sd->sd_nodes[i][0] == '\0') 4683 continue; 4684 4685 /* Skip nodes being deleted */ 4686 if (strinlst(sd->sd_nodes[i], node_c, node_v)) 4687 continue; 4688 4689 if (clnt_del_drv_sidenms(sd->sd_nodes[i], sp, 4690 ep)) { 4691 if (oha == TRUE && mdanyrpcerror(ep)) { 4692 mdclrerror(ep); 4693 continue; 4694 } 4695 metaflushsetname(sp); 4696 goto rollback; 4697 } 4698 4699 RB_TEST(24, "deletehosts", ep) 4700 } 4701 } else { 4702 nd = sd->sd_nodelist; 4703 /* All nodes guaranteed to be ALIVE unless in oha mode */ 4704 while (nd) { 4705 /* 4706 * If mirror owner was set to a deleted node, then 4707 * each existing node resets mirror owner to NULL. 4708 * 4709 * During OHA mode, don't issue RPCs to 4710 * non-alive nodes since there is no reason to 4711 * wait for RPC timeouts. 4712 */ 4713 if ((oha == TRUE) && 4714 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 4715 nd = nd->nd_next; 4716 continue; 4717 } 4718 4719 /* Skip nodes being deleted */ 4720 if (strinlst(nd->nd_nodename, node_c, node_v)) { 4721 nd = nd->nd_next; 4722 continue; 4723 } 4724 4725 /* 4726 * If mirror owner is a deleted node, reset mirror 4727 * owners to NULL. If an error occurs, print a 4728 * warning and continue. Don't fail metaset 4729 * because of mirror owner reset problem since next 4730 * node to grab mirror will resolve this issue. 4731 * Before next node grabs mirrors, metaset will show 4732 * the deleted node as owner which is why an attempt 4733 * to reset the mirror owner is made. 4734 */ 4735 if (clnt_reset_mirror_owner(nd->nd_nodename, sp, 4736 node_c, &node_id_list[0], &xep) == -1) { 4737 mde_perror(&xep, dgettext(TEXT_DOMAIN, 4738 "Unable to reset mirror owner on" 4739 " node %s\n"), nd->nd_nodename); 4740 mdclrerror(&xep); 4741 } 4742 4743 RB_TEST(21, "deletehosts", ep) 4744 nd = nd->nd_next; 4745 } 4746 } 4747 } 4748 4749 RB_TEST(25, "deletehosts", ep) 4750 4751 RB_PREEMPT; 4752 rb_level = 4; /* level 4 */ 4753 4754 RB_TEST(26, "deletehosts", ep) 4755 4756 /* 4757 * Bring the mediator record up to date with the set record for 4758 * traditional diskset. 4759 */ 4760 if (!(MD_MNSET_DESC(sd))) { 4761 medr = rb_medr; /* structure assignment */ 4762 for (i = 0; i < MD_MAXSIDES; i++) { 4763 if (strinlst(sd->sd_nodes[i], node_c, node_v)) 4764 (void) memset(&medr.med_rec_nodes[i], 4765 '\0', sizeof (md_node_nm_t)); 4766 else 4767 (void) strcpy(medr.med_rec_nodes[i], 4768 sd->sd_nodes[i]); 4769 } 4770 crcgen(&medr, &medr.med_rec_cks, sizeof (med_rec_t), NULL); 4771 4772 /* Inform the mediator hosts of the new node list */ 4773 for (i = 0; i < max_meds; i++) { 4774 if (sd->sd_med.n_lst[i].a_cnt == 0) 4775 continue; 4776 4777 if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, 4778 &medr, ep)) { 4779 if (oha == TRUE && mdanyrpcerror(ep)) { 4780 mdclrerror(ep); 4781 continue; 4782 } 4783 goto rollback; 4784 } 4785 } 4786 } 4787 4788 RB_TEST(27, "deletehosts", ep) 4789 4790 /* 4791 * For traditional diskset: 4792 * We are deleting ourselves out of the set and we have drives to 4793 * consider; so we need to halt the set, release the drives and 4794 * reset the timeout. **** THIS IS A ONE WAY TICKET, NO ROLL BACK 4795 * IS POSSIBLE AS SOON AS THE HALT SET COMPLETES, SO THIS IS DONE 4796 * WITH ALL SIGNALS BLOCKED AND LAST **** 4797 * 4798 * This situation cannot occur in a MN diskset since a node can't 4799 * delete itself unless all nodes are being deleted and a diskset 4800 * cannot contain any drives if all nodes are being deleted. 4801 * So, don't even test for this if a MN diskset. 4802 */ 4803 if (!(MD_MNSET_DESC(sd)) && (dd != NULL) && 4804 strinlst(mynode(), node_c, node_v)) { 4805 /* Make sure we are blocking all signals */ 4806 if (procsigs(TRUE, &oldsigs, ep) < 0) { 4807 rval = -1; 4808 goto out1; 4809 } 4810 4811 if (halt_set(sp, ep)) { 4812 rval = -1; 4813 goto out1; 4814 } 4815 4816 if (rel_own_bydd(sp, dd, FALSE, ep)) 4817 rval = -1; 4818 4819 out1: 4820 /* release signals back to what they were on entry */ 4821 if (procsigs(FALSE, &oldsigs, &xep) < 0) { 4822 if (rval == 0) 4823 (void) mdstealerror(ep, &xep); 4824 rval = -1; 4825 } 4826 } 4827 4828 out2: 4829 /* 4830 * Unlock diskset by resuming messages across the diskset. 4831 * Just resume all classes so that resume is the same whether 4832 * just one class was locked or all classes were locked. 4833 */ 4834 if ((suspend1_flag) || (suspendall_flag)) { 4835 /* Send resume */ 4836 nd = sd->sd_nodelist; 4837 while (nd) { 4838 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 4839 nd = nd->nd_next; 4840 continue; 4841 } 4842 /* 4843 * Skip nodes being deleted if remote set 4844 * was deleted since rpc.mdcommd may no longer 4845 * be running on remote node. 4846 */ 4847 if ((remote_sets_deleted == 1) && 4848 (strinlst(nd->nd_nodename, node_c, node_v))) { 4849 nd = nd->nd_next; 4850 continue; 4851 } 4852 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 4853 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { 4854 if (rval == 0) 4855 (void) mdstealerror(ep, &xep); 4856 rval = -1; 4857 mde_perror(ep, dgettext(TEXT_DOMAIN, 4858 "Unable to resume rpc.mdcommd.\n")); 4859 } 4860 nd = nd->nd_next; 4861 } 4862 meta_ping_mnset(sp->setno); 4863 } 4864 4865 cl_sk = cl_get_setkey(sp->setno, sp->setname); 4866 if (lock_flag) { 4867 if (MD_MNSET_DESC(sd)) { 4868 nd = sd->sd_nodelist; 4869 while (nd) { 4870 /* 4871 * During OHA mode, don't issue RPCs to 4872 * non-alive nodes since there is no reason to 4873 * wait for RPC timeouts. 4874 */ 4875 if ((oha == TRUE) && 4876 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 4877 nd = nd->nd_next; 4878 continue; 4879 } 4880 if (clnt_unlock_set(nd->nd_nodename, 4881 cl_sk, &xep)) { 4882 if (rval == 0) 4883 (void) mdstealerror(ep, &xep); 4884 rval = -1; 4885 } 4886 nd = nd->nd_next; 4887 } 4888 } else { 4889 for (i = 0; i < MD_MAXSIDES; i++) { 4890 /* Skip empty slots */ 4891 if (sd->sd_nodes[i][0] == '\0') 4892 continue; 4893 4894 if (clnt_unlock_set(sd->sd_nodes[i], 4895 cl_sk, &xep)) { 4896 if (oha == TRUE && 4897 mdanyrpcerror(&xep)) { 4898 mdclrerror(&xep); 4899 continue; 4900 } 4901 if (rval == 0) 4902 (void) mdstealerror(ep, &xep); 4903 rval = -1; 4904 } 4905 } 4906 } 4907 } 4908 cl_set_setkey(NULL); 4909 4910 out3: 4911 metafreereplicalist(rlp); 4912 if (node_id_list) 4913 Free(node_id_list); 4914 4915 metaflushsetname(sp); 4916 4917 if (MD_MNSET_DESC(sd)) { 4918 /* release signals back to what they were on entry */ 4919 if (procsigs(FALSE, &oldsigs, &xep) < 0) 4920 mdclrerror(&xep); 4921 } else { 4922 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 4923 } 4924 4925 4926 return (rval); 4927 4928 rollback: 4929 /* all signals already blocked for MN disket */ 4930 if (!(MD_MNSET_DESC(sd))) { 4931 if (procsigs(TRUE, &oldsigs, &xep) < 0) 4932 mdclrerror(&xep); 4933 } 4934 4935 rval = -1; 4936 4937 max_genid = sd->sd_genid; 4938 4939 4940 /* 4941 * Send reinit command to rpc.mdcommd which forces it to get 4942 * fresh set description and resume all classes but class 0. 4943 * Don't send any commands to rpc.mdcommd if set on that node 4944 * has been removed. 4945 */ 4946 if (suspendall_flag) { 4947 /* Send reinit */ 4948 nd = sd->sd_nodelist; 4949 while (nd) { 4950 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 4951 nd = nd->nd_next; 4952 continue; 4953 } 4954 /* 4955 * If the remote set was deleted, rpc.mdcommd 4956 * may no longer be running so send nothing to it. 4957 */ 4958 if ((remote_sets_deleted == 1) && 4959 (strinlst(nd->nd_nodename, node_c, node_v))) { 4960 nd = nd->nd_next; 4961 continue; 4962 } 4963 /* Class is ignored for REINIT */ 4964 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT, 4965 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 4966 mde_perror(&xep, dgettext(TEXT_DOMAIN, 4967 "Unable to reinit rpc.mdcommd.\n")); 4968 mdclrerror(&xep); 4969 } 4970 nd = nd->nd_next; 4971 } 4972 /* Send resume */ 4973 nd = sd->sd_nodelist; 4974 while (nd) { 4975 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 4976 nd = nd->nd_next; 4977 continue; 4978 } 4979 /* 4980 * If the remote set was deleted, rpc.mdcommd 4981 * may no longer be running so send nothing to it. 4982 */ 4983 if ((remote_sets_deleted == 1) && 4984 (strinlst(nd->nd_nodename, node_c, node_v))) { 4985 nd = nd->nd_next; 4986 continue; 4987 } 4988 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 4989 sp, MD_MSG_CLASS0, MD_MSCF_DONT_RESUME_CLASS1, 4990 &xep)) { 4991 mde_perror(&xep, dgettext(TEXT_DOMAIN, 4992 "Unable to resume rpc.mdcommd.\n")); 4993 mdclrerror(&xep); 4994 } 4995 nd = nd->nd_next; 4996 } 4997 meta_ping_mnset(sp->setno); 4998 } 4999 5000 /* level 2 */ 5001 if (rb_level > 1) { 5002 md_set_record *sr; 5003 md_replicalist_t *rl; 5004 5005 recreate_set(sp, sd); 5006 5007 /* 5008 * Lock out other meta* commands on nodes with the newly 5009 * re-created sets by suspending class 1 messages 5010 * across the diskset. 5011 */ 5012 nd = sd->sd_nodelist; 5013 while (nd) { 5014 /* Skip nodes not being deleted */ 5015 if (!(strinlst(nd->nd_nodename, node_c, node_v))) { 5016 nd = nd->nd_next; 5017 continue; 5018 } 5019 /* Suspend commd on nodes with re-created sets */ 5020 if (clnt_mdcommdctl(nd->nd_nodename, 5021 COMMDCTL_SUSPEND, sp, MD_MSG_CLASS1, 5022 MD_MSCF_NO_FLAGS, &xep)) { 5023 mde_perror(&xep, dgettext(TEXT_DOMAIN, 5024 "Unable to suspend rpc.mdcommd.\n")); 5025 mdclrerror(&xep); 5026 } 5027 nd = nd->nd_next; 5028 } 5029 5030 max_genid++; 5031 5032 /* 5033 * See if we have to re-add the drives specified. 5034 */ 5035 for (i = 0; i < node_c; i++) { 5036 if (MD_MNSET_DESC(sd) && (oha == TRUE)) { 5037 /* 5038 * During OHA mode, don't issue RPCs to 5039 * non-alive nodes since there is no reason to 5040 * wait for RPC timeouts. 5041 */ 5042 nd = sd->sd_nodelist; 5043 while (nd) { 5044 if (strcmp(nd->nd_nodename, node_v[i]) 5045 == 0) { 5046 break; 5047 } 5048 nd = nd->nd_next; 5049 } 5050 if (nd == 0) 5051 continue; 5052 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) 5053 continue; 5054 } 5055 5056 /* Don't care if set record is MN or not */ 5057 if (clnt_getset(node_v[i], sp->setname, MD_SET_BAD, &sr, 5058 &xep) == -1) { 5059 mdclrerror(&xep); 5060 continue; 5061 } 5062 5063 /* Drive already added, skip to next node */ 5064 if (sr->sr_drivechain != NULL) { 5065 /* 5066 * Set record structure was allocated from RPC 5067 * routine getset so this structure is only of 5068 * size md_set_record even if the MN flag is 5069 * set. So, clear the flag so that the free 5070 * code doesn't attempt to free a structure 5071 * the size of md_mnset_record. 5072 */ 5073 sr->sr_flags &= ~MD_SR_MN; 5074 free_sr(sr); 5075 continue; 5076 } 5077 5078 if (clnt_adddrvs(node_v[i], sp, dd, sr->sr_ctime, 5079 sr->sr_genid, &xep) == -1) 5080 mdclrerror(&xep); 5081 5082 if (clnt_upd_dr_flags(node_v[i], sp, dd, MD_DR_OK, 5083 &xep) == -1) 5084 mdclrerror(&xep); 5085 5086 /* 5087 * Set record structure was allocated from RPC routine 5088 * getset so this structure is only of size 5089 * md_set_record even if the MN flag is set. So, 5090 * clear the flag so that the free code doesn't 5091 * attempt to free a structure the size of 5092 * md_mnset_record. 5093 */ 5094 sr->sr_flags &= ~MD_SR_MN; 5095 free_sr(sr); 5096 } 5097 max_genid += 3; 5098 5099 for (rl = rlp; rl != NULL; rl = rl->rl_next) { 5100 md_replica_t *r = rl->rl_repp; 5101 /* 5102 * This is not the first replica being added to the 5103 * diskset so call with ADDSIDENMS_BCAST. If this 5104 * is a traditional diskset, the bcast flag is ignored 5105 * since traditional disksets don't use the rpc.mdcommd. 5106 */ 5107 if (meta_db_addsidenms(sp, r->r_namep, r->r_blkno, 5108 DB_ADDSIDENMS_BCAST, &xep)) 5109 mdclrerror(&xep); 5110 } 5111 5112 /* 5113 * Add the device names for the new sides into the namespace, 5114 * on all hosts not being deleted. 5115 */ 5116 if (MD_MNSET_DESC(sd)) { 5117 nd = sd->sd_nodelist; 5118 while (nd) { 5119 /* Find a node that is not being deleted */ 5120 if (!strinlst(nd->nd_nodename, node_c, 5121 node_v)) { 5122 j = nd->nd_nodeid; 5123 break; 5124 } 5125 nd = nd->nd_next; 5126 } 5127 } else { 5128 for (j = 0; j < MD_MAXSIDES; j++) { 5129 /* Skip empty slots */ 5130 if (sd->sd_nodes[j][0] == '\0') 5131 continue; 5132 5133 /* Find a node that is not being deleted */ 5134 if (!strinlst(sd->sd_nodes[j], node_c, node_v)) 5135 break; 5136 } 5137 } 5138 5139 if (MD_MNSET_DESC(sd)) { 5140 nd = sd->sd_nodelist; 5141 while (nd) { 5142 /* Skip nodes not being deleted */ 5143 if (!strinlst(nd->nd_nodename, node_c, 5144 node_v)) { 5145 nd = nd->nd_next; 5146 continue; 5147 } 5148 5149 /* this side was just created, add the names */ 5150 if (add_md_sidenms(sp, nd->nd_nodeid, j, &xep)) 5151 mdclrerror(&xep); 5152 nd = nd->nd_next; 5153 } 5154 } else { 5155 for (i = 0; i < MD_MAXSIDES; i++) { 5156 /* Skip empty slots */ 5157 if (sd->sd_nodes[i][0] == '\0') 5158 continue; 5159 5160 /* Skip nodes not being deleted */ 5161 if (!strinlst(sd->sd_nodes[i], node_c, node_v)) 5162 continue; 5163 5164 /* this side was just created, add the names */ 5165 if (add_md_sidenms(sp, i, j, &xep)) 5166 mdclrerror(&xep); 5167 } 5168 } 5169 } 5170 5171 /* level 4 */ 5172 if (rb_level > 3 && dd != NULL) { 5173 /* 5174 * Add the new sidename for each drive to all the hosts 5175 * Multi-node disksets only store the sidename for 5176 * that host, so there is nothing to re-add. 5177 */ 5178 if (!(MD_MNSET_DESC(sd))) { 5179 for (j = 0; j < MD_MAXSIDES; j++) { 5180 /* Skip empty slots */ 5181 if (sd->sd_nodes[j][0] == '\0') 5182 continue; 5183 5184 /* Skip nodes not being deleted */ 5185 if (!strinlst(sd->sd_nodes[j], node_c, node_v)) 5186 break; 5187 } 5188 for (i = 0; i < MD_MAXSIDES; i++) { 5189 /* Skip empty slots */ 5190 if (sd->sd_nodes[i][0] == '\0') 5191 continue; 5192 5193 if (clnt_add_drv_sidenms(sd->sd_nodes[i], 5194 sd->sd_nodes[j], sp, sd, node_c, node_v, 5195 &xep)) 5196 mdclrerror(&xep); 5197 } 5198 } 5199 5200 } 5201 5202 /* level 5 */ 5203 if ((rb_level > 4) && (!(MD_MNSET_DESC(sd)))) { 5204 /* rollback the mediator record */ 5205 for (i = 0; i < max_meds; i++) { 5206 if (sd->sd_med.n_lst[i].a_cnt == 0) 5207 continue; 5208 5209 if (clnt_med_upd_rec(&sd->sd_med.n_lst[i], sp, 5210 &rb_medr, &xep)) 5211 mdclrerror(&xep); 5212 } 5213 } 5214 5215 /* level 3 */ 5216 if (rb_level > 2) { 5217 md_set_record *sr; 5218 md_mnset_record *mnsr; 5219 5220 if (MD_MNSET_DESC(sd)) { 5221 nd = sd->sd_nodelist; 5222 /* 5223 * During OHA mode, don't issue RPCs to 5224 * non-alive nodes since there is no reason to 5225 * wait for RPC timeouts. 5226 */ 5227 while (nd) { 5228 if ((oha == TRUE) && 5229 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 5230 nd = nd->nd_next; 5231 continue; 5232 } 5233 /* Record should be for a multi-node diskset */ 5234 if (clnt_mngetset(nd->nd_nodename, sp->setname, 5235 MD_SET_BAD, &mnsr, &xep) == -1) { 5236 mdclrerror(&xep); 5237 nd = nd->nd_next; 5238 continue; 5239 } 5240 5241 has_set = 1; 5242 5243 nr = mnsr->sr_nodechain; 5244 while (nr) { 5245 if (nd->nd_nodeid == nr->nr_nodeid) { 5246 break; 5247 } 5248 nr = nr->nr_next; 5249 } 5250 if (nr == NULL) 5251 has_set = 0; 5252 5253 free_sr((struct md_set_record *)mnsr); 5254 if (has_set) { 5255 nd = nd->nd_next; 5256 continue; 5257 } 5258 5259 if (clnt_addhosts(nd->nd_nodename, sp, node_c, 5260 node_v, &xep) == -1) 5261 mdclrerror(&xep); 5262 5263 nd = nd->nd_next; 5264 } 5265 } else { 5266 for (i = 0; i < MD_MAXSIDES; i++) { 5267 /* Skip empty slots */ 5268 if (sd->sd_nodes[i][0] == '\0') 5269 continue; 5270 5271 /* Record should be for a non-multi-node set */ 5272 if (clnt_getset(sd->sd_nodes[i], sp->setname, 5273 MD_SET_BAD, &sr, &xep) == -1) { 5274 mdclrerror(&xep); 5275 continue; 5276 } 5277 5278 /* 5279 * Set record structure was allocated from RPC 5280 * routine getset so this structure is only of 5281 * size md_set_record even if the MN flag is 5282 * set. So, clear the flag so that the free 5283 * code doesn't attempt to free a structure 5284 * the size of md_mnset_record. 5285 */ 5286 if (MD_MNSET_REC(sr)) { 5287 sr->sr_flags &= ~MD_SR_MN; 5288 free_sr(sr); 5289 continue; 5290 } 5291 5292 has_set = 1; 5293 for (j = 0; j < MD_MAXSIDES; j++) { 5294 /* Skip empty slots */ 5295 if (sd->sd_nodes[j][0] == '\0') 5296 continue; 5297 5298 if (sr->sr_nodes[j][0] == '\0') { 5299 has_set = 0; 5300 break; 5301 } 5302 } 5303 5304 free_sr(sr); 5305 if (has_set) 5306 continue; 5307 5308 if (clnt_addhosts(sd->sd_nodes[i], sp, node_c, 5309 node_v, &xep) == -1) 5310 mdclrerror(&xep); 5311 } 5312 } 5313 max_genid++; 5314 } 5315 5316 /* level 1 */ 5317 if (rb_level > 0) { 5318 max_genid++; 5319 /* Sets MD_SR_OK on given nodes. */ 5320 resync_genid(sp, sd, max_genid, node_c, node_v); 5321 5322 /* 5323 * For MN diskset: 5324 * On each newly re-added node, set the node record for that 5325 * node to OK. Then set all node records for the newly added 5326 * nodes on all nodes to ok. 5327 * 5328 * By setting a node's own node record to ok first, even if 5329 * the node re-adding the hosts panics, the rest of the nodes 5330 * can determine the same node list during the choosing of the 5331 * master during reconfig. So, only nodes considered for 5332 * mastership are nodes that have both MD_MN_NODE_OK and 5333 * MD_SR_OK set on that node's rpc.metad. If all nodes have 5334 * MD_SR_OK set, but no node has its own MD_MN_NODE_OK set, 5335 * then the set will be removed during reconfig since a panic 5336 * occurred during the re-creation of the deletion of 5337 * the initial diskset. 5338 */ 5339 if (MD_MNSET_DESC(sd)) { 5340 md_mnnode_desc *saved_nd_next; 5341 if (dd != NULL) { 5342 /* 5343 * Notify rpc.mdcommd on all nodes of a 5344 * nodelist change. Start by suspending 5345 * rpc.mdcommd (which drains it of all 5346 * messages), then change the nodelist 5347 * followed by a reinit and resume. 5348 */ 5349 nd = sd->sd_nodelist; 5350 while (nd) { 5351 if (!(nd->nd_flags & 5352 MD_MN_NODE_ALIVE)) { 5353 nd = nd->nd_next; 5354 continue; 5355 } 5356 if (clnt_mdcommdctl(nd->nd_nodename, 5357 COMMDCTL_SUSPEND, sp, 5358 MD_MSG_CLASS0, 5359 MD_MSCF_NO_FLAGS, &xep)) { 5360 mde_perror(&xep, 5361 dgettext(TEXT_DOMAIN, 5362 "Unable to suspend " 5363 "rpc.mdcommd.\n")); 5364 mdclrerror(&xep); 5365 } 5366 suspendall_flag_rb = 1; 5367 nd = nd->nd_next; 5368 } 5369 } 5370 for (i = 0; i < node_c; i++) { 5371 /* 5372 * During OHA mode, don't issue RPCs to 5373 * non-alive nodes since there is no reason to 5374 * wait for RPC timeouts. 5375 */ 5376 nd = sd->sd_nodelist; 5377 while (nd) { 5378 if (strcmp(nd->nd_nodename, node_v[i]) 5379 == 0) 5380 break; 5381 nd = nd->nd_next; 5382 } 5383 /* Something wrong, finish this in next loop */ 5384 if (nd == NULL) 5385 continue; 5386 5387 if ((oha == TRUE) && 5388 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 5389 continue; 5390 } 5391 5392 if (dd != NULL) { 5393 /* Set master on re-joining node. */ 5394 if (clnt_mnsetmaster(node_v[i], sp, 5395 sd->sd_mn_master_nodenm, 5396 sd->sd_mn_master_nodeid, &xep)) { 5397 mdclrerror(&xep); 5398 } 5399 5400 /* 5401 * Re-join set to same state as 5402 * before - stale or non-stale. 5403 */ 5404 if (clnt_joinset(node_v[i], sp, 5405 stale_flag, &xep)) { 5406 mdclrerror(&xep); 5407 } 5408 } 5409 5410 /* Only changing my local cache of node list */ 5411 saved_nd_next = nd->nd_next; 5412 nd->nd_next = NULL; 5413 5414 /* Set record for host to ok on that host */ 5415 if (clnt_upd_nr_flags(node_v[i], sp, 5416 nd, MD_NR_OK, NULL, &xep)) { 5417 mdclrerror(&xep); 5418 } 5419 nd->nd_next = saved_nd_next; 5420 } 5421 5422 /* Now set all node records on all nodes to be ok */ 5423 nd = sd->sd_nodelist; 5424 while (nd) { 5425 /* 5426 * During OHA mode, don't issue RPCs to 5427 * non-alive nodes since there is no reason to 5428 * wait for RPC timeouts. 5429 */ 5430 if ((oha == TRUE) && 5431 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 5432 nd = nd->nd_next; 5433 continue; 5434 } 5435 if (clnt_upd_nr_flags(nd->nd_nodename, sp, 5436 sd->sd_nodelist, MD_NR_OK, NULL, &xep)) { 5437 mdclrerror(&xep); 5438 } 5439 nd = nd->nd_next; 5440 } 5441 } 5442 } 5443 5444 /* 5445 * Notify rpc.mdcommd on all nodes of a nodelist change. 5446 * Send reinit command to mdcommd which forces it to get 5447 * fresh set description. 5448 */ 5449 if (suspendall_flag_rb) { 5450 /* Send reinit */ 5451 nd = sd->sd_nodelist; 5452 while (nd) { 5453 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 5454 nd = nd->nd_next; 5455 continue; 5456 } 5457 5458 /* Class is ignored for REINIT */ 5459 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_REINIT, 5460 sp, NULL, MD_MSCF_NO_FLAGS, &xep)) { 5461 mde_perror(&xep, dgettext(TEXT_DOMAIN, 5462 "Unable to reinit rpc.mdcommd.\n")); 5463 mdclrerror(&xep); 5464 } 5465 nd = nd->nd_next; 5466 } 5467 } 5468 5469 /* 5470 * Unlock diskset by resuming messages across the diskset. 5471 * Just resume all classes so that resume is the same whether 5472 * just one class was locked or all classes were locked. 5473 */ 5474 if ((suspend1_flag) || (suspendall_flag) || (suspendall_flag_rb)) { 5475 /* Send resume */ 5476 nd = sd->sd_nodelist; 5477 while (nd) { 5478 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 5479 nd = nd->nd_next; 5480 continue; 5481 } 5482 if (clnt_mdcommdctl(nd->nd_nodename, COMMDCTL_RESUME, 5483 sp, MD_MSG_CLASS0, MD_MSCF_NO_FLAGS, &xep)) { 5484 mde_perror(&xep, dgettext(TEXT_DOMAIN, 5485 "Unable to resume rpc.mdcommd.\n")); 5486 } 5487 nd = nd->nd_next; 5488 } 5489 meta_ping_mnset(sp->setno); 5490 } 5491 5492 /* 5493 * Start a resync thread on the re-added nodes 5494 * if set is not stale. Also start a thread to update the 5495 * abr state of all soft partitions 5496 */ 5497 if (stale_flag != MNSET_IS_STALE) { 5498 for (i = 0; i < node_c; i++) { 5499 /* 5500 * During OHA mode, don't issue RPCs to 5501 * non-alive nodes since there is no reason to 5502 * wait for RPC timeouts. 5503 */ 5504 nd = sd->sd_nodelist; 5505 while (nd) { 5506 if (strcmp(nd->nd_nodename, node_v[i]) 5507 == 0) 5508 break; 5509 nd = nd->nd_next; 5510 } 5511 if (nd == NULL) 5512 continue; 5513 5514 if ((oha == TRUE) && 5515 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 5516 continue; 5517 } 5518 5519 if (dd != 0) { 5520 if (clnt_mn_mirror_resync_all(node_v[i], 5521 sp->setno, &xep)) { 5522 mde_perror(ep, dgettext(TEXT_DOMAIN, 5523 "Unable to start resync " 5524 "thread.\n")); 5525 } 5526 if (clnt_mn_sp_update_abr(node_v[i], 5527 sp->setno, &xep)) { 5528 mde_perror(ep, dgettext(TEXT_DOMAIN, 5529 "Unable to start sp update " 5530 "thread.\n")); 5531 } 5532 } 5533 } 5534 } 5535 5536 /* level 0 */ 5537 cl_sk = cl_get_setkey(sp->setno, sp->setname); 5538 /* Don't test lock flag since guaranteed to be set if in rollback */ 5539 if (MD_MNSET_DESC(sd)) { 5540 nd = sd->sd_nodelist; 5541 while (nd) { 5542 /* 5543 * During OHA mode, don't issue RPCs to 5544 * non-alive nodes since there is no reason to 5545 * wait for RPC timeouts. 5546 */ 5547 if ((oha == TRUE) && 5548 (!(nd->nd_flags & MD_MN_NODE_ALIVE))) { 5549 nd = nd->nd_next; 5550 continue; 5551 } 5552 if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) 5553 mdclrerror(&xep); 5554 nd = nd->nd_next; 5555 } 5556 } else { 5557 for (i = 0; i < MD_MAXSIDES; i++) { 5558 /* Skip empty slots */ 5559 if (sd->sd_nodes[i][0] == '\0') 5560 continue; 5561 5562 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) 5563 mdclrerror(&xep); 5564 } 5565 } 5566 cl_set_setkey(NULL); 5567 5568 /* release signals back to what they were on entry */ 5569 if (procsigs(FALSE, &oldsigs, &xep) < 0) 5570 mdclrerror(&xep); 5571 5572 metafreereplicalist(rlp); 5573 if (node_id_list) 5574 Free(node_id_list); 5575 5576 metaflushsetname(sp); 5577 5578 if (!(MD_MNSET_DESC(sd))) { 5579 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 5580 } 5581 5582 return (rval); 5583 } 5584 5585 int 5586 meta_set_auto_take( 5587 mdsetname_t *sp, 5588 int take_val, 5589 md_error_t *ep 5590 ) 5591 { 5592 int i; 5593 md_set_desc *sd; 5594 int rval = 0; 5595 md_setkey_t *cl_sk; 5596 md_error_t xep = mdnullerror; 5597 char *hostname; 5598 md_drive_desc *dd; 5599 5600 if ((sd = metaget_setdesc(sp, ep)) == NULL) 5601 return (-1); 5602 5603 /* Make sure we own the set */ 5604 if (meta_check_ownership(sp, ep) != 0) 5605 return (-1); 5606 5607 hostname = mynode(); 5608 5609 /* Lock the set on our side */ 5610 if (clnt_lock_set(hostname, sp, ep)) { 5611 rval = -1; 5612 goto out; 5613 } 5614 5615 if (take_val) { 5616 /* enable auto_take but only if it is not already set */ 5617 if (! (sd->sd_flags & MD_SR_AUTO_TAKE)) { 5618 /* verify that we're the only host in the set */ 5619 for (i = 0; i < MD_MAXSIDES; i++) { 5620 if (sd->sd_nodes[i] == NULL || sd->sd_nodes[i][0] == '\0') 5621 continue; 5622 5623 if (strcmp(sd->sd_nodes[i], hostname) != 0) { 5624 (void) mddserror(ep, MDE_DS_SINGLEHOST, sp->setno, NULL, 5625 NULL, sp->setname); 5626 rval = -1; 5627 goto out; 5628 } 5629 } 5630 5631 if (clnt_enable_sr_flags(hostname, sp, MD_SR_AUTO_TAKE, ep)) 5632 rval = -1; 5633 5634 /* Disable SCSI reservations */ 5635 if (sd->sd_flags & MD_SR_MB_DEVID) 5636 dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, 5637 &xep); 5638 else 5639 dd = metaget_drivedesc(sp, MD_BASICNAME_OK, &xep); 5640 if (! mdisok(&xep)) 5641 mdclrerror(&xep); 5642 5643 if (dd != NULL) { 5644 if (rel_own_bydd(sp, dd, TRUE, &xep)) 5645 mdclrerror(&xep); 5646 } 5647 } 5648 5649 } else { 5650 /* disable auto_take, if set, or error */ 5651 if (sd->sd_flags & MD_SR_AUTO_TAKE) { 5652 if (clnt_disable_sr_flags(hostname, sp, MD_SR_AUTO_TAKE, ep)) 5653 rval = -1; 5654 5655 /* Enable SCSI reservations */ 5656 if (sd->sd_flags & MD_SR_MB_DEVID) 5657 dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, 5658 &xep); 5659 else 5660 dd = metaget_drivedesc(sp, MD_BASICNAME_OK, &xep); 5661 if (! mdisok(&xep)) 5662 mdclrerror(&xep); 5663 5664 if (dd != NULL) { 5665 mhd_mhiargs_t mhiargs = defmhiargs; 5666 5667 if (tk_own_bydd(sp, dd, &mhiargs, TRUE, &xep)) 5668 mdclrerror(&xep); 5669 } 5670 5671 } else { 5672 (void) mddserror(ep, MDE_DS_AUTONOTSET, sp->setno, NULL, NULL, 5673 sp->setname); 5674 rval = -1; 5675 } 5676 } 5677 5678 out: 5679 cl_sk = cl_get_setkey(sp->setno, sp->setname); 5680 if (clnt_unlock_set(hostname, cl_sk, &xep)) { 5681 if (rval == 0) 5682 (void) mdstealerror(ep, &xep); 5683 rval = -1; 5684 } 5685 cl_set_setkey(NULL); 5686 5687 return (rval); 5688 } 5689