1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Metadevice diskset interfaces 30 */ 31 32 #include "meta_set_prv.h" 33 #include <sys/lvm/md_crc.h> 34 35 extern char *blkname(char *); 36 37 static int 38 upd_dr_dbinfo( 39 mdsetname_t *sp, 40 md_set_desc *sd, 41 md_drive_desc *dd, 42 md_replicalist_t *rlp, 43 int forceflg, 44 md_error_t *ep 45 ) 46 { 47 md_drive_desc *p; 48 md_replica_t *r; 49 md_replicalist_t *rl; 50 int i; 51 int dbcnt; 52 int rval = 0; 53 daddr_t nblks = 0; 54 md_setkey_t *cl_sk; 55 md_error_t xep = mdnullerror; 56 md_mnnode_desc *nd; 57 ddi_devid_t devid; 58 59 /* find the smallest existing replica */ 60 for (rl = rlp; rl != NULL; rl = rl->rl_next) { 61 r = rl->rl_repp; 62 nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks)); 63 } 64 65 if (nblks <= 0) 66 nblks = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE; 67 68 for (p = dd; p != NULL; p = p->dd_next) { 69 dbcnt = 0; 70 for (rl = rlp; rl != NULL; rl = rl->rl_next) { 71 r = rl->rl_repp; 72 73 /* 74 * Before we bump up the dbcnt, if we're 75 * running with device ids in disksets, let's 76 * compare the device ids otherwise we compare 77 * the ctd names. 78 * 79 * There is a possibility the device ids might 80 * have changed. To account for that case, we 81 * fallback to comparing the ctd names if the 82 * device id comparison fails. If we aren't running 83 * in device id mode and a disk has moved, the ctd's 84 * won't match. 85 */ 86 if ((p->dd_dnp->devid != NULL) && 87 (r->r_devid != NULL) && (!MD_MNSET_DESC(sd))) { 88 (void) devid_str_decode(p->dd_dnp->devid, 89 &devid, NULL); 90 if ((devid_compare(devid, r->r_devid) == 0) || 91 (strcmp(r->r_namep->drivenamep->cname, 92 p->dd_dnp->cname) == 0)) 93 dbcnt++; 94 devid_free(devid); 95 } else { 96 if (strcmp(r->r_namep->drivenamep->cname, 97 p->dd_dnp->cname) == 0) 98 dbcnt++; 99 } 100 } 101 p->dd_dbcnt = dbcnt; 102 p->dd_dbsize = dbcnt > 0 ? nblks : 0; 103 } 104 105 /* Lock the set on current set members */ 106 if (MD_MNSET_DESC(sd)) { 107 nd = sd->sd_nodelist; 108 while (nd) { 109 /* If this is forced, don't lock other sides */ 110 if (forceflg && strcmp(mynode(), nd->nd_nodename) 111 != 0) { 112 nd = nd->nd_next; 113 continue; 114 } 115 116 /* We already locked this side in the caller */ 117 if (strcmp(mynode(), nd->nd_nodename) == 0) { 118 nd = nd->nd_next; 119 continue; 120 } 121 122 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 123 nd = nd->nd_next; 124 continue; 125 } 126 127 if (clnt_lock_set(nd->nd_nodename, sp, ep)) { 128 rval = -1; 129 goto out; 130 } 131 nd = nd->nd_next; 132 } 133 } else { 134 for (i = 0; i < MD_MAXSIDES; i++) { 135 /* Skip empty slots */ 136 if (sd->sd_nodes[i][0] == '\0') 137 continue; 138 139 /* If this is forced, don't lock other sides */ 140 if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0) 141 continue; 142 143 /* We already locked this side in the caller */ 144 if (strcmp(mynode(), sd->sd_nodes[i]) == 0) 145 continue; 146 147 if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { 148 rval = -1; 149 goto out; 150 } 151 } 152 } 153 154 if (MD_MNSET_DESC(sd)) { 155 nd = sd->sd_nodelist; 156 while (nd) { 157 /* If this is forced, then only care about this node */ 158 if (forceflg && strcmp(mynode(), nd->nd_nodename) 159 != 0) { 160 nd = nd->nd_next; 161 continue; 162 } 163 164 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 165 nd = nd->nd_next; 166 continue; 167 } 168 169 if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, 170 ep) == -1) { 171 if (! mdiserror(ep, MDE_NO_SET) && 172 ! mdismddberror(ep, MDE_DB_NODB)) { 173 rval = -1; 174 break; 175 } 176 mdclrerror(ep); 177 } 178 nd = nd->nd_next; 179 } 180 } else { 181 for (i = 0; i < MD_MAXSIDES; i++) { 182 /* Skip empty slots */ 183 if (sd->sd_nodes[i][0] == '\0') 184 continue; 185 186 /* If this is forced, then only care about this node */ 187 if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0) 188 continue; 189 190 if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, 191 ep) == -1) { 192 if (! mdiserror(ep, MDE_NO_SET) && 193 ! mdismddberror(ep, MDE_DB_NODB)) { 194 rval = -1; 195 break; 196 } 197 mdclrerror(ep); 198 } 199 } 200 } 201 202 out: 203 cl_sk = cl_get_setkey(sp->setno, sp->setname); 204 if (MD_MNSET_DESC(sd)) { 205 nd = sd->sd_nodelist; 206 while (nd) { 207 /* If this is forced, don't unlock other sides */ 208 if (forceflg && strcmp(mynode(), nd->nd_nodename) 209 != 0) { 210 nd = nd->nd_next; 211 continue; 212 } 213 214 /* We will unlocked this side in the caller */ 215 if (strcmp(mynode(), nd->nd_nodename) == 0) { 216 nd = nd->nd_next; 217 continue; 218 } 219 220 if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { 221 nd = nd->nd_next; 222 continue; 223 } 224 225 if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) { 226 if (rval == 0) 227 (void) mdstealerror(ep, &xep); 228 rval = -1; 229 } 230 nd = nd->nd_next; 231 } 232 } else { 233 for (i = 0; i < MD_MAXSIDES; i++) { 234 /* Skip empty slots */ 235 if (sd->sd_nodes[i][0] == '\0') 236 continue; 237 238 /* If this is forced, don't unlock other sides */ 239 if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0) 240 continue; 241 242 /* We will unlocked this side in the caller */ 243 if (strcmp(mynode(), sd->sd_nodes[i]) == 0) 244 continue; 245 246 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { 247 if (rval == 0) 248 (void) mdstealerror(ep, &xep); 249 rval = -1; 250 } 251 } 252 } 253 /* Do not clear the key, via cl_set_setkey(NULL) this is nested */ 254 255 return (rval); 256 } 257 258 static int 259 usetag_take(set_t setno, int usetag, md_error_t *ep) 260 { 261 mddb_dtag_use_parm_t dtup; 262 263 (void) memset(&dtup, '\0', sizeof (mddb_dtag_use_parm_t)); 264 dtup.dtup_id = usetag; 265 dtup.dtup_setno = setno; 266 267 if (metaioctl(MD_MED_USE_TAG, &dtup, &dtup.dtup_mde, NULL) != 0) 268 return (mdstealerror(ep, &dtup.dtup_mde)); 269 270 return (0); 271 } 272 273 static int 274 useit_take(set_t setno, md_error_t *ep) 275 { 276 mddb_accept_parm_t accp; 277 278 (void) memset(&accp, '\0', sizeof (mddb_accept_parm_t)); 279 accp.accp_setno = setno; 280 281 if (metaioctl(MD_MED_ACCEPT, &accp, &accp.accp_mde, NULL) != 0) 282 return (mdstealerror(ep, &accp.accp_mde)); 283 284 return (0); 285 } 286 287 /* 288 * Update the master block with the device id information for the disks 289 * in the diskset. The device id information will be consumed by the 290 * diskset import code in case of remotely replicated disksets. 291 * 292 * For the drives that have a valid diskset mddb on them, we add the 293 * device id for the drive to the unused portion of the mddb. 294 * 295 * For the drives that don't have a diskset mddb on them, we add a dummy 296 * master block that contains the device id for the drive. A dummy master 297 * block is signified by changing the master block magic number, mb_magic, 298 * to MDDB_MAGIC_DU. 299 * 300 * This code is responsible primarily for adding the appropriate device id 301 * information to diskset disks that didn't have the information. This would 302 * typically occur when the OS has been upgraded from an OS release prior to 303 * Solaris 10 304 * 305 * The error path in this routine is defined as - if an error occurs while 306 * updating the mddb for one disk in the diskset, don't bother updating *any* 307 * of the mddbs because it's game over anyways as far as disaster recovery for 308 * that diskset is concerned. 309 * 310 * This code will need to be revisited if and when support for importing 311 * partial disksets is added. 312 * 313 * NOTE: This code relies heavily on the meta_repartition() working correctly 314 * and reformatting a drive, so that there's enough room for a dummy master 315 * block, every time a drive is added to a diskset. Should 316 * the meta_repartition() code change in future, this code will have to be 317 * revisited. 318 * 319 * Returns 0 on success and -1 on failure 320 */ 321 int 322 meta_update_mb(mdsetname_t *sp, md_drive_desc *drivedesc, md_error_t *ep) 323 { 324 uint_t sliceno, offset; 325 void *mb; 326 mddb_mb_t *mbp; 327 int fd = -1; 328 ddi_devid_t devid = NULL; 329 md_drive_desc *dd; 330 mddrivename_t *dnp; 331 mdname_t *rsp; 332 int dbcnt; 333 int dbsize; 334 size_t len; 335 md_set_desc *sd; 336 337 /* 338 * Don't do anything for MN diskset for now. 339 */ 340 if (! metaislocalset(sp)) { 341 if ((sd = metaget_setdesc(sp, ep)) == NULL) 342 return (-1); 343 344 if (MD_MNSET_DESC(sd)) 345 return (0); 346 } 347 348 mb = Malloc(DEV_BSIZE); 349 mbp = (mddb_mb_t *)mb; 350 351 /* 352 * For every drive in the drive descriptor, iterate through all 353 * the mddbs present on it and check to see if mb_devid_magic is 354 * set. If it isn't, then update the master block with the correct 355 * device id information 356 */ 357 for (dd = drivedesc; dd != NULL; dd = dd->dd_next) { 358 int i = 0; 359 360 dnp = dd->dd_dnp; 361 dbcnt = dd->dd_dbcnt; 362 dbsize = dd->dd_dbsize; 363 364 /* 365 * When the import support for remotely replicated 366 * disksets gets implemented, we probably want to 367 * inform the user that the disks won't be self 368 * identifying if any of these calls fails 369 */ 370 if (meta_replicaslice(dnp, &sliceno, ep) != 0) 371 return (-1); 372 373 if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL) 374 return (-1); 375 376 if ((fd = open(rsp->rname, O_RDWR)) < 0) 377 goto cleanup; 378 379 /* if devid_str_decode fails, make sure devid is null */ 380 if (devid_str_decode(dnp->devid, &devid, NULL) != 0) { 381 devid = NULL; 382 } 383 384 do { 385 int push = 0; 386 387 offset = (i * dbsize + 16); 388 ++i; 389 390 if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0) 391 goto cleanup; 392 393 if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE) 394 goto cleanup; 395 396 if (crcchk((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum, 397 (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) 398 goto cleanup; 399 400 /* 401 * If the disk is one of the ones that doesn't 402 * have a shared mddb on it, we put a dummy 403 * master block on it. 404 */ 405 if (mbp->mb_devid_magic != MDDB_MAGIC_DE) { 406 if (dbcnt == 0) { 407 meta_mkdummymaster(sp, fd, 16); 408 break; 409 } 410 } 411 412 /* 413 * if mb_setcreatetime is 0, this field was never 414 * filled in so do it now. 415 */ 416 if ((mbp->mb_setcreatetime.tv_sec == 0) && 417 (mbp->mb_setcreatetime.tv_usec == 0)) { 418 mbp->mb_setcreatetime = 419 meta_get_lb_inittime(sp, ep); 420 push = 1; 421 } 422 423 /* 424 * If MDDB_MAGIC_DE is set in the 425 * mb_devid_magic field then we know we 426 * have a valid device id and we don't 427 * need to add it to the master block. 428 * 429 * This would have to be revisited if device 430 * ids change as a result of device id 431 * algorithms changing or somesuch. 432 */ 433 if (mbp->mb_devid_magic != MDDB_MAGIC_DE) { 434 if (devid != NULL) { 435 len = devid_sizeof(devid); 436 if (len <= (DEV_BSIZE - 437 sizeof (mddb_mb_t))) { 438 /* 439 * there's enough space to 440 * store the devid 441 */ 442 mbp->mb_devid_magic = 443 MDDB_MAGIC_DE; 444 mbp->mb_devid_len = len; 445 (void) memcpy(mbp->mb_devid, 446 (char *)devid, len); 447 push = 1; 448 } 449 } 450 } 451 452 /* 453 * write out (push) any changes we have to the mb 454 */ 455 if (push) { 456 crcgen((uchar_t *)mbp, 457 (uint_t *)&mbp->mb_checksum, 458 (uint_t)DEV_BSIZE, (crc_skip_t *)NULL); 459 460 if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) 461 < 0) 462 goto cleanup; 463 464 if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE) 465 goto cleanup; 466 } 467 if (devid) 468 devid_free(devid); 469 } while (i < dbcnt); 470 (void) close(fd); 471 } 472 /* success */ 473 return (0); 474 475 cleanup: 476 if (fd != -1) 477 (void) close(fd); 478 if (devid) 479 devid_free(devid); 480 return (-1); 481 } 482 483 extern int *replicated_disk_list_built; 484 extern int replicated_disk_list_built_pass1; 485 /* 486 * Exported Entry Points 487 */ 488 int 489 meta_set_take( 490 mdsetname_t *sp, 491 mhd_mhiargs_t *mhiargsp, 492 int flags, 493 int usetag, 494 md_error_t *ep 495 ) 496 { 497 md_set_desc *sd; 498 md_drive_desc *dd; 499 md_drive_desc *d = NULL; 500 char *owner = NULL; 501 int rval = 0; 502 int pathname_return = 0; 503 int i; 504 int has_set; 505 int matches = 0; 506 int numsides = 0; 507 md_replicalist_t *rlp = NULL; 508 sigset_t oldsigs; 509 md_setkey_t *cl_sk; 510 int rb_level = 0; 511 md_error_t xep = mdnullerror; 512 mdsetname_t *local_sp = NULL; 513 side_t side; 514 int ret = 0; 515 char *newname = NULL; 516 mdkey_t side_names_key; 517 int unrslv_replicated = 0; 518 mddrivenamelist_t *dnlp = NULL; 519 int retake_flag = 0; 520 521 if ((flags & TAKE_USETAG) || (flags & TAKE_USEIT)) { 522 if (flags & TAKE_USETAG) { 523 if (usetag_take(sp->setno, usetag, ep)) 524 return (-1); 525 } else { 526 if (useit_take(sp->setno, ep)) 527 return (-1); 528 } 529 530 if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, ep) != 0) 531 mdclrerror(ep); 532 } 533 534 /* Do we own the set? */ 535 i = own_set(sp, &owner, (flags & TAKE_FORCE), ep); 536 if (! mdisok(ep)) { 537 if (owner != NULL) 538 Free(owner); 539 return (-1); 540 } 541 542 if (i == MD_SETOWNER_NO) { 543 (void) mddserror(ep, MDE_DS_NOTOWNER, sp->setno, owner, NULL, 544 sp->setname); 545 if (owner != NULL) 546 Free(owner); 547 return (-1); 548 } 549 550 if (owner != NULL) { 551 Free(owner); 552 owner = NULL; 553 } 554 555 /* We already own it, we are done. */ 556 if (i == MD_SETOWNER_YES) 557 return (0); 558 559 if ((sd = metaget_setdesc(sp, &xep)) == NULL) 560 return (-1); 561 562 /* You can not take ownership of a set that has no drives */ 563 if (sd->sd_flags & MD_SR_MB_DEVID) 564 dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, ep); 565 else 566 dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep); 567 568 if (dd == NULL) { 569 if (! mdisok(ep)) 570 return (-1); 571 return (0); 572 } 573 574 /* END CHECK CODE */ 575 576 md_rb_sig_handling_on(); 577 578 /* Lock the set on our side */ 579 if (clnt_lock_set(mynode(), sp, ep)) { 580 rval = -1; 581 goto out; 582 } 583 584 /* 585 * Find the "side" value so that it can be used to deal with 586 * the devids. 587 */ 588 side = getnodeside(mynode(), sd); 589 590 if (side == MD_SIDEWILD) { 591 (void) mddserror(ep, MDE_DS_HOSTNOSIDE, sp->setno, mynode(), 592 NULL, mynode()); 593 rval = -1; 594 goto out; 595 } 596 597 /* 598 * A local sets' side 0 references records associated with 599 * that node's local set. As this is a non-local set, "side" 600 * must be modified (by adding a SKEW) before we reference 601 * records in the local set [setno = 0] for the non-local set 602 * [setno = 1..n]. 603 */ 604 side += SKEW; 605 606 /* 607 * If this set had been previously imported as a partial replicated 608 * diskset, then must attempt to updated any unresolved drive 609 * records in diskset with new devid information. Must set 610 * flags in drivedesc list before loading up set so that the 611 * md driver will fix up names and devids correctly in the 612 * locator block. 613 */ 614 if (sd->sd_flags & MD_SR_UNRSLV_REPLICATED) { 615 md_im_names_t cnames = { 0, NULL}; 616 ddi_devid_t old_devid, new_devid; 617 char *search_path = "/dev"; 618 devid_nmlist_t *nmlist; 619 int indx; 620 mddrivenamelist_t **dnlpp = &dnlp; 621 622 if (meta_list_disks(ep, &cnames) != 0) { 623 rval = -1; 624 goto out; 625 } 626 627 for (indx = 0; indx < cnames.min_count; ++indx) { 628 mddrivename_t *dnp; 629 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep); 630 int fd = -1; 631 ddi_devid_t devid1; 632 char *cdevidp; 633 int len; 634 char *fp; 635 636 /* 637 * We may have name collision here so we need to get 638 * the dnp using the devid and not the name. 639 */ 640 len = strlen(cnames.min_names[indx]) + strlen("s0"); 641 if ((fp = (char *)Malloc(len+1)) == NULL) { 642 (void) mdsyserror(ep, ENOMEM, NULL); 643 rval = -1; 644 goto out; 645 } 646 (void) snprintf(fp, len + 1, "%ss0", 647 cnames.min_names[indx]); 648 if ((fd = open(fp, O_RDONLY|O_NDELAY)) < 0) { 649 (void) mdsyserror(ep, EIO, fp); 650 rval = -1; 651 goto out; 652 } 653 Free(fp); 654 /* if no device id, what error?) */ 655 if (devid_get(fd, &devid1) != 0) { 656 (void) mdsyserror(ep, EIO, fp); 657 rval = -1; 658 goto out; 659 } 660 if (close(fd) < 0) { 661 (void) mdsyserror(ep, EIO, fp); 662 rval = -1; 663 goto out; 664 } 665 cdevidp = devid_str_encode(devid1, NULL); 666 if (cdevidp == NULL) { 667 (void) mdsyserror(ep, EIO, fp); 668 rval = -1; 669 goto out; 670 } 671 devid_free(devid1); 672 dnp = metadrivenamebydevid(&sp, cdevidp, 673 cnames.min_names[indx], ep); 674 devid_str_free(cdevidp); 675 if (dnp == NULL) { 676 /* 677 * Assuming we're interested in knowing about 678 * whatever error occurred, but not in stopping. 679 */ 680 mde_perror(ep, cnames.min_names[indx]); 681 mdclrerror(ep); 682 continue; 683 } 684 685 dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp); 686 } 687 /* Reget sd and dd since freed by meta_prune_cnames. */ 688 if ((sd = metaget_setdesc(sp, ep)) == NULL) { 689 rval = -1; 690 goto out; 691 } 692 693 if (sd->sd_flags & MD_SR_MB_DEVID) 694 dd = metaget_drivedesc(sp, 695 MD_BASICNAME_OK | PRINT_FAST, ep); 696 else 697 dd = metaget_drivedesc(sp, 698 MD_BASICNAME_OK, ep); 699 /* If ep has error, then there was a failure, set rval */ 700 if (!mdisok(ep)) { 701 rval = -1; 702 goto out; 703 } 704 705 /* Builds global replicated disk list */ 706 replicated_disk_list_built = &replicated_disk_list_built_pass1; 707 708 /* If success, then clear error structure */ 709 if (build_replicated_disks_list(ep, dnlp) == 1) 710 mdclrerror(ep); 711 /* If ep has error, then there was a failure, set rval */ 712 if (! mdisok(ep)) { 713 rval = -1; 714 goto out; 715 } 716 717 for (d = dd; d != NULL; d = d->dd_next) { 718 if (d->dd_flags & MD_DR_UNRSLV_REPLICATED) { 719 /* Get old devid from drive record */ 720 (void) devid_str_decode(d->dd_dnp->devid, 721 &old_devid, NULL); 722 723 /* 724 * If the devid stored in the drive record 725 * (old_devid) matches a devid known by 726 * the system, then this disk has already 727 * been partially resolved. This situation 728 * could occur if a panic happened during a 729 * previous take of this diskset. 730 * Set flag to later handle fixing the master 731 * block on disk and turning off the unresolved 732 * replicated flag. 733 */ 734 if (meta_deviceid_to_nmlist(search_path, 735 (ddi_devid_t)old_devid, 736 DEVID_MINOR_NAME_ALL, 737 &nmlist) == 0) { 738 d->dd_flags |= MD_DR_FIX_MB_DID; 739 retake_flag = 1; 740 continue; 741 } 742 743 /* 744 * If the devid stored in the drive record 745 * is on the list of replicated disks found 746 * during a system scan then set both flags 747 * so that the locator block, namespaces 748 * (diskset and local set), master block 749 * and unresolved replicated flag are updated. 750 */ 751 new_devid = replicated_list_lookup( 752 devid_sizeof((ddi_devid_t)old_devid), 753 old_devid); 754 devid_free(old_devid); 755 756 /* 757 * If devid stored in the drive record is 758 * not found then set flag to mark 759 * that set is still unresolved and 760 * continue to next drive record. 761 */ 762 if (new_devid == NULL) { 763 unrslv_replicated = 1; 764 continue; 765 } 766 767 /* 768 * Set flags to fix up the master block, 769 * locator block of the diskset, diskset 770 * namespace and the local set namespace. 771 */ 772 d->dd_flags |= (MD_DR_FIX_MB_DID | 773 MD_DR_FIX_LB_NM_DID); 774 retake_flag = 1; 775 } 776 } 777 778 } 779 780 /* 781 * Check the local devid namespace to see if the disks 782 * have been moved. Use the local set first of all as this contains 783 * entries for the disks in the set. 784 * 785 * This is being done before the tk_own_bydd because the disks 786 * in the dd list could be wrong! But it should be done with the lock 787 * held for the set. 788 */ 789 local_sp = metasetname(MD_LOCAL_NAME, ep); 790 for (d = dd; d != NULL; d = d->dd_next) { 791 /* 792 * Actually do the check of the disks. 793 */ 794 ret = meta_upd_ctdnames(&local_sp, 0, side, d->dd_dnp, &newname, 795 ep); 796 797 if ((ret == METADEVADM_ERR) || 798 (ret == METADEVADM_DSKNAME_ERR)) { 799 /* check failed in some unknown manner */ 800 rval = -1; 801 goto out; 802 } else if (ret == METADEVADM_DISKMOVE) { 803 804 /* 805 * Update the dd namelist so that the rpc.metamhd 806 * gets the correct disks to reserve - it is the rname 807 * we are interested in. 808 */ 809 if (newname != NULL) { 810 char *save_devid; 811 /* 812 * Need to save the side names key as this 813 * points to the namespace entry that will 814 * need to be updated. In addition the call 815 * to meta_make_sidenmlist does not actually 816 * set the namespace key. 817 */ 818 side_names_key = d->dd_dnp->side_names_key; 819 820 /* 821 * There is the possibility that there 822 * will be multiple disks with the same 823 * name but different devids in the 824 * drivelist. Because of this, we need 825 * to look for a new dnp based on devid 826 * and not name. 827 */ 828 save_devid = Strdup(d->dd_dnp->devid); 829 metafreedrivename(d->dd_dnp); 830 d->dd_dnp = metadrivenamebydevid(&sp, 831 save_devid, newname, ep); 832 Free(save_devid); 833 Free(newname); 834 /* 835 * null newname so we are reset for next time 836 * through 837 */ 838 newname = NULL; 839 ret = meta_make_sidenmlist(sp, 840 d->dd_dnp, 0, NULL, ep); 841 d->dd_dnp->side_names_key = side_names_key; 842 if (ret == -1) { 843 rval = -1; 844 goto out; 845 } 846 } 847 } 848 } 849 850 851 RB_TEST(1, "take", ep) 852 853 RB_PREEMPT; 854 rb_level = 1; /* level 1 */ 855 856 RB_TEST(2, "take", ep) 857 858 if (!MD_ATSET_DESC(sd)) { 859 if (tk_own_bydd(sp, dd, mhiargsp, 860 flags & MD_IM_PARTIAL_DISKSET, ep)) 861 goto rollback; 862 } 863 864 RB_TEST(3, "take", ep) 865 866 RB_PREEMPT; 867 rb_level = 2; /* level 2 */ 868 869 RB_TEST(4, "take", ep) 870 871 if (clnt_stimeout(mynode(), sp, mhiargsp, ep) == -1) 872 goto rollback; 873 874 if (setup_db_bydd(sp, dd, (flags & TAKE_FORCE), ep) == -1) { 875 if (! mdismddberror(ep, MDE_DB_ACCOK) && 876 ! mdismddberror(ep, MDE_DB_TAGDATA)) 877 goto rollback; 878 mdclrerror(ep); 879 } 880 881 RB_TEST(5, "take", ep) 882 883 RB_PREEMPT; 884 rb_level = 3; /* level 3 */ 885 886 RB_TEST(6, "take", ep) 887 888 /* Snarf set of traditional diskset doesn't use stale information */ 889 if (snarf_set(sp, FALSE, ep)) { 890 if (mdismddberror(ep, MDE_DB_STALE) || 891 mdismddberror(ep, MDE_DB_ACCOK) || 892 mdismddberror(ep, MDE_DB_TAGDATA)) { 893 rval = -1; 894 goto out; 895 } 896 897 if (! mdismddberror(ep, MDE_DB_NODB) && 898 ! mdismddberror(ep, MDE_DB_NOTOWNER)) 899 goto rollback; 900 901 /* 902 * Look at the set on all other hosts, if every other host 903 * has the same set with a larger genid, then we destroy this 904 * copy. 905 */ 906 for (i = 0; i < MD_MAXSIDES; i++) { 907 /* Skip empty slots */ 908 if (sd->sd_nodes[i][0] == '\0') 909 continue; 910 911 /* Skip this node */ 912 if (strcmp(sd->sd_nodes[i], mynode()) == 0) 913 continue; 914 915 numsides++; 916 917 has_set = nodehasset(sp, sd->sd_nodes[i], 918 NHS_NST_EQ_G_GT, &xep); 919 920 if (has_set < 0) { 921 if (! mdiserror(&xep, MDE_NO_SET) && 922 ! mdismddberror(&xep, MDE_DB_NODB)) 923 goto rollback; 924 matches++; 925 mdclrerror(&xep); 926 continue; 927 } 928 929 if (has_set) 930 matches++; 931 } 932 933 /* Destroy the set */ 934 if (numsides > 0 && (numsides - matches) == 0) { 935 if (meta_set_destroy(sp, FALSE, &xep)) 936 mdclrerror(&xep); 937 (void) mddserror(ep, MDE_DS_SETCLEANUP, sp->setno, 938 sp->setname, NULL, mynode()); 939 rval = -1; 940 } 941 goto rollback; 942 } 943 944 /* 945 * If an unresolved replicated diskset, fix up diskset 946 * and local namespaces, master block and drive record 947 * with the new devid. If all drives in diskset are 948 * now resolved, then clear set unresolved replicated flag. 949 * If an error is encountered, don't fail the take, but 950 * don't proceed any further in resolving the replicated disks. 951 */ 952 if (sd->sd_flags & MD_SR_UNRSLV_REPLICATED) { 953 /* Fix up diskset and local namespaces with new devids */ 954 meta_unrslv_replicated_nm(sp, dd, dnlp, ep); 955 if (mdisok(ep)) { 956 /* Fix up master block with new devids */ 957 meta_unrslv_replicated_mb(sp, dd, dnlp, ep); 958 } 959 960 /* If all drives are resolved, set OK flag in set record. */ 961 if (mdisok(ep) && (unrslv_replicated == 0)) { 962 /* Ignore failure since no bad effect. */ 963 (void) clnt_upd_sr_flags(mynode(), sp, MD_SR_OK, ep); 964 } 965 mdclrerror(ep); 966 967 } 968 969 pathname_return = pathname_reload(&sp, sp->setno, ep); 970 if ((pathname_return == METADEVADM_ERR) || 971 (pathname_return == METADEVADM_DSKNAME_ERR)) { 972 goto rollback; 973 } 974 975 976 if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0) 977 goto rollback; 978 979 if (upd_dr_dbinfo(sp, sd, dd, rlp, (flags & TAKE_FORCE), ep) < 0) { 980 metafreereplicalist(rlp); 981 goto rollback; 982 } 983 984 metafreereplicalist(rlp); 985 986 /* 987 * If the set doesn't have the MD_SR_MB_DEVID bit set, i.e 988 * the drives in the set don't have the device id information, 989 * then stick it in if possible. 990 * 991 * If updating the master block fails for whatever reason, it's 992 * okay. It just means the disk(s) in the diskset won't be self 993 * identifying. 994 */ 995 if (!(sd->sd_flags & MD_SR_MB_DEVID)) { 996 /* Lock the set on current set members */ 997 for (i = 0; i < MD_MAXSIDES; i++) { 998 /* Skip empty slots */ 999 if (sd->sd_nodes[i][0] == '\0') 1000 continue; 1001 1002 /* We already locked this side */ 1003 if (strcmp(mynode(), sd->sd_nodes[i]) == 0) 1004 continue; 1005 1006 if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { 1007 rval = -1; 1008 goto out; 1009 } 1010 } 1011 rb_level = 4; /* level 4 */ 1012 1013 if (meta_update_mb(sp, dd, ep) == 0) 1014 /* update the sr_flags on all hosts */ 1015 for (i = 0; i < MD_MAXSIDES; i++) { 1016 /* Skip empty slots */ 1017 if (sd->sd_nodes[i][0] == '\0') 1018 continue; 1019 1020 if (clnt_upd_sr_flags(sd->sd_nodes[i], 1021 sp, (sd->sd_flags | MD_SR_MB_DEVID), ep)) 1022 goto rollback; 1023 } 1024 1025 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1026 for (i = 0; i < MD_MAXSIDES; i++) { 1027 /* Skip empty slots */ 1028 if (sd->sd_nodes[i][0] == '\0') 1029 continue; 1030 1031 /* Unlocked of this side is done later */ 1032 if (strcmp(mynode(), sd->sd_nodes[i]) == 0) 1033 continue; 1034 1035 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { 1036 if (rval == 0) 1037 (void) mdstealerror(ep, &xep); 1038 rval = -1; 1039 } 1040 } 1041 } 1042 1043 /* 1044 * If we get here, we need to unlock the set before the resync 1045 * gets called, otherwise the "daemon" will hold the set lock 1046 * until the resync is done! 1047 */ 1048 1049 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1050 if (clnt_unlock_set(mynode(), cl_sk, &xep)) { 1051 if (rval == 0) 1052 (void) mdstealerror(ep, &xep); 1053 rval = -1; 1054 } 1055 cl_set_setkey(NULL); 1056 1057 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1058 1059 /* We try to get things resync'ed, but this can fail */ 1060 mdclrerror(&xep); 1061 if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, &xep) != 0) { 1062 if (rval == 0) 1063 (void) mdstealerror(ep, &xep); 1064 rval = -1; 1065 } 1066 1067 RB_TEST(7, "take", ep) 1068 1069 /* 1070 * In order to resolve the namespace major driver names and 1071 * to have the subdrivers attempt to re-associate devts from 1072 * the newly resolved replicated device ids, return a '2'. 1073 * This instructs metaset to release the diskset and re-take. 1074 * 1075 * Return a 2 if 1076 * - no error was detected on the take 1077 * - a replicated unresolved devid was resolved during take 1078 * - take isn't being called during an import 1079 * - this isn't already a re-take situation 1080 */ 1081 if ((rval == 0) && (retake_flag == 1) && 1082 ((flags & (TAKE_RETAKE | TAKE_IMP)) == 0)) { 1083 rval = 2; 1084 } 1085 1086 return (rval); 1087 1088 out: 1089 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1090 if (clnt_unlock_set(mynode(), cl_sk, &xep)) { 1091 if (rval == 0) 1092 (void) mdstealerror(ep, &xep); 1093 rval = -1; 1094 } 1095 if (!(sd->sd_flags & MD_SR_MB_DEVID) && (rb_level > 2)) { 1096 for (i = 0; i < MD_MAXSIDES; i++) { 1097 /* Skip empty slots */ 1098 if (sd->sd_nodes[i][0] == '\0') 1099 continue; 1100 1101 /* We already unlocked this side */ 1102 if (strcmp(mynode(), sd->sd_nodes[i]) == 0) 1103 continue; 1104 1105 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { 1106 if (rval == 0) 1107 (void) mdstealerror(ep, &xep); 1108 rval = -1; 1109 } 1110 } 1111 } 1112 cl_set_setkey(NULL); 1113 1114 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1115 1116 return (rval); 1117 1118 rollback: 1119 /* Make sure we are blocking all signals */ 1120 if (procsigs(TRUE, &oldsigs, &xep) < 0) 1121 mdclrerror(&xep); 1122 1123 rval = -1; 1124 1125 /* level 4 */ 1126 if (rb_level > 3) { 1127 if (sd->sd_flags & MD_SR_MB_DEVID) { 1128 /* update the sr_flags on all hosts */ 1129 for (i = 0; i < MD_MAXSIDES; i++) { 1130 /* Skip empty slots */ 1131 if (sd->sd_nodes[i][0] == '\0') 1132 continue; 1133 1134 if (clnt_upd_sr_flags(sd->sd_nodes[i], sp, 1135 (sd->sd_flags & ~MD_SR_MB_DEVID), &xep)) 1136 mdclrerror(&xep); 1137 } 1138 } 1139 1140 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1141 for (i = 0; i < MD_MAXSIDES; i++) { 1142 /* Skip empty slots */ 1143 if (sd->sd_nodes[i][0] == '\0') 1144 continue; 1145 1146 /* We will unlocked this side below */ 1147 if (strcmp(mynode(), sd->sd_nodes[i]) == 0) 1148 continue; 1149 1150 if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) 1151 mdclrerror(&xep); 1152 } 1153 } 1154 1155 /* level 3 */ 1156 if (rb_level > 2) { 1157 if (halt_set(sp, &xep)) 1158 mdclrerror(&xep); 1159 } 1160 1161 /* level 2 */ 1162 if (rb_level > 1) { 1163 if (clnt_stimeout(mynode(), sp, &defmhiargs, &xep) == -1) 1164 mdclrerror(&xep); 1165 } 1166 1167 /* level 1 */ 1168 if (rb_level > 0) { 1169 if (!MD_ATSET_DESC(sd)) { 1170 if (rel_own_bydd(sp, dd, FALSE, &xep)) 1171 mdclrerror(&xep); 1172 } 1173 } 1174 1175 /* level 0 */ 1176 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1177 if (clnt_unlock_set(mynode(), cl_sk, &xep)) 1178 mdclrerror(&xep); 1179 cl_set_setkey(NULL); 1180 1181 /* release signals back to what they were on entry */ 1182 if (procsigs(FALSE, &oldsigs, &xep) < 0) 1183 mdclrerror(&xep); 1184 1185 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1186 1187 return (rval); 1188 } 1189 1190 int 1191 meta_set_release( 1192 mdsetname_t *sp, 1193 md_error_t *ep 1194 ) 1195 { 1196 int rval = 0; 1197 md_drive_desc *dd; 1198 mhd_mhiargs_t mhiargs; 1199 sigset_t oldsigs; 1200 md_setkey_t *cl_sk; 1201 int rb_level = 0; 1202 md_error_t xep = mdnullerror; 1203 1204 /* Make sure we own the set */ 1205 if (meta_check_ownership(sp, ep) != 0) 1206 return (-1); 1207 1208 /* Get the drive descriptors */ 1209 if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), 1210 ep)) == NULL) 1211 if (! mdisok(ep)) 1212 return (-1); 1213 1214 /* Get timeout values in case we need to roll back this release */ 1215 (void) memset(&mhiargs, '\0', sizeof (mhiargs)); 1216 if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0) 1217 return (-1); 1218 1219 /* END CHECK CODE */ 1220 1221 md_rb_sig_handling_on(); 1222 1223 /* Lock the set on our side */ 1224 if (clnt_lock_set(mynode(), sp, ep)) { 1225 rval = -1; 1226 goto out; 1227 } 1228 1229 RB_TEST(1, "release", ep) 1230 1231 RB_PREEMPT; 1232 rb_level = 1; /* level 1 */ 1233 1234 RB_TEST(2, "release", ep) 1235 1236 if (halt_set(sp, ep)) 1237 goto rollback; 1238 1239 RB_TEST(3, "release", ep) 1240 1241 RB_PREEMPT; 1242 rb_level = 2; /* level 2 */ 1243 1244 RB_TEST(4, "release", ep) 1245 1246 if (rel_own_bydd(sp, dd, FALSE, ep)) 1247 goto rollback; 1248 1249 RB_TEST(5, "release", ep) 1250 1251 RB_PREEMPT; 1252 rb_level = 3; /* level 3 */ 1253 1254 RB_TEST(6, "release", ep) 1255 1256 if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1) 1257 goto rollback; 1258 1259 RB_TEST(7, "release", ep) 1260 1261 out: 1262 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1263 if (clnt_unlock_set(mynode(), cl_sk, &xep)) { 1264 if (rval == 0) 1265 (void) mdstealerror(ep, &xep); 1266 rval = -1; 1267 } 1268 cl_set_setkey(NULL); 1269 1270 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1271 1272 return (rval); 1273 1274 rollback: 1275 /* Make sure we are blocking all signals */ 1276 if (procsigs(TRUE, &oldsigs, &xep) < 0) 1277 mdclrerror(&xep); 1278 1279 rval = -1; 1280 1281 /* level 3 */ 1282 if (rb_level > 2) { 1283 if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1) 1284 mdclrerror(&xep); 1285 } 1286 1287 /* level 2 */ 1288 if (rb_level > 1) { 1289 if (tk_own_bydd(sp, dd, &mhiargs, FALSE, &xep)) 1290 mdclrerror(&xep); 1291 } 1292 1293 /* level 1 */ 1294 if (rb_level > 0) { 1295 if (setup_db_bydd(sp, dd, TRUE, &xep) == -1) 1296 mdclrerror(&xep); 1297 1298 /* Snarf set of trad diskset doesn't use stale information */ 1299 if (snarf_set(sp, FALSE, &xep)) 1300 mdclrerror(&xep); 1301 } 1302 1303 /* level 0 */ 1304 cl_sk = cl_get_setkey(sp->setno, sp->setname); 1305 if (clnt_unlock_set(mynode(), cl_sk, &xep)) 1306 mdclrerror(&xep); 1307 cl_set_setkey(NULL); 1308 1309 /* release signals back to what they were on entry */ 1310 if (procsigs(FALSE, &oldsigs, &xep) < 0) 1311 mdclrerror(&xep); 1312 1313 md_rb_sig_handling_off(md_got_sig(), md_which_sig()); 1314 1315 return (rval); 1316 } 1317