1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <assert.h> 29 #include <ctype.h> 30 #include <libdevinfo.h> 31 #include <mdiox.h> 32 #include <meta.h> 33 #include "meta_repartition.h" 34 #include "meta_set_prv.h" 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <strings.h> 38 #include <sys/lvm/md_mddb.h> 39 #include <sys/lvm/md_names.h> 40 #include <sys/lvm/md_crc.h> 41 #include <sys/lvm/md_convert.h> 42 43 typedef struct did_list { 44 void *rdid; /* real did if replicated set */ 45 void *did; /* did stored in lb */ 46 char *devname; 47 dev_t dev; 48 uint_t did_index; 49 char *minor_name; 50 char *driver_name; 51 int available; 52 struct did_list *next; 53 } did_list_t; 54 55 typedef struct replicated_disk { 56 void *old_devid; 57 void *new_devid; 58 struct replicated_disk *next; 59 } replicated_disk_t; 60 61 /* 62 * The current implementation limits the max device id length to 256 bytes. 63 * Should the max device id length be increased, this definition would have to 64 * be bumped up accordingly 65 */ 66 #define MAX_DEVID_LEN 256 67 68 /* 69 * We store a global list of all the replicated disks in the system. In 70 * order to prevent us from performing a linear search on this list, we 71 * store the disks in a two dimensional sparse array. The disks are bucketed 72 * based on the length of their device ids. 73 */ 74 static replicated_disk_t *replicated_disk_list[MAX_DEVID_LEN + 1] = {NULL}; 75 76 /* 77 * The list of replicated disks is built just once and this flag is set 78 * once it's done 79 */ 80 int replicated_disk_list_built_pass1 = 0; 81 int replicated_disk_list_built_pass2 = 0; 82 int *replicated_disk_list_built; 83 84 static void free_did_list(did_list_t *did_listp); 85 86 /* 87 * Map logical blk to physical 88 * 89 * This is based on the routine of the same name in the md kernel module (see 90 * file md_mddb.c), with the following caveats: 91 * 92 * - The kernel routine works on in core master blocks, or mddb_mb_ic_t; this 93 * routine works instead on the mddb_mb_t read directly from the disk 94 */ 95 daddr_t 96 getphysblk( 97 mddb_block_t blk, 98 mddb_mb_t *mbp 99 ) 100 { 101 /* 102 * Sanity check: is the block within range? If so, we then assume 103 * that the block range map in the master block is valid and 104 * consistent with the block count. Unfortunately, there is no 105 * reliable way to validate this assumption. 106 */ 107 if (blk >= mbp->mb_blkcnt || blk >= mbp->mb_blkmap.m_consecutive) 108 return ((daddr_t)-1); 109 110 return (mbp->mb_blkmap.m_firstblk + blk); 111 } 112 113 114 115 /* 116 * drive_append() 117 * 118 * Append to tail of linked list of md_im_drive_info_t. 119 * 120 * Will allocate space for new node and copy args into new space. 121 * 122 * Returns pointer to new node. 123 */ 124 static md_im_drive_info_t * 125 drive_append( 126 md_im_drive_info_t **midpp, 127 mddrivename_t *dnp, 128 did_list_t *nonrep_did_listp, 129 minor_t mnum, 130 md_timeval32_t timestamp, 131 md_im_replica_info_t *mirp 132 ) 133 { 134 md_im_drive_info_t *midp; 135 int o_devid_sz; 136 int devid_sz; 137 138 for (; (*midpp != NULL); midpp = &((*midpp)->mid_next)) 139 ; 140 141 midp = *midpp = Zalloc(sizeof (md_im_drive_info_t)); 142 143 midp->mid_dnp = dnp; 144 145 /* 146 * If rdid is not NULL then we know we are dealing with 147 * replicated diskset case. 'devid_sz' will always be the 148 * size of a valid devid which can be 'did' or 'rdid' 149 */ 150 151 if (nonrep_did_listp->rdid) { 152 devid_sz = devid_sizeof(nonrep_did_listp->rdid); 153 midp->mid_devid = (void *)Malloc(devid_sz); 154 (void) memcpy(midp->mid_devid, nonrep_did_listp->rdid, 155 devid_sz); 156 /* 157 * Also need to store the 'other' devid 158 */ 159 o_devid_sz = devid_sizeof((ddi_devid_t)(nonrep_did_listp->did)); 160 midp->mid_o_devid = (void *)Malloc(o_devid_sz); 161 (void) memcpy(midp->mid_o_devid, nonrep_did_listp->did, 162 o_devid_sz); 163 midp->mid_o_devid_sz = o_devid_sz; 164 } else { 165 devid_sz = devid_sizeof(nonrep_did_listp->did); 166 midp->mid_devid = (void *)Malloc(devid_sz); 167 /* 168 * In the case of regular diskset, midp->mid_o_devid 169 * will be a NULL pointer 170 */ 171 (void) memcpy(midp->mid_devid, nonrep_did_listp->did, devid_sz); 172 } 173 174 midp->mid_devid_sz = devid_sz; 175 midp->mid_setcreatetimestamp = timestamp; 176 midp->mid_available = nonrep_did_listp->available; 177 if (nonrep_did_listp->minor_name) { 178 (void) strlcpy(midp->mid_minor_name, 179 nonrep_did_listp->minor_name, MDDB_MINOR_NAME_MAX); 180 } 181 midp->mid_mnum = mnum; 182 if (nonrep_did_listp->driver_name) 183 midp->mid_driver_name = Strdup(nonrep_did_listp->driver_name); 184 midp->mid_replicas = mirp; 185 if (nonrep_did_listp->devname) 186 midp->mid_devname = Strdup(nonrep_did_listp->devname); 187 return (midp); 188 } 189 190 191 192 /* 193 * drive_append_wrapper() 194 * 195 * Constant time append wrapper; the append function will always walk the list, 196 * this will take a tail argument and use the append function on just the tail 197 * node, doing the appropriate old-tail-next-pointer bookkeeping. 198 */ 199 static md_im_drive_info_t ** 200 drive_append_wrapper( 201 md_im_drive_info_t **tailpp, 202 mddrivename_t *dnp, 203 did_list_t *nonrep_did_listp, 204 minor_t mnum, 205 md_timeval32_t timestamp, 206 md_im_replica_info_t *mirp 207 ) 208 { 209 (void) drive_append(tailpp, dnp, nonrep_did_listp, mnum, timestamp, 210 mirp); 211 212 if ((*tailpp)->mid_next == NULL) 213 return (tailpp); 214 215 return (&((*tailpp)->mid_next)); 216 } 217 218 219 220 /* 221 * replica_append() 222 * 223 * Append to tail of linked list of md_im_replica_info_t. 224 * 225 * Will allocate space for new node and copy args into new space. 226 * 227 * Returns pointer to new node. 228 */ 229 static md_im_replica_info_t * 230 replica_append( 231 md_im_replica_info_t **mirpp, 232 int flags, 233 daddr32_t offset, 234 daddr32_t length, 235 md_timeval32_t timestamp 236 ) 237 { 238 md_im_replica_info_t *mirp; 239 240 for (; (*mirpp != NULL); mirpp = &((*mirpp)->mir_next)) 241 ; 242 243 mirp = *mirpp = Zalloc(sizeof (md_im_replica_info_t)); 244 245 mirp->mir_flags = flags; 246 mirp->mir_offset = offset; 247 mirp->mir_length = length; 248 mirp->mir_timestamp = timestamp; 249 250 return (mirp); 251 252 } 253 254 255 256 /* 257 * replica_append_wrapper() 258 * 259 * Constant time append wrapper; the append function will always walk the list, 260 * this will take a tail argument and use the append function on just the tail 261 * node, doing the appropriate old-tail-next-pointer bookkeeping. 262 */ 263 static md_im_replica_info_t ** 264 replica_append_wrapper( 265 md_im_replica_info_t **tailpp, 266 int flags, 267 daddr32_t offset, 268 daddr32_t length, 269 md_timeval32_t timestamp 270 ) 271 { 272 (void) replica_append(tailpp, flags, offset, length, timestamp); 273 274 if ((*tailpp)->mir_next == NULL) 275 return (tailpp); 276 277 return (&(*tailpp)->mir_next); 278 } 279 280 /* 281 * map_replica_disk() 282 * 283 * Searches the device id list for a specific 284 * disk based on the locator block device id array index. 285 * 286 * Returns a pointer to the did_list node if a match was 287 * found or NULL otherwise. 288 */ 289 static did_list_t * 290 map_replica_disk( 291 did_list_t *did_listp, 292 int did_index 293 ) 294 { 295 did_list_t *tailp = did_listp; 296 297 while (tailp != NULL) { 298 if (tailp->did_index == did_index) 299 return (tailp); 300 tailp = tailp->next; 301 } 302 303 /* not found, return failure */ 304 return (NULL); 305 } 306 307 /* 308 * replicated_list_lookup() 309 * 310 * looks up a replicated disk entry in the global replicated disk list 311 * based upon the length of that disk's device id. returns the new device id 312 * for the disk. 313 * If you store the returned devid you must create a local copy. 314 */ 315 void * 316 replicated_list_lookup( 317 uint_t devid_len, 318 void *old_devid 319 ) 320 { 321 replicated_disk_t *head = NULL; 322 323 assert(devid_len <= MAX_DEVID_LEN); 324 head = replicated_disk_list[devid_len]; 325 326 if (head == NULL) 327 return (NULL); 328 329 do { 330 if (devid_compare((ddi_devid_t)old_devid, 331 (ddi_devid_t)head->old_devid) == 0) 332 return (head->new_devid); 333 head = head->next; 334 } while (head != NULL); 335 336 return (NULL); 337 } 338 339 /* 340 * replicated_list_insert() 341 * 342 * inserts a replicated disk entry into the global replicated disk list 343 */ 344 static void 345 replicated_list_insert( 346 size_t old_devid_len, 347 void *old_devid, 348 void *new_devid 349 ) 350 { 351 replicated_disk_t *repl_disk, **first_entry; 352 void *repl_old_devid = NULL; 353 354 assert(old_devid_len <= MAX_DEVID_LEN); 355 356 repl_disk = Zalloc(sizeof (replicated_disk_t)); 357 repl_old_devid = Zalloc(old_devid_len); 358 (void) memcpy(repl_old_devid, (void *)old_devid, old_devid_len); 359 360 repl_disk->old_devid = repl_old_devid; 361 repl_disk->new_devid = new_devid; 362 363 first_entry = &replicated_disk_list[old_devid_len]; 364 365 if (*first_entry == NULL) { 366 *first_entry = repl_disk; 367 return; 368 } 369 370 repl_disk->next = *first_entry; 371 replicated_disk_list[old_devid_len] = repl_disk; 372 } 373 374 /* 375 * get_replica_disks() 376 * 377 * Will step through the locator records in the supplied locator block, and add 378 * each one with an active replica to a supplied list of md_im_drive_info_t, and 379 * add the appropriate replicas to the md_im_replica_info_t contained therein. 380 */ 381 static void 382 get_replica_disks( 383 md_im_set_desc_t *misp, 384 did_list_t *did_listp, 385 mddb_mb_t *mb, 386 mddb_lb_t *lbp, 387 md_error_t *ep 388 ) 389 { 390 mddrivename_t *dnp; 391 int indx, on_list; 392 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep); 393 int flags; 394 did_list_t *replica_disk; 395 daddr32_t offset; 396 daddr32_t length; 397 md_timeval32_t timestamp; 398 md_im_replica_info_t **mirpp = NULL; 399 md_im_drive_info_t **midpp = &misp->mis_drives; 400 md_im_drive_info_t *midp; 401 402 for (indx = 0; indx < lbp->lb_loccnt; indx++) { 403 404 on_list = 0; 405 if ((lbp->lb_locators[indx].l_flags == 0) || 406 (lbp->lb_locators[indx].l_flags & MDDB_F_DELETED)) 407 continue; 408 409 /* 410 * search the device id list for a 411 * specific ctds based on the locator 412 * block device id array index. 413 */ 414 replica_disk = map_replica_disk(did_listp, indx); 415 416 assert(replica_disk != NULL); 417 418 419 /* 420 * metadrivename() can fail for a slice name 421 * if there is not an existing mddrivename_t. 422 * So we use metadiskname() to strip the slice 423 * number. 424 */ 425 dnp = metadrivename(&sp, metadiskname(replica_disk->devname), 426 ep); 427 428 for (midp = misp->mis_drives; midp != NULL; 429 midp = midp->mid_next) { 430 if (dnp == midp->mid_dnp) { 431 /* 432 * You could get a dnp match, but if 1 disk 433 * is unavailable and the other isn't, they 434 * will have the same dnp due 435 * to the name being the same, but in fact 436 * are different disks. 437 */ 438 if (midp->mid_available == 439 replica_disk->available) { 440 on_list = 1; 441 mirpp = &midp->mid_replicas; 442 break; 443 } 444 } 445 } 446 447 /* 448 * New on the list so add it 449 */ 450 if (!on_list) { 451 mddb_mb_t *mbp; 452 uint_t sliceno; 453 mdname_t *rsp; 454 int fd = -1; 455 456 mbp = Malloc(DEV_BSIZE); 457 458 /* 459 * If the disk isn't available, we don't 460 * want to try to read from it. 461 */ 462 if (replica_disk->available == MD_IM_DISK_AVAILABLE) { 463 /* determine the replica slice */ 464 if (meta_replicaslice(dnp, &sliceno, 465 ep) != 0) { 466 Free(mbp); 467 continue; 468 } 469 470 /* 471 * if the replica slice size is zero, 472 * don't bother opening 473 */ 474 if (dnp->vtoc.parts[sliceno].size == 0) { 475 Free(mbp); 476 continue; 477 } 478 479 if ((rsp = metaslicename(dnp, sliceno, 480 ep)) == NULL) { 481 Free(mbp); 482 continue; 483 } 484 485 if ((fd = open(rsp->rname, 486 O_RDONLY| O_NDELAY)) < 0) { 487 Free(mbp); 488 continue; 489 } 490 491 /* 492 * a drive may not have a master block 493 */ 494 if (read_master_block(ep, fd, mbp, 495 DEV_BSIZE) <= 0) { 496 mdclrerror(ep); 497 Free(mbp); 498 (void) close(fd); 499 continue; 500 } 501 502 (void) close(fd); 503 } 504 midpp = drive_append_wrapper(midpp, dnp, 505 replica_disk, 506 meta_getminor(replica_disk->dev), 507 mbp->mb_setcreatetime, NULL); 508 mirpp = &((*midpp)->mid_replicas); 509 Free(mbp); 510 } 511 512 /* 513 * For either of these assertions to fail, it implies 514 * a NULL return from metadrivename() above. Since 515 * the args came from a presumed valid locator block, 516 * that's Bad. 517 */ 518 assert(midpp != NULL); 519 assert(mirpp != NULL); 520 521 /* 522 * Extract the parameters describing this replica. 523 * 524 * The magic "1" in the length calculation accounts 525 * for the length of the master block, in addition to 526 * the block count it describes. (The master block 527 * will always take up one block on the disk, and 528 * there will always only be one master block per 529 * replica, even though much of the code is structured 530 * to handle noncontiguous replicas.) 531 */ 532 flags = lbp->lb_locators[indx].l_flags; 533 offset = lbp->lb_locators[indx].l_blkno; 534 length = mb->mb_blkcnt + 1; 535 timestamp = mb->mb_setcreatetime; 536 537 mirpp = replica_append_wrapper(mirpp, flags, 538 offset, length, timestamp); 539 540 /* 541 * If we're here it means - 542 * 543 * we've added the disk to the list of 544 * disks. 545 */ 546 547 /* 548 * We need to bump up the number of active 549 * replica count for each such replica that is 550 * active so that it can be used later for replica 551 * quorum check. 552 */ 553 if (flags & MDDB_F_ACTIVE) { 554 misp->mis_active_replicas++; 555 } 556 } 557 } 558 559 560 /* 561 * append_pnm_rec() 562 * 563 * Append pnm_rec_t entry to list of physical devices in the diskset. Entry 564 * contains a mapping of n_key in NM namespace(or min_key in DID_NM namespace) 565 * to name of the physical device. This list will be used to ensure that the 566 * correct names of the physical devices are printed in the metastat output--the 567 * NM namespace might have stale information about where the physical devices 568 * were previously located when the diskset was last active. 569 */ 570 static void 571 append_pnm_rec( 572 pnm_rec_t **pnm, 573 mdkey_t min_key, 574 char *n_name 575 ) 576 { 577 pnm_rec_t *tmp_pnm; 578 char *p; 579 int len; 580 581 if ((p = strrchr(n_name, '/')) != NULL) 582 p++; 583 584 /* 585 * Allocates pnm_rec_t record for the physical 586 * device. 587 */ 588 len = strlen(p) + 1; /* Length of name plus Null term */ 589 tmp_pnm = Malloc(sizeof (pnm_rec_t) + len); 590 (void) strncpy(tmp_pnm->n_name, p, len); 591 tmp_pnm->n_key = min_key; 592 593 /* 594 * Adds new element to head of pnm_rec_t list. 595 */ 596 if (*pnm == NULL) { 597 tmp_pnm->next = NULL; 598 *pnm = tmp_pnm; 599 } else { 600 tmp_pnm->next = *pnm; 601 *pnm = tmp_pnm; 602 } 603 } 604 605 /* 606 * free_pnm_rec_list() 607 * 608 * Freeing all pnm_rec_t entries on the list of physical devices in the 609 * diskset. 610 */ 611 void 612 free_pnm_rec_list(pnm_rec_t **pnm) 613 { 614 pnm_rec_t *tmp_pnm, *rm_pnm; 615 616 for (tmp_pnm = *pnm; tmp_pnm != NULL; ) { 617 rm_pnm = tmp_pnm; 618 tmp_pnm = tmp_pnm->next; 619 Free(rm_pnm); 620 } 621 622 *pnm = NULL; 623 } 624 625 626 /* 627 * get_disks_from_didnamespace() 628 * This function was origionally called: get_nonreplica_disks() 629 * 630 * Extracts the disks without replicas from the locator name space and adds them 631 * to the supplied list of md_im_drive_info_t. 632 * If the print verbose option was given then this function will also 633 * correct the nm namespace so that the n_name is the right ctd name 634 */ 635 static void 636 get_disks_from_didnamespace( 637 md_im_set_desc_t *misp, 638 pnm_rec_t **pnm, 639 mddb_rb_t *nm, 640 mddb_rb_t *shrnm, 641 mddb_rb_t *did_nm, 642 mddb_rb_t *did_shrnm, 643 uint_t imp_flags, 644 int replicated, 645 md_error_t *ep 646 ) 647 { 648 char *search_path = "/dev"; 649 devid_nmlist_t *nmlist; 650 md_im_drive_info_t *midp, **midpp = &misp->mis_drives; 651 mddrivename_t *dnp; 652 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep); 653 mddb_rb_t *rbp_did = did_nm; 654 mddb_rb_t *rbp_did_shr = did_shrnm; 655 mddb_rb_t *rbp_nm = nm; 656 mddb_rb_t *rbp_shr_nm = shrnm; 657 int on_list = 0; 658 struct devid_min_rec *did_rec; 659 struct devid_shr_rec *did_shr_rec; 660 struct nm_rec *namesp_rec; 661 struct nm_shr_rec *namesp_shr_rec; 662 struct did_shr_name *did; 663 struct did_min_name *min; 664 void *r_did; /* NULL if not a replicated diskset */ 665 void *valid_did; 666 int avail = 0; 667 struct nm_name *nmp; 668 struct nm_shared_name *snmp; 669 mdkey_t drv_key, key, dev_key; 670 minor_t mnum = 0; 671 did_list_t *nonrep_did_listp; 672 size_t used_size, offset; 673 674 /* 675 * We got a pointer to an mddb record, which we expect to contain a 676 * name record; extract the pointer thereto. 677 */ 678 /* LINTED */ 679 did_rec = (struct devid_min_rec *)((caddr_t)(&rbp_did->rb_data)); 680 /* LINTED */ 681 did_shr_rec = (struct devid_shr_rec *) 682 ((caddr_t)(&rbp_did_shr->rb_data)); 683 /* LINTED */ 684 namesp_rec = (struct nm_rec *)((caddr_t)(&rbp_nm->rb_data)); 685 /* LINTED */ 686 namesp_shr_rec = (struct nm_shr_rec *)((caddr_t)(&rbp_shr_nm->rb_data)); 687 688 /* 689 * Skip the nm_rec_hdr and iterate on the array of struct minor_name 690 * at the end of the devid_min_rec 691 */ 692 for (min = &did_rec->minor_name[0]; min->min_devid_key != 0; 693 /* LINTED */ 694 min = (struct did_min_name *)((char *)min + DID_NAMSIZ(min))) { 695 696 on_list = 0; 697 r_did = NULL; 698 nonrep_did_listp = Zalloc(sizeof (struct did_list)); 699 700 /* 701 * For a given DID_NM key, locate the corresponding device 702 * id from DID_NM_SHR 703 */ 704 for (did = &did_shr_rec->device_id[0]; did->did_key != 0; 705 /* LINTED */ 706 did = (struct did_shr_name *) 707 ((char *)did + DID_SHR_NAMSIZ(did))) { 708 /* 709 * We got a match, this is the device id we're 710 * looking for 711 */ 712 if (min->min_devid_key == did->did_key) 713 break; 714 } 715 716 if (did->did_key == 0) { 717 /* we didn't find a match */ 718 assert(did->did_key != 0); 719 md_exit(NULL, 1); 720 } 721 722 /* 723 * If replicated diskset 724 */ 725 if (replicated) { 726 size_t new_devid_len, old_devid_len; 727 char *temp; 728 /* 729 * In this case, did->did_devid will 730 * be invalid so lookup the real one 731 */ 732 temp = replicated_list_lookup(did->did_size, 733 did->did_devid); 734 if (temp == NULL) { 735 /* we have a partial replicated set, fake it */ 736 new_devid_len = did->did_size; 737 r_did = Zalloc(new_devid_len); 738 (void) memcpy(r_did, did->did_devid, 739 new_devid_len); 740 } else { 741 new_devid_len = devid_sizeof((ddi_devid_t)temp); 742 r_did = Zalloc(new_devid_len); 743 (void) memcpy(r_did, temp, new_devid_len); 744 } 745 valid_did = r_did; 746 nonrep_did_listp->rdid = Zalloc(new_devid_len); 747 (void) memcpy(nonrep_did_listp->rdid, r_did, 748 new_devid_len); 749 old_devid_len = 750 devid_sizeof((ddi_devid_t)did->did_devid); 751 nonrep_did_listp->did = Zalloc(old_devid_len); 752 (void) memcpy((void *)nonrep_did_listp->did, 753 (void *)did->did_devid, old_devid_len); 754 } else { 755 size_t new_devid_len; 756 757 valid_did = did->did_devid; 758 new_devid_len = 759 devid_sizeof((ddi_devid_t)did->did_devid); 760 nonrep_did_listp->did = Zalloc(new_devid_len); 761 (void) memcpy((void *)nonrep_did_listp->did, 762 (void *)did->did_devid, new_devid_len); 763 } 764 765 /* 766 * Get a ctds mapping for that device id. 767 * Since disk is being imported into this system, 768 * just use the first ctds in list. 769 */ 770 if (meta_deviceid_to_nmlist(search_path, 771 (ddi_devid_t)valid_did, 772 &min->min_name[0], &nmlist) == 0) { 773 /* 774 * We know the disk is available. Use the 775 * device information in nmlist. 776 */ 777 assert(nmlist[0].devname != NULL); 778 nonrep_did_listp->devname = Strdup(nmlist[0].devname); 779 nonrep_did_listp->available = MD_IM_DISK_AVAILABLE; 780 avail = 0; 781 mnum = meta_getminor(nmlist[0].dev); 782 devid_free_nmlist(nmlist); 783 } else { 784 /* 785 * The disk is not available. That means we need to 786 * use the (old) device information stored in the 787 * namespace. 788 */ 789 /* search in nm space for a match */ 790 offset = sizeof (struct nm_rec) - 791 sizeof (struct nm_name); 792 used_size = namesp_rec->r_rec_hdr.r_used_size - offset; 793 for (nmp = &namesp_rec->r_name[0]; nmp->n_key != 0; 794 /* LINTED */ 795 nmp = (struct nm_name *)((char *)nmp + 796 NAMSIZ(nmp))) { 797 if (nmp->n_key == min->min_key) 798 break; 799 used_size -= NAMSIZ(nmp); 800 if ((int)used_size <= 0) { 801 md_exit(NULL, 1); 802 } 803 } 804 805 if (nmp->n_key == 0) { 806 assert(nmp->n_key != 0); 807 md_exit(NULL, 1); 808 } 809 dev_key = nmp->n_dir_key; 810 snmp = &namesp_shr_rec->sr_name[0]; 811 key = snmp->sn_key; 812 /* 813 * Use the namespace n_dir_key to look in the 814 * shared namespace. When we find the matching 815 * key, that is the devname and minor number we 816 * want. 817 */ 818 offset = sizeof (struct nm_shr_rec) - 819 sizeof (struct nm_shared_name); 820 used_size = namesp_shr_rec->sr_rec_hdr.r_used_size - 821 offset; 822 while (key != 0) { 823 if (dev_key == key) { 824 /* 825 * This complicated looking series 826 * of code creates a devname of the 827 * form <sn_name>/<n_name> which 828 * will look like /dev/dsk/c1t4d0s0. 829 */ 830 nonrep_did_listp->devname = 831 Zalloc(strlen(nmp->n_name) + 832 strlen(snmp->sn_name) + 2); 833 (void) strlcpy( 834 nonrep_did_listp->devname, 835 snmp->sn_name, 836 strlen(snmp->sn_name)); 837 (void) strlcat( 838 nonrep_did_listp->devname, "/", 839 strlen(nmp->n_name) + 840 strlen(snmp->sn_name) + 2); 841 (void) strlcat( 842 nonrep_did_listp->devname, 843 nmp->n_name, 844 strlen(nmp->n_name) + 845 strlen(snmp->sn_name) + 2); 846 mnum = nmp->n_minor; 847 break; 848 } 849 /* LINTED */ 850 snmp = (struct nm_shared_name *)((char *)snmp + 851 SHR_NAMSIZ(snmp)); 852 key = snmp->sn_key; 853 used_size -= SHR_NAMSIZ(snmp); 854 if ((int)used_size <= 0) { 855 md_exit(NULL, 1); 856 } 857 } 858 if (key == 0) { 859 nonrep_did_listp->devname = NULL; 860 mnum = 0; 861 } 862 863 nonrep_did_listp->available = MD_IM_DISK_NOT_AVAILABLE; 864 nonrep_did_listp->minor_name = Strdup(min->min_name); 865 avail = 1; 866 drv_key = nmp->n_drv_key; 867 snmp = &namesp_shr_rec->sr_name[0]; 868 key = snmp->sn_key; 869 /* 870 * Use the namespace n_drv_key to look in the 871 * shared namespace. When we find the matching 872 * key, that is the driver name for the disk. 873 */ 874 offset = sizeof (struct nm_shr_rec) - 875 sizeof (struct nm_shared_name); 876 used_size = namesp_shr_rec->sr_rec_hdr.r_used_size - 877 offset; 878 while (key != 0) { 879 if (drv_key == key) { 880 nonrep_did_listp->driver_name = 881 Strdup(snmp->sn_name); 882 break; 883 } 884 /* LINTED */ 885 snmp = (struct nm_shared_name *)((char *)snmp + 886 SHR_NAMSIZ(snmp)); 887 key = snmp->sn_key; 888 used_size -= SHR_NAMSIZ(snmp); 889 if ((int)used_size <= 0) { 890 md_exit(NULL, 1); 891 } 892 } 893 if (key == 0) 894 nonrep_did_listp->driver_name = NULL; 895 } 896 dnp = metadrivename(&sp, 897 metadiskname(nonrep_did_listp->devname), ep); 898 /* 899 * Add drive to pnm_rec_t list of physical devices for 900 * metastat output. 901 */ 902 if (imp_flags & META_IMP_VERBOSE) { 903 append_pnm_rec(pnm, min->min_key, 904 nonrep_did_listp->devname); 905 } 906 907 assert(dnp != NULL); 908 /* Is it already on the list? */ 909 for (midp = misp->mis_drives; midp != NULL; 910 midp = midp->mid_next) { 911 if (midp->mid_dnp == dnp) { 912 if (midp->mid_available == 913 nonrep_did_listp->available) { 914 on_list = 1; 915 break; 916 } 917 } 918 } 919 920 if (!on_list) { 921 mddb_mb_t *mbp; 922 uint_t sliceno; 923 mdname_t *rsp; 924 int fd = -1; 925 926 mbp = Malloc(DEV_BSIZE); 927 928 if (!avail) { 929 /* determine the replica slice */ 930 if (meta_replicaslice(dnp, &sliceno, 931 ep) != 0) { 932 Free(mbp); 933 free_did_list(nonrep_did_listp); 934 continue; 935 } 936 937 /* 938 * if the replica slice size is zero, 939 * don't bother opening 940 */ 941 if (dnp->vtoc.parts[sliceno].size 942 == 0) { 943 Free(mbp); 944 free_did_list(nonrep_did_listp); 945 continue; 946 } 947 948 if ((rsp = metaslicename(dnp, sliceno, 949 ep)) == NULL) { 950 Free(mbp); 951 free_did_list(nonrep_did_listp); 952 continue; 953 } 954 955 if ((fd = open(rsp->rname, 956 O_RDONLY| O_NDELAY)) < 0) { 957 Free(mbp); 958 free_did_list(nonrep_did_listp); 959 continue; 960 } 961 962 /* 963 * a drive may not have a master block 964 */ 965 if (read_master_block(ep, fd, mbp, 966 DEV_BSIZE) <= 0) { 967 mdclrerror(ep); 968 Free(mbp); 969 free_did_list(nonrep_did_listp); 970 (void) close(fd); 971 continue; 972 } 973 974 (void) close(fd); 975 } 976 /* 977 * If it is replicated diskset, 978 * r_did will be non-NULL. 979 * Passing the devname as NULL because field 980 * is not currently used for a non-replica disk. 981 */ 982 midpp = drive_append_wrapper(midpp, 983 dnp, nonrep_did_listp, 984 mnum, mbp->mb_setcreatetime, NULL); 985 Free(mbp); 986 free_did_list(nonrep_did_listp); 987 } 988 free_did_list(nonrep_did_listp); 989 } 990 } 991 992 /* 993 * set_append() 994 * 995 * Append to tail of linked list of md_im_set_desc_t. 996 * 997 * Will allocate space for new node AND populate it by extracting disks with 998 * and without replicas from the locator blocks and locator namespace. 999 * 1000 * Returns pointer to new node. 1001 */ 1002 static md_im_set_desc_t * 1003 set_append( 1004 md_im_set_desc_t **mispp, 1005 did_list_t *did_listp, 1006 mddb_mb_t *mb, 1007 mddb_lb_t *lbp, 1008 mddb_rb_t *nm, 1009 mddb_rb_t *shrnm, 1010 pnm_rec_t **pnm, 1011 mddb_rb_t *did_nm, 1012 mddb_rb_t *did_shrnm, 1013 uint_t imp_flags, 1014 md_error_t *ep 1015 ) 1016 { 1017 1018 md_im_set_desc_t *misp; 1019 set_t setno = mb->mb_setno; 1020 int partial = imp_flags & MD_IM_PARTIAL_DISKSET; 1021 int replicated = imp_flags & MD_IM_SET_REPLICATED; 1022 1023 /* run to end of list */ 1024 for (; (*mispp != NULL); mispp = &((*mispp)->mis_next)) 1025 ; 1026 1027 /* allocate new list element */ 1028 misp = *mispp = Zalloc(sizeof (md_im_set_desc_t)); 1029 1030 if (replicated) 1031 misp->mis_flags = MD_IM_SET_REPLICATED; 1032 1033 misp->mis_oldsetno = setno; 1034 misp->mis_partial = partial; 1035 1036 /* Get the disks with and without replicas */ 1037 get_replica_disks(misp, did_listp, mb, lbp, ep); 1038 1039 if (nm != NULL && did_nm != NULL && did_shrnm != NULL) { 1040 get_disks_from_didnamespace(misp, pnm, nm, shrnm, did_nm, 1041 did_shrnm, imp_flags, replicated, ep); 1042 } 1043 1044 /* 1045 * An error in this struct could come from either of 1046 * the above routines; 1047 * in both cases, we want to pass it back on up. 1048 */ 1049 1050 return (misp); 1051 } 1052 1053 1054 /* 1055 * add_disk_names() 1056 * 1057 * Iterator to walk the minor node tree of the device snapshot, adding only the 1058 * first non-block instance of each non-cdrom minor node to a list of disks. 1059 */ 1060 static int 1061 add_disk_names(di_node_t node, di_minor_t minor, void *args) 1062 { 1063 char *search_path = "/dev"; 1064 ddi_devid_t devid = di_devid(node); 1065 devid_nmlist_t *nm; 1066 char *min = di_minor_name(minor); 1067 md_im_names_t *cnames = (md_im_names_t *)args; 1068 static di_node_t save_node = NULL; 1069 1070 /* 1071 * skip CD devices 1072 * If a device does not have a device id, we can't 1073 * do anything with it so just exclude it from our 1074 * list. 1075 * 1076 * This would also encompass CD devices and floppy 1077 * devices that don't have a device id. 1078 */ 1079 if (devid == NULL) { 1080 return (DI_WALK_CONTINUE); 1081 } 1082 1083 /* char disk devices (as opposed to block) */ 1084 if (di_minor_spectype(minor) == S_IFCHR) { 1085 1086 /* only first occurrence (slice 0) of each instance */ 1087 if (save_node == NULL || node != save_node) { 1088 save_node = node; 1089 if (meta_deviceid_to_nmlist(search_path, devid, 1090 min, &nm) == 0) { 1091 int index = cnames->min_count++; 1092 1093 assert(nm->devname != NULL); 1094 cnames->min_names = 1095 Realloc(cnames->min_names, 1096 cnames->min_count * 1097 sizeof (char *)); 1098 1099 assert(cnames->min_names != NULL); 1100 cnames->min_names[index] = 1101 metadiskname(nm->devname); 1102 devid_free_nmlist(nm); 1103 } 1104 } 1105 } 1106 return (DI_WALK_CONTINUE); 1107 } 1108 1109 1110 1111 /* 1112 * meta_list_disks() 1113 * 1114 * Snapshots the device tree and extracts disk devices from the snapshot. 1115 */ 1116 int 1117 meta_list_disks(md_error_t *ep, md_im_names_t *cnames) 1118 { 1119 di_node_t root_node; 1120 1121 assert(cnames != NULL); 1122 cnames->min_count = 0; 1123 cnames->min_names = NULL; 1124 1125 if ((root_node = di_init("/", DINFOCPYALL|DINFOFORCE)) 1126 == DI_NODE_NIL) { 1127 return (mdsyserror(ep, errno, NULL)); 1128 } 1129 1130 (void) di_walk_minor(root_node, DDI_NT_BLOCK, 0, cnames, 1131 add_disk_names); 1132 1133 di_fini(root_node); 1134 return (0); 1135 } 1136 1137 /* 1138 * meta_imp_drvused 1139 * 1140 * Checks if given drive is mounted, swapped, part of disk configuration 1141 * or in use by SVM. ep also has error code set up if drive is in use. 1142 * 1143 * Returns 1 if drive is in use. 1144 * Returns 0 if drive is not in use. 1145 */ 1146 int 1147 meta_imp_drvused( 1148 mdsetname_t *sp, 1149 mddrivename_t *dnp, 1150 md_error_t *ep 1151 ) 1152 { 1153 md_error_t status = mdnullerror; 1154 md_error_t *db_ep = &status; 1155 1156 /* 1157 * We pass in db_ep to meta_setup_db_locations 1158 * and never ever use the error contained therein 1159 * because all we're interested in is a check to 1160 * see whether any local metadbs are present. 1161 */ 1162 if ((meta_check_drivemounted(sp, dnp, ep) != 0) || 1163 (meta_check_driveswapped(sp, dnp, ep) != 0) || 1164 (((meta_setup_db_locations(db_ep) == 0) && 1165 ((meta_check_drive_inuse(sp, dnp, 1, ep) != 0) || 1166 (meta_check_driveinset(sp, dnp, ep) != 0))))) { 1167 return (1); 1168 } else { 1169 return (0); 1170 } 1171 } 1172 1173 /* 1174 * meta_prune_cnames() 1175 * 1176 * Removes in-use disks from the list prior to further processing. 1177 * 1178 * Return value depends on err_on_prune flag: if set, and one or more disks 1179 * are pruned, the return list will be the pruned disks. If not set, or if no 1180 * disks are pruned, the return list will be the unpruned disks. 1181 */ 1182 mddrivenamelist_t * 1183 meta_prune_cnames( 1184 md_error_t *ep, 1185 md_im_names_t *cnames, 1186 int err_on_prune 1187 ) 1188 { 1189 int d; 1190 int fcount = 0; 1191 mddrivenamelist_t *dnlp = NULL; 1192 mddrivenamelist_t **dnlpp = &dnlp; 1193 mddrivenamelist_t *fdnlp = NULL; 1194 mddrivenamelist_t **fdnlpp = &fdnlp; 1195 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep); 1196 1197 for (d = 0; d < cnames->min_count; ++d) { 1198 mddrivename_t *dnp; 1199 1200 dnp = metadrivename(&sp, cnames->min_names[d], ep); 1201 if (dnp == NULL) { 1202 /* 1203 * Assuming we're interested in knowing about 1204 * whatever error occurred, but not in stopping. 1205 */ 1206 mde_perror(ep, cnames->min_names[d]); 1207 mdclrerror(ep); 1208 1209 continue; 1210 } 1211 1212 /* 1213 * Check if the drive is inuse. 1214 */ 1215 if (meta_imp_drvused(sp, dnp, ep)) { 1216 fdnlpp = meta_drivenamelist_append_wrapper(fdnlpp, dnp); 1217 fcount++; 1218 mdclrerror(ep); 1219 } else { 1220 dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp); 1221 } 1222 } 1223 1224 if (fcount) { 1225 if (err_on_prune) { 1226 (void) mddserror(ep, MDE_DS_DRIVEINUSE, 0, 1227 NULL, fdnlp->drivenamep->cname, NULL); 1228 metafreedrivenamelist(dnlp); 1229 return (fdnlp); 1230 } 1231 metafreedrivenamelist(fdnlp); 1232 } 1233 1234 return (dnlp); 1235 } 1236 1237 /* 1238 * read_master_block() 1239 * 1240 * Returns: 1241 * < 0 for failure 1242 * 0 for no valid master block 1243 * 1 for valid master block 1244 * 1245 * The supplied buffer will be filled in for EITHER 0 or 1. 1246 */ 1247 int 1248 read_master_block( 1249 md_error_t *ep, 1250 int fd, 1251 void *bp, 1252 int bsize 1253 ) 1254 { 1255 mddb_mb_t *mbp = bp; 1256 int rval = 1; 1257 1258 assert(bp != NULL); 1259 1260 if (lseek(fd, (off_t)dbtob(16), SEEK_SET) < 0) 1261 return (mdsyserror(ep, errno, NULL)); 1262 1263 if (read(fd, bp, bsize) != bsize) 1264 return (mdsyserror(ep, errno, NULL)); 1265 1266 /* 1267 * The master block magic number can either be MDDB_MAGIC_MB in 1268 * the case of a real master block, or, it can be MDDB_MAGIC_DU 1269 * in the case of a dummy master block 1270 */ 1271 if ((mbp->mb_magic != MDDB_MAGIC_MB) && 1272 (mbp->mb_magic != MDDB_MAGIC_DU)) { 1273 rval = 0; 1274 (void) mdmddberror(ep, MDE_DB_MASTER, 0, 0, 0, NULL); 1275 } 1276 1277 if (mbp->mb_revision != MDDB_REV_MB) { 1278 rval = 0; 1279 } 1280 1281 return (rval); 1282 } 1283 1284 /* 1285 * read_locator_block() 1286 * 1287 * Returns: 1288 * < 0 for failure 1289 * 0 for no valid locator block 1290 * 1 for valid locator block 1291 */ 1292 int 1293 read_locator_block( 1294 md_error_t *ep, 1295 int fd, 1296 mddb_mb_t *mbp, 1297 void *bp, 1298 int bsize 1299 ) 1300 { 1301 mddb_lb_t *lbp = bp; 1302 1303 assert(bp != NULL); 1304 1305 if (lseek(fd, (off_t)dbtob(mbp->mb_blkmap.m_firstblk), SEEK_SET) < 0) 1306 return (mdsyserror(ep, errno, NULL)); 1307 1308 if (read(fd, bp, bsize) != bsize) 1309 return (mdsyserror(ep, errno, NULL)); 1310 1311 return ((lbp->lb_magic == MDDB_MAGIC_LB) ? 1 : 0); 1312 } 1313 1314 int 1315 phys_read( 1316 md_error_t *ep, 1317 int fd, 1318 mddb_mb_t *mbp, 1319 daddr_t blk, 1320 void *bp, 1321 int bcount 1322 ) 1323 { 1324 daddr_t pblk; 1325 1326 if ((pblk = getphysblk(blk, mbp)) < 0) 1327 return (mdmddberror(ep, MDE_DB_BLKRANGE, NODEV32, 1328 MD_LOCAL_SET, blk, NULL)); 1329 1330 if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) 1331 return (mdsyserror(ep, errno, NULL)); 1332 1333 if (read(fd, bp, bcount) != bcount) 1334 return (mdsyserror(ep, errno, NULL)); 1335 1336 return (bcount); 1337 } 1338 1339 /* 1340 * read_locator_block_did() 1341 * 1342 * Returns: 1343 * < 0 for failure 1344 * 0 for no valid locator name struct 1345 * 1 for valid locator name struct 1346 */ 1347 int 1348 read_locator_block_did( 1349 md_error_t *ep, 1350 int fd, 1351 mddb_mb_t *mbp, 1352 mddb_lb_t *lbp, 1353 void *bp, 1354 int bsize 1355 ) 1356 { 1357 int lb_didfirstblk = lbp->lb_didfirstblk; 1358 mddb_did_blk_t *lbdidp = bp; 1359 int rval; 1360 1361 assert(bp != NULL); 1362 1363 if ((rval = phys_read(ep, fd, mbp, lb_didfirstblk, bp, bsize)) < 0) 1364 return (rval); 1365 1366 return ((lbdidp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0); 1367 } 1368 1369 /* 1370 * read_locator_names() 1371 * 1372 * Returns: 1373 * < 0 for failure 1374 * 0 for no valid locator name struct 1375 * 1 for valid locator name struct 1376 */ 1377 int 1378 read_locator_names( 1379 md_error_t *ep, 1380 int fd, 1381 mddb_mb_t *mbp, 1382 mddb_lb_t *lbp, 1383 void *bp, 1384 int bsize 1385 ) 1386 { 1387 int lnfirstblk = lbp->lb_lnfirstblk; 1388 mddb_ln_t *lnp = bp; 1389 int rval; 1390 1391 assert(bp != NULL); 1392 1393 if ((rval = phys_read(ep, fd, mbp, lnfirstblk, bp, bsize)) < 0) 1394 return (rval); 1395 1396 return ((lnp->ln_magic == MDDB_MAGIC_LN) ? 1 : 0); 1397 } 1398 1399 1400 int 1401 read_database_block( 1402 md_error_t *ep, 1403 int fd, 1404 mddb_mb_t *mbp, 1405 int dbblk, 1406 void *bp, 1407 int bsize 1408 ) 1409 { 1410 mddb_db_t *dbp = bp; 1411 int rval; 1412 1413 assert(bp != NULL); 1414 1415 if ((rval = phys_read(ep, fd, mbp, dbblk, bp, bsize)) < 0) 1416 return (rval); 1417 1418 return ((dbp->db_magic == MDDB_MAGIC_DB) ? 1 : 0); 1419 } 1420 1421 int 1422 read_loc_didblks( 1423 md_error_t *ep, 1424 int fd, 1425 mddb_mb_t *mbp, 1426 int didblk, 1427 void *bp, 1428 int bsize 1429 ) 1430 { 1431 mddb_did_blk_t *didbp = bp; 1432 int rval; 1433 1434 assert(bp != NULL); 1435 1436 if ((rval = phys_read(ep, fd, mbp, didblk, bp, bsize)) < 0) 1437 return (rval); 1438 1439 return ((didbp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0); 1440 } 1441 1442 1443 int 1444 read_loc_didinfo( 1445 md_error_t *ep, 1446 int fd, 1447 mddb_mb_t *mbp, 1448 int infoblk, 1449 void *bp, 1450 int bsize 1451 ) 1452 { 1453 int rval = 1; 1454 mddb_did_info_t *infop = bp; 1455 1456 assert(bp != NULL); 1457 1458 if ((rval = phys_read(ep, fd, mbp, infoblk, bp, bsize)) < 0) 1459 return (rval); 1460 1461 return ((infop->info_flags & MDDB_DID_EXISTS) ? 1 : 0); 1462 } 1463 1464 /* 1465 * meta_nm_rec() 1466 * 1467 * Return the DE corresponding to the requested namespace record type. 1468 * Modifies dbp to have a firstentry if one isn't there. 1469 */ 1470 static mddb_de_t * 1471 meta_nm_rec(mddb_db_t *dbp, mddb_type_t rectype) 1472 { 1473 mddb_de_t *dep; 1474 int desize; 1475 1476 if (dbp->db_firstentry != NULL) { 1477 /* LINTED */ 1478 dep = (mddb_de_t *)((caddr_t)(&dbp->db_firstentry) 1479 + sizeof (dbp->db_firstentry)); 1480 dbp->db_firstentry = dep; 1481 while (dep && dep->de_next) { 1482 desize = sizeof (*dep) - sizeof (dep->de_blks) + 1483 sizeof (daddr_t) * dep->de_blkcount; 1484 /* LINTED */ 1485 dep->de_next = (mddb_de_t *) 1486 ((caddr_t)dep + desize); 1487 dep = dep->de_next; 1488 } 1489 } 1490 1491 for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) { 1492 if (dep->de_type1 == rectype) 1493 break; 1494 } 1495 return (dep); 1496 } 1497 1498 /* 1499 * read_nm_rec() 1500 * 1501 * Reads the NM, NM_DID or NM_DID_SHR record in the mddb and stores the 1502 * configuration data in the buffer 'nm' 1503 * 1504 * Returns: 1505 * < 0 for failure 1506 * 0 for no valid NM/DID_NM/DID_NM_SHR record 1507 * 1 for valid NM/DID_NM/DID_NM_SHR record 1508 * 1509 */ 1510 static int 1511 read_nm_rec( 1512 md_error_t *ep, 1513 int fd, 1514 mddb_mb_t *mbp, 1515 mddb_lb_t *lbp, 1516 char **nm, 1517 mddb_type_t rectype, 1518 char *diskname 1519 ) 1520 { 1521 int cnt, dbblk, rval = 0; 1522 char db[DEV_BSIZE]; 1523 mddb_de_t *dep; 1524 /*LINTED*/ 1525 mddb_db_t *dbp = (mddb_db_t *)&db; 1526 char *tmpnm = NULL; 1527 daddr_t pblk; 1528 1529 for (dbblk = lbp->lb_dbfirstblk; 1530 dbblk != 0; 1531 dbblk = dbp->db_nextblk) { 1532 1533 if ((rval = read_database_block(ep, fd, mbp, dbblk, dbp, 1534 sizeof (db))) <= 0) 1535 return (rval); 1536 1537 /* 1538 * Locate NM/DID_NM/DID_NM_SHR record. Normally there is 1539 * only one record per mddb. There is a rare case when we 1540 * can't expand the record. If this is the case then we 1541 * will have multiple NM/DID_NM/DID_NM_SHR records linked 1542 * with r_next_recid. 1543 * 1544 * For now assume the normal case and handle the extended 1545 * namespace in Phase 2. 1546 */ 1547 if ((dep = meta_nm_rec(dbp, rectype)) != NULL) 1548 break; 1549 } 1550 1551 /* If meta_nm_rec() never succeeded, bail out */ 1552 if (dep == NULL) 1553 return (0); 1554 1555 /* Read in the appropriate record and return configurations */ 1556 tmpnm = (char *)Zalloc(dbtob(dep->de_blkcount)); 1557 *nm = tmpnm; 1558 1559 for (cnt = 0; cnt < dep->de_blkcount; cnt++) { 1560 if ((pblk = getphysblk(dep->de_blks[cnt], mbp)) < 0) { 1561 rval = mdmddberror(ep, MDE_DB_BLKRANGE, 1562 NODEV32, MD_LOCAL_SET, 1563 dep->de_blks[cnt], diskname); 1564 return (rval); 1565 } 1566 1567 if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) { 1568 rval = mdsyserror(ep, errno, diskname); 1569 return (rval); 1570 } 1571 1572 if (read(fd, tmpnm, DEV_BSIZE) != DEV_BSIZE) { 1573 rval = mdsyserror(ep, errno, diskname); 1574 return (rval); 1575 } 1576 1577 tmpnm += DEV_BSIZE; 1578 } 1579 return (1); 1580 } 1581 1582 /* 1583 * is_replicated 1584 * 1585 * Determines whether a disk has been replicated or not. It checks to see 1586 * if the device id stored in the master block is the same as the device id 1587 * registered for that disk on the current system. If the two device ids are 1588 * different, then we know that the disk has been replicated. 1589 * 1590 * If need_devid is set and the disk is replicated, fill in the new_devid. 1591 * Also, if need_devid is set, this routine allocates memory for the device 1592 * ids; the caller of this routine is responsible for free'ing up the memory. 1593 * 1594 * Returns: 1595 * MD_IM_SET_REPLICATED if it's a replicated disk 1596 * 0 if it's not a replicated disk 1597 */ 1598 static int 1599 is_replicated( 1600 int fd, 1601 mddb_mb_t *mbp, 1602 int need_devid, 1603 void **new_devid 1604 ) 1605 { 1606 ddi_devid_t current_devid; 1607 int retval = 0; 1608 size_t new_devid_len; 1609 1610 if (mbp->mb_devid_magic != MDDB_MAGIC_DE) 1611 return (retval); 1612 1613 if (devid_get(fd, ¤t_devid) != 0) 1614 return (retval); 1615 1616 if (devid_compare((ddi_devid_t)mbp->mb_devid, current_devid) != 0) 1617 retval = MD_IM_SET_REPLICATED; 1618 1619 if (retval && need_devid) { 1620 new_devid_len = devid_sizeof(current_devid); 1621 *new_devid = Zalloc(new_devid_len); 1622 (void) memcpy(*new_devid, (void *)current_devid, new_devid_len); 1623 } 1624 1625 devid_free(current_devid); 1626 return (retval); 1627 } 1628 1629 /* 1630 * free_replicated_disks_list() 1631 * 1632 * this frees up all the memory allocated by build_replicated_disks_list 1633 */ 1634 static void 1635 free_replicated_disks_list() 1636 { 1637 replicated_disk_t **repl_disk, *temp; 1638 int index; 1639 1640 for (index = 0; index <= MAX_DEVID_LEN; index++) { 1641 repl_disk = &replicated_disk_list[index]; 1642 1643 while (*repl_disk != NULL) { 1644 temp = *repl_disk; 1645 *repl_disk = (*repl_disk)->next; 1646 1647 Free(temp->old_devid); 1648 Free(temp->new_devid); 1649 Free(temp); 1650 } 1651 } 1652 } 1653 1654 /* 1655 * build_replicated_disks_list() 1656 * 1657 * Builds a list of disks that have been replicated using either a 1658 * remote replication or a point-in-time replication software. The 1659 * list is stored as a two dimensional sparse array. 1660 * 1661 * Returns 1662 * 1 on success 1663 * 0 on failure 1664 */ 1665 int 1666 build_replicated_disks_list( 1667 md_error_t *ep, 1668 mddrivenamelist_t *dnlp 1669 ) 1670 { 1671 uint_t sliceno; 1672 int fd = -1; 1673 mddrivenamelist_t *dp; 1674 mdname_t *rsp; 1675 mddb_mb_t *mbp; 1676 1677 mbp = Malloc(DEV_BSIZE); 1678 1679 for (dp = dnlp; dp != NULL; dp = dp->next) { 1680 mddrivename_t *dnp; 1681 void *new_devid; 1682 1683 dnp = dp->drivenamep; 1684 /* determine the replica slice */ 1685 if (meta_replicaslice(dnp, &sliceno, ep) != 0) 1686 continue; 1687 1688 /* 1689 * if the replica slice size is zero, don't bother opening 1690 */ 1691 if (dnp->vtoc.parts[sliceno].size == 0) 1692 continue; 1693 1694 if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL) 1695 continue; 1696 1697 if ((fd = open(rsp->rname, O_RDONLY| O_NDELAY)) < 0) 1698 return (mdsyserror(ep, errno, rsp->rname)); 1699 1700 /* a drive may not have a master block so we just continue */ 1701 if (read_master_block(ep, fd, mbp, DEV_BSIZE) <= 0) { 1702 (void) close(fd); 1703 mdclrerror(ep); 1704 continue; 1705 } 1706 1707 if (is_replicated(fd, mbp, 1, &new_devid)) { 1708 replicated_list_insert(mbp->mb_devid_len, 1709 mbp->mb_devid, new_devid); 1710 } 1711 (void) close(fd); 1712 } 1713 *replicated_disk_list_built = 1; 1714 1715 Free(mbp); 1716 return (1); 1717 } 1718 1719 /* 1720 * free_did_list() 1721 * 1722 * Frees the did_list allocated as part of build_did_list 1723 */ 1724 static void 1725 free_did_list( 1726 did_list_t *did_listp 1727 ) 1728 { 1729 did_list_t *temp, *head; 1730 1731 head = did_listp; 1732 1733 while (head != NULL) { 1734 temp = head; 1735 head = head->next; 1736 if (temp->rdid) 1737 Free(temp->rdid); 1738 if (temp->did) 1739 Free(temp->did); 1740 if (temp->devname) 1741 Free(temp->devname); 1742 if (temp->minor_name) 1743 Free(temp->minor_name); 1744 if (temp->driver_name) 1745 Free(temp->driver_name); 1746 Free(temp); 1747 } 1748 } 1749 1750 /* 1751 * meta_free_im_replica_info 1752 * 1753 * Frees the md_im_replica_info list 1754 */ 1755 static void 1756 meta_free_im_replica_info( 1757 md_im_replica_info_t *mirp 1758 ) 1759 { 1760 md_im_replica_info_t *r, *temp; 1761 1762 r = mirp; 1763 1764 while (r != NULL) { 1765 temp = r; 1766 r = r->mir_next; 1767 1768 Free(temp); 1769 } 1770 } 1771 1772 /* 1773 * meta_free_im_drive_info 1774 * 1775 * Frees the md_im_drive_info list 1776 */ 1777 static void 1778 meta_free_im_drive_info( 1779 md_im_drive_info_t *midp 1780 ) 1781 { 1782 md_im_drive_info_t *d, *temp; 1783 1784 d = midp; 1785 1786 while (d != NULL) { 1787 temp = d; 1788 d = d->mid_next; 1789 1790 if (temp->mid_available & MD_IM_DISK_NOT_AVAILABLE) 1791 /* 1792 * dnp is not on the drivenamelist and is a temp 1793 * dnp for metaimport if the disk is unavailable. 1794 * We need to specifically free it because of this. 1795 * If the disk is available, standard drivelist freeing 1796 * will kick in so we don't need to do it. 1797 */ 1798 metafreedrivename(temp->mid_dnp); 1799 if (temp->mid_devid) 1800 Free(temp->mid_devid); 1801 if (temp->mid_o_devid) 1802 Free(temp->mid_o_devid); 1803 if (temp->mid_driver_name) 1804 Free(temp->mid_driver_name); 1805 if (temp->mid_devname) 1806 Free(temp->mid_devname); 1807 if (temp->mid_replicas) { 1808 meta_free_im_replica_info(temp->mid_replicas); 1809 temp->mid_replicas = NULL; 1810 } 1811 if (temp->overlap) { 1812 meta_free_im_drive_info(temp->overlap); 1813 temp->overlap = NULL; 1814 } 1815 Free(temp); 1816 } 1817 } 1818 1819 /* 1820 * meta_free_im_set_desc 1821 * 1822 * Frees the md_im_set_desc_t list 1823 */ 1824 void 1825 meta_free_im_set_desc( 1826 md_im_set_desc_t *misp 1827 ) 1828 { 1829 md_im_set_desc_t *s, *temp; 1830 1831 s = misp; 1832 1833 while (s != NULL) { 1834 temp = s; 1835 s = s->mis_next; 1836 if (temp->mis_drives) { 1837 meta_free_im_drive_info(temp->mis_drives); 1838 temp->mis_drives = NULL; 1839 } 1840 Free(temp); 1841 } 1842 } 1843 1844 /* 1845 * build_did_list() 1846 * 1847 * Build a list of device ids corresponding to disks in the locator block. 1848 * Memory is allocated here for the nodes in the did_list. The callers of 1849 * this routine must also call free_did_list to free up the memory after 1850 * they're done. 1851 * 1852 * Returns: 1853 * < 0 for failure 1854 * 0 for no valid locator block device id array 1855 * 1 for valid locator block device id array 1856 * ENOTSUP partial diskset, not all disks in a diskset on the 1857 * system where import is being executed 1858 */ 1859 static int 1860 build_did_list( 1861 md_error_t *ep, 1862 int fd, 1863 mddb_mb_t *mb, 1864 mddb_lb_t *lbp, 1865 mddb_did_blk_t *lbdidp, 1866 mddb_ln_t *lnp, 1867 did_list_t **did_listp, 1868 int replicated 1869 ) 1870 { 1871 char *search_path = "/dev"; 1872 char *minor_name; 1873 int rval, cnt; 1874 devid_nmlist_t *nm; 1875 uint_t did_info_length = 0; 1876 uint_t did_info_firstblk = 0; 1877 did_list_t *new, *head = NULL; 1878 char *bp = NULL, *temp; 1879 mddb_did_info_t *did_info = NULL; 1880 void *did = NULL; 1881 size_t new_devid_len; 1882 int partial = 0; 1883 int partial_replicated = 0; 1884 1885 for (cnt = 0; cnt < MDDB_NLB; cnt++) { 1886 partial_replicated = 0; 1887 did_info = &lbdidp->blk_info[cnt]; 1888 1889 if (!(did_info->info_flags & MDDB_DID_EXISTS)) 1890 continue; 1891 1892 new = Zalloc(sizeof (did_list_t)); 1893 new->did = Zalloc(did_info->info_length); 1894 1895 /* 1896 * If we can re-use the buffer that has already been 1897 * read in then just use it. Otherwise free 1898 * the previous one and alloc a new one 1899 */ 1900 if (dbtob(did_info->info_blkcnt) != did_info_length && 1901 did_info->info_firstblk != did_info_firstblk) { 1902 1903 did_info_length = dbtob(did_info->info_blkcnt); 1904 did_info_firstblk = did_info->info_firstblk; 1905 1906 if (bp) 1907 Free(bp); 1908 bp = temp = Zalloc(did_info_length); 1909 1910 if ((rval = phys_read(ep, fd, mb, did_info_firstblk, 1911 (void *)bp, did_info_length)) < 0) 1912 return (rval); 1913 } else { 1914 temp = bp; 1915 } 1916 1917 temp += did_info->info_offset; 1918 (void) memcpy(new->did, temp, did_info->info_length); 1919 new->did_index = cnt; 1920 minor_name = did_info->info_minor_name; 1921 1922 /* 1923 * If we are not able to find the ctd mapping corresponding 1924 * to a given device id, it probably means the device id in 1925 * question is not registered with the system. 1926 * 1927 * Highly likely that the only time this happens, we've hit 1928 * a case where not all the disks that are a part of the 1929 * diskset were moved before importing the diskset. 1930 * 1931 * If set is a replicated diskset, then the device id we get 1932 * from 'lb' will be the 'other' did and we need to lookup 1933 * the real one before we call this routine. 1934 */ 1935 if (replicated) { 1936 temp = replicated_list_lookup(did_info->info_length, 1937 new->did); 1938 if (temp == NULL) { 1939 /* we have a partial replicated set, fake it */ 1940 new_devid_len = devid_sizeof((ddi_devid_t)new->did); 1941 new->rdid = Zalloc(new_devid_len); 1942 (void) memcpy(new->rdid, new->did, new_devid_len); 1943 did = new->rdid; 1944 partial_replicated = 1; 1945 } else { 1946 new_devid_len = devid_sizeof((ddi_devid_t)temp); 1947 new->rdid = Zalloc(new_devid_len); 1948 (void) memcpy(new->rdid, temp, new_devid_len); 1949 did = new->rdid; 1950 } 1951 } else { 1952 did = new->did; 1953 } 1954 1955 if (devid_valid((ddi_devid_t)(did)) == 0) { 1956 return (-1); 1957 } 1958 1959 if (partial_replicated || meta_deviceid_to_nmlist(search_path, 1960 (ddi_devid_t)did, minor_name, &nm) != 0) { 1961 int len = 0; 1962 1963 /* 1964 * Partial diskset case. We'll need to get the 1965 * device information from the metadb instead 1966 * of the output (nm) of meta_deviceid_to_nmlist. 1967 */ 1968 len = strlen(lnp->ln_prefixes[0].pre_data) + 1969 strlen(lnp->ln_suffixes[0][cnt].suf_data) + 2; 1970 new->devname = Zalloc(len); 1971 (void) strlcpy(new->devname, 1972 lnp->ln_prefixes[0].pre_data, 1973 strlen(lnp->ln_prefixes[0].pre_data) + 1); 1974 (void) strlcat(new->devname, "/", len); 1975 (void) strlcat(new->devname, 1976 lnp->ln_suffixes[0][cnt].suf_data, len); 1977 new->minor_name = Strdup(minor_name); 1978 new->next = head; 1979 new->available = MD_IM_DISK_NOT_AVAILABLE; 1980 new->driver_name = Strdup(lbp->lb_drvnm[0].dn_data); 1981 new->dev = lbp->lb_locators[cnt].l_dev; 1982 head = new; 1983 partial = ENOTSUP; 1984 continue; 1985 } 1986 1987 /* 1988 * Disk is there. Grab device information from nm structure. 1989 */ 1990 assert(nm->devname != NULL); 1991 new->devname = Strdup(nm->devname); 1992 new->dev = nm->dev; 1993 new->minor_name = Strdup(minor_name); 1994 new->available = MD_IM_DISK_AVAILABLE; 1995 1996 devid_free_nmlist(nm); 1997 1998 new->next = head; 1999 head = new; 2000 } 2001 2002 /* Free the last bp */ 2003 if (bp) 2004 Free(bp); 2005 *did_listp = head; 2006 if (partial) 2007 return (partial); 2008 return (1); 2009 } 2010 /* 2011 * check_nm_disks 2012 * Checks the disks listed in the shared did namespace to see if they 2013 * are accessable on the system. If not, return ENOTSUP error to 2014 * indicate we have a partial diskset. 2015 * Returns: 2016 * < 0 for failure 2017 * 0 success 2018 * ENOTSUP partial diskset, not all disks in a diskset on the 2019 * system where import is being executed 2020 */ 2021 static int 2022 check_nm_disks( 2023 struct devid_min_rec *did_nmp, 2024 struct devid_shr_rec *did_shrnmp 2025 ) 2026 { 2027 char *search_path = "/dev"; 2028 char *minor_name = NULL; 2029 uint_t used_size, min_used_size; 2030 ddi_devid_t did; 2031 devid_nmlist_t *nm; 2032 void *did_min_namep; 2033 void *did_shr_namep; 2034 size_t did_nsize, did_shr_nsize; 2035 2036 used_size = did_shrnmp->did_rec_hdr.r_used_size - 2037 sizeof (struct nm_rec_hdr); 2038 min_used_size = did_nmp->min_rec_hdr.r_used_size - 2039 sizeof (struct nm_rec_hdr); 2040 did_shr_namep = (void *)(&did_shrnmp->device_id[0]); 2041 while (used_size > (int)sizeof (struct did_shr_name)) { 2042 did_min_namep = (void *)(&did_nmp->minor_name[0]); 2043 /* grab device id and minor name from the shared spaces */ 2044 did = (ddi_devid_t)(((struct did_shr_name *) 2045 did_shr_namep)->did_devid); 2046 if (devid_valid(did) == 0) { 2047 return (-1); 2048 } 2049 2050 /* 2051 * We need to check that the DID_NM and DID_SHR_NM are in 2052 * sync. It is possible that we took a panic between writing 2053 * the two areas to disk. This would be cleaned up on the 2054 * next snarf but we don't know for sure that snarf has even 2055 * happened since we're reading from disk. 2056 */ 2057 while (((struct did_shr_name *)did_shr_namep)->did_key != 2058 ((struct did_min_name *)did_min_namep)->min_devid_key) { 2059 did_nsize = DID_NAMSIZ((struct did_min_name *) 2060 did_min_namep); 2061 did_min_namep = ((void *)((char *)did_min_namep + 2062 did_nsize)); 2063 min_used_size -= did_nsize; 2064 if (min_used_size < (int)sizeof (struct did_min_name)) 2065 continue; 2066 } 2067 minor_name = ((struct did_min_name *)did_min_namep)->min_name; 2068 2069 /* 2070 * Try to find disk in the system. If we can't find the 2071 * disk, we have a partial diskset. 2072 */ 2073 if ((meta_deviceid_to_nmlist(search_path, 2074 did, minor_name, &nm)) != 0) { 2075 /* Partial diskset detected */ 2076 return (ENOTSUP); 2077 } 2078 devid_free_nmlist(nm); 2079 used_size -= DID_SHR_NAMSIZ((struct did_shr_name *) 2080 did_shr_namep); 2081 /* increment to next item in the shared spaces */ 2082 did_shr_nsize = DID_SHR_NAMSIZ((struct did_shr_name *) 2083 did_shr_namep); 2084 did_shr_namep = ((void *)((char *)did_shr_namep + 2085 did_shr_nsize)); 2086 } 2087 return (0); 2088 } 2089 2090 2091 /* 2092 * report_metadb_info() 2093 * 2094 * Generates metadb output for the diskset. 2095 * 2096 */ 2097 static void 2098 report_metadb_info( 2099 md_im_set_desc_t *misp, 2100 char *indent 2101 ) 2102 { 2103 md_im_drive_info_t *d; 2104 md_im_replica_info_t *r; 2105 char *unk_str = ""; 2106 int i; 2107 2108 (void) printf("%s\t%5.5s\t\t%9.9s\t%11.11s\n", indent, gettext("flags"), 2109 gettext("first blk"), gettext("block count")); 2110 2111 unk_str = gettext("unknown"); 2112 2113 /* 2114 * Looping through all drives in the diskset to print 2115 * out information about the drive and if the verbose 2116 * option is set print out replica data. 2117 */ 2118 for (d = misp->mis_drives; d != NULL; d = d->mid_next) { 2119 2120 if (d->mid_replicas != NULL) { 2121 for (r = d->mid_replicas; r != NULL; 2122 r = r->mir_next) { 2123 (void) printf("%s", indent); 2124 for (i = 0; i < MDDB_FLAGS_LEN; i++) { 2125 if (r->mir_flags & (1 << i)) { 2126 (void) putchar( 2127 MDDB_FLAGS_STRING[i]); 2128 } else { 2129 (void) putchar(' '); 2130 } 2131 } 2132 if ((r->mir_offset == -1) && (r->mir_length 2133 == -1)) { 2134 (void) printf("%7.7s\t\t%7.7s\t", 2135 unk_str, unk_str); 2136 } else if (r->mir_length == -1) { 2137 (void) printf("%i\t\t%7.7s\t", 2138 r->mir_offset, unk_str); 2139 } else { 2140 (void) printf("%i\t\t%i\t", 2141 r->mir_offset, r->mir_length); 2142 } 2143 (void) printf("\t%s\n", 2144 d->mid_devname); 2145 } 2146 } 2147 } 2148 (void) printf("\n"); 2149 } 2150 2151 /* 2152 * meta_replica_quorum will determine if the disks in the set to be 2153 * imported have enough valid replicas to have quorum. 2154 * 2155 * RETURN: 2156 * -1 Set doesn't have quorum 2157 * 0 Set does have quorum 2158 */ 2159 int 2160 meta_replica_quorum( 2161 md_im_set_desc_t *misp 2162 ) 2163 { 2164 md_im_drive_info_t *midp; 2165 md_im_replica_info_t *midr; 2166 int replica_count = 0; 2167 2168 for (midp = misp->mis_drives; midp != NULL; 2169 midp = midp->mid_next) { 2170 2171 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) 2172 continue; 2173 2174 /* 2175 * The drive is okay. Now count its replicas 2176 */ 2177 for (midr = midp->mid_replicas; midr != NULL; 2178 midr = midr->mir_next) { 2179 replica_count++; 2180 } 2181 } 2182 2183 if (misp->mis_active_replicas & 1) { 2184 /* odd number of replicas */ 2185 if (replica_count < (misp->mis_active_replicas + 1)/2) 2186 return (-1); 2187 } else { 2188 /* even number of replicas */ 2189 if (replica_count <= ((misp->mis_active_replicas + 1)/2)) 2190 return (-1); 2191 } 2192 2193 return (0); 2194 } 2195 2196 2197 /* 2198 * Choose the best drive to use for the metaimport command. 2199 */ 2200 md_im_drive_info_t * 2201 pick_good_disk(md_im_set_desc_t *misp) 2202 { 2203 md_timeval32_t *setcrtime; /* set creation time */ 2204 md_im_drive_info_t *good_disk = NULL; 2205 md_im_drive_info_t *midp = NULL; 2206 md_im_replica_info_t *mirp; 2207 2208 setcrtime = &(misp->mis_drives->mid_replicas->mir_timestamp); 2209 for (midp = misp->mis_drives; (midp != NULL) && (good_disk == NULL); 2210 midp = midp->mid_next) { 2211 /* drive must be available */ 2212 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) { 2213 continue; 2214 } 2215 for (mirp = midp->mid_replicas; mirp != NULL; 2216 mirp = mirp->mir_next) { 2217 /* replica must be active to be a good one */ 2218 if (mirp->mir_flags & MDDB_F_ACTIVE) { 2219 if ((setcrtime->tv_sec == 2220 midp-> mid_setcreatetimestamp.tv_sec) && 2221 (setcrtime->tv_usec == 2222 midp->mid_setcreatetimestamp.tv_usec)) { 2223 good_disk = midp; 2224 break; 2225 } 2226 } 2227 } 2228 } 2229 return (good_disk); 2230 } 2231 2232 /* 2233 * report_set_info() 2234 * 2235 * Returns: 2236 * < 0 for failure 2237 * 0 for success 2238 * 2239 */ 2240 static int 2241 report_set_info( 2242 md_im_set_desc_t *misp, 2243 mddb_mb_t *mb, 2244 mddb_lb_t *lbp, 2245 mddb_rb_t *nm, 2246 pnm_rec_t **pnm, 2247 mdname_t *rsp, 2248 int fd, 2249 uint_t imp_flags, 2250 int set_count, 2251 int overlap, 2252 md_im_drive_info_t *overlap_disks, 2253 md_error_t *ep 2254 ) 2255 { 2256 int rval = 0; 2257 md_im_drive_info_t *d; 2258 md_im_drive_info_t *good_disk = NULL; 2259 int i; 2260 int in = META_INDENT; 2261 char indent[MAXPATHLEN]; 2262 md_timeval32_t lastaccess; /* stores last modified timestamp */ 2263 int has_overlap = 0; 2264 int no_quorum = 0; 2265 int partial = 0; 2266 2267 /* Calculates the correct indentation. */ 2268 indent[0] = 0; 2269 for (i = 0; i < in; i++) 2270 (void) strlcat(indent, " ", sizeof (indent)); 2271 2272 /* 2273 * This will print before the information for the first diskset 2274 * if the verbose option was set. 2275 */ 2276 if (set_count == 1) { 2277 if (imp_flags & META_IMP_REPORT) { 2278 (void) printf("\n%s:\n\n", 2279 gettext("Disksets eligible for import")); 2280 } 2281 } 2282 2283 partial = misp->mis_partial; 2284 good_disk = pick_good_disk(misp); 2285 if (good_disk == NULL) { 2286 return (rval); 2287 } 2288 2289 /* 2290 * Make the distinction between a regular diskset and 2291 * a replicated diskset. Also make the distinction 2292 * between a partial vs. full diskset. 2293 */ 2294 if (partial == MD_IM_PARTIAL_DISKSET) { 2295 if (misp->mis_flags & MD_IM_SET_REPLICATED) { 2296 if (imp_flags & META_IMP_REPORT) { 2297 (void) printf("%i) %s:\n", set_count, gettext( 2298 "Found partial replicated diskset " 2299 "containing disks")); 2300 } else { 2301 (void) printf("\n%s:\n", gettext( 2302 "Importing partial replicated diskset " 2303 "containing disks")); 2304 } 2305 } else { 2306 if (imp_flags & META_IMP_REPORT) { 2307 (void) printf("%i) %s:\n", set_count, gettext( 2308 "Found partial regular diskset containing " 2309 "disks")); 2310 } else { 2311 (void) printf("\n%s:\n", gettext( 2312 "Importing partial regular diskset " 2313 "containing disks")); 2314 } 2315 } 2316 } else { 2317 if (misp->mis_flags & MD_IM_SET_REPLICATED) { 2318 if (imp_flags & META_IMP_REPORT) { 2319 (void) printf("%i) %s:\n", set_count, gettext( 2320 "Found replicated diskset containing " 2321 "disks")); 2322 } else { 2323 (void) printf("\n%s:\n", gettext( 2324 "Importing replicated diskset containing " 2325 "disks")); 2326 } 2327 } else { 2328 if (imp_flags & META_IMP_REPORT) { 2329 (void) printf("%i) %s:\n", set_count, gettext( 2330 "Found regular diskset containing disks")); 2331 } else { 2332 (void) printf("\n%s:\n", gettext( 2333 "Importing regular diskset containing " 2334 "disks")); 2335 } 2336 } 2337 } 2338 2339 /* 2340 * Check each drive in the set. If it's unavailable or 2341 * an overlap tell the user. 2342 */ 2343 for (d = misp->mis_drives; d != NULL; d = d->mid_next) { 2344 (void) fprintf(stdout, " %s", d->mid_dnp->cname); 2345 if (d->mid_available == MD_IM_DISK_NOT_AVAILABLE) { 2346 (void) fprintf(stdout, " (UNAVAIL)"); 2347 } 2348 if (overlap) { 2349 md_im_drive_info_t **chain; 2350 /* 2351 * There is the potential for an overlap, see if 2352 * this disk is one of the overlapped disks. 2353 */ 2354 for (chain = &overlap_disks; *chain != NULL; 2355 chain = &(*chain)->overlap) { 2356 if (strcmp(d->mid_dnp->cname, 2357 (*chain)->mid_dnp->cname) == 0) { 2358 (void) fprintf(stdout, " (CONFLICT)"); 2359 has_overlap = 1; 2360 break; 2361 } 2362 } 2363 } 2364 (void) fprintf(stdout, "\n"); 2365 } 2366 2367 /* 2368 * This note explains the (UNAVAIL) that appears next to the 2369 * disks in the diskset that are not available. 2370 */ 2371 if (partial) { 2372 (void) printf("%s%s\n%s%s\n\n", indent, 2373 gettext("(UNAVAIL) WARNING: This disk is unavailable on" 2374 " this system."), indent, gettext("Import may corrupt " 2375 "data in the diskset.")); 2376 } 2377 2378 /* 2379 * This note explains the (CONFLICT) that appears next to the 2380 * disks whose lb_inittime timestamp does not 2381 * match the rest of the diskset. 2382 */ 2383 if (has_overlap) { 2384 (void) printf("%s%s\n%s%s\n\n", indent, 2385 gettext("(CONFLICT) WARNING: This disk has been reused in " 2386 "another diskset or system configuration."), indent, 2387 gettext("Import may corrupt data in the diskset.")); 2388 } 2389 2390 /* 2391 * If the verbose flag was given on the command line, 2392 * we will print out the metastat -c information , the 2393 * creation time, and last modified time for the diskset. 2394 */ 2395 if (imp_flags & META_IMP_VERBOSE) { 2396 (void) printf("%s%s\n", indent, 2397 gettext("Metadatabase information:")); 2398 report_metadb_info(misp, indent); 2399 2400 /* 2401 * Printing creation time and last modified time. 2402 * Last modified: uses the global variable "lastaccess", 2403 * which is set to the last updated timestamp from all of 2404 * the database blocks(db_timestamp) or record blocks 2405 * (rb_timestamp). 2406 * Creation time is the locator block init time 2407 * (lb_inittime). 2408 */ 2409 lastaccess = good_disk->mid_replicas->mir_timestamp; 2410 2411 (void) printf("%s%s\n", indent, 2412 gettext("Metadevice information:")); 2413 rval = report_metastat_info(mb, lbp, nm, pnm, rsp, fd, 2414 &lastaccess, ep); 2415 if (rval < 0) { 2416 return (rval); 2417 } 2418 2419 (void) printf("%s%s:\t%s\n", indent, 2420 gettext("Creation time"), 2421 meta_print_time(&good_disk->mid_replicas->mir_timestamp)); 2422 (void) printf("%s%s:\t%s\n", indent, 2423 gettext("Last modified time"), 2424 meta_print_time(&lastaccess)); 2425 } else { 2426 /* 2427 * Even if the verbose option is not set, we will print the 2428 * creation time for the diskset. 2429 */ 2430 (void) printf("%s%s:\t%s\n", indent, gettext("Creation time"), 2431 meta_print_time(&good_disk->mid_replicas->mir_timestamp)); 2432 } 2433 2434 2435 /* 2436 * If the diskset is not actually being imported, then we 2437 * print out extra information about how to import it. 2438 * If the verbose flag was not set, then we will also 2439 * print out information about how to obtain verbose output. 2440 */ 2441 if (imp_flags & META_IMP_REPORT) { 2442 /* 2443 * TRANSLATION_NOTE 2444 * 2445 * The translation of the phrase "For more information 2446 * about this set" will be followed by a ":" and a 2447 * suggested command (untranslatable) that the user 2448 * may use to request additional information. 2449 */ 2450 if (!(imp_flags & META_IMP_VERBOSE)) { 2451 (void) printf("%s%s:\n%s %s -r -v %s\n", indent, 2452 gettext("For more information about this diskset"), 2453 indent, myname, good_disk->mid_dnp->cname); 2454 } 2455 2456 if (meta_replica_quorum(misp) != 0) 2457 no_quorum = 1; 2458 2459 /* 2460 * TRANSLATION_NOTE 2461 * 2462 * The translation of the phrase "To import this set" 2463 * will be followed by a ":" and a suggested command 2464 * (untranslatable) that the user may use to import 2465 * the specified diskset. 2466 */ 2467 if (partial || has_overlap || no_quorum) { 2468 (void) printf("%s%s:\n%s %s -f -s <newsetname> %s\n", 2469 indent, gettext("To import this diskset"), indent, 2470 myname, good_disk->mid_dnp->cname); 2471 } else { 2472 (void) printf("%s%s:\n%s %s -s <newsetname> %s\n", 2473 indent, gettext("To import this diskset"), indent, 2474 myname, good_disk->mid_dnp->cname); 2475 } 2476 } 2477 (void) printf("\n\n"); 2478 2479 return (rval); 2480 } 2481 2482 2483 /* 2484 * meta_get_and_report_set_info 2485 * 2486 * Scans a given drive for set specific information. If the given drive 2487 * has a shared metadb, scans the shared metadb for information pertaining 2488 * to the set. 2489 * If imp_flags has META_IMP_PASS1 set don't report. 2490 * 2491 * Returns: 2492 * <0 for failure 2493 * 0 success but no replicas were found 2494 * 1 success and a replica was found 2495 */ 2496 int 2497 meta_get_and_report_set_info( 2498 mddrivenamelist_t *dp, 2499 md_im_set_desc_t **mispp, 2500 int local_mb_ok, 2501 uint_t imp_flags, 2502 int *set_count, 2503 int overlap, 2504 md_im_drive_info_t *overlap_disks, 2505 md_error_t *ep 2506 ) 2507 { 2508 uint_t s; 2509 mdname_t *rsp; 2510 int fd; 2511 char mb[DEV_BSIZE]; 2512 /*LINTED*/ 2513 mddb_mb_t *mbp = (mddb_mb_t *)mb; 2514 char lb[dbtob(MDDB_LBCNT)]; 2515 /*LINTED*/ 2516 mddb_lb_t *lbp = (mddb_lb_t *)lb; 2517 mddb_did_blk_t *lbdidp = NULL; 2518 mddb_ln_t *lnp = NULL; 2519 int lnsize, lbdid_size; 2520 int rval = 0; 2521 char db[DEV_BSIZE]; 2522 /*LINTED*/ 2523 mddb_db_t *dbp = (mddb_db_t *)db; 2524 did_list_t *did_listp = NULL; 2525 mddrivenamelist_t *dnlp; 2526 mddrivename_t *dnp; 2527 md_im_names_t cnames = { 0, NULL}; 2528 char *nm = NULL, *shrnm = NULL; 2529 char *did_nm = NULL, *did_shrnm = NULL; 2530 struct nm_rec *nmp; 2531 struct nm_shr_rec *snmp; 2532 struct devid_shr_rec *did_shrnmp; 2533 struct devid_min_rec *did_nmp; 2534 int extended_namespace = 0; 2535 int replicated = 0; 2536 int partial = 0; 2537 pnm_rec_t *pnm = NULL; /* list of physical devs in set */ 2538 md_im_set_desc_t *misp; 2539 2540 dnp = dp->drivenamep; 2541 2542 /* 2543 * Determine and open the replica slice 2544 */ 2545 if (meta_replicaslice(dnp, &s, ep) != 0) { 2546 return (-1); 2547 } 2548 2549 /* 2550 * Test for the size of replica slice in question. If 2551 * the size is zero, we know that this is not a disk that was 2552 * part of a set and it should be silently ignored for import. 2553 */ 2554 if (dnp->vtoc.parts[s].size == 0) 2555 return (0); 2556 2557 if ((rsp = metaslicename(dnp, s, ep)) == NULL) { 2558 return (-1); 2559 } 2560 2561 if ((fd = open(rsp->rname, O_RDONLY|O_NDELAY)) < 0) 2562 return (mdsyserror(ep, errno, rsp->cname)); 2563 2564 /* 2565 * After the open() succeeds, we should return via the "out" 2566 * label to clean up after ourselves. (Up 'til now, we can 2567 * just return directly, because there are no resources to 2568 * give back.) 2569 */ 2570 2571 if ((rval = read_master_block(ep, fd, mbp, sizeof (mb))) <= 0) 2572 goto out; 2573 2574 replicated = is_replicated(fd, mbp, 0, NULL); 2575 2576 if (!local_mb_ok && mbp->mb_setno == 0) { 2577 rval = 0; 2578 goto out; 2579 } 2580 2581 if ((rval = read_locator_block(ep, fd, mbp, lbp, sizeof (lb))) <= 0) 2582 goto out; 2583 2584 /* 2585 * Once the locator block has been read, we need to 2586 * check if the locator block commit count is zero. 2587 * If it is zero, we know that the replica we're dealing 2588 * with is on a disk that was deleted from the disk set; 2589 * and, it potentially has stale data. We need to quit 2590 * in that case 2591 */ 2592 if (lbp->lb_commitcnt == 0) { 2593 rval = 0; 2594 goto out; 2595 } 2596 2597 /* 2598 * Make sure that the disk being imported has device id 2599 * namespace present for disksets. If a disk doesn't have 2600 * device id namespace, we skip reading the replica on that disk 2601 */ 2602 if (!(lbp->lb_flags & MDDB_DEVID_STYLE)) { 2603 rval = 0; 2604 goto out; 2605 } 2606 2607 /* 2608 * Grab the locator block device id array. Allocate memory for the 2609 * array first. 2610 */ 2611 lbdid_size = dbtob(lbp->lb_didblkcnt); 2612 lbdidp = Zalloc(lbdid_size); 2613 2614 if ((rval = read_locator_block_did(ep, fd, mbp, lbp, lbdidp, 2615 lbdid_size)) <= 0) 2616 goto out; 2617 2618 /* 2619 * For a disk that has not been replicated, extract the device ids 2620 * stored in the locator block device id array and store them in 2621 * a list. 2622 * 2623 * If the disk has been replicated using replication software such 2624 * as HDS Truecopy/ShadowImage or EMC SRDF/BCV, the device ids in 2625 * the locator block are invalid and we need to build a list of 2626 * replicated disks. 2627 */ 2628 if (imp_flags & META_IMP_PASS1) { 2629 /* 2630 * We need to do this for both passes but 2631 * replicated_disk_list_built is global so we need some way 2632 * to determine which pass we're on. Set it to the appropriate 2633 * pass's flag. 2634 */ 2635 replicated_disk_list_built = &replicated_disk_list_built_pass1; 2636 } else { 2637 replicated_disk_list_built = &replicated_disk_list_built_pass2; 2638 } 2639 if (replicated && !(*replicated_disk_list_built)) { 2640 /* 2641 * if there's a replicated diskset involved, we need to 2642 * scan the system one more time and build a list of all 2643 * candidate disks that might be part of that replicated set 2644 */ 2645 if (meta_list_disks(ep, &cnames) != 0) { 2646 rval = 0; 2647 goto out; 2648 } 2649 dnlp = meta_prune_cnames(ep, &cnames, 0); 2650 rval = build_replicated_disks_list(ep, dnlp); 2651 if (rval == 0) 2652 goto out; 2653 } 2654 2655 /* 2656 * Until here, we've gotten away with fixed sizes for the 2657 * master block and locator block. The locator names, 2658 * however, are sized (and therefore allocated) dynamically 2659 * according to information in the locator block. 2660 */ 2661 lnsize = dbtob(lbp->lb_lnblkcnt); 2662 lnp = Zalloc(lnsize); 2663 2664 if ((rval = read_locator_names(ep, fd, mbp, lbp, lnp, lnsize)) <= 0) 2665 goto out; 2666 2667 rval = build_did_list(ep, fd, mbp, lbp, lbdidp, lnp, &did_listp, 2668 replicated); 2669 2670 /* 2671 * An rval of ENOTSUP means we have a partial diskset. We'll want 2672 * to set the partial variable so we can pass this information 2673 * set_append_wrapper later for placing on the misp list. 2674 */ 2675 if (rval == ENOTSUP) 2676 partial = MD_IM_PARTIAL_DISKSET; 2677 2678 if (rval < 0) 2679 goto out; 2680 2681 /* 2682 * Read in the NM record 2683 * If no NM record was found, it still is a valid configuration 2684 * but it also means that we won't find any corresponding DID_NM 2685 * or DID_SHR_NM. 2686 */ 2687 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &nm, MDDB_NM, rsp->cname)) 2688 < 0) 2689 goto out; 2690 else if (rval == 0) 2691 goto append; 2692 2693 /* 2694 * At this point, we have read in all of the blocks that form 2695 * the nm_rec. We should at least detect the corner case 2696 * mentioned above, in which r_next_recid links to another 2697 * nm_rec. Extended namespace handling is left for Phase 2. 2698 * 2699 * What this should really be is a loop, each iteration of 2700 * which reads in a nm_rec and calls the set_append(). 2701 */ 2702 /*LINTED*/ 2703 nmp = (struct nm_rec *)(nm + sizeof (mddb_rb_t)); 2704 if (nmp->r_rec_hdr.r_next_recid != (mddb_recid_t)0) { 2705 extended_namespace = 1; 2706 rval = 0; 2707 goto out; 2708 } 2709 2710 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &shrnm, MDDB_SHR_NM, 2711 rsp->cname)) < 0) 2712 goto out; 2713 else if (rval == 0) 2714 goto append; 2715 2716 /*LINTED*/ 2717 snmp = (struct nm_shr_rec *)(shrnm + sizeof (mddb_rb_t)); 2718 if (snmp->sr_rec_hdr.r_next_recid != (mddb_recid_t)0) { 2719 extended_namespace = 1; 2720 rval = 0; 2721 goto out; 2722 } 2723 2724 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_nm, 2725 MDDB_DID_NM, rsp->cname)) < 0) 2726 goto out; 2727 else if (rval == 0) 2728 goto append; 2729 2730 /*LINTED*/ 2731 did_nmp = (struct devid_min_rec *)(did_nm + sizeof (mddb_rb_t) - 2732 sizeof (int)); 2733 if (did_nmp->min_rec_hdr.r_next_recid != (mddb_recid_t)0) { 2734 extended_namespace = 1; 2735 rval = 0; 2736 goto out; 2737 } 2738 2739 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_shrnm, 2740 MDDB_DID_SHR_NM, rsp->cname)) < 0) 2741 goto out; 2742 else if (rval == 0) 2743 goto append; 2744 2745 /*LINTED*/ 2746 did_shrnmp = (struct devid_shr_rec *)(did_shrnm + sizeof (mddb_rb_t) - 2747 sizeof (int)); 2748 if (did_shrnmp->did_rec_hdr.r_next_recid != (mddb_recid_t)0) { 2749 extended_namespace = 1; 2750 rval = 0; 2751 goto out; 2752 } 2753 2754 /* 2755 * We need to check if all of the disks listed in the namespace 2756 * are actually available. If they aren't we'll return with 2757 * an ENOTSUP error which indicates a partial diskset. 2758 */ 2759 rval = check_nm_disks(did_nmp, did_shrnmp); 2760 2761 /* 2762 * An rval of ENOTSUP means we have a partial diskset. We'll want 2763 * to set the partial variable so we can pass this information 2764 * to set_append_wrapper later for placing on the misp list. 2765 */ 2766 if (rval == ENOTSUP) 2767 partial = MD_IM_PARTIAL_DISKSET; 2768 2769 if (rval < 0) 2770 goto out; 2771 2772 append: 2773 /* Finally, we've got what we need to process this replica. */ 2774 misp = set_append(mispp, did_listp, mbp, lbp, 2775 /*LINTED*/ 2776 (mddb_rb_t *)nm, (mddb_rb_t *)shrnm, &pnm, (mddb_rb_t *)did_nm, 2777 /*LINTED*/ 2778 (mddb_rb_t *)did_shrnm, (imp_flags | partial | replicated), ep); 2779 2780 if (!(imp_flags & META_IMP_PASS1)) { 2781 *set_count += 1; 2782 rval = report_set_info(misp, mbp, lbp, 2783 /*LINTED*/ 2784 (mddb_rb_t *)nm, &pnm, rsp, fd, imp_flags, *set_count, 2785 overlap, overlap_disks, ep); 2786 if (rval < 0) 2787 goto out; 2788 } 2789 2790 /* Return the fact that we found at least one set */ 2791 rval = 1; 2792 2793 out: 2794 if (fd >= 0) 2795 (void) close(fd); 2796 if (did_listp != NULL) 2797 free_did_list(did_listp); 2798 if (lnp != NULL) 2799 Free(lnp); 2800 if (nm != NULL) 2801 Free(nm); 2802 if (did_nm != NULL) 2803 Free(did_nm); 2804 if (did_shrnm != NULL) 2805 Free(did_shrnm); 2806 if (pnm != NULL) 2807 free_pnm_rec_list(&pnm); 2808 2809 /* 2810 * If we are at the end of the list, we must free up 2811 * the replicated list too 2812 */ 2813 if (dp->next == NULL) 2814 free_replicated_disks_list(); 2815 2816 if (extended_namespace) 2817 return (mddserror(ep, MDE_DS_EXTENDEDNM, MD_SET_BAD, 2818 mynode(), NULL, NULL)); 2819 2820 return (rval); 2821 } 2822 2823 /* 2824 * Return the minor name associated with a given disk slice 2825 */ 2826 static char * 2827 meta_getminor_name( 2828 char *devname, 2829 md_error_t *ep 2830 ) 2831 { 2832 int fd = -1; 2833 char *minor_name = NULL; 2834 char *ret_minor_name = NULL; 2835 2836 if (devname == NULL) 2837 return (NULL); 2838 2839 if ((fd = open(devname, O_RDONLY|O_NDELAY, 0)) < 0) { 2840 (void) mdsyserror(ep, errno, devname); 2841 return (NULL); 2842 } 2843 2844 if (devid_get_minor_name(fd, &minor_name) == 0) { 2845 ret_minor_name = Strdup(minor_name); 2846 devid_str_free(minor_name); 2847 } 2848 2849 (void) close(fd); 2850 return (ret_minor_name); 2851 } 2852 2853 /* 2854 * meta_update_mb_did 2855 * 2856 * Update or create the master block with the new set number. 2857 * If a non-null devid pointer is given, the devid in the 2858 * master block will also be changed. 2859 * 2860 * This routine is called during the import of a diskset 2861 * (meta_imp_update_mb) and during the take of a diskset that has 2862 * some unresolved replicated drives (meta_unrslv_replicated_mb). 2863 * 2864 * Returns : nothing (void) 2865 */ 2866 static void 2867 meta_update_mb_did( 2868 mdsetname_t *sp, 2869 mddrivename_t *dnp, /* raw name of drive with mb */ 2870 void *new_devid, /* devid to be stored in mb */ 2871 int new_devid_len, 2872 void *old_devid, /* old devid stored in mb */ 2873 int replica_present, /* does replica follow mb? */ 2874 int offset, 2875 md_error_t *ep 2876 ) 2877 { 2878 int fd; 2879 struct mddb_mb *mbp; 2880 uint_t sliceno; 2881 mdname_t *rsp; 2882 2883 /* determine the replica slice */ 2884 if (meta_replicaslice(dnp, &sliceno, ep) != 0) { 2885 return; 2886 } 2887 2888 /* 2889 * if the replica slice size is zero, 2890 * don't bother opening 2891 */ 2892 if (dnp->vtoc.parts[sliceno].size == 0) { 2893 return; 2894 } 2895 2896 if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL) { 2897 return; 2898 } 2899 2900 if ((fd = open(rsp->rname, O_RDWR | O_NDELAY)) < 0) { 2901 return; 2902 } 2903 2904 if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0) 2905 return; 2906 2907 mbp = Zalloc(DEV_BSIZE); 2908 if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE) { 2909 Free(mbp); 2910 return; 2911 } 2912 2913 /* If no replica on disk, check for dummy mb */ 2914 if (replica_present == NULL) { 2915 /* 2916 * Check to see if there is a dummy there. If not 2917 * create one. This would happen if the set was 2918 * created before the master block dummy code was 2919 * implemented. 2920 */ 2921 if ((mbp->mb_magic != MDDB_MAGIC_DU) || 2922 (mbp->mb_revision != MDDB_REV_MB)) { 2923 meta_mkdummymaster(sp, fd, offset); 2924 Free(mbp); 2925 return; 2926 } 2927 } 2928 2929 mbp->mb_setno = sp->setno; 2930 if (meta_gettimeofday(&mbp->mb_timestamp) == -1) { 2931 Free(mbp); 2932 return; 2933 } 2934 2935 /* 2936 * If a old_devid is non-NULL then we're are dealing with a 2937 * replicated diskset and the devid needs to be updated. 2938 */ 2939 if (old_devid) { 2940 if (mbp->mb_devid_magic == MDDB_MAGIC_DE) { 2941 if (mbp->mb_devid_len) 2942 (void) memset(mbp->mb_devid, 0, 2943 mbp->mb_devid_len); 2944 (void) memcpy(mbp->mb_devid, 2945 (char *)new_devid, new_devid_len); 2946 mbp->mb_devid_len = new_devid_len; 2947 } 2948 } 2949 2950 crcgen((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum, 2951 (uint_t)DEV_BSIZE, (crc_skip_t *)NULL); 2952 2953 /* 2954 * Now write out the changes to disk. 2955 * If an error occurs, just continue on. 2956 * Next take of set will register this drive as 2957 * an unresolved replicated drive and will attempt 2958 * to fix the master block again. 2959 */ 2960 if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0) { 2961 Free(mbp); 2962 return; 2963 } 2964 if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE) { 2965 Free(mbp); 2966 return; 2967 } 2968 2969 Free(mbp); 2970 (void) close(fd); 2971 } 2972 2973 2974 /* 2975 * meta_imp_update_mb 2976 * 2977 * Update the master block information during an import. 2978 * Takes an import set descriptor. 2979 * 2980 * Returns : nothing (void) 2981 */ 2982 void 2983 meta_imp_update_mb(mdsetname_t *sp, md_im_set_desc_t *misp, md_error_t *ep) 2984 { 2985 md_im_drive_info_t *midp; 2986 mddrivename_t *dnp; 2987 int offset = 16; /* default mb offset is 16 */ 2988 2989 for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) { 2990 /* 2991 * If disk isn't available we can't update, so go to next 2992 */ 2993 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) { 2994 continue; 2995 } 2996 2997 dnp = midp->mid_dnp; 2998 2999 if (midp->mid_replicas) { 3000 md_im_replica_info_t *mirp; 3001 3002 /* 3003 * If we have replicas on this disk we need to make 3004 * sure that we update the master block on every 3005 * replica on the disk. 3006 */ 3007 for (mirp = midp->mid_replicas; mirp != NULL; 3008 mirp = mirp->mir_next) { 3009 offset = mirp->mir_offset; 3010 meta_update_mb_did(sp, dnp, midp->mid_devid, 3011 midp->mid_devid_sz, midp->mid_o_devid, 3012 1, offset, ep); 3013 } 3014 } else { 3015 /* No replicas, just update the one dummy mb */ 3016 meta_update_mb_did(sp, dnp, midp->mid_devid, 3017 midp->mid_devid_sz, midp->mid_o_devid, 3018 0, offset, ep); 3019 } 3020 if (!mdisok(ep)) 3021 return; 3022 } 3023 } 3024 3025 /* 3026 * meta_unrslv_replicated_common 3027 * 3028 * Given a drive_desc and a drivenamelist pointer, 3029 * return the devidp associated with the drive_desc, 3030 * the replicated (new) devidp associated with the drive_desc 3031 * and the specific mddrivename in the drivenamelist that 3032 * matches the replicated (new) devidp. 3033 * 3034 * Typically the drivenamelist pointer would be setup by 3035 * the meta_prune_cnames function. 3036 * 3037 * Calling function must free devidp using devid_free. 3038 * 3039 * Returns 0 - success, found new_devidp and dnp_new. 3040 * Returns 1 - failure, didn't find new devid info 3041 */ 3042 static int 3043 meta_unrslv_replicated_common( 3044 int myside, 3045 md_drive_desc *dd, /* drive list for diskset */ 3046 mddrivenamelist_t *dnlp, /* list of drives on current system */ 3047 ddi_devid_t *devidp, /* old devid */ 3048 ddi_devid_t *new_devidp, /* replicated (new) devid */ 3049 mddrivename_t **dnp_new, /* replicated drive name */ 3050 md_error_t *ep 3051 ) 3052 { 3053 mddrivename_t *dnp; /* drive name of old drive */ 3054 mdsidenames_t *sn = NULL; 3055 uint_t rep_slice; 3056 mdname_t *np; 3057 char *minor_name = NULL; 3058 char *devid_str = NULL; 3059 size_t len; 3060 int devid_sz; 3061 mddrivenamelist_t *dp; 3062 ddi_devid_t old_devid; /* devid of old drive */ 3063 ddi_devid_t new_devid; /* devid of new replicated drive */ 3064 ddi_devid_t dnp_new_devid; /* devid derived from drive */ 3065 /* name of replicated drive */ 3066 3067 dnp = dd->dd_dnp; 3068 3069 /* Get old devid from drive record */ 3070 (void) devid_str_decode(dd->dd_dnp->devid, 3071 &old_devid, NULL); 3072 3073 /* Look up replicated (new) devid */ 3074 new_devid = replicated_list_lookup( 3075 devid_sizeof(old_devid), old_devid); 3076 3077 devid_free(old_devid); 3078 3079 if (new_devid == NULL) 3080 return (1); 3081 3082 /* 3083 * Using new_devid, find a drivename entry with a matching devid. 3084 * Use the passed in dnlp since it has the new (replicated) disknames 3085 * in it. 3086 */ 3087 for (dp = dnlp; dp != NULL; dp = dp->next) { 3088 (void) devid_str_decode(dp->drivenamep->devid, 3089 &dnp_new_devid, NULL); 3090 3091 if (dnp_new_devid == NULL) 3092 continue; 3093 3094 if (devid_compare(new_devid, dnp_new_devid) == 0) { 3095 devid_free(dnp_new_devid); 3096 break; 3097 } 3098 devid_free(dnp_new_devid); 3099 } 3100 3101 /* If can't find new name for drive - nothing to update */ 3102 if (dp == NULL) 3103 return (1); 3104 3105 /* 3106 * Setup returned value to be the drivename structure associated 3107 * with new (replicated) drive. 3108 */ 3109 *dnp_new = dp->drivenamep; 3110 3111 /* 3112 * Need to return the new devid including the minor name. 3113 * Find the minor_name here using the sidename or by 3114 * looking in the namespace. 3115 */ 3116 for (sn = dnp->side_names; sn != NULL; sn = sn->next) { 3117 if (sn->sideno == myside) 3118 break; 3119 } 3120 3121 /* 3122 * The disk has no side name information 3123 */ 3124 if (sn == NULL) { 3125 if ((meta_replicaslice(*dnp_new, &rep_slice, ep) != 0) || 3126 ((np = metaslicename(*dnp_new, rep_slice, ep)) 3127 == NULL)) { 3128 mdclrerror(ep); 3129 return (1); 3130 } 3131 3132 if (np->dev == NODEV64) 3133 return (1); 3134 3135 /* 3136 * minor_name will be NULL if dnp->devid == NULL 3137 * - see metagetvtoc() 3138 */ 3139 if (np->minor_name == NULL) 3140 return (1); 3141 else 3142 minor_name = Strdup(np->minor_name); 3143 3144 } else { 3145 minor_name = meta_getdidminorbykey( 3146 MD_LOCAL_SET, sn->sideno + SKEW, 3147 dnp->side_names_key, ep); 3148 if (!mdisok(ep)) 3149 return (1); 3150 } 3151 /* 3152 * Now, use the old devid with minor name to lookup 3153 * the replicated (new) devid that will also contain 3154 * a minor name. 3155 */ 3156 len = strlen(dnp->devid) + strlen(minor_name) + 2; 3157 devid_str = (char *)Malloc(len); 3158 (void) snprintf(devid_str, len, "%s/%s", dnp->devid, 3159 minor_name); 3160 (void) devid_str_decode(devid_str, devidp, NULL); 3161 Free(devid_str); 3162 devid_sz = devid_sizeof((ddi_devid_t)*devidp); 3163 *new_devidp = replicated_list_lookup(devid_sz, *devidp); 3164 return (0); 3165 } 3166 3167 /* 3168 * meta_unrslv_replicated_mb 3169 * 3170 * Update the master block information during a take. 3171 * Takes an md_drive_desc descriptor. 3172 * 3173 * Returns : nothing (void) 3174 */ 3175 void 3176 meta_unrslv_replicated_mb( 3177 mdsetname_t *sp, 3178 md_drive_desc *dd, /* drive list for diskset */ 3179 mddrivenamelist_t *dnlp, /* list of drives on current system */ 3180 md_error_t *ep 3181 ) 3182 { 3183 md_drive_desc *d = NULL, *d_save; 3184 mddrivename_t *dnp; /* dnp of old drive */ 3185 mddrivename_t *dnp_new; /* dnp of new (replicated) drive */ 3186 mddrivename_t *dnp_save; /* saved copy needed to restore */ 3187 ddi_devid_t devidp, new_devidp; 3188 int myside; 3189 3190 if ((myside = getmyside(sp, ep)) == MD_SIDEWILD) 3191 return; 3192 3193 for (d = dd; d != NULL; d = d->dd_next) { 3194 dnp = d->dd_dnp; 3195 if (dnp == NULL) 3196 continue; 3197 3198 /* If don't need to update master block - skip it. */ 3199 if (!(d->dd_flags & MD_DR_FIX_MB_DID)) 3200 continue; 3201 3202 /* 3203 * Get old and replicated (new) devids associated with this 3204 * drive. Also, get the new (replicated) drivename structure. 3205 */ 3206 if (meta_unrslv_replicated_common(myside, d, dnlp, &devidp, 3207 &new_devidp, &dnp_new, ep) != 0) { 3208 mdclrerror(ep); 3209 continue; 3210 } 3211 3212 if (new_devidp) { 3213 int offset = 16; /* default mb offset is 16 */ 3214 int dbcnt; 3215 3216 if (d->dd_dbcnt) { 3217 /* 3218 * Update each master block on the disk 3219 */ 3220 for (dbcnt = d->dd_dbcnt; dbcnt != 0; dbcnt--) { 3221 meta_update_mb_did(sp, dnp_new, 3222 new_devidp, 3223 devid_sizeof(new_devidp), devidp, 3224 1, offset, ep); 3225 offset += d->dd_dbsize; 3226 } 3227 } else { 3228 /* update the one dummy mb */ 3229 meta_update_mb_did(sp, dnp_new, new_devidp, 3230 devid_sizeof(new_devidp), devidp, 3231 0, offset, ep); 3232 } 3233 if (!mdisok(ep)) { 3234 devid_free(devidp); 3235 return; 3236 } 3237 3238 /* Set drive record flags to ok */ 3239 /* Just update this one drive record. */ 3240 d_save = d->dd_next; 3241 dnp_save = d->dd_dnp; 3242 d->dd_next = NULL; 3243 d->dd_dnp = dnp_new; 3244 /* Ignore failure since no bad effect. */ 3245 (void) clnt_upd_dr_flags(mynode(), sp, d, 3246 MD_DR_OK, ep); 3247 d->dd_next = d_save; 3248 d->dd_dnp = dnp_save; 3249 } 3250 devid_free(devidp); 3251 } 3252 } 3253 3254 /* 3255 * meta_update_nm_rr_did 3256 * 3257 * Change a devid stored in the diskset namespace and in the local set 3258 * namespace with the new devid. 3259 * 3260 * This routine is called during the import of a diskset 3261 * (meta_imp_update_nn) and during the take of a diskset that has 3262 * some unresolved replicated drives (meta_unrslv_replicated_nm). 3263 * 3264 * Returns : nothing (void) 3265 */ 3266 static void 3267 meta_update_nm_rr_did( 3268 mdsetname_t *sp, 3269 void *old_devid, /* old devid being replaced */ 3270 int old_devid_sz, 3271 void *new_devid, /* devid to be stored in nm */ 3272 int new_devid_sz, 3273 int import_flag, /* called during import? */ 3274 md_error_t *ep 3275 ) 3276 { 3277 struct mddb_config c; 3278 3279 (void) memset(&c, 0, sizeof (c)); 3280 c.c_setno = sp->setno; 3281 3282 /* During import to NOT update the local namespace. */ 3283 if (import_flag) 3284 c.c_flags = MDDB_C_IMPORT; 3285 3286 c.c_locator.l_devid = (uintptr_t)Malloc(new_devid_sz); 3287 (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid, 3288 new_devid, new_devid_sz); 3289 c.c_locator.l_devid_sz = new_devid_sz; 3290 c.c_locator.l_devid_flags = 3291 MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 3292 c.c_locator.l_old_devid = (uint64_t)(uintptr_t)Malloc(old_devid_sz); 3293 (void) memcpy((void *)(uintptr_t)c.c_locator.l_old_devid, 3294 old_devid, old_devid_sz); 3295 c.c_locator.l_old_devid_sz = old_devid_sz; 3296 if (metaioctl(MD_IOCUPDATE_NM_RR_DID, &c, &c.c_mde, NULL) != 0) { 3297 (void) mdstealerror(ep, &c.c_mde); 3298 } 3299 Free((void *)(uintptr_t)c.c_locator.l_devid); 3300 Free((void *)(uintptr_t)c.c_locator.l_old_devid); 3301 } 3302 3303 /* 3304 * meta_imp_update_nm 3305 * 3306 * Change a devid stored in the diskset namespace with the new devid. 3307 * This routine is called during the import of a remotely replicated diskset. 3308 * 3309 * Returns : nothing (void) 3310 */ 3311 void 3312 meta_imp_update_nm(mdsetname_t *sp, md_im_set_desc_t *misp, md_error_t *ep) 3313 { 3314 md_im_drive_info_t *midp; 3315 3316 for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) { 3317 /* 3318 * If disk isn't available we can't update, so go to next 3319 */ 3320 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) { 3321 continue; 3322 } 3323 3324 meta_update_nm_rr_did(sp, midp->mid_o_devid, 3325 midp->mid_o_devid_sz, midp->mid_devid, 3326 midp->mid_devid_sz, 1, ep); 3327 if (!mdisok(ep)) 3328 return; 3329 } 3330 } 3331 3332 /* 3333 * meta_unrslv_replicated_nm 3334 * 3335 * Change a devid stored in the diskset namespace and in the local set 3336 * namespace with the new devid. 3337 * 3338 * This routine is called during the take of a diskset that has 3339 * some unresolved replicated drives. 3340 * 3341 * Returns : nothing (void) 3342 */ 3343 void 3344 meta_unrslv_replicated_nm( 3345 mdsetname_t *sp, 3346 md_drive_desc *dd, /* drive list for diskset */ 3347 mddrivenamelist_t *dnlp, /* list of drives on current system */ 3348 md_error_t *ep 3349 ) 3350 { 3351 md_drive_desc *d = NULL; 3352 mddrivename_t *dnp; /* drive name of old drive */ 3353 mddrivename_t *dnp_new; /* drive name of new (repl) drive */ 3354 ddi_devid_t devidp, new_devidp; 3355 ddi_devid_t old_devid; 3356 char *devid_old_save; 3357 mdsetname_t *local_sp = NULL; 3358 int myside; 3359 3360 if ((myside = getmyside(sp, ep)) == MD_SIDEWILD) 3361 return; 3362 3363 for (d = dd; d != NULL; d = d->dd_next) { 3364 dnp = d->dd_dnp; 3365 if (dnp == NULL) 3366 continue; 3367 3368 /* If don't need to update namespace - skip it. */ 3369 if (!(d->dd_flags & MD_DR_FIX_LB_NM_DID)) 3370 continue; 3371 3372 /* Get old devid from drive record */ 3373 (void) devid_str_decode(d->dd_dnp->devid, 3374 &old_devid, NULL); 3375 3376 /* 3377 * Get old and replicated (new) devids associated with this 3378 * drive. Also, get the new (replicated) drivename structure. 3379 */ 3380 if (meta_unrslv_replicated_common(myside, d, dnlp, &devidp, 3381 &new_devidp, &dnp_new, ep) != 0) { 3382 mdclrerror(ep); 3383 continue; 3384 } 3385 3386 if (new_devidp) { 3387 meta_update_nm_rr_did(sp, devidp, 3388 devid_sizeof(devidp), new_devidp, 3389 devid_sizeof(new_devidp), 0, ep); 3390 if (!mdisok(ep)) { 3391 devid_free(devidp); 3392 return; 3393 } 3394 } 3395 devid_free(devidp); 3396 3397 /* 3398 * Using the new devid, fix up the name. 3399 * If meta_upd_ctdnames fails, the next take will re-resolve 3400 * the name from the new devid. 3401 */ 3402 local_sp = metasetname(MD_LOCAL_NAME, ep); 3403 devid_old_save = dnp->devid; 3404 dnp->devid = dnp_new->devid; 3405 (void) meta_upd_ctdnames(&local_sp, 0, (myside + SKEW), 3406 dnp, NULL, ep); 3407 mdclrerror(ep); 3408 dnp->devid = devid_old_save; 3409 } 3410 } 3411 3412 static set_t 3413 meta_imp_setno( 3414 md_error_t *ep 3415 ) 3416 { 3417 set_t max_sets, setno; 3418 int bool; 3419 3420 if ((max_sets = get_max_sets(ep)) == 0) { 3421 return (MD_SET_BAD); 3422 } 3423 3424 /* 3425 * This code needs to be expanded when we run in SunCluster 3426 * environment SunCluster obtains setno internally 3427 */ 3428 for (setno = 1; setno < max_sets; setno++) { 3429 if (clnt_setnumbusy(mynode(), setno, 3430 &bool, ep) == -1) { 3431 setno = MD_SET_BAD; 3432 break; 3433 } 3434 /* 3435 * found one available 3436 */ 3437 if (bool == FALSE) 3438 break; 3439 } 3440 3441 if (setno == max_sets) { 3442 setno = MD_SET_BAD; 3443 } 3444 3445 return (setno); 3446 } 3447 3448 int 3449 meta_imp_set( 3450 md_im_set_desc_t *misp, 3451 char *setname, 3452 int force, 3453 bool_t dry_run, 3454 md_error_t *ep 3455 ) 3456 { 3457 md_timeval32_t tp; 3458 md_im_drive_info_t *midp; 3459 uint_t rep_slice; 3460 mddrivename_t *dnp; 3461 struct mddb_config c; 3462 mdname_t *np; 3463 md_im_replica_info_t *mirp; 3464 set_t setno; 3465 mdcinfo_t *cinfo; 3466 mdsetname_t *sp; 3467 mddrivenamelist_t *dnlp = NULL; 3468 mddrivenamelist_t **dnlpp = &dnlp; 3469 char *minor_name = NULL; 3470 int stale_flag = 0; 3471 md_set_desc *sd; 3472 int partial_replicated_flag = 0; 3473 md_error_t xep = mdnullerror; 3474 md_setkey_t *cl_sk; 3475 3476 (void) memset(&c, 0, sizeof (c)); 3477 (void) strlcpy(c.c_setname, setname, sizeof (c.c_setname)); 3478 c.c_sideno = 0; 3479 c.c_flags = MDDB_C_IMPORT; 3480 3481 /* 3482 * Check to see if the setname that the set is being imported into, 3483 * already exists. 3484 */ 3485 if (getsetbyname(c.c_setname, ep) != NULL) { 3486 return (mddserror(ep, MDE_DS_SETNAMEBUSY, MD_SET_BAD, 3487 mynode(), NULL, c.c_setname)); 3488 } 3489 3490 /* 3491 * Find the next available set number 3492 */ 3493 if ((setno = meta_imp_setno(ep)) == MD_SET_BAD) { 3494 return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD, 3495 mynode(), NULL, c.c_setname)); 3496 } 3497 3498 c.c_setno = setno; 3499 if (meta_gettimeofday(&tp) == -1) { 3500 return (mdsyserror(ep, errno, NULL)); 3501 } 3502 c.c_timestamp = tp; 3503 3504 /* Check to see if replica quorum requirement is fulfilled */ 3505 if (meta_replica_quorum(misp) == -1) { 3506 if (!force) { 3507 return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD, 3508 mynode(), NULL, c.c_setname)); 3509 } else { 3510 stale_flag = MD_IMP_STALE_SET; 3511 /* 3512 * If we have a stale diskset, the kernel will 3513 * delete the replicas on the unavailable disks. 3514 * To be consistent, we'll zero out the mirp on those 3515 * disks here. 3516 */ 3517 for (midp = misp->mis_drives; midp != NULL; 3518 midp = midp->mid_next) { 3519 if (midp->mid_available == 3520 MD_IM_DISK_NOT_AVAILABLE) { 3521 midp->mid_replicas = NULL; 3522 } 3523 } 3524 } 3525 } 3526 3527 for (midp = misp->mis_drives; midp != NULL; 3528 midp = midp->mid_next) { 3529 3530 if ((misp->mis_flags & MD_IM_SET_REPLICATED) && 3531 (partial_replicated_flag == 0) && 3532 (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE)) 3533 partial_replicated_flag = MD_SR_UNRSLV_REPLICATED; 3534 3535 /* 3536 * We pass the list of the drives in the 3537 * set with replicas on them down to the kernel. 3538 */ 3539 dnp = midp->mid_dnp; 3540 mirp = midp->mid_replicas; 3541 if (!mirp) { 3542 /* 3543 * No replicas on this disk, go to next disk. 3544 */ 3545 continue; 3546 } 3547 3548 if (midp->mid_available == MD_IM_DISK_NOT_AVAILABLE) { 3549 /* 3550 * The disk isn't there. We'll need to get the 3551 * disk information from the midp list instead 3552 * of going and looking for it. This means it 3553 * will be information relative to the old 3554 * system. 3555 */ 3556 minor_name = Strdup(midp->mid_minor_name); 3557 (void) strncpy(c.c_locator.l_driver, 3558 midp->mid_driver_name, 3559 sizeof (c.c_locator.l_driver)); 3560 (void) strcpy(c.c_locator.l_devname, midp->mid_devname); 3561 c.c_locator.l_mnum = midp->mid_mnum; 3562 3563 } else { 3564 if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) || 3565 ((np = metaslicename(dnp, rep_slice, ep)) 3566 == NULL)) { 3567 mdclrerror(ep); 3568 continue; 3569 } 3570 (void) strcpy(c.c_locator.l_devname, np->bname); 3571 c.c_locator.l_dev = meta_cmpldev(np->dev); 3572 c.c_locator.l_mnum = meta_getminor(np->dev); 3573 minor_name = meta_getminor_name(np->bname, ep); 3574 if ((cinfo = metagetcinfo(np, ep)) == NULL) { 3575 mdclrerror(ep); 3576 continue; 3577 } 3578 3579 if (cinfo->dname) { 3580 (void) strncpy(c.c_locator.l_driver, 3581 cinfo->dname, 3582 sizeof (c.c_locator.l_driver)); 3583 } 3584 } 3585 3586 c.c_locator.l_devid = (uintptr_t)Malloc(midp->mid_devid_sz); 3587 (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid, 3588 midp->mid_devid, midp->mid_devid_sz); 3589 c.c_locator.l_devid_sz = midp->mid_devid_sz; 3590 c.c_locator.l_devid_flags = 3591 MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 3592 if (midp->mid_o_devid) { 3593 c.c_locator.l_old_devid = 3594 (uint64_t)(uintptr_t)Malloc(midp->mid_o_devid_sz); 3595 (void) memcpy((void *)(uintptr_t) 3596 c.c_locator.l_old_devid, 3597 midp->mid_o_devid, midp->mid_o_devid_sz); 3598 c.c_locator.l_old_devid_sz = midp->mid_o_devid_sz; 3599 } 3600 if (minor_name) { 3601 (void) strncpy(c.c_locator.l_minor_name, minor_name, 3602 sizeof (c.c_locator.l_minor_name)); 3603 } 3604 3605 do { 3606 c.c_locator.l_flags = 0; 3607 c.c_locator.l_blkno = mirp->mir_offset; 3608 if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) { 3609 Free((void *)(uintptr_t)c.c_locator.l_devid); 3610 if (c.c_locator.l_old_devid) 3611 Free((void *)(uintptr_t) 3612 c.c_locator.l_old_devid); 3613 return (mdstealerror(ep, &c.c_mde)); 3614 } 3615 mirp = mirp->mir_next; 3616 } while (mirp != NULL); 3617 } 3618 3619 /* 3620 * If the dry run option was specified, flag success 3621 * and exit out 3622 */ 3623 if (dry_run == 1) { 3624 md_eprintf("%s\n", dgettext(TEXT_DOMAIN, 3625 "import should be successful")); 3626 Free((void *)(uintptr_t)c.c_locator.l_devid); 3627 if (c.c_locator.l_old_devid) 3628 Free((void *)(uintptr_t)c.c_locator.l_old_devid); 3629 return (0); 3630 } 3631 3632 /* 3633 * Now the kernel should have all the information 3634 * regarding the import diskset replica. 3635 * Tell the kernel to load them up and import the set 3636 */ 3637 (void) memset(&c, 0, sizeof (c)); 3638 c.c_flags = stale_flag; 3639 c.c_setno = setno; 3640 if (metaioctl(MD_IOCIMP_LOAD, &c, &c.c_mde, NULL) != 0) { 3641 Free((void *)(uintptr_t)c.c_locator.l_devid); 3642 if (c.c_locator.l_old_devid) 3643 Free((void *)(uintptr_t)c.c_locator.l_old_devid); 3644 return (mdstealerror(ep, &c.c_mde)); 3645 } 3646 3647 (void) meta_smf_enable(META_SMF_DISKSET, NULL); 3648 3649 /* 3650 * Create a set name for the set. 3651 */ 3652 sp = Zalloc(sizeof (*sp)); 3653 sp->setname = Strdup(setname); 3654 sp->lockfd = MD_NO_LOCK; 3655 sp->setno = setno; 3656 sd = Zalloc(sizeof (*sd)); 3657 (void) strcpy(sd->sd_nodes[0], mynode()); 3658 sd->sd_ctime = tp; 3659 sd->sd_genid = 0; 3660 3661 if (misp->mis_flags & MD_IM_SET_REPLICATED) { 3662 /* Update the diskset namespace */ 3663 meta_imp_update_nm(sp, misp, ep); 3664 3665 /* Release the diskset - even if update_nm failed */ 3666 (void) memset(&c, 0, sizeof (c)); 3667 c.c_setno = setno; 3668 /* Don't need device id information from this ioctl */ 3669 c.c_locator.l_devid = (uint64_t)0; 3670 c.c_locator.l_devid_flags = 0; 3671 if (metaioctl(MD_RELEASE_SET, &c, &c.c_mde, NULL) != 0) { 3672 if (mdisok(ep)) 3673 (void) mdstealerror(ep, &c.c_mde); 3674 Free(sd); 3675 Free(sp); 3676 return (-1); 3677 } 3678 3679 /* If update_nm failed, then fail the import. */ 3680 if (!mdisok(ep)) { 3681 Free(sd); 3682 Free(sp); 3683 return (-1); 3684 } 3685 } 3686 3687 /* 3688 * We'll need to update information in the master block due 3689 * to the set number changing and if the case of a replicated 3690 * diskset, the device id changing. May also need to create a 3691 * dummy master block if it's not there. 3692 */ 3693 meta_imp_update_mb(sp, misp, ep); 3694 if (!mdisok(ep)) { 3695 Free(sd); 3696 Free(sp); 3697 return (-1); 3698 } 3699 3700 /* 3701 * Create set record for diskset, but record is left in 3702 * MD_SR_ADD state until after drives are added to set. 3703 */ 3704 if (clnt_lock_set(mynode(), sp, ep)) { 3705 Free(sd); 3706 Free(sp); 3707 return (-1); 3708 } 3709 3710 if (clnt_createset(mynode(), sp, sd->sd_nodes, 3711 sd->sd_ctime, sd->sd_genid, ep)) { 3712 cl_sk = cl_get_setkey(sp->setno, sp->setname); 3713 (void) clnt_unlock_set(mynode(), cl_sk, &xep); 3714 Free(sd); 3715 Free(sp); 3716 return (-1); 3717 } 3718 3719 Free(sd); 3720 3721 /* 3722 * Create drive records for the disks in the set. 3723 */ 3724 for (midp = misp->mis_drives; midp != NULL; midp = midp->mid_next) { 3725 dnp = midp->mid_dnp; 3726 if (midp->mid_available & MD_IM_DISK_NOT_AVAILABLE) { 3727 /* 3728 * If the disk isn't available, the dnp->devid is 3729 * no good. It is either blank for the case where 3730 * there is no disk with that devname, or it 3731 * contains the devid for the real disk in the system 3732 * with that name. The problem is, if the disk is 3733 * unavailable, then the devid should be the devid 3734 * of the missing disk. So we're faking a dnp for 3735 * the import. This is needed for creating drive 3736 * records. 3737 */ 3738 dnp = Zalloc(sizeof (mddrivename_t)); 3739 dnp->side_names_key = midp->mid_dnp->side_names_key; 3740 dnp->type = midp->mid_dnp->type; 3741 dnp->cname = Strdup(midp->mid_dnp->cname); 3742 dnp->rname = Strdup(midp->mid_dnp->rname); 3743 dnp->devid = devid_str_encode(midp->mid_devid, 3744 NULL); 3745 midp->mid_dnp = dnp; 3746 } 3747 dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp); 3748 } 3749 3750 if (meta_imp_set_adddrives(sp, dnlp, misp, ep)) { 3751 Free(sp); 3752 return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD, 3753 mynode(), NULL, c.c_setname)); 3754 } 3755 3756 /* If drives were added without error, set set_record to OK */ 3757 if (clnt_upd_sr_flags(mynode(), sp, 3758 (partial_replicated_flag | MD_SR_OK | MD_SR_MB_DEVID), ep)) { 3759 Free(sp); 3760 return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD, 3761 mynode(), NULL, c.c_setname)); 3762 } 3763 3764 Free(sp); 3765 3766 cl_sk = cl_get_setkey(sp->setno, sp->setname); 3767 if (clnt_unlock_set(mynode(), cl_sk, ep)) { 3768 return (-1); 3769 } 3770 cl_set_setkey(NULL); 3771 3772 Free((void *)(uintptr_t)c.c_locator.l_devid); 3773 if (c.c_locator.l_old_devid) 3774 Free((void *)(uintptr_t)c.c_locator.l_old_devid); 3775 return (0); 3776 } 3777