1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <assert.h> 30 #include <ctype.h> 31 #include <libdevinfo.h> 32 #include <mdiox.h> 33 #include <meta.h> 34 #include "meta_repartition.h" 35 #include "meta_set_prv.h" 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <strings.h> 39 #include <sys/lvm/md_mddb.h> 40 #include <sys/lvm/md_names.h> 41 #include <sys/lvm/md_crc.h> 42 #include <sys/lvm/md_convert.h> 43 44 typedef struct did_list { 45 void *rdid; /* real did if replicated set */ 46 void *did; /* did stored in lb */ 47 char *devname; 48 dev_t dev; 49 uint_t did_index; 50 char *minor_name; 51 struct did_list *next; 52 } did_list_t; 53 54 typedef struct replicated_disk { 55 void *old_devid; 56 void *new_devid; 57 struct replicated_disk *next; 58 } replicated_disk_t; 59 60 /* 61 * The current implementation limits the max device id length to 256 bytes. 62 * Should the max device id length be increased, this definition would have to 63 * be bumped up accordingly 64 */ 65 #define MAX_DEVID_LEN 256 66 67 /* 68 * We store a global list of all the replicated disks in the system. In 69 * order to prevent us from performing a linear search on this list, we 70 * store the disks in a two dimensional sparse array. The disks are bucketed 71 * based on the length of their device ids. 72 */ 73 static replicated_disk_t *replicated_disk_list[MAX_DEVID_LEN + 1] = {NULL}; 74 75 /* 76 * The list of replicated disks is built just once and this flag is set 77 * once it's done 78 */ 79 static int replicated_disk_list_built = 0; 80 81 /* 82 * Map logical blk to physical 83 * 84 * This is based on the routine of the same name in the md kernel module (see 85 * file md_mddb.c), with the following caveats: 86 * 87 * - The kernel routine works on in core master blocks, or mddb_mb_ic_t; this 88 * routine works instead on the mddb_mb_t read directly from the disk 89 */ 90 daddr_t 91 getphysblk( 92 mddb_block_t blk, 93 mddb_mb_t *mbp 94 ) 95 { 96 /* 97 * Sanity check: is the block within range? If so, we then assume 98 * that the block range map in the master block is valid and 99 * consistent with the block count. Unfortunately, there is no 100 * reliable way to validate this assumption. 101 */ 102 if (blk >= mbp->mb_blkcnt || blk >= mbp->mb_blkmap.m_consecutive) 103 return ((daddr_t)-1); 104 105 return (mbp->mb_blkmap.m_firstblk + blk); 106 } 107 108 109 110 /* 111 * drive_append() 112 * 113 * Append to tail of linked list of md_im_drive_info_t. 114 * 115 * Will allocate space for new node and copy args into new space. 116 * 117 * Returns pointer to new node. 118 */ 119 static md_im_drive_info_t * 120 drive_append( 121 md_im_drive_info_t **midpp, 122 mddrivename_t *dnp, 123 void *devid, 124 void *rdevid, 125 void *devname, 126 int devid_sz, 127 char *minor_name, 128 md_timeval32_t timestamp, 129 md_im_replica_info_t *mirp 130 ) 131 { 132 md_im_drive_info_t *midp; 133 int o_devid_sz; 134 135 for (; (*midpp != NULL); midpp = &((*midpp)->mid_next)) 136 ; 137 138 midp = *midpp = Zalloc(sizeof (md_im_drive_info_t)); 139 140 midp->mid_dnp = dnp; 141 142 /* 143 * If rdevid is not NULL then we know we are dealing with 144 * replicated diskset case. 'devid_sz' will always be the 145 * size of a valid devid which can be 'devid' or 'rdevid' 146 */ 147 midp->mid_devid = (void *)Malloc(devid_sz); 148 149 if (rdevid) { 150 (void) memcpy(midp->mid_devid, rdevid, devid_sz); 151 /* 152 * Also need to store the 'other' devid 153 */ 154 o_devid_sz = devid_sizeof((ddi_devid_t)devid); 155 midp->mid_o_devid = (void *)Malloc(o_devid_sz); 156 (void) memcpy(midp->mid_o_devid, devid, o_devid_sz); 157 midp->mid_o_devid_sz = o_devid_sz; 158 } else { 159 /* 160 * In the case of regular diskset, midp->mid_o_devid 161 * will be a NULL pointer 162 */ 163 (void) memcpy(midp->mid_devid, devid, devid_sz); 164 } 165 166 if (devname) 167 midp->mid_devname = Strdup(devname); 168 169 midp->mid_devid_sz = devid_sz; 170 midp->mid_setcreatetimestamp = timestamp; 171 (void) strlcpy(midp->mid_minor_name, minor_name, MDDB_MINOR_NAME_MAX); 172 midp->mid_replicas = mirp; 173 174 return (midp); 175 } 176 177 178 179 /* 180 * drive_append_wrapper() 181 * 182 * Constant time append wrapper; the append function will always walk the list, 183 * this will take a tail argument and use the append function on just the tail 184 * node, doing the appropriate old-tail-next-pointer bookkeeping. 185 */ 186 static md_im_drive_info_t ** 187 drive_append_wrapper( 188 md_im_drive_info_t **tailpp, 189 mddrivename_t *dnp, 190 void *devid, 191 void *rdevid, 192 void *devname, 193 int devid_sz, 194 char *minor_name, 195 md_timeval32_t timestamp, 196 md_im_replica_info_t *mirp 197 ) 198 { 199 (void) drive_append(tailpp, dnp, devid, rdevid, devname, devid_sz, 200 minor_name, timestamp, mirp); 201 202 if ((*tailpp)->mid_next == NULL) 203 return (tailpp); 204 205 return (&((*tailpp)->mid_next)); 206 } 207 208 209 210 /* 211 * replica_append() 212 * 213 * Append to tail of linked list of md_im_replica_info_t. 214 * 215 * Will allocate space for new node and copy args into new space. 216 * 217 * Returns pointer to new node. 218 */ 219 static md_im_replica_info_t * 220 replica_append( 221 md_im_replica_info_t **mirpp, 222 int flags, 223 daddr32_t offset, 224 daddr32_t length, 225 md_timeval32_t timestamp 226 ) 227 { 228 md_im_replica_info_t *mirp; 229 230 for (; (*mirpp != NULL); mirpp = &((*mirpp)->mir_next)) 231 ; 232 233 mirp = *mirpp = Zalloc(sizeof (md_im_replica_info_t)); 234 235 mirp->mir_flags = flags; 236 mirp->mir_offset = offset; 237 mirp->mir_length = length; 238 mirp->mir_timestamp = timestamp; 239 240 return (mirp); 241 242 } 243 244 245 246 /* 247 * replica_append_wrapper() 248 * 249 * Constant time append wrapper; the append function will always walk the list, 250 * this will take a tail argument and use the append function on just the tail 251 * node, doing the appropriate old-tail-next-pointer bookkeeping. 252 */ 253 static md_im_replica_info_t ** 254 replica_append_wrapper( 255 md_im_replica_info_t **tailpp, 256 int flags, 257 daddr32_t offset, 258 daddr32_t length, 259 md_timeval32_t timestamp 260 ) 261 { 262 (void) replica_append(tailpp, flags, offset, length, timestamp); 263 264 if ((*tailpp)->mir_next == NULL) 265 return (tailpp); 266 267 return (&(*tailpp)->mir_next); 268 } 269 270 /* 271 * map_replica_disk() 272 * 273 * Searches the device id list for a specific 274 * disk based on the locator block device id array index. 275 * 276 * Returns a pointer to the did_list node if a match was 277 * found or NULL otherwise. 278 */ 279 static did_list_t * 280 map_replica_disk( 281 did_list_t *did_listp, 282 int did_index 283 ) 284 { 285 did_list_t *tailp = did_listp; 286 287 while (tailp != NULL) { 288 if (tailp->did_index == did_index) 289 return (tailp); 290 tailp = tailp->next; 291 } 292 293 /* not found, return failure */ 294 return (NULL); 295 } 296 297 /* 298 * replicated_list_lookup() 299 * 300 * looks up a replicated disk entry in the global replicated disk list 301 * based upon the length of that disk's device id. returns the new device id 302 * for the disk. 303 * If you store the returned devid you must create a local copy. 304 */ 305 static void * 306 replicated_list_lookup( 307 uint_t devid_len, 308 void *old_devid 309 ) 310 { 311 replicated_disk_t *head = NULL; 312 313 assert(devid_len <= MAX_DEVID_LEN); 314 head = replicated_disk_list[devid_len]; 315 316 if (head == NULL) 317 return (NULL); 318 319 do { 320 if (devid_compare((ddi_devid_t)old_devid, 321 (ddi_devid_t)head->old_devid) == 0) 322 return (head->new_devid); 323 head = head->next; 324 } while (head != NULL); 325 326 return (NULL); 327 } 328 329 /* 330 * replicated_list_insert() 331 * 332 * inserts a replicated disk entry into the global replicated disk list 333 */ 334 static void 335 replicated_list_insert( 336 size_t old_devid_len, 337 void *old_devid, 338 void *new_devid 339 ) 340 { 341 replicated_disk_t *repl_disk, **first_entry; 342 void *repl_old_devid = NULL; 343 344 assert(old_devid_len <= MAX_DEVID_LEN); 345 346 repl_disk = Zalloc(sizeof (replicated_disk_t)); 347 repl_old_devid = Zalloc(old_devid_len); 348 (void) memcpy(repl_old_devid, (void *)old_devid, old_devid_len); 349 350 repl_disk->old_devid = repl_old_devid; 351 repl_disk->new_devid = new_devid; 352 353 first_entry = &replicated_disk_list[old_devid_len]; 354 355 if (*first_entry == NULL) { 356 *first_entry = repl_disk; 357 return; 358 } 359 360 repl_disk->next = *first_entry; 361 replicated_disk_list[old_devid_len] = repl_disk; 362 } 363 364 /* 365 * get_replica_disks() 366 * 367 * Will step through the locator records in the supplied locator block, and add 368 * each one with an active replica to a supplied list of md_im_drive_info_t, and 369 * add the appropriate replicas to the md_im_replica_info_t contained therein. 370 */ 371 static void 372 get_replica_disks( 373 md_im_set_desc_t *misp, 374 did_list_t *did_listp, 375 mddb_mb_t *mb, 376 mddb_lb_t *lbp, 377 md_error_t *ep, 378 int replicated 379 ) 380 { 381 mddrivename_t *dnp; 382 int indx, on_list; 383 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep); 384 int flags; 385 int devid_sz; 386 char *minor_name; 387 did_list_t *replica_disk; 388 daddr32_t offset; 389 daddr32_t length; 390 md_timeval32_t timestamp; 391 md_im_replica_info_t **mirpp = NULL; 392 md_im_drive_info_t **midpp = &misp->mis_drives; 393 md_im_drive_info_t *midp; 394 void *did; 395 396 for (indx = 0; indx < lbp->lb_loccnt; indx++) { 397 398 on_list = 0; 399 if (lbp->lb_locators[indx].l_flags & MDDB_F_ACTIVE) { 400 401 /* 402 * search the device id list for a 403 * specific ctds based on the locator 404 * block device id array index. 405 */ 406 replica_disk = map_replica_disk(did_listp, indx); 407 408 assert(replica_disk != NULL); 409 410 411 /* 412 * metadrivename() can fail for a slice name 413 * if there is not an existing mddrivename_t. 414 * So we use metadiskname() to strip the slice 415 * number. 416 */ 417 dnp = metadrivename(&sp, 418 metadiskname(replica_disk->devname), ep); 419 420 for (midp = misp->mis_drives; midp != NULL; 421 midp = midp->mid_next) { 422 if (dnp == midp->mid_dnp) { 423 on_list = 1; 424 mirpp = &midp->mid_replicas; 425 break; 426 } 427 } 428 429 /* 430 * Get the correct devid_sz 431 */ 432 if (replicated) 433 did = replica_disk->rdid; 434 else 435 did = replica_disk->did; 436 437 devid_sz = devid_sizeof((ddi_devid_t)did); 438 minor_name = replica_disk->minor_name; 439 440 /* 441 * New on the list so add it 442 */ 443 if (!on_list) { 444 mddb_mb_t *mbp; 445 uint_t sliceno; 446 mdname_t *rsp; 447 int fd = -1; 448 449 mbp = Malloc(DEV_BSIZE); 450 451 /* determine the replica slice */ 452 if (meta_replicaslice(dnp, &sliceno, 453 ep) != 0) { 454 Free(mbp); 455 continue; 456 } 457 458 /* 459 * if the replica slice size is zero, 460 * don't bother opening 461 */ 462 if (dnp->vtoc.parts[sliceno].size == 0) { 463 Free(mbp); 464 continue; 465 } 466 467 if ((rsp = metaslicename(dnp, sliceno, 468 ep)) == NULL) { 469 Free(mbp); 470 continue; 471 } 472 473 if ((fd = open(rsp->rname, 474 O_RDONLY| O_NDELAY)) < 0) { 475 Free(mbp); 476 continue; 477 } 478 479 /* 480 * a drive may not have a master block 481 */ 482 if (read_master_block(ep, fd, mbp, 483 DEV_BSIZE) <= 0) { 484 mdclrerror(ep); 485 Free(mbp); 486 (void) close(fd); 487 continue; 488 } 489 490 (void) close(fd); 491 midpp = drive_append_wrapper(midpp, dnp, 492 replica_disk->did, replica_disk->rdid, 493 replica_disk->devname, 494 devid_sz, minor_name, mbp->mb_setcreatetime, 495 NULL); 496 mirpp = &((*midpp)->mid_replicas); 497 Free(mbp); 498 } 499 500 /* 501 * For either of these assertions to fail, it implies 502 * a NULL return from metadrivename() above. Since 503 * the args came from a presumed valid locator block, 504 * that's Bad. 505 */ 506 assert(midpp != NULL); 507 assert(mirpp != NULL); 508 509 /* 510 * Extract the parameters describing this replica. 511 * 512 * The magic "1" in the length calculation accounts 513 * for the length of the master block, in addition to 514 * the block count it describes. (The master block 515 * will always take up one block on the disk, and 516 * there will always only be one master block per 517 * replica, even though much of the code is structured 518 * to handle noncontiguous replicas.) 519 */ 520 flags = lbp->lb_locators[indx].l_flags; 521 offset = lbp->lb_locators[indx].l_blkno; 522 length = mb->mb_blkcnt + 1; 523 timestamp = mb->mb_setcreatetime; 524 525 mirpp = replica_append_wrapper(mirpp, flags, 526 offset, length, timestamp); 527 528 /* 529 * If we're here it means - 530 * 531 * a) we had an active copy of the replica, and 532 * b) we've added the disk to the list of 533 * disks as well. 534 * 535 * We need to bump up the number of active 536 * replica count for each such replica so that it 537 * can be used later for replica quorum check. 538 */ 539 misp->mis_active_replicas++; 540 } 541 } 542 } 543 544 545 /* 546 * append_pnm_rec() 547 * 548 * Append pnm_rec_t entry to list of physical devices in the diskset. Entry 549 * contains a mapping of n_key in NM namespace(or min_key in DID_NM namespace) 550 * to name of the physical device. This list will be used to ensure that the 551 * correct names of the physical devices are printed in the metastat output--the 552 * NM namespace might have stale information about where the physical devices 553 * were previously located when the diskset was last active. 554 */ 555 static void 556 append_pnm_rec( 557 pnm_rec_t **pnm, 558 mdkey_t min_key, 559 char *n_name 560 ) 561 { 562 pnm_rec_t *tmp_pnm; 563 char *p; 564 int len; 565 566 if ((p = strrchr(n_name, '/')) != NULL) 567 p++; 568 569 /* 570 * Allocates pnm_rec_t record for the physical 571 * device. 572 */ 573 len = strlen(p) + 1; /* Length of name plus Null term */ 574 tmp_pnm = Malloc(sizeof (pnm_rec_t) + len); 575 (void) strncpy(tmp_pnm->n_name, p, len); 576 tmp_pnm->n_key = min_key; 577 578 /* 579 * Adds new element to head of pnm_rec_t list. 580 */ 581 if (*pnm == NULL) { 582 tmp_pnm->next = NULL; 583 *pnm = tmp_pnm; 584 } else { 585 tmp_pnm->next = *pnm; 586 *pnm = tmp_pnm; 587 } 588 } 589 590 /* 591 * free_pnm_rec_list() 592 * 593 * Freeing all pnm_rec_t entries on the list of physical devices in the 594 * diskset. 595 */ 596 void 597 free_pnm_rec_list(pnm_rec_t **pnm) 598 { 599 pnm_rec_t *tmp_pnm, *rm_pnm; 600 601 for (tmp_pnm = *pnm; tmp_pnm != NULL; ) { 602 rm_pnm = tmp_pnm; 603 tmp_pnm = tmp_pnm->next; 604 Free(rm_pnm); 605 } 606 607 *pnm = NULL; 608 } 609 610 611 /* 612 * get_disks_from_didnamespace() 613 * This function was origionally called: get_nonreplica_disks() 614 * 615 * Extracts the disks without replicas from the locator name space and adds them 616 * to the supplied list of md_im_drive_info_t. 617 * If the print verbose option was given then this function will also 618 * correct the nm namespace so that the n_name is the right ctd name 619 */ 620 static void 621 get_disks_from_didnamespace( 622 md_im_set_desc_t *misp, 623 pnm_rec_t **pnm, 624 mddb_rb_t *did_nm, 625 mddb_rb_t *did_shrnm, 626 uint_t imp_flags, 627 int replicated, 628 md_error_t *ep 629 ) 630 { 631 char *search_path = "/dev"; 632 devid_nmlist_t *nmlist; 633 md_im_drive_info_t *midp, **midpp = &misp->mis_drives; 634 mddrivename_t *dnp; 635 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep); 636 mddb_rb_t *rbp_did = did_nm; 637 mddb_rb_t *rbp_did_shr = did_shrnm; 638 int on_list = 0; 639 int devid_sz; 640 struct devid_min_rec *did_rec; 641 struct devid_shr_rec *did_shr_rec; 642 struct did_shr_name *did; 643 struct did_min_name *min; 644 void *r_did; /* NULL if not a replicated diskset */ 645 void *valid_did; 646 647 /* 648 * We got a pointer to an mddb record, which we expect to contain a 649 * name record; extract the pointer thereto. 650 */ 651 /* LINTED */ 652 did_rec = (struct devid_min_rec *)((caddr_t)(&rbp_did->rb_data)); 653 /* LINTED */ 654 did_shr_rec = (struct devid_shr_rec *) 655 ((caddr_t)(&rbp_did_shr->rb_data)); 656 657 /* 658 * Skip the nm_rec_hdr and iterate on the array of struct minor_name 659 * at the end of the devid_min_rec 660 */ 661 for (min = &did_rec->minor_name[0]; min->min_devid_key != 0; 662 /* LINTED */ 663 min = (struct did_min_name *)((char *)min + DID_NAMSIZ(min))) { 664 665 on_list = 0; 666 r_did = NULL; 667 668 /* 669 * For a give DID_NM key, locate the corresponding device 670 * id from DID_NM_SHR 671 */ 672 for (did = &did_shr_rec->device_id[0]; did->did_key != 0; 673 /* LINTED */ 674 did = (struct did_shr_name *) 675 ((char *)did + DID_SHR_NAMSIZ(did))) { 676 /* 677 * We got a match, this is the device id we're 678 * looking for 679 */ 680 if (min->min_devid_key == did->did_key) 681 break; 682 } 683 684 if (did->did_key == 0) { 685 /* we didn't find a match */ 686 assert(did->did_key != 0); 687 md_exit(NULL, 1); 688 } 689 690 /* 691 * If replicated diskset 692 */ 693 if (replicated) { 694 size_t new_devid_len; 695 char *temp; 696 /* 697 * In this case, did->did_devid will 698 * be invalid so lookup the real one 699 */ 700 temp = replicated_list_lookup(did->did_size, 701 did->did_devid); 702 new_devid_len = devid_sizeof((ddi_devid_t)temp); 703 r_did = Zalloc(new_devid_len); 704 (void) memcpy(r_did, temp, new_devid_len); 705 valid_did = r_did; 706 } else { 707 valid_did = did->did_devid; 708 } 709 710 /* Get the ctds mapping for that device id */ 711 if (meta_deviceid_to_nmlist(search_path, 712 (ddi_devid_t)valid_did, 713 &min->min_name[0], &nmlist) == 0) { 714 715 assert(nmlist->devname != NULL); 716 dnp = metadrivename(&sp, 717 metadiskname(nmlist->devname), ep); 718 /* 719 * Add drive to pnm_rec_t list of physical devices for 720 * metastat output. 721 */ 722 if (imp_flags & META_IMP_VERBOSE) { 723 append_pnm_rec(pnm, min->min_key, 724 nmlist->devname); 725 } 726 727 assert(dnp != NULL); 728 /* Is it already on the list? */ 729 for (midp = misp->mis_drives; midp != NULL; 730 midp = midp->mid_next) { 731 if (midp->mid_dnp == dnp) { 732 on_list = 1; 733 break; 734 } 735 } 736 737 devid_sz = devid_sizeof( 738 (ddi_devid_t)valid_did); 739 740 if (!on_list) { 741 mddb_mb_t *mbp; 742 uint_t sliceno; 743 mdname_t *rsp; 744 int fd = -1; 745 746 mbp = Malloc(DEV_BSIZE); 747 748 /* determine the replica slice */ 749 if (meta_replicaslice(dnp, &sliceno, 750 ep) != 0) { 751 Free(mbp); 752 continue; 753 } 754 755 /* 756 * if the replica slice size is zero, 757 * don't bother opening 758 */ 759 if (dnp->vtoc.parts[sliceno].size 760 == 0) { 761 Free(mbp); 762 continue; 763 } 764 765 if ((rsp = metaslicename(dnp, sliceno, 766 ep)) == NULL) { 767 Free(mbp); 768 continue; 769 } 770 771 if ((fd = open(rsp->rname, 772 O_RDONLY| O_NDELAY)) < 0) { 773 Free(mbp); 774 continue; 775 } 776 777 /* 778 * a drive may not have a master block 779 */ 780 if (read_master_block(ep, fd, mbp, 781 DEV_BSIZE) <= 0) { 782 mdclrerror(ep); 783 Free(mbp); 784 (void) close(fd); 785 continue; 786 } 787 788 (void) close(fd); 789 /* 790 * If it is replicated diskset, 791 * r_did will be non-NULL and 792 * devid_sz will be its size. 793 * Passing the devname as NULL because field 794 * is not currently used for a non-replica disk. 795 */ 796 midpp = drive_append_wrapper(midpp, 797 dnp, &did->did_devid, r_did, NULL, 798 devid_sz, &min->min_name[0], 799 mbp->mb_setcreatetime, NULL); 800 Free(mbp); 801 } 802 devid_free_nmlist(nmlist); 803 } 804 } 805 } 806 807 /* 808 * set_append() 809 * 810 * Append to tail of linked list of md_im_set_desc_t. 811 * 812 * Will allocate space for new node AND populate it by extracting disks with 813 * and without replicas from the locator blocks and locator namespace. 814 * 815 * Returns pointer to new node. 816 */ 817 static md_im_set_desc_t * 818 set_append( 819 md_im_set_desc_t **mispp, 820 did_list_t *did_listp, 821 mddb_mb_t *mb, 822 mddb_lb_t *lbp, 823 mddb_rb_t *nm, 824 pnm_rec_t **pnm, 825 mddb_rb_t *did_nm, 826 mddb_rb_t *did_shrnm, 827 uint_t imp_flags, 828 int replicated, 829 md_error_t *ep 830 ) 831 { 832 833 md_im_set_desc_t *misp; 834 set_t setno = mb->mb_setno; 835 836 /* run to end of list */ 837 for (; (*mispp != NULL); mispp = &((*mispp)->mis_next)) 838 ; 839 840 /* allocate new list element */ 841 misp = *mispp = Zalloc(sizeof (md_im_set_desc_t)); 842 843 if (replicated) 844 misp->mis_flags = MD_IM_SET_REPLICATED; 845 846 misp->mis_oldsetno = setno; 847 848 /* Get the disks with and without replicas */ 849 get_replica_disks(misp, did_listp, mb, lbp, ep, replicated); 850 851 if (nm != NULL && did_nm != NULL && did_shrnm != NULL) { 852 get_disks_from_didnamespace(misp, pnm, did_nm, 853 did_shrnm, imp_flags, replicated, ep); 854 } 855 856 /* 857 * An error in this struct could come from either of 858 * the above routines; 859 * in both cases, we want to pass it back on up. 860 */ 861 862 return (misp); 863 } 864 865 866 /* 867 * add_disk_names() 868 * 869 * Iterator to walk the minor node tree of the device snapshot, adding only the 870 * first non-block instance of each non-cdrom minor node to a list of disks. 871 */ 872 static int 873 add_disk_names(di_node_t node, di_minor_t minor, void *args) 874 { 875 char *search_path = "/dev"; 876 ddi_devid_t devid = di_devid(node); 877 devid_nmlist_t *nm; 878 char *min = di_minor_name(minor); 879 md_im_names_t *cnames = (md_im_names_t *)args; 880 static di_node_t save_node = NULL; 881 882 /* 883 * skip CD devices 884 * If a device does not have a device id, we can't 885 * do anything with it so just exclude it from our 886 * list. 887 * 888 * This would also encompass CD devices and floppy 889 * devices that don't have a device id. 890 */ 891 if (devid == NULL) { 892 return (DI_WALK_CONTINUE); 893 } 894 895 /* char disk devices (as opposed to block) */ 896 if (di_minor_spectype(minor) == S_IFCHR) { 897 898 /* only first occurrence (slice 0) of each instance */ 899 if (save_node == NULL || node != save_node) { 900 save_node = node; 901 if (meta_deviceid_to_nmlist(search_path, devid, 902 min, &nm) == 0) { 903 int index = cnames->min_count++; 904 905 assert(nm->devname != NULL); 906 cnames->min_names = 907 Realloc(cnames->min_names, 908 cnames->min_count * 909 sizeof (char *)); 910 911 assert(cnames->min_names != NULL); 912 cnames->min_names[index] = 913 metadiskname(nm->devname); 914 devid_free_nmlist(nm); 915 } 916 } 917 } 918 return (DI_WALK_CONTINUE); 919 } 920 921 922 923 /* 924 * meta_list_disks() 925 * 926 * Snapshots the device tree and extracts disk devices from the snapshot. 927 */ 928 int 929 meta_list_disks(md_error_t *ep, md_im_names_t *cnames) 930 { 931 di_node_t root_node; 932 933 assert(cnames != NULL); 934 cnames->min_count = 0; 935 cnames->min_names = NULL; 936 937 if ((root_node = di_init("/", DINFOCPYALL|DINFOFORCE)) 938 == DI_NODE_NIL) { 939 return (mdsyserror(ep, errno, NULL)); 940 } 941 942 (void) di_walk_minor(root_node, DDI_NT_BLOCK, 0, cnames, 943 add_disk_names); 944 945 di_fini(root_node); 946 return (0); 947 } 948 949 /* 950 * meta_imp_drvused 951 * 952 * Checks if given drive is mounted, swapped, part of disk configuration 953 * or in use by SVM. ep also has error code set up if drive is in use. 954 * 955 * Returns 1 if drive is in use. 956 * Returns 0 if drive is not in use. 957 */ 958 int 959 meta_imp_drvused( 960 mdsetname_t *sp, 961 mddrivename_t *dnp, 962 md_error_t *ep 963 ) 964 { 965 md_error_t status = mdnullerror; 966 md_error_t *db_ep = &status; 967 968 /* 969 * We pass in db_ep to meta_setup_db_locations 970 * and never ever use the error contained therein 971 * because all we're interested in is a check to 972 * see whether any local metadbs are present. 973 */ 974 if ((meta_check_drivemounted(sp, dnp, ep) != 0) || 975 (meta_check_driveswapped(sp, dnp, ep) != 0) || 976 (((meta_setup_db_locations(db_ep) == 0) && 977 ((meta_check_drive_inuse(sp, dnp, 1, ep) != 0) || 978 (meta_check_driveinset(sp, dnp, ep) != 0))))) { 979 return (1); 980 } else { 981 return (0); 982 } 983 } 984 985 /* 986 * meta_prune_cnames() 987 * 988 * Removes in-use disks from the list prior to further processing. 989 * 990 * Return value depends on err_on_prune flag: if set, and one or more disks 991 * are pruned, the return list will be the pruned disks. If not set, or if no 992 * disks are pruned, the return list will be the unpruned disks. 993 */ 994 mddrivenamelist_t * 995 meta_prune_cnames( 996 md_error_t *ep, 997 md_im_names_t *cnames, 998 int err_on_prune 999 ) 1000 { 1001 int d; 1002 int fcount = 0; 1003 mddrivenamelist_t *dnlp = NULL; 1004 mddrivenamelist_t **dnlpp = &dnlp; 1005 mddrivenamelist_t *fdnlp = NULL; 1006 mddrivenamelist_t **fdnlpp = &fdnlp; 1007 mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep); 1008 1009 for (d = 0; d < cnames->min_count; ++d) { 1010 mddrivename_t *dnp; 1011 1012 dnp = metadrivename(&sp, cnames->min_names[d], ep); 1013 if (dnp == NULL) { 1014 /* 1015 * Assuming we're interested in knowing about 1016 * whatever error occurred, but not in stopping. 1017 */ 1018 mde_perror(ep, cnames->min_names[d]); 1019 mdclrerror(ep); 1020 1021 continue; 1022 } 1023 1024 /* 1025 * Check if the drive is inuse. 1026 */ 1027 if (meta_imp_drvused(sp, dnp, ep)) { 1028 fdnlpp = meta_drivenamelist_append_wrapper(fdnlpp, dnp); 1029 fcount++; 1030 mdclrerror(ep); 1031 } else { 1032 dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp); 1033 } 1034 } 1035 1036 if (fcount) { 1037 if (err_on_prune) { 1038 (void) mddserror(ep, MDE_DS_DRIVEINUSE, 0, 1039 NULL, fdnlp->drivenamep->cname, NULL); 1040 metafreedrivenamelist(dnlp); 1041 return (fdnlp); 1042 } 1043 metafreedrivenamelist(fdnlp); 1044 } 1045 1046 return (dnlp); 1047 } 1048 1049 /* 1050 * read_master_block() 1051 * 1052 * Returns: 1053 * < 0 for failure 1054 * 0 for no valid master block 1055 * 1 for valid master block 1056 * 1057 * The supplied buffer will be filled in for EITHER 0 or 1. 1058 */ 1059 int 1060 read_master_block( 1061 md_error_t *ep, 1062 int fd, 1063 void *bp, 1064 int bsize 1065 ) 1066 { 1067 mddb_mb_t *mbp = bp; 1068 int rval = 1; 1069 1070 assert(bp != NULL); 1071 1072 if (lseek(fd, (off_t)dbtob(16), SEEK_SET) < 0) 1073 return (mdsyserror(ep, errno, NULL)); 1074 1075 if (read(fd, bp, bsize) != bsize) 1076 return (mdsyserror(ep, errno, NULL)); 1077 1078 /* 1079 * The master block magic number can either be MDDB_MAGIC_MB in 1080 * the case of a real master block, or, it can be MDDB_MAGIC_DU 1081 * in the case of a dummy master block 1082 */ 1083 if ((mbp->mb_magic != MDDB_MAGIC_MB) && 1084 (mbp->mb_magic != MDDB_MAGIC_DU)) { 1085 rval = 0; 1086 (void) mdmddberror(ep, MDE_DB_MASTER, 0, 0, 0, NULL); 1087 } 1088 1089 if (mbp->mb_revision != MDDB_REV_MB) { 1090 rval = 0; 1091 } 1092 1093 return (rval); 1094 } 1095 1096 /* 1097 * read_locator_block() 1098 * 1099 * Returns: 1100 * < 0 for failure 1101 * 0 for no valid locator block 1102 * 1 for valid locator block 1103 */ 1104 int 1105 read_locator_block( 1106 md_error_t *ep, 1107 int fd, 1108 mddb_mb_t *mbp, 1109 void *bp, 1110 int bsize 1111 ) 1112 { 1113 mddb_lb_t *lbp = bp; 1114 1115 assert(bp != NULL); 1116 1117 if (lseek(fd, (off_t)dbtob(mbp->mb_blkmap.m_firstblk), SEEK_SET) < 0) 1118 return (mdsyserror(ep, errno, NULL)); 1119 1120 if (read(fd, bp, bsize) != bsize) 1121 return (mdsyserror(ep, errno, NULL)); 1122 1123 return ((lbp->lb_magic == MDDB_MAGIC_LB) ? 1 : 0); 1124 } 1125 1126 int 1127 phys_read( 1128 md_error_t *ep, 1129 int fd, 1130 mddb_mb_t *mbp, 1131 daddr_t blk, 1132 void *bp, 1133 int bcount 1134 ) 1135 { 1136 daddr_t pblk; 1137 1138 if ((pblk = getphysblk(blk, mbp)) < 0) 1139 return (mdmddberror(ep, MDE_DB_BLKRANGE, NODEV32, 1140 MD_LOCAL_SET, blk, NULL)); 1141 1142 if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) 1143 return (mdsyserror(ep, errno, NULL)); 1144 1145 if (read(fd, bp, bcount) != bcount) 1146 return (mdsyserror(ep, errno, NULL)); 1147 1148 return (bcount); 1149 } 1150 1151 /* 1152 * read_locator_block_did() 1153 * 1154 * Returns: 1155 * < 0 for failure 1156 * 0 for no valid locator name struct 1157 * 1 for valid locator name struct 1158 */ 1159 int 1160 read_locator_block_did( 1161 md_error_t *ep, 1162 int fd, 1163 mddb_mb_t *mbp, 1164 mddb_lb_t *lbp, 1165 void *bp, 1166 int bsize 1167 ) 1168 { 1169 int lb_didfirstblk = lbp->lb_didfirstblk; 1170 mddb_did_blk_t *lbdidp = bp; 1171 int rval; 1172 1173 assert(bp != NULL); 1174 1175 if ((rval = phys_read(ep, fd, mbp, lb_didfirstblk, bp, bsize)) < 0) 1176 return (rval); 1177 1178 return ((lbdidp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0); 1179 } 1180 1181 /* 1182 * read_locator_names() 1183 * 1184 * Returns: 1185 * < 0 for failure 1186 * 0 for no valid locator name struct 1187 * 1 for valid locator name struct 1188 */ 1189 int 1190 read_locator_names( 1191 md_error_t *ep, 1192 int fd, 1193 mddb_mb_t *mbp, 1194 mddb_lb_t *lbp, 1195 void *bp, 1196 int bsize 1197 ) 1198 { 1199 int lnfirstblk = lbp->lb_lnfirstblk; 1200 mddb_ln_t *lnp = bp; 1201 int rval; 1202 1203 assert(bp != NULL); 1204 1205 if ((rval = phys_read(ep, fd, mbp, lnfirstblk, bp, bsize)) < 0) 1206 return (rval); 1207 1208 return ((lnp->ln_magic == MDDB_MAGIC_LN) ? 1 : 0); 1209 } 1210 1211 1212 int 1213 read_database_block( 1214 md_error_t *ep, 1215 int fd, 1216 mddb_mb_t *mbp, 1217 int dbblk, 1218 void *bp, 1219 int bsize 1220 ) 1221 { 1222 mddb_db_t *dbp = bp; 1223 int rval; 1224 1225 assert(bp != NULL); 1226 1227 if ((rval = phys_read(ep, fd, mbp, dbblk, bp, bsize)) < 0) 1228 return (rval); 1229 1230 return ((dbp->db_magic == MDDB_MAGIC_DB) ? 1 : 0); 1231 } 1232 1233 int 1234 read_loc_didblks( 1235 md_error_t *ep, 1236 int fd, 1237 mddb_mb_t *mbp, 1238 int didblk, 1239 void *bp, 1240 int bsize 1241 ) 1242 { 1243 mddb_did_blk_t *didbp = bp; 1244 int rval; 1245 1246 assert(bp != NULL); 1247 1248 if ((rval = phys_read(ep, fd, mbp, didblk, bp, bsize)) < 0) 1249 return (rval); 1250 1251 return ((didbp->blk_magic == MDDB_MAGIC_DI) ? 1 : 0); 1252 } 1253 1254 1255 int 1256 read_loc_didinfo( 1257 md_error_t *ep, 1258 int fd, 1259 mddb_mb_t *mbp, 1260 int infoblk, 1261 void *bp, 1262 int bsize 1263 ) 1264 { 1265 int rval = 1; 1266 mddb_did_info_t *infop = bp; 1267 1268 assert(bp != NULL); 1269 1270 if ((rval = phys_read(ep, fd, mbp, infoblk, bp, bsize)) < 0) 1271 return (rval); 1272 1273 return ((infop->info_flags & MDDB_DID_EXISTS) ? 1 : 0); 1274 } 1275 1276 /* 1277 * meta_nm_rec() 1278 * 1279 * Return the DE corresponding to the requested namespace record type. 1280 * Modifies dbp to have a firstentry if one isn't there. 1281 */ 1282 static mddb_de_t * 1283 meta_nm_rec(mddb_db_t *dbp, mddb_type_t rectype) 1284 { 1285 mddb_de_t *dep; 1286 int desize; 1287 1288 if (dbp->db_firstentry != NULL) { 1289 /* LINTED */ 1290 dep = (mddb_de_t *)((caddr_t)(&dbp->db_firstentry) 1291 + sizeof (dbp->db_firstentry)); 1292 dbp->db_firstentry = dep; 1293 while (dep && dep->de_next) { 1294 desize = sizeof (*dep) - sizeof (dep->de_blks) + 1295 sizeof (daddr_t) * dep->de_blkcount; 1296 /* LINTED */ 1297 dep->de_next = (mddb_de_t *) 1298 ((caddr_t)dep + desize); 1299 dep = dep->de_next; 1300 } 1301 } 1302 1303 for (dep = dbp->db_firstentry; dep != NULL; dep = dep->de_next) { 1304 if (dep->de_type1 == rectype) 1305 break; 1306 } 1307 return (dep); 1308 } 1309 1310 /* 1311 * read_nm_rec() 1312 * 1313 * Reads the NM, NM_DID or NM_DID_SHR record in the mddb and stores the 1314 * configuration data in the buffer 'nm' 1315 * 1316 * Returns: 1317 * < 0 for failure 1318 * 0 for no valid NM/DID_NM/DID_NM_SHR record 1319 * 1 for valid NM/DID_NM/DID_NM_SHR record 1320 * 1321 */ 1322 static int 1323 read_nm_rec( 1324 md_error_t *ep, 1325 int fd, 1326 mddb_mb_t *mbp, 1327 mddb_lb_t *lbp, 1328 char **nm, 1329 mddb_type_t rectype, 1330 char *diskname 1331 ) 1332 { 1333 int cnt, dbblk, rval = 0; 1334 char db[DEV_BSIZE]; 1335 mddb_de_t *dep; 1336 /*LINTED*/ 1337 mddb_db_t *dbp = (mddb_db_t *)&db; 1338 char *tmpnm = NULL; 1339 daddr_t pblk; 1340 1341 for (dbblk = lbp->lb_dbfirstblk; 1342 dbblk != 0; 1343 dbblk = dbp->db_nextblk) { 1344 1345 if ((rval = read_database_block(ep, fd, mbp, dbblk, dbp, 1346 sizeof (db))) <= 0) 1347 return (rval); 1348 1349 /* 1350 * Locate NM/DID_NM/DID_NM_SHR record. Normally there is 1351 * only one record per mddb. There is a rare case when we 1352 * can't expand the record. If this is the case then we 1353 * will have multiple NM/DID_NM/DID_NM_SHR records linked 1354 * with r_next_recid. 1355 * 1356 * For now assume the normal case and handle the extended 1357 * namespace in Phase 2. 1358 */ 1359 if ((dep = meta_nm_rec(dbp, rectype)) != NULL) 1360 break; 1361 } 1362 1363 /* If meta_nm_rec() never succeeded, bail out */ 1364 if (dep == NULL) 1365 return (0); 1366 1367 /* Read in the appropriate record and return configurations */ 1368 tmpnm = (char *)Zalloc(dbtob(dep->de_blkcount)); 1369 *nm = tmpnm; 1370 1371 for (cnt = 0; cnt < dep->de_blkcount; cnt++) { 1372 if ((pblk = getphysblk(dep->de_blks[cnt], mbp)) < 0) { 1373 rval = mdmddberror(ep, MDE_DB_BLKRANGE, 1374 NODEV32, MD_LOCAL_SET, 1375 dep->de_blks[cnt], diskname); 1376 return (rval); 1377 } 1378 1379 if (lseek(fd, (off_t)dbtob(pblk), SEEK_SET) < 0) { 1380 rval = mdsyserror(ep, errno, diskname); 1381 return (rval); 1382 } 1383 1384 if (read(fd, tmpnm, DEV_BSIZE) != DEV_BSIZE) { 1385 rval = mdsyserror(ep, errno, diskname); 1386 return (rval); 1387 } 1388 1389 tmpnm += DEV_BSIZE; 1390 } 1391 return (1); 1392 } 1393 1394 /* 1395 * is_replicated 1396 * 1397 * Determines whether a disk has been replicated or not. It checks to see 1398 * if the device id stored in the master block is the same as the device id 1399 * registered for that disk on the current system. If the two device ids are 1400 * different, then we know that the disk has been replicated. 1401 * 1402 * If need_devid is set and the disk is replicated, fill in the new_devid. 1403 * Also, if need_devid is set, this routine allocates memory for the device 1404 * ids; the caller of this routine is responsible for free'ing up the memory. 1405 * 1406 * Returns: 1407 * 1 if it's a replicated disk 1408 * 0 if it's not a replicated disk 1409 */ 1410 static int 1411 is_replicated( 1412 int fd, 1413 mddb_mb_t *mbp, 1414 int need_devid, 1415 void **new_devid 1416 ) 1417 { 1418 ddi_devid_t current_devid; 1419 int retval = 0; 1420 size_t new_devid_len; 1421 1422 if (mbp->mb_devid_magic != MDDB_MAGIC_DE) 1423 return (retval); 1424 1425 if (devid_get(fd, ¤t_devid) != 0) 1426 return (retval); 1427 1428 if (devid_compare((ddi_devid_t)mbp->mb_devid, current_devid) != 0) 1429 retval = 1; 1430 1431 if (retval && need_devid) { 1432 new_devid_len = devid_sizeof(current_devid); 1433 *new_devid = Zalloc(new_devid_len); 1434 (void) memcpy(*new_devid, (void *)current_devid, new_devid_len); 1435 } 1436 1437 devid_free(current_devid); 1438 return (retval); 1439 } 1440 1441 /* 1442 * free_replicated_disks_list() 1443 * 1444 * this frees up all the memory allocated by build_replicated_disks_list 1445 */ 1446 static void 1447 free_replicated_disks_list() 1448 { 1449 replicated_disk_t **repl_disk, *temp; 1450 int index; 1451 1452 for (index = 0; index <= MAX_DEVID_LEN; index++) { 1453 repl_disk = &replicated_disk_list[index]; 1454 1455 while (*repl_disk != NULL) { 1456 temp = *repl_disk; 1457 *repl_disk = (*repl_disk)->next; 1458 1459 Free(temp->old_devid); 1460 Free(temp->new_devid); 1461 Free(temp); 1462 } 1463 } 1464 } 1465 1466 /* 1467 * build_replicated_disks_list() 1468 * 1469 * Builds a list of disks that have been replicated using either a 1470 * remote replication or a point-in-time replication software. The 1471 * list is stored as a two dimensional sparse array. 1472 * 1473 * Returns 1474 * 1 on success 1475 * 0 on failure 1476 */ 1477 static int 1478 build_replicated_disks_list( 1479 md_error_t *ep, 1480 mddrivenamelist_t *dnlp 1481 ) 1482 { 1483 uint_t sliceno; 1484 int fd = -1; 1485 mddrivenamelist_t *dp; 1486 mdname_t *rsp; 1487 mddb_mb_t *mbp; 1488 1489 mbp = Malloc(DEV_BSIZE); 1490 1491 for (dp = dnlp; dp != NULL; dp = dp->next) { 1492 mddrivename_t *dnp; 1493 void *new_devid; 1494 1495 dnp = dp->drivenamep; 1496 /* determine the replica slice */ 1497 if (meta_replicaslice(dnp, &sliceno, ep) != 0) 1498 continue; 1499 1500 /* 1501 * if the replica slice size is zero, don't bother opening 1502 */ 1503 if (dnp->vtoc.parts[sliceno].size == 0) 1504 continue; 1505 1506 if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL) 1507 continue; 1508 1509 if ((fd = open(rsp->rname, O_RDONLY| O_NDELAY)) < 0) 1510 return (mdsyserror(ep, errno, rsp->rname)); 1511 1512 /* a drive may not have a master block so we just continue */ 1513 if (read_master_block(ep, fd, mbp, DEV_BSIZE) <= 0) { 1514 (void) close(fd); 1515 mdclrerror(ep); 1516 continue; 1517 } 1518 1519 if (is_replicated(fd, mbp, 1, &new_devid)) { 1520 replicated_list_insert(mbp->mb_devid_len, 1521 mbp->mb_devid, new_devid); 1522 } 1523 (void) close(fd); 1524 } 1525 replicated_disk_list_built = 1; 1526 1527 Free(mbp); 1528 return (1); 1529 } 1530 1531 /* 1532 * free_did_list() 1533 * 1534 * Frees the did_list allocated as part of build_did_list 1535 */ 1536 static void 1537 free_did_list( 1538 did_list_t *did_listp 1539 ) 1540 { 1541 did_list_t *temp, *head; 1542 1543 head = did_listp; 1544 1545 while (head != NULL) { 1546 temp = head; 1547 head = head->next; 1548 if (temp->rdid) 1549 Free(temp->rdid); 1550 if (temp->did) 1551 Free(temp->did); 1552 if (temp->devname) 1553 Free(temp->devname); 1554 if (temp->minor_name) 1555 Free(temp->minor_name); 1556 Free(temp); 1557 } 1558 } 1559 1560 /* 1561 * build_did_list() 1562 * 1563 * Build a list of device ids corresponding to disks in the locator block. 1564 * Memory is allocated here for the nodes in the did_list. The callers of 1565 * this routine must also call free_did_list to free up the memory after 1566 * they're done. 1567 * 1568 * Returns: 1569 * < 0 for failure 1570 * 0 for no valid locator block device id array 1571 * 1 for valid locator block device id array 1572 * ENOTSUP partial diskset, not all disks in a diskset on the 1573 * system where import is being executed 1574 */ 1575 static int 1576 build_did_list( 1577 md_error_t *ep, 1578 int fd, 1579 mddb_mb_t *mb, 1580 mddb_did_blk_t *lbdidp, 1581 did_list_t **did_listp, 1582 int replicated 1583 ) 1584 { 1585 char *search_path = "/dev"; 1586 char *minor_name; 1587 int rval, cnt; 1588 devid_nmlist_t *nm; 1589 uint_t did_info_length = 0; 1590 uint_t did_info_firstblk = 0; 1591 did_list_t *new, *head = NULL; 1592 char *bp = NULL, *temp; 1593 mddb_did_info_t *did_info = NULL; 1594 void *did = NULL; 1595 size_t new_devid_len; 1596 1597 for (cnt = 0; cnt < MDDB_NLB; cnt++) { 1598 did_info = &lbdidp->blk_info[cnt]; 1599 1600 if (!(did_info->info_flags & MDDB_DID_EXISTS)) 1601 continue; 1602 1603 new = Zalloc(sizeof (did_list_t)); 1604 new->did = Zalloc(did_info->info_length); 1605 1606 /* 1607 * If we can re-use the buffer already has been 1608 * read in then just use it. Otherwise free 1609 * the previous one and alloc a new one 1610 */ 1611 if (dbtob(did_info->info_blkcnt) != did_info_length && 1612 did_info->info_firstblk != did_info_firstblk) { 1613 1614 did_info_length = dbtob(did_info->info_blkcnt); 1615 did_info_firstblk = did_info->info_firstblk; 1616 1617 if (bp) 1618 Free(bp); 1619 bp = temp = Zalloc(did_info_length); 1620 1621 if ((rval = phys_read(ep, fd, mb, did_info_firstblk, 1622 (void *)bp, did_info_length)) < 0) 1623 return (rval); 1624 } else { 1625 temp = bp; 1626 } 1627 1628 temp += did_info->info_offset; 1629 (void) memcpy(new->did, temp, did_info->info_length); 1630 new->did_index = cnt; 1631 minor_name = did_info->info_minor_name; 1632 1633 /* 1634 * If we are not able to find the ctd mapping corresponding 1635 * to a given device id, it probably means the device id in 1636 * question is not registered with the system. 1637 * 1638 * Highly likely that the only time this happens, we've hit 1639 * a case where not all the disks that are a part of the 1640 * diskset were moved before importing the diskset. 1641 * 1642 * If set is a replicated diskset, then the device id we get 1643 * from 'lb' will be the 'other' did and we need to lookup 1644 * the real one before we call this routine. 1645 */ 1646 if (replicated) { 1647 temp = replicated_list_lookup(did_info->info_length, 1648 new->did); 1649 new_devid_len = devid_sizeof((ddi_devid_t)temp); 1650 new->rdid = Zalloc(new_devid_len); 1651 (void) memcpy(new->rdid, temp, new_devid_len); 1652 did = new->rdid; 1653 } else { 1654 did = new->did; 1655 } 1656 1657 if (devid_valid((ddi_devid_t)(did)) == 0) { 1658 return (-1); 1659 } 1660 1661 if ((rval = meta_deviceid_to_nmlist(search_path, 1662 (ddi_devid_t)did, minor_name, &nm)) != 0) { 1663 *did_listp = head; 1664 free_did_list(*did_listp); 1665 *did_listp = NULL; 1666 (void) mddserror(ep, MDE_DS_PARTIALSET, MD_SET_BAD, 1667 mynode(), NULL, NULL); 1668 return (ENOTSUP); 1669 } 1670 1671 assert(nm->devname != NULL); 1672 new->devname = Strdup(nm->devname); 1673 new->dev = nm->dev; 1674 new->minor_name = Strdup(minor_name); 1675 1676 devid_free_nmlist(nm); 1677 1678 new->next = head; 1679 head = new; 1680 } 1681 1682 /* Free the last bp */ 1683 if (bp) 1684 Free(bp); 1685 *did_listp = head; 1686 return (1); 1687 } 1688 /* 1689 * check_nm_disks 1690 * Checks the disks listed in the shared did namespace to see if they 1691 * are accessable on the system. If not, return ENOTSUP error to 1692 * indicate we have a partial diskset. 1693 * Returns: 1694 * < 0 for failure 1695 * 0 success 1696 * ENOTSUP partial diskset, not all disks in a diskset on the 1697 * system where import is being executed 1698 */ 1699 static int 1700 check_nm_disks( 1701 md_error_t *ep, 1702 struct devid_min_rec *did_nmp, 1703 struct devid_shr_rec *did_shrnmp 1704 ) 1705 { 1706 char *search_path = "/dev"; 1707 char *minor_name = NULL; 1708 uint_t used_size, min_used_size; 1709 ddi_devid_t did; 1710 devid_nmlist_t *nm; 1711 void *did_min_namep; 1712 void *did_shr_namep; 1713 size_t did_nsize, did_shr_nsize; 1714 1715 used_size = did_shrnmp->did_rec_hdr.r_used_size - 1716 sizeof (struct nm_rec_hdr); 1717 min_used_size = did_nmp->min_rec_hdr.r_used_size - 1718 sizeof (struct nm_rec_hdr); 1719 did_shr_namep = (void *)(&did_shrnmp->device_id[0]); 1720 while (used_size > (int)sizeof (struct did_shr_name)) { 1721 did_min_namep = (void *)(&did_nmp->minor_name[0]); 1722 /* grab device id and minor name from the shared spaces */ 1723 did = (ddi_devid_t)(((struct did_shr_name *) 1724 did_shr_namep)->did_devid); 1725 if (devid_valid(did) == 0) { 1726 return (-1); 1727 } 1728 1729 /* 1730 * We need to check that the DID_NM and DID_SHR_NM are in 1731 * sync. It is possible that we took a panic between writing 1732 * the two areas to disk. This would be cleaned up on the 1733 * next snarf but we don't know for sure that snarf has even 1734 * happened since we're reading from disk. 1735 */ 1736 while (((struct did_shr_name *)did_shr_namep)->did_key != 1737 ((struct did_min_name *)did_min_namep)->min_devid_key) { 1738 did_nsize = DID_NAMSIZ((struct did_min_name *) 1739 did_min_namep); 1740 did_min_namep = ((void *)((char *)did_min_namep + 1741 did_nsize)); 1742 min_used_size -= did_nsize; 1743 if (min_used_size < (int)sizeof (struct did_min_name)) 1744 continue; 1745 } 1746 minor_name = ((struct did_min_name *)did_min_namep)->min_name; 1747 1748 /* 1749 * Try to find disk in the system. If we can't find the 1750 * disk, we have a partial diskset. 1751 */ 1752 if ((meta_deviceid_to_nmlist(search_path, 1753 did, minor_name, &nm)) != 0) { 1754 (void) mddserror(ep, MDE_DS_PARTIALSET, MD_SET_BAD, 1755 mynode(), NULL, NULL); 1756 return (ENOTSUP); 1757 } 1758 devid_free_nmlist(nm); 1759 used_size -= DID_SHR_NAMSIZ((struct did_shr_name *) 1760 did_shr_namep); 1761 /* increment to next item in the shared spaces */ 1762 did_shr_nsize = DID_SHR_NAMSIZ((struct did_shr_name *) 1763 did_shr_namep); 1764 did_shr_namep = ((void *)((char *)did_shr_namep + 1765 did_shr_nsize)); 1766 } 1767 return (0); 1768 } 1769 1770 1771 /* 1772 * report_metadb_info() 1773 * 1774 * Generates metadb output for the diskset. 1775 * 1776 */ 1777 static void 1778 report_metadb_info( 1779 md_im_set_desc_t *misp, 1780 char *indent 1781 ) 1782 { 1783 md_im_drive_info_t *d; 1784 md_im_replica_info_t *r; 1785 char *unk_str = ""; 1786 int i; 1787 1788 (void) printf("%s\t%5.5s\t\t%9.9s\t%11.11s\n", indent, gettext("flags"), 1789 gettext("first blk"), gettext("block count")); 1790 1791 unk_str = gettext("unknown"); 1792 1793 /* 1794 * Looping through all drives in the diskset to print 1795 * out information about the drive and if the verbose 1796 * option is set print out replica data. 1797 */ 1798 for (d = misp->mis_drives; d != NULL; d = d->mid_next) { 1799 1800 if (d->mid_replicas != NULL) { 1801 for (r = d->mid_replicas; r != NULL; 1802 r = r->mir_next) { 1803 (void) printf("%s", indent); 1804 for (i = 0; i < MDDB_FLAGS_LEN; i++) { 1805 if (r->mir_flags & (1 << i)) { 1806 (void) putchar( 1807 MDDB_FLAGS_STRING[i]); 1808 } else { 1809 (void) putchar(' '); 1810 } 1811 } 1812 if ((r->mir_offset == -1) && (r->mir_length 1813 == -1)) { 1814 (void) printf("%7.7s\t\t%7.7s\t", 1815 unk_str, unk_str); 1816 } else if (r->mir_length == -1) { 1817 (void) printf("%i\t\t%7.7s\t", 1818 r->mir_offset, unk_str); 1819 } else { 1820 (void) printf("%i\t\t%i\t", 1821 r->mir_offset, r->mir_length); 1822 } 1823 (void) printf("\t%s\n", 1824 d->mid_devname); 1825 } 1826 } 1827 } 1828 (void) printf("\n"); 1829 } 1830 1831 1832 /* 1833 * report_set_info() 1834 * 1835 * Returns: 1836 * < 0 for failure 1837 * 0 for success 1838 * 1839 */ 1840 static int 1841 report_set_info( 1842 md_im_set_desc_t *misp, 1843 mddb_mb_t *mb, 1844 mddb_lb_t *lbp, 1845 mddb_rb_t *nm, 1846 pnm_rec_t **pnm, 1847 mdname_t *rsp, 1848 int fd, 1849 uint_t imp_flags, 1850 int set_count, 1851 md_error_t *ep 1852 ) 1853 { 1854 int rval = 0; 1855 md_im_drive_info_t *d; 1856 md_im_replica_info_t *r; 1857 md_im_drive_info_t *good_disk = NULL; 1858 int i; 1859 int in = META_INDENT; 1860 char indent[MAXPATHLEN]; 1861 int dlen = 0; 1862 md_timeval32_t firstdisktime; 1863 md_timeval32_t lastaccess; /* stores last modified timestamp */ 1864 int set_contains_time_conflict = 0; 1865 int disk_time_conflict = 0; 1866 1867 1868 /* Calculates the correct indentation. */ 1869 indent[0] = 0; 1870 for (i = 0; i < in; i++) 1871 (void) strlcat(indent, " ", sizeof (indent)); 1872 1873 /* 1874 * This will print before the information for the first diskset 1875 * if the verbose option was set. 1876 */ 1877 if (set_count == 1) { 1878 if (imp_flags & META_IMP_REPORT) { 1879 (void) printf("\n%s:\n\n", 1880 gettext("Disksets eligible for import")); 1881 } 1882 } 1883 1884 /* 1885 * Make the distinction between a regular diskset and 1886 * a replicated diskset. 1887 */ 1888 if (misp->mis_flags & MD_IM_SET_REPLICATED) { 1889 if (imp_flags & META_IMP_REPORT) { 1890 (void) printf("%i) %s:\n", set_count, gettext( 1891 "Found replicated diskset containing disks")); 1892 } else { 1893 (void) printf("\n%s:\n", gettext( 1894 "Importing replicated diskset containing disks")); 1895 } 1896 } else { 1897 if (imp_flags & META_IMP_REPORT) { 1898 (void) printf("%i) %s:\n", set_count, gettext( 1899 "Found regular diskset containing disks")); 1900 } else { 1901 (void) printf("\n%s:\n", gettext( 1902 "Importing regular diskset containing disks")); 1903 } 1904 } 1905 1906 1907 /* 1908 * Save the set creation time for the first disk in the 1909 * diskset. 1910 */ 1911 for (d = misp->mis_drives; d != NULL; d = d->mid_next) { 1912 dlen = max(dlen, strlen(d->mid_dnp->cname)); 1913 if (good_disk == NULL) { 1914 for (r = d->mid_replicas; r != NULL; r = r->mir_next) { 1915 if (r->mir_flags & MDDB_F_ACTIVE) { 1916 good_disk = d; 1917 firstdisktime = 1918 d->mid_setcreatetimestamp; 1919 break; 1920 } 1921 } 1922 } else { 1923 break; 1924 } 1925 } 1926 1927 1928 /* 1929 * Compares the set creation time from the first disk in the 1930 * diskset to the diskset creation time on all other 1931 * disks in the diskset. 1932 * If they are different then the disk probably belongs to a 1933 * different diskset so we will print out a warning. 1934 * 1935 * Looping through all drives in the diskset to print 1936 * out information about the drive. 1937 */ 1938 for (d = misp->mis_drives; d != NULL; disk_time_conflict = 0, 1939 d = d->mid_next) { 1940 /* 1941 * Verify that the disk's seconds and micro-seconds fields 1942 * match the fields for the good_disk. 1943 */ 1944 if ((firstdisktime.tv_sec != 1945 d->mid_setcreatetimestamp.tv_sec) || 1946 (firstdisktime.tv_usec != 1947 d->mid_setcreatetimestamp.tv_usec)) { 1948 disk_time_conflict = 1; 1949 set_contains_time_conflict = 1; 1950 } 1951 1952 /* Printing disk names. */ 1953 if (disk_time_conflict == 1) { 1954 /* print '*' next to conflicting disk */ 1955 (void) printf("%s%-*.*s *\n", indent, 1956 dlen, dlen, d->mid_dnp->cname); 1957 } else { 1958 (void) printf("%s%-*.*s\n", indent, 1959 dlen, dlen, d->mid_dnp->cname); 1960 } 1961 } 1962 (void) printf("\n"); 1963 1964 /* 1965 * This note explains the "*" that appears next to the 1966 * disks with metadbs' whose lb_inittime timestamp does not 1967 * match the rest of the diskset. 1968 */ 1969 if (set_contains_time_conflict) { 1970 (void) printf("%s%s\n%s%s\n\n", indent, 1971 gettext("* WARNING: This disk has been reused in " 1972 "another diskset."), indent, gettext("Import may corrupt " 1973 "data in the diskset.")); 1974 } 1975 1976 1977 /* 1978 * If the verbose flag was given on the command line, 1979 * we will print out the metastat -c information , the 1980 * creation time, and last modified time for the diskset. 1981 */ 1982 if (imp_flags & META_IMP_VERBOSE) { 1983 (void) printf("%s%s\n", indent, 1984 gettext("Metadatabase information:")); 1985 report_metadb_info(misp, indent); 1986 1987 /* 1988 * Printing creation time and last modified time. 1989 * Last modified: uses the global variable "lastaccess", 1990 * which is set to the last updated timestamp from all of 1991 * the database blocks(db_timestamp) or record blocks 1992 * (rb_timestamp). 1993 * Creation time is the locator block init time 1994 * (lb_inittime). 1995 */ 1996 lastaccess = good_disk->mid_replicas->mir_timestamp; 1997 1998 (void) printf("%s%s\n", indent, 1999 gettext("Metadevice information:")); 2000 rval = report_metastat_info(mb, lbp, nm, pnm, rsp, fd, 2001 &lastaccess, ep); 2002 if (rval < 0) { 2003 return (rval); 2004 } 2005 2006 (void) printf("%s%s:\t%s\n", indent, 2007 gettext("Creation time"), 2008 meta_print_time(&good_disk->mid_replicas->mir_timestamp)); 2009 (void) printf("%s%s:\t%s\n", indent, 2010 gettext("Last modified time"), 2011 meta_print_time(&lastaccess)); 2012 } else { 2013 /* 2014 * Even if the verbose option is not set, we will print the 2015 * creation time for the diskset. 2016 */ 2017 (void) printf("%s%s:\t%s\n", indent, gettext("Creation time"), 2018 meta_print_time(&good_disk->mid_replicas->mir_timestamp)); 2019 } 2020 2021 2022 /* 2023 * If the diskset is not actually being imported, then we 2024 * print out extra information about how to import it. 2025 * If the verbose flag was not set, then we will also 2026 * print out information about how to obtain verbose output. 2027 */ 2028 if (imp_flags & META_IMP_REPORT) { 2029 /* 2030 * TRANSLATION_NOTE 2031 * 2032 * The translation of the phrase "For more information 2033 * about this set" will be followed by a ":" and a 2034 * suggested command (untranslatable) that the user 2035 * may use to request additional information. 2036 */ 2037 if (!(imp_flags & META_IMP_VERBOSE)) { 2038 (void) printf("%s%s:\n%s %s -r -v %s\n", indent, 2039 gettext("For more information about this diskset"), 2040 indent, myname, good_disk->mid_dnp->cname); 2041 } 2042 /* 2043 * TRANSLATION_NOTE 2044 * 2045 * The translation of the phrase "To import this set" 2046 * will be followed by a ":" and a suggested command 2047 * (untranslatable) that the user may use to import 2048 * the specified diskset. 2049 */ 2050 (void) printf("%s%s:\n%s %s -s <newsetname> %s\n", indent, 2051 gettext("To import this diskset"), indent, myname, 2052 good_disk->mid_dnp->cname); 2053 } 2054 (void) printf("\n\n"); 2055 2056 return (rval); 2057 } 2058 2059 2060 /* 2061 * meta_get_and_report_set_info 2062 * 2063 * Scans a given drive for set specific information. If the given drive 2064 * has a shared metadb, scans the shared metadb for information pertaining 2065 * to the set. 2066 * 2067 * Returns: 2068 * <0 for failure 2069 * 0 success but no replicas were found 2070 * 1 success and a replica was found 2071 * ENOTSUP for partial disksets detected 2072 */ 2073 int 2074 meta_get_and_report_set_info( 2075 mddrivenamelist_t *dp, 2076 md_im_set_desc_t **mispp, 2077 int local_mb_ok, 2078 uint_t imp_flags, 2079 int *set_count, 2080 md_error_t *ep 2081 ) 2082 { 2083 uint_t s; 2084 mdname_t *rsp; 2085 int fd; 2086 char mb[DEV_BSIZE]; 2087 /*LINTED*/ 2088 mddb_mb_t *mbp = (mddb_mb_t *)mb; 2089 char lb[dbtob(MDDB_LBCNT)]; 2090 /*LINTED*/ 2091 mddb_lb_t *lbp = (mddb_lb_t *)lb; 2092 mddb_did_blk_t *lbdidp = NULL; 2093 mddb_ln_t *lnp = NULL; 2094 int lnsize, lbdid_size; 2095 int rval = 0; 2096 char db[DEV_BSIZE]; 2097 /*LINTED*/ 2098 mddb_db_t *dbp = (mddb_db_t *)db; 2099 did_list_t *did_listp = NULL; 2100 mddrivenamelist_t *dnlp; 2101 mddrivename_t *dnp; 2102 md_im_names_t cnames = { 0, NULL}; 2103 char *nm = NULL; 2104 char *did_nm = NULL, *did_shrnm = NULL; 2105 struct nm_rec *nmp; 2106 struct devid_shr_rec *did_shrnmp; 2107 struct devid_min_rec *did_nmp; 2108 int extended_namespace = 0; 2109 int replicated = 0; 2110 pnm_rec_t *pnm = NULL; /* list of physical devs in set */ 2111 md_im_set_desc_t *misp; 2112 2113 dnp = dp->drivenamep; 2114 2115 /* 2116 * Determine and open the replica slice 2117 */ 2118 if (meta_replicaslice(dnp, &s, ep) != 0) { 2119 return (-1); 2120 } 2121 2122 /* 2123 * Test for the size of replica slice in question. If 2124 * the size is zero, we know that this is not a disk that was 2125 * part of a set and it should be silently ignored for import. 2126 */ 2127 if (dnp->vtoc.parts[s].size == 0) 2128 return (0); 2129 2130 if ((rsp = metaslicename(dnp, s, ep)) == NULL) { 2131 return (-1); 2132 } 2133 2134 if ((fd = open(rsp->rname, O_RDONLY|O_NDELAY)) < 0) 2135 return (mdsyserror(ep, errno, rsp->cname)); 2136 2137 /* 2138 * After the open() succeeds, we should return via the "out" 2139 * label to clean up after ourselves. (Up 'til now, we can 2140 * just return directly, because there are no resources to 2141 * give back.) 2142 */ 2143 2144 if ((rval = read_master_block(ep, fd, mbp, sizeof (mb))) <= 0) 2145 goto out; 2146 2147 replicated = is_replicated(fd, mbp, 0, NULL); 2148 2149 if (!local_mb_ok && mbp->mb_setno == 0) { 2150 rval = 0; 2151 goto out; 2152 } 2153 2154 if ((rval = read_locator_block(ep, fd, mbp, lbp, sizeof (lb))) <= 0) 2155 goto out; 2156 2157 /* 2158 * Once the locator block has been read, we need to 2159 * check if the locator block commit count is zero. 2160 * If it is zero, we know that the replica we're dealing 2161 * with is on a disk that was deleted from the disk set; 2162 * and, it potentially has stale data. We need to quit 2163 * in that case 2164 */ 2165 if (lbp->lb_commitcnt == 0) { 2166 rval = 0; 2167 goto out; 2168 } 2169 2170 /* 2171 * Make sure that the disk being imported has device id 2172 * namespace present for disksets. If a disk doesn't have 2173 * device id namespace, we skip reading the replica on that disk 2174 */ 2175 if (!(lbp->lb_flags & MDDB_DEVID_STYLE)) { 2176 rval = 0; 2177 goto out; 2178 } 2179 2180 /* 2181 * Grab the locator block device id array. Allocate memory for the 2182 * array first. 2183 */ 2184 lbdid_size = dbtob(lbp->lb_didblkcnt); 2185 lbdidp = Zalloc(lbdid_size); 2186 2187 if ((rval = read_locator_block_did(ep, fd, mbp, lbp, lbdidp, 2188 lbdid_size)) <= 0) 2189 goto out; 2190 2191 /* 2192 * For a disk that has not been replicated, extract the device ids 2193 * stored in the locator block device id array and store them in 2194 * a list. 2195 * 2196 * If the disk has been replicated using replication software such 2197 * as HDS Truecopy/ShadowImage or EMC SRDF/BCV, the device ids in 2198 * the locator block are invalid and we need to build a list of 2199 * replicated disks. 2200 */ 2201 if (replicated && !replicated_disk_list_built) { 2202 /* 2203 * if there's a replicated diskset involved, we need to 2204 * scan the system one more time and build a list of all 2205 * candidate disks that might be part of that replicated set 2206 */ 2207 if (meta_list_disks(ep, &cnames) != 0) { 2208 rval = 0; 2209 goto out; 2210 } 2211 dnlp = meta_prune_cnames(ep, &cnames, 0); 2212 rval = build_replicated_disks_list(ep, dnlp); 2213 if (rval == 0) 2214 goto out; 2215 } 2216 2217 rval = build_did_list(ep, fd, mbp, lbdidp, &did_listp, replicated); 2218 2219 if ((rval <= 0) || (rval == ENOTSUP)) 2220 goto out; 2221 2222 /* 2223 * Until here, we've gotten away with fixed sizes for the 2224 * master block and locator block. The locator names, 2225 * however, are sized (and therefore allocated) dynamically 2226 * according to information in the locator block. 2227 */ 2228 lnsize = dbtob(lbp->lb_lnblkcnt); 2229 lnp = Zalloc(lnsize); 2230 2231 if ((rval = read_locator_names(ep, fd, mbp, lbp, lnp, lnsize)) <= 0) 2232 goto out; 2233 2234 /* 2235 * Read in the NM record 2236 * If no NM record was found, it still is a valid configuration 2237 * but it also means that we won't find any corresponding DID_NM 2238 * or DID_SHR_NM. 2239 */ 2240 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &nm, MDDB_NM, rsp->cname)) 2241 < 0) 2242 goto out; 2243 else if (rval == 0) 2244 goto append; 2245 2246 /* 2247 * At this point, we have read in all of the blocks that form 2248 * the nm_rec. We should at least detect the corner case 2249 * mentioned above, in which r_next_recid links to another 2250 * nm_rec. Extended namespace handling is left for Phase 2. 2251 * 2252 * What this should really be is a loop, each iteration of 2253 * which reads in a nm_rec and calls the set_append(). 2254 */ 2255 /*LINTED*/ 2256 nmp = (struct nm_rec *)(nm + sizeof (mddb_rb_t)); 2257 if (nmp->r_rec_hdr.r_next_recid != (mddb_recid_t)0) { 2258 extended_namespace = 1; 2259 rval = 0; 2260 goto out; 2261 } 2262 2263 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_nm, 2264 MDDB_DID_NM, rsp->cname)) < 0) 2265 goto out; 2266 else if (rval == 0) 2267 goto append; 2268 2269 /*LINTED*/ 2270 did_nmp = (struct devid_min_rec *)(did_nm + sizeof (mddb_rb_t) - 2271 sizeof (int)); 2272 if (did_nmp->min_rec_hdr.r_next_recid != (mddb_recid_t)0) { 2273 extended_namespace = 1; 2274 rval = 0; 2275 goto out; 2276 } 2277 2278 if ((rval = read_nm_rec(ep, fd, mbp, lbp, &did_shrnm, 2279 MDDB_DID_SHR_NM, rsp->cname)) < 0) 2280 goto out; 2281 else if (rval == 0) 2282 goto append; 2283 2284 /*LINTED*/ 2285 did_shrnmp = (struct devid_shr_rec *)(did_shrnm + sizeof (mddb_rb_t) - 2286 sizeof (int)); 2287 if (did_shrnmp->did_rec_hdr.r_next_recid != (mddb_recid_t)0) { 2288 extended_namespace = 1; 2289 rval = 0; 2290 goto out; 2291 } 2292 2293 /* 2294 * We need to check if all of the disks listed in the namespace 2295 * are actually available. If they aren't we'll return with 2296 * an ENOTSUP error which indicates a partial diskset. 2297 */ 2298 rval = check_nm_disks(ep, did_nmp, did_shrnmp); 2299 if ((rval < 0) || (rval == ENOTSUP)) 2300 goto out; 2301 2302 append: 2303 /* Finally, we've got what we need to process this replica. */ 2304 misp = set_append(mispp, did_listp, mbp, lbp, 2305 /*LINTED*/ 2306 (mddb_rb_t *)nm, &pnm, (mddb_rb_t *)did_nm, (mddb_rb_t *)did_shrnm, 2307 imp_flags, replicated, ep); 2308 2309 *set_count += 1; 2310 rval = report_set_info(misp, mbp, lbp, 2311 /*LINTED*/ 2312 (mddb_rb_t *)nm, &pnm, rsp, fd, imp_flags, *set_count, ep); 2313 if (rval < 0) 2314 goto out; 2315 2316 /* Return the fact that we found at least one set */ 2317 rval = 1; 2318 2319 out: 2320 if (fd >= 0) 2321 (void) close(fd); 2322 if (did_listp != NULL) 2323 free_did_list(did_listp); 2324 if (lnp != NULL) 2325 Free(lnp); 2326 if (nm != NULL) 2327 Free(nm); 2328 if (did_nm != NULL) 2329 Free(did_nm); 2330 if (did_shrnm != NULL) 2331 Free(did_shrnm); 2332 if (pnm != NULL) 2333 free_pnm_rec_list(&pnm); 2334 2335 /* 2336 * If we are at the end of the list, we must free up 2337 * the replicated list too 2338 */ 2339 if (dp->next == NULL) 2340 free_replicated_disks_list(); 2341 2342 if (extended_namespace) 2343 return (mddserror(ep, MDE_DS_EXTENDEDNM, MD_SET_BAD, 2344 mynode(), NULL, NULL)); 2345 2346 return (rval); 2347 } 2348 2349 /* 2350 * Return the minor name associated with a given disk slice 2351 */ 2352 static char * 2353 meta_getminor_name( 2354 char *devname, 2355 md_error_t *ep 2356 ) 2357 { 2358 int fd = -1; 2359 char *minor_name = NULL; 2360 char *ret_minor_name = NULL; 2361 2362 if (devname == NULL) 2363 return (NULL); 2364 2365 if ((fd = open(devname, O_RDONLY|O_NDELAY, 0)) < 0) { 2366 (void) mdsyserror(ep, errno, devname); 2367 return (NULL); 2368 } 2369 2370 if (devid_get_minor_name(fd, &minor_name) == 0) { 2371 ret_minor_name = Strdup(minor_name); 2372 devid_str_free(minor_name); 2373 } 2374 2375 (void) close(fd); 2376 return (ret_minor_name); 2377 } 2378 2379 static int 2380 meta_replica_quorum( 2381 md_im_set_desc_t *misp, 2382 md_error_t *ep 2383 ) 2384 { 2385 md_im_drive_info_t *midp; 2386 mddrivename_t *dnp; 2387 md_im_replica_info_t *midr; 2388 mdname_t *np; 2389 struct stat st_buf; 2390 uint_t rep_slice; 2391 int replica_count = 0; 2392 2393 for (midp = misp->mis_drives; midp != NULL; 2394 midp = midp->mid_next) { 2395 2396 dnp = midp->mid_dnp; 2397 2398 if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) || 2399 ((np = metaslicename(dnp, rep_slice, ep)) 2400 == NULL)) { 2401 mdclrerror(ep); 2402 continue; 2403 } 2404 2405 if (stat(np->bname, &st_buf) != 0) 2406 continue; 2407 2408 /* 2409 * The drive is okay now count its replicas 2410 */ 2411 for (midr = midp->mid_replicas; midr != NULL; 2412 midr = midr->mir_next) { 2413 replica_count++; 2414 } 2415 } 2416 2417 if (replica_count < (misp->mis_active_replicas + 1)/2) 2418 return (-1); 2419 2420 return (0); 2421 } 2422 2423 static set_t 2424 meta_imp_setno( 2425 md_error_t *ep 2426 ) 2427 { 2428 set_t max_sets, setno; 2429 int bool; 2430 2431 if ((max_sets = get_max_sets(ep)) == 0) { 2432 return (MD_SET_BAD); 2433 } 2434 2435 /* 2436 * This code needs to be expanded when we run in SunCluster 2437 * environment SunCluster obtains setno internally 2438 */ 2439 for (setno = 1; setno < max_sets; setno++) { 2440 if (clnt_setnumbusy(mynode(), setno, 2441 &bool, ep) == -1) { 2442 setno = MD_SET_BAD; 2443 break; 2444 } 2445 /* 2446 * found one available 2447 */ 2448 if (bool == FALSE) 2449 break; 2450 } 2451 2452 if (setno == max_sets) { 2453 setno = MD_SET_BAD; 2454 } 2455 2456 return (setno); 2457 } 2458 2459 int 2460 meta_imp_set( 2461 md_im_set_desc_t *misp, 2462 char *setname, 2463 int force, 2464 bool_t dry_run, 2465 md_error_t *ep 2466 ) 2467 { 2468 md_timeval32_t tp; 2469 md_im_drive_info_t *midp; 2470 uint_t rep_slice; 2471 mddrivename_t *dnp; 2472 struct mddb_config c; 2473 mdname_t *np; 2474 md_im_replica_info_t *mirp; 2475 char setnum_link[MAXPATHLEN]; 2476 char setname_link[MAXPATHLEN]; 2477 char *minor_name = NULL; 2478 2479 (void) memset(&c, 0, sizeof (c)); 2480 (void) strlcpy(c.c_setname, setname, sizeof (c.c_setname)); 2481 c.c_sideno = 0; 2482 c.c_flags = MDDB_C_IMPORT; 2483 2484 /* 2485 * Check to see if the setname that the set is being imported into, 2486 * already exists. 2487 */ 2488 if (getsetbyname(c.c_setname, ep) != NULL) { 2489 return (mddserror(ep, MDE_DS_SETNAMEBUSY, MD_SET_BAD, 2490 mynode(), NULL, c.c_setname)); 2491 } 2492 2493 /* 2494 * Find the next available set number 2495 */ 2496 if ((c.c_setno = meta_imp_setno(ep)) == MD_SET_BAD) { 2497 return (mddserror(ep, MDE_DS_SETNOTIMP, MD_SET_BAD, 2498 mynode(), NULL, c.c_setname)); 2499 } 2500 2501 if (meta_gettimeofday(&tp) == -1) { 2502 return (mdsyserror(ep, errno, NULL)); 2503 } 2504 c.c_timestamp = tp; 2505 2506 /* Check to see if replica quorum requirement is fulfilled */ 2507 if (!force && meta_replica_quorum(misp, ep) == -1) 2508 return (mddserror(ep, MDE_DS_INSUFQUORUM, MD_SET_BAD, 2509 mynode(), NULL, c.c_setname)); 2510 2511 for (midp = misp->mis_drives; midp != NULL; 2512 midp = midp->mid_next) { 2513 mdcinfo_t *cinfo; 2514 2515 /* 2516 * We pass down the list of the drives in the 2517 * set down to the kernel irrespective of 2518 * whether the drives have a replica or not. 2519 * 2520 * The kernel detects which of the drives don't 2521 * have a replica and accordingly does the 2522 * right thing. 2523 */ 2524 dnp = midp->mid_dnp; 2525 if ((meta_replicaslice(dnp, &rep_slice, ep) != 0) || 2526 ((np = metaslicename(dnp, rep_slice, ep)) 2527 == NULL)) { 2528 mdclrerror(ep); 2529 continue; 2530 } 2531 2532 (void) strcpy(c.c_locator.l_devname, np->bname); 2533 c.c_locator.l_dev = meta_cmpldev(np->dev); 2534 c.c_locator.l_mnum = meta_getminor(np->dev); 2535 c.c_locator.l_devid = (uintptr_t)Malloc(midp->mid_devid_sz); 2536 (void) memcpy((void *)(uintptr_t)c.c_locator.l_devid, 2537 midp->mid_devid, midp->mid_devid_sz); 2538 c.c_locator.l_devid_sz = midp->mid_devid_sz; 2539 c.c_locator.l_devid_flags = 2540 MDDB_DEVID_VALID | MDDB_DEVID_SPACE | MDDB_DEVID_SZ; 2541 if (midp->mid_o_devid) { 2542 c.c_locator.l_old_devid = 2543 (uint64_t)(uintptr_t)Malloc(midp->mid_o_devid_sz); 2544 (void) memcpy((void *)(uintptr_t) 2545 c.c_locator.l_old_devid, 2546 midp->mid_o_devid, midp->mid_o_devid_sz); 2547 c.c_locator.l_old_devid_sz = midp->mid_o_devid_sz; 2548 } 2549 minor_name = meta_getminor_name(np->bname, ep); 2550 (void) strncpy(c.c_locator.l_minor_name, minor_name, 2551 sizeof (c.c_locator.l_minor_name)); 2552 2553 if ((cinfo = metagetcinfo(np, ep)) == NULL) { 2554 mdclrerror(ep); 2555 continue; 2556 } 2557 (void) strncpy(c.c_locator.l_driver, cinfo->dname, 2558 sizeof (c.c_locator.l_driver)); 2559 2560 mirp = midp->mid_replicas; 2561 2562 do { 2563 if (mirp) { 2564 c.c_locator.l_flags = 0; 2565 c.c_locator.l_blkno = mirp->mir_offset; 2566 mirp = mirp->mir_next; 2567 } else { 2568 /* 2569 * Default offset for dummy is 16 2570 */ 2571 c.c_locator.l_blkno = 16; 2572 } 2573 2574 if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) { 2575 Free((void *)(uintptr_t)c.c_locator.l_devid); 2576 if (c.c_locator.l_old_devid) 2577 Free((void *)(uintptr_t) 2578 c.c_locator.l_old_devid); 2579 return (mdstealerror(ep, &c.c_mde)); 2580 } 2581 } while (mirp != NULL); 2582 } 2583 2584 /* 2585 * If the dry run option was specified, flag success 2586 * and exit out 2587 */ 2588 if (dry_run == 1) { 2589 md_eprintf("%s\n", dgettext(TEXT_DOMAIN, 2590 "import should be successful")); 2591 Free((void *)(uintptr_t)c.c_locator.l_devid); 2592 if (c.c_locator.l_old_devid) 2593 Free((void *)(uintptr_t)c.c_locator.l_old_devid); 2594 return (0); 2595 } 2596 2597 /* 2598 * Now kernel should have all the information 2599 * regarding the import diskset replica. 2600 * Tell kernel to load them up and import the set 2601 */ 2602 if (metaioctl(MD_IOCIMP_LOAD, &c.c_setno, &c.c_mde, NULL) != 0) { 2603 Free((void *)(uintptr_t)c.c_locator.l_devid); 2604 if (c.c_locator.l_old_devid) 2605 Free((void *)(uintptr_t)c.c_locator.l_old_devid); 2606 return (mdstealerror(ep, &c.c_mde)); 2607 } 2608 2609 (void) meta_smf_enable(META_SMF_DISKSET, NULL); 2610 2611 /* The set has now been imported, create the appropriate symlink */ 2612 (void) snprintf(setname_link, MAXPATHLEN, "/dev/md/%s", setname); 2613 (void) snprintf(setnum_link, MAXPATHLEN, "shared/%d", c.c_setno); 2614 2615 /* 2616 * Since we already verified that the setname was OK, make sure to 2617 * cleanup before proceeding. 2618 */ 2619 if (unlink(setname_link) == -1) { 2620 if (errno != ENOENT) 2621 (void) mdsyserror(ep, errno, setname_link); 2622 } 2623 2624 if (symlink(setnum_link, setname_link) == -1) 2625 (void) mdsyserror(ep, errno, setname_link); 2626 2627 /* resnarf the set that has just been imported */ 2628 if (clnt_resnarf_set(mynode(), c.c_setno, ep) != 0) 2629 md_eprintf("%s\n", dgettext(TEXT_DOMAIN, "Please stop and " 2630 "restart rpc.metad")); 2631 2632 Free((void *)(uintptr_t)c.c_locator.l_devid); 2633 if (c.c_locator.l_old_devid) 2634 Free((void *)(uintptr_t)c.c_locator.l_old_devid); 2635 return (0); 2636 } 2637