1 /* $NetBSD: rf_disks.c,v 1.72 2009/11/17 18:54:26 jld Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 1995 Carnegie-Mellon University. 33 * All rights reserved. 34 * 35 * Author: Mark Holland 36 * 37 * Permission to use, copy, modify and distribute this software and 38 * its documentation is hereby granted, provided that both the copyright 39 * notice and this permission notice appear in all copies of the 40 * software, derivative works or modified versions, and any portions 41 * thereof, and that both notices appear in supporting documentation. 42 * 43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 46 * 47 * Carnegie Mellon requests users of this software to return to 48 * 49 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 50 * School of Computer Science 51 * Carnegie Mellon University 52 * Pittsburgh PA 15213-3890 53 * 54 * any improvements or extensions that they make and grant Carnegie the 55 * rights to redistribute these changes. 56 */ 57 58 /*************************************************************** 59 * rf_disks.c -- code to perform operations on the actual disks 60 ***************************************************************/ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.72 2009/11/17 18:54:26 jld Exp $"); 64 65 #include <dev/raidframe/raidframevar.h> 66 67 #include "rf_raid.h" 68 #include "rf_alloclist.h" 69 #include "rf_utils.h" 70 #include "rf_general.h" 71 #include "rf_options.h" 72 #include "rf_kintf.h" 73 #include "rf_netbsd.h" 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/proc.h> 78 #include <sys/ioctl.h> 79 #include <sys/fcntl.h> 80 #include <sys/vnode.h> 81 #include <sys/kauth.h> 82 83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); 84 static void rf_print_label_status( RF_Raid_t *, int, char *, 85 RF_ComponentLabel_t *); 86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 87 RF_ComponentLabel_t *, int, int ); 88 89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) 90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) 91 92 /************************************************************************** 93 * 94 * initialize the disks comprising the array 95 * 96 * We want the spare disks to have regular row,col numbers so that we can 97 * easily substitue a spare for a failed disk. But, the driver code assumes 98 * throughout that the array contains numRow by numCol _non-spare_ disks, so 99 * it's not clear how to fit in the spares. This is an unfortunate holdover 100 * from raidSim. The quick and dirty fix is to make row zero bigger than the 101 * rest, and put all the spares in it. This probably needs to get changed 102 * eventually. 103 * 104 **************************************************************************/ 105 106 int 107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 108 RF_Config_t *cfgPtr) 109 { 110 RF_RaidDisk_t *disks; 111 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 112 RF_RowCol_t c; 113 int bs, ret; 114 unsigned i, count, foundone = 0, numFailuresThisRow; 115 int force; 116 117 force = cfgPtr->force; 118 119 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 120 if (ret) 121 goto fail; 122 123 disks = raidPtr->Disks; 124 125 numFailuresThisRow = 0; 126 for (c = 0; c < raidPtr->numCol; c++) { 127 ret = rf_ConfigureDisk(raidPtr, 128 &cfgPtr->devnames[0][c][0], 129 &disks[c], c); 130 131 if (ret) 132 goto fail; 133 134 if (disks[c].status == rf_ds_optimal) { 135 raidfetch_component_label(raidPtr, c); 136 } 137 138 if (disks[c].status != rf_ds_optimal) { 139 numFailuresThisRow++; 140 } else { 141 if (disks[c].numBlocks < min_numblks) 142 min_numblks = disks[c].numBlocks; 143 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", 144 c, disks[c].devname, 145 disks[c].numBlocks, 146 disks[c].blockSize, 147 (long int) disks[c].numBlocks * 148 disks[c].blockSize / 1024 / 1024); 149 } 150 } 151 /* XXX fix for n-fault tolerant */ 152 /* XXX this should probably check to see how many failures 153 we can handle for this configuration! */ 154 if (numFailuresThisRow > 0) 155 raidPtr->status = rf_rs_degraded; 156 157 /* all disks must be the same size & have the same block size, bs must 158 * be a power of 2 */ 159 bs = 0; 160 foundone = 0; 161 for (c = 0; c < raidPtr->numCol; c++) { 162 if (disks[c].status == rf_ds_optimal) { 163 bs = disks[c].blockSize; 164 foundone = 1; 165 break; 166 } 167 } 168 if (!foundone) { 169 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); 170 ret = EINVAL; 171 goto fail; 172 } 173 for (count = 0, i = 1; i; i <<= 1) 174 if (bs & i) 175 count++; 176 if (count != 1) { 177 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); 178 ret = EINVAL; 179 goto fail; 180 } 181 182 if (rf_CheckLabels( raidPtr, cfgPtr )) { 183 printf("raid%d: There were fatal errors\n", raidPtr->raidid); 184 if (force != 0) { 185 printf("raid%d: Fatal errors being ignored.\n", 186 raidPtr->raidid); 187 } else { 188 ret = EINVAL; 189 goto fail; 190 } 191 } 192 193 for (c = 0; c < raidPtr->numCol; c++) { 194 if (disks[c].status == rf_ds_optimal) { 195 if (disks[c].blockSize != bs) { 196 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c); 197 ret = EINVAL; 198 goto fail; 199 } 200 if (disks[c].numBlocks != min_numblks) { 201 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n", 202 c, (int) min_numblks); 203 disks[c].numBlocks = min_numblks; 204 } 205 } 206 } 207 208 raidPtr->sectorsPerDisk = min_numblks; 209 raidPtr->logBytesPerSector = ffs(bs) - 1; 210 raidPtr->bytesPerSector = bs; 211 raidPtr->sectorMask = bs - 1; 212 return (0); 213 214 fail: 215 216 rf_UnconfigureVnodes( raidPtr ); 217 218 return (ret); 219 } 220 221 222 /**************************************************************************** 223 * set up the data structures describing the spare disks in the array 224 * recall from the above comment that the spare disk descriptors are stored 225 * in row zero, which is specially expanded to hold them. 226 ****************************************************************************/ 227 int 228 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 229 RF_Config_t *cfgPtr) 230 { 231 int i, ret; 232 unsigned int bs; 233 RF_RaidDisk_t *disks; 234 int num_spares_done; 235 236 num_spares_done = 0; 237 238 /* The space for the spares should have already been allocated by 239 * ConfigureDisks() */ 240 241 disks = &raidPtr->Disks[raidPtr->numCol]; 242 for (i = 0; i < raidPtr->numSpare; i++) { 243 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], 244 &disks[i], raidPtr->numCol + i); 245 if (ret) 246 goto fail; 247 if (disks[i].status != rf_ds_optimal) { 248 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 249 &cfgPtr->spare_names[i][0]); 250 } else { 251 disks[i].status = rf_ds_spare; /* change status to 252 * spare */ 253 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i, 254 disks[i].devname, 255 disks[i].numBlocks, disks[i].blockSize, 256 (long int) disks[i].numBlocks * 257 disks[i].blockSize / 1024 / 1024); 258 } 259 num_spares_done++; 260 } 261 262 /* check sizes and block sizes on spare disks */ 263 bs = 1 << raidPtr->logBytesPerSector; 264 for (i = 0; i < raidPtr->numSpare; i++) { 265 if (disks[i].blockSize != bs) { 266 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); 267 ret = EINVAL; 268 goto fail; 269 } 270 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { 271 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n", 272 disks[i].devname, disks[i].blockSize, 273 raidPtr->sectorsPerDisk); 274 ret = EINVAL; 275 goto fail; 276 } else 277 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { 278 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n", 279 disks[i].devname, 280 raidPtr->sectorsPerDisk, 281 disks[i].numBlocks); 282 283 disks[i].numBlocks = raidPtr->sectorsPerDisk; 284 } 285 } 286 287 return (0); 288 289 fail: 290 291 /* Release the hold on the main components. We've failed to allocate 292 * a spare, and since we're failing, we need to free things.. 293 294 XXX failing to allocate a spare is *not* that big of a deal... 295 We *can* survive without it, if need be, esp. if we get hot 296 adding working. 297 298 If we don't fail out here, then we need a way to remove this spare... 299 that should be easier to do here than if we are "live"... 300 301 */ 302 303 rf_UnconfigureVnodes( raidPtr ); 304 305 return (ret); 306 } 307 308 static int 309 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) 310 { 311 int ret; 312 313 /* We allocate RF_MAXSPARE on the first row so that we 314 have room to do hot-swapping of spares */ 315 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) * 316 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 317 raidPtr->cleanupList); 318 if (raidPtr->Disks == NULL) { 319 ret = ENOMEM; 320 goto fail; 321 } 322 323 /* get space for device specific stuff.. */ 324 RF_MallocAndAdd(raidPtr->raid_cinfo, 325 (raidPtr->numCol + RF_MAXSPARE) * 326 sizeof(struct raidcinfo), (struct raidcinfo *), 327 raidPtr->cleanupList); 328 329 if (raidPtr->raid_cinfo == NULL) { 330 ret = ENOMEM; 331 goto fail; 332 } 333 334 return(0); 335 fail: 336 rf_UnconfigureVnodes( raidPtr ); 337 338 return(ret); 339 } 340 341 342 /* configure a single disk during auto-configuration at boot */ 343 int 344 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, 345 RF_AutoConfig_t *auto_config) 346 { 347 RF_RaidDisk_t *disks; 348 RF_RaidDisk_t *diskPtr; 349 RF_RowCol_t c; 350 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 351 int bs, ret; 352 int numFailuresThisRow; 353 RF_AutoConfig_t *ac; 354 int parity_good; 355 int mod_counter; 356 int mod_counter_found; 357 358 #if DEBUG 359 printf("Starting autoconfiguration of RAID set...\n"); 360 #endif 361 362 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 363 if (ret) 364 goto fail; 365 366 disks = raidPtr->Disks; 367 368 /* assume the parity will be fine.. */ 369 parity_good = RF_RAID_CLEAN; 370 371 /* Check for mod_counters that are too low */ 372 mod_counter_found = 0; 373 mod_counter = 0; 374 ac = auto_config; 375 while(ac!=NULL) { 376 if (mod_counter_found==0) { 377 mod_counter = ac->clabel->mod_counter; 378 mod_counter_found = 1; 379 } else { 380 if (ac->clabel->mod_counter > mod_counter) { 381 mod_counter = ac->clabel->mod_counter; 382 } 383 } 384 ac->flag = 0; /* clear the general purpose flag */ 385 ac = ac->next; 386 } 387 388 bs = 0; 389 390 numFailuresThisRow = 0; 391 for (c = 0; c < raidPtr->numCol; c++) { 392 diskPtr = &disks[c]; 393 394 /* find this row/col in the autoconfig */ 395 #if DEBUG 396 printf("Looking for %d in autoconfig\n",c); 397 #endif 398 ac = auto_config; 399 while(ac!=NULL) { 400 if (ac->clabel==NULL) { 401 /* big-time bad news. */ 402 goto fail; 403 } 404 if ((ac->clabel->column == c) && 405 (ac->clabel->mod_counter == mod_counter)) { 406 /* it's this one... */ 407 /* flag it as 'used', so we don't 408 free it later. */ 409 ac->flag = 1; 410 #if DEBUG 411 printf("Found: %s at %d\n", 412 ac->devname,c); 413 #endif 414 415 break; 416 } 417 ac=ac->next; 418 } 419 420 if (ac==NULL) { 421 /* we didn't find an exact match with a 422 correct mod_counter above... can we find 423 one with an incorrect mod_counter to use 424 instead? (this one, if we find it, will be 425 marked as failed once the set configures) 426 */ 427 428 ac = auto_config; 429 while(ac!=NULL) { 430 if (ac->clabel==NULL) { 431 /* big-time bad news. */ 432 goto fail; 433 } 434 if (ac->clabel->column == c) { 435 /* it's this one... 436 flag it as 'used', so we 437 don't free it later. */ 438 ac->flag = 1; 439 #if DEBUG 440 printf("Found(low mod_counter): %s at %d\n", 441 ac->devname,c); 442 #endif 443 444 break; 445 } 446 ac=ac->next; 447 } 448 } 449 450 451 452 if (ac!=NULL) { 453 /* Found it. Configure it.. */ 454 diskPtr->blockSize = ac->clabel->blockSize; 455 diskPtr->numBlocks = ac->clabel->numBlocks; 456 /* Note: rf_protectedSectors is already 457 factored into numBlocks here */ 458 raidPtr->raid_cinfo[c].ci_vp = ac->vp; 459 raidPtr->raid_cinfo[c].ci_dev = ac->dev; 460 461 memcpy(raidget_component_label(raidPtr, c), 462 ac->clabel, sizeof(*ac->clabel)); 463 snprintf(diskPtr->devname, sizeof(diskPtr->devname), 464 "/dev/%s", ac->devname); 465 466 /* note the fact that this component was 467 autoconfigured. You'll need this info 468 later. Trust me :) */ 469 diskPtr->auto_configured = 1; 470 diskPtr->dev = ac->dev; 471 472 /* 473 * we allow the user to specify that 474 * only a fraction of the disks should 475 * be used this is just for debug: it 476 * speeds up the parity scan 477 */ 478 479 diskPtr->numBlocks = diskPtr->numBlocks * 480 rf_sizePercentage / 100; 481 482 /* XXX these will get set multiple times, 483 but since we're autoconfiguring, they'd 484 better be always the same each time! 485 If not, this is the least of your worries */ 486 487 bs = diskPtr->blockSize; 488 min_numblks = diskPtr->numBlocks; 489 490 /* this gets done multiple times, but that's 491 fine -- the serial number will be the same 492 for all components, guaranteed */ 493 raidPtr->serial_number = ac->clabel->serial_number; 494 /* check the last time the label was modified */ 495 496 if (ac->clabel->mod_counter != mod_counter) { 497 /* Even though we've filled in all of 498 the above, we don't trust this 499 component since it's modification 500 counter is not in sync with the 501 rest, and we really consider it to 502 be failed. */ 503 disks[c].status = rf_ds_failed; 504 numFailuresThisRow++; 505 } else { 506 if (ac->clabel->clean != RF_RAID_CLEAN) { 507 parity_good = RF_RAID_DIRTY; 508 } 509 } 510 } else { 511 /* Didn't find it at all!! Component must 512 really be dead */ 513 disks[c].status = rf_ds_failed; 514 snprintf(disks[c].devname, sizeof(disks[c].devname), 515 "component%d", c); 516 numFailuresThisRow++; 517 } 518 } 519 /* XXX fix for n-fault tolerant */ 520 /* XXX this should probably check to see how many failures 521 we can handle for this configuration! */ 522 if (numFailuresThisRow > 0) { 523 raidPtr->status = rf_rs_degraded; 524 raidPtr->numFailures = numFailuresThisRow; 525 } 526 527 /* close the device for the ones that didn't get used */ 528 529 ac = auto_config; 530 while(ac!=NULL) { 531 if (ac->flag == 0) { 532 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 533 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED); 534 vput(ac->vp); 535 ac->vp = NULL; 536 #if DEBUG 537 printf("Released %s from auto-config set.\n", 538 ac->devname); 539 #endif 540 } 541 ac = ac->next; 542 } 543 544 raidPtr->mod_counter = mod_counter; 545 546 /* note the state of the parity, if any */ 547 raidPtr->parity_good = parity_good; 548 raidPtr->sectorsPerDisk = min_numblks; 549 raidPtr->logBytesPerSector = ffs(bs) - 1; 550 raidPtr->bytesPerSector = bs; 551 raidPtr->sectorMask = bs - 1; 552 return (0); 553 554 fail: 555 556 rf_UnconfigureVnodes( raidPtr ); 557 558 return (ret); 559 560 } 561 562 /* configure a single disk in the array */ 563 int 564 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr, 565 RF_RowCol_t col) 566 { 567 char *p; 568 struct vnode *vp; 569 struct vattr va; 570 int error; 571 572 p = rf_find_non_white(bf); 573 if (p[strlen(p) - 1] == '\n') { 574 /* strip off the newline */ 575 p[strlen(p) - 1] = '\0'; 576 } 577 (void) strcpy(diskPtr->devname, p); 578 579 /* Let's start by claiming the component is fine and well... */ 580 diskPtr->status = rf_ds_optimal; 581 582 raidPtr->raid_cinfo[col].ci_vp = NULL; 583 raidPtr->raid_cinfo[col].ci_dev = 0; 584 585 if (!strcmp("absent", diskPtr->devname)) { 586 printf("Ignoring missing component at column %d\n", col); 587 sprintf(diskPtr->devname, "component%d", col); 588 diskPtr->status = rf_ds_failed; 589 return (0); 590 } 591 592 error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE); 593 if (error) { 594 printf("dk_lookup on device: %s failed!\n", diskPtr->devname); 595 if (error == ENXIO) { 596 /* the component isn't there... must be dead :-( */ 597 diskPtr->status = rf_ds_failed; 598 } else { 599 return (error); 600 } 601 } 602 if (diskPtr->status == rf_ds_optimal) { 603 604 if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0) 605 return (error); 606 if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0) 607 return (error); 608 609 raidPtr->raid_cinfo[col].ci_vp = vp; 610 raidPtr->raid_cinfo[col].ci_dev = va.va_rdev; 611 612 /* This component was not automatically configured */ 613 diskPtr->auto_configured = 0; 614 diskPtr->dev = va.va_rdev; 615 616 /* we allow the user to specify that only a fraction of the 617 * disks should be used this is just for debug: it speeds up 618 * the parity scan */ 619 diskPtr->numBlocks = diskPtr->numBlocks * 620 rf_sizePercentage / 100; 621 } 622 return (0); 623 } 624 625 static void 626 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name, 627 RF_ComponentLabel_t *ci_label) 628 { 629 630 printf("raid%d: Component %s being configured at col: %d\n", 631 raidPtr->raidid, dev_name, column ); 632 printf(" Column: %d Num Columns: %d\n", 633 ci_label->column, 634 ci_label->num_columns); 635 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 636 ci_label->version, ci_label->serial_number, 637 ci_label->mod_counter); 638 printf(" Clean: %s Status: %d\n", 639 ci_label->clean ? "Yes" : "No", ci_label->status ); 640 } 641 642 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, 643 char *dev_name, RF_ComponentLabel_t *ci_label, 644 int serial_number, int mod_counter) 645 { 646 int fatal_error = 0; 647 648 if (serial_number != ci_label->serial_number) { 649 printf("%s has a different serial number: %d %d\n", 650 dev_name, serial_number, ci_label->serial_number); 651 fatal_error = 1; 652 } 653 if (mod_counter != ci_label->mod_counter) { 654 printf("%s has a different modification count: %d %d\n", 655 dev_name, mod_counter, ci_label->mod_counter); 656 } 657 658 if (row != ci_label->row) { 659 printf("Row out of alignment for: %s\n", dev_name); 660 fatal_error = 1; 661 } 662 if (column != ci_label->column) { 663 printf("Column out of alignment for: %s\n", dev_name); 664 fatal_error = 1; 665 } 666 if (raidPtr->numCol != ci_label->num_columns) { 667 printf("Number of columns do not match for: %s\n", dev_name); 668 fatal_error = 1; 669 } 670 if (ci_label->clean == 0) { 671 /* it's not clean, but that's not fatal */ 672 printf("%s is not clean!\n", dev_name); 673 } 674 return(fatal_error); 675 } 676 677 678 /* 679 680 rf_CheckLabels() - check all the component labels for consistency. 681 Return an error if there is anything major amiss. 682 683 */ 684 685 int 686 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) 687 { 688 int c; 689 char *dev_name; 690 RF_ComponentLabel_t *ci_label; 691 int serial_number = 0; 692 int mod_number = 0; 693 int fatal_error = 0; 694 int mod_values[4]; 695 int mod_count[4]; 696 int ser_values[4]; 697 int ser_count[4]; 698 int num_ser; 699 int num_mod; 700 int i; 701 int found; 702 int hosed_column; 703 int too_fatal; 704 int parity_good; 705 int force; 706 707 hosed_column = -1; 708 too_fatal = 0; 709 force = cfgPtr->force; 710 711 /* 712 We're going to try to be a little intelligent here. If one 713 component's label is bogus, and we can identify that it's the 714 *only* one that's gone, we'll mark it as "failed" and allow 715 the configuration to proceed. This will be the *only* case 716 that we'll proceed if there would be (otherwise) fatal errors. 717 718 Basically we simply keep a count of how many components had 719 what serial number. If all but one agree, we simply mark 720 the disagreeing component as being failed, and allow 721 things to come up "normally". 722 723 We do this first for serial numbers, and then for "mod_counter". 724 725 */ 726 727 num_ser = 0; 728 num_mod = 0; 729 730 for (c = 0; c < raidPtr->numCol; c++) { 731 ci_label = raidget_component_label(raidPtr, c); 732 found=0; 733 for(i=0;i<num_ser;i++) { 734 if (ser_values[i] == ci_label->serial_number) { 735 ser_count[i]++; 736 found=1; 737 break; 738 } 739 } 740 if (!found) { 741 ser_values[num_ser] = ci_label->serial_number; 742 ser_count[num_ser] = 1; 743 num_ser++; 744 if (num_ser>2) { 745 fatal_error = 1; 746 break; 747 } 748 } 749 found=0; 750 for(i=0;i<num_mod;i++) { 751 if (mod_values[i] == ci_label->mod_counter) { 752 mod_count[i]++; 753 found=1; 754 break; 755 } 756 } 757 if (!found) { 758 mod_values[num_mod] = ci_label->mod_counter; 759 mod_count[num_mod] = 1; 760 num_mod++; 761 if (num_mod>2) { 762 fatal_error = 1; 763 break; 764 } 765 } 766 } 767 #if DEBUG 768 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); 769 for(i=0;i<num_ser;i++) { 770 printf("%d %d\n", ser_values[i], ser_count[i]); 771 } 772 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); 773 for(i=0;i<num_mod;i++) { 774 printf("%d %d\n", mod_values[i], mod_count[i]); 775 } 776 #endif 777 serial_number = ser_values[0]; 778 if (num_ser == 2) { 779 if ((ser_count[0] == 1) || (ser_count[1] == 1)) { 780 /* Locate the maverick component */ 781 if (ser_count[1] > ser_count[0]) { 782 serial_number = ser_values[1]; 783 } 784 785 for (c = 0; c < raidPtr->numCol; c++) { 786 ci_label = raidget_component_label(raidPtr, c); 787 if (serial_number != ci_label->serial_number) { 788 hosed_column = c; 789 break; 790 } 791 } 792 printf("Hosed component: %s\n", 793 &cfgPtr->devnames[0][hosed_column][0]); 794 if (!force) { 795 /* we'll fail this component, as if there are 796 other major errors, we arn't forcing things 797 and we'll abort the config anyways */ 798 raidPtr->Disks[hosed_column].status 799 = rf_ds_failed; 800 raidPtr->numFailures++; 801 raidPtr->status = rf_rs_degraded; 802 } 803 } else { 804 too_fatal = 1; 805 } 806 if (cfgPtr->parityConfig == '0') { 807 /* We've identified two different serial numbers. 808 RAID 0 can't cope with that, so we'll punt */ 809 too_fatal = 1; 810 } 811 812 } 813 814 /* record the serial number for later. If we bail later, setting 815 this doesn't matter, otherwise we've got the best guess at the 816 correct serial number */ 817 raidPtr->serial_number = serial_number; 818 819 mod_number = mod_values[0]; 820 if (num_mod == 2) { 821 if ((mod_count[0] == 1) || (mod_count[1] == 1)) { 822 /* Locate the maverick component */ 823 if (mod_count[1] > mod_count[0]) { 824 mod_number = mod_values[1]; 825 } else if (mod_count[1] < mod_count[0]) { 826 mod_number = mod_values[0]; 827 } else { 828 /* counts of different modification values 829 are the same. Assume greater value is 830 the correct one, all other things 831 considered */ 832 if (mod_values[0] > mod_values[1]) { 833 mod_number = mod_values[0]; 834 } else { 835 mod_number = mod_values[1]; 836 } 837 838 } 839 840 for (c = 0; c < raidPtr->numCol; c++) { 841 ci_label = raidget_component_label(raidPtr, c); 842 if (mod_number != ci_label->mod_counter) { 843 if (hosed_column == c) { 844 /* same one. Can 845 deal with it. */ 846 } else { 847 hosed_column = c; 848 if (num_ser != 1) { 849 too_fatal = 1; 850 break; 851 } 852 } 853 } 854 } 855 printf("Hosed component: %s\n", 856 &cfgPtr->devnames[0][hosed_column][0]); 857 if (!force) { 858 /* we'll fail this component, as if there are 859 other major errors, we arn't forcing things 860 and we'll abort the config anyways */ 861 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) { 862 raidPtr->Disks[hosed_column].status 863 = rf_ds_failed; 864 raidPtr->numFailures++; 865 raidPtr->status = rf_rs_degraded; 866 } 867 } 868 } else { 869 too_fatal = 1; 870 } 871 if (cfgPtr->parityConfig == '0') { 872 /* We've identified two different mod counters. 873 RAID 0 can't cope with that, so we'll punt */ 874 too_fatal = 1; 875 } 876 } 877 878 raidPtr->mod_counter = mod_number; 879 880 if (too_fatal) { 881 /* we've had both a serial number mismatch, and a mod_counter 882 mismatch -- and they involved two different components!! 883 Bail -- make things fail so that the user must force 884 the issue... */ 885 hosed_column = -1; 886 fatal_error = 1; 887 } 888 889 if (num_ser > 2) { 890 printf("raid%d: Too many different serial numbers!\n", 891 raidPtr->raidid); 892 fatal_error = 1; 893 } 894 895 if (num_mod > 2) { 896 printf("raid%d: Too many different mod counters!\n", 897 raidPtr->raidid); 898 fatal_error = 1; 899 } 900 901 /* we start by assuming the parity will be good, and flee from 902 that notion at the slightest sign of trouble */ 903 904 parity_good = RF_RAID_CLEAN; 905 906 for (c = 0; c < raidPtr->numCol; c++) { 907 dev_name = &cfgPtr->devnames[0][c][0]; 908 ci_label = raidget_component_label(raidPtr, c); 909 910 if (c == hosed_column) { 911 printf("raid%d: Ignoring %s\n", 912 raidPtr->raidid, dev_name); 913 } else { 914 rf_print_label_status( raidPtr, c, dev_name, ci_label); 915 if (rf_check_label_vitals( raidPtr, 0, c, 916 dev_name, ci_label, 917 serial_number, 918 mod_number )) { 919 fatal_error = 1; 920 } 921 if (ci_label->clean != RF_RAID_CLEAN) { 922 parity_good = RF_RAID_DIRTY; 923 } 924 } 925 } 926 927 if (fatal_error) { 928 parity_good = RF_RAID_DIRTY; 929 } 930 931 /* we note the state of the parity */ 932 raidPtr->parity_good = parity_good; 933 934 return(fatal_error); 935 } 936 937 int 938 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) 939 { 940 RF_RaidDisk_t *disks; 941 RF_DiskQueue_t *spareQueues; 942 int ret; 943 unsigned int bs; 944 int spare_number; 945 946 ret=0; 947 948 if (raidPtr->numSpare >= RF_MAXSPARE) { 949 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); 950 return(EINVAL); 951 } 952 953 RF_LOCK_MUTEX(raidPtr->mutex); 954 while (raidPtr->adding_hot_spare==1) { 955 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0, 956 &(raidPtr->mutex)); 957 } 958 raidPtr->adding_hot_spare=1; 959 RF_UNLOCK_MUTEX(raidPtr->mutex); 960 961 /* the beginning of the spares... */ 962 disks = &raidPtr->Disks[raidPtr->numCol]; 963 964 spare_number = raidPtr->numSpare; 965 966 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, 967 &disks[spare_number], 968 raidPtr->numCol + spare_number); 969 970 if (ret) 971 goto fail; 972 if (disks[spare_number].status != rf_ds_optimal) { 973 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 974 sparePtr->component_name); 975 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); 976 ret=EINVAL; 977 goto fail; 978 } else { 979 disks[spare_number].status = rf_ds_spare; 980 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", 981 spare_number, 982 disks[spare_number].devname, 983 disks[spare_number].numBlocks, 984 disks[spare_number].blockSize, 985 (long int) disks[spare_number].numBlocks * 986 disks[spare_number].blockSize / 1024 / 1024); 987 } 988 989 990 /* check sizes and block sizes on the spare disk */ 991 bs = 1 << raidPtr->logBytesPerSector; 992 if (disks[spare_number].blockSize != bs) { 993 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); 994 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); 995 ret = EINVAL; 996 goto fail; 997 } 998 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { 999 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n", 1000 disks[spare_number].devname, 1001 disks[spare_number].blockSize, 1002 raidPtr->sectorsPerDisk); 1003 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); 1004 ret = EINVAL; 1005 goto fail; 1006 } else { 1007 if (disks[spare_number].numBlocks > 1008 raidPtr->sectorsPerDisk) { 1009 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n", 1010 disks[spare_number].devname, 1011 raidPtr->sectorsPerDisk, 1012 disks[spare_number].numBlocks); 1013 1014 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; 1015 } 1016 } 1017 1018 spareQueues = &raidPtr->Queues[raidPtr->numCol]; 1019 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 1020 raidPtr->numCol + spare_number, 1021 raidPtr->qType, 1022 raidPtr->sectorsPerDisk, 1023 raidPtr->Disks[raidPtr->numCol + 1024 spare_number].dev, 1025 raidPtr->maxOutstanding, 1026 &raidPtr->shutdownList, 1027 raidPtr->cleanupList); 1028 1029 RF_LOCK_MUTEX(raidPtr->mutex); 1030 raidPtr->numSpare++; 1031 RF_UNLOCK_MUTEX(raidPtr->mutex); 1032 1033 fail: 1034 RF_LOCK_MUTEX(raidPtr->mutex); 1035 raidPtr->adding_hot_spare=0; 1036 wakeup(&(raidPtr->adding_hot_spare)); 1037 RF_UNLOCK_MUTEX(raidPtr->mutex); 1038 1039 return(ret); 1040 } 1041 1042 int 1043 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) 1044 { 1045 int spare_number; 1046 1047 1048 if (raidPtr->numSpare==0) { 1049 printf("No spares to remove!\n"); 1050 return(EINVAL); 1051 } 1052 1053 spare_number = sparePtr->column; 1054 1055 return(EINVAL); /* XXX not implemented yet */ 1056 #if 0 1057 if (spare_number < 0 || spare_number > raidPtr->numSpare) { 1058 return(EINVAL); 1059 } 1060 1061 /* verify that this spare isn't in use... */ 1062 1063 1064 1065 1066 /* it's gone.. */ 1067 1068 raidPtr->numSpare--; 1069 1070 return(0); 1071 #endif 1072 } 1073 1074 1075 int 1076 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) 1077 { 1078 RF_RaidDisk_t *disks; 1079 1080 if ((component->column < 0) || 1081 (component->column >= raidPtr->numCol)) { 1082 return(EINVAL); 1083 } 1084 1085 disks = &raidPtr->Disks[component->column]; 1086 1087 /* 1. This component must be marked as 'failed' */ 1088 1089 return(EINVAL); /* Not implemented yet. */ 1090 } 1091 1092 int 1093 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, 1094 RF_SingleComponent_t *component) 1095 { 1096 1097 /* Issues here include how to 'move' this in if there is IO 1098 taking place (e.g. component queues and such) */ 1099 1100 return(EINVAL); /* Not implemented yet. */ 1101 } 1102