1 /* $NetBSD: rf_disks.c,v 1.73 2010/03/01 21:10:26 jld Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 1995 Carnegie-Mellon University. 33 * All rights reserved. 34 * 35 * Author: Mark Holland 36 * 37 * Permission to use, copy, modify and distribute this software and 38 * its documentation is hereby granted, provided that both the copyright 39 * notice and this permission notice appear in all copies of the 40 * software, derivative works or modified versions, and any portions 41 * thereof, and that both notices appear in supporting documentation. 42 * 43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 46 * 47 * Carnegie Mellon requests users of this software to return to 48 * 49 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 50 * School of Computer Science 51 * Carnegie Mellon University 52 * Pittsburgh PA 15213-3890 53 * 54 * any improvements or extensions that they make and grant Carnegie the 55 * rights to redistribute these changes. 56 */ 57 58 /*************************************************************** 59 * rf_disks.c -- code to perform operations on the actual disks 60 ***************************************************************/ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.73 2010/03/01 21:10:26 jld Exp $"); 64 65 #include <dev/raidframe/raidframevar.h> 66 67 #include "rf_raid.h" 68 #include "rf_alloclist.h" 69 #include "rf_utils.h" 70 #include "rf_general.h" 71 #include "rf_options.h" 72 #include "rf_kintf.h" 73 #include "rf_netbsd.h" 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/proc.h> 78 #include <sys/ioctl.h> 79 #include <sys/fcntl.h> 80 #include <sys/vnode.h> 81 #include <sys/kauth.h> 82 83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); 84 static void rf_print_label_status( RF_Raid_t *, int, char *, 85 RF_ComponentLabel_t *); 86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 87 RF_ComponentLabel_t *, int, int ); 88 89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) 90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) 91 92 /************************************************************************** 93 * 94 * initialize the disks comprising the array 95 * 96 * We want the spare disks to have regular row,col numbers so that we can 97 * easily substitue a spare for a failed disk. But, the driver code assumes 98 * throughout that the array contains numRow by numCol _non-spare_ disks, so 99 * it's not clear how to fit in the spares. This is an unfortunate holdover 100 * from raidSim. The quick and dirty fix is to make row zero bigger than the 101 * rest, and put all the spares in it. This probably needs to get changed 102 * eventually. 103 * 104 **************************************************************************/ 105 106 int 107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 108 RF_Config_t *cfgPtr) 109 { 110 RF_RaidDisk_t *disks; 111 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 112 RF_RowCol_t c; 113 int bs, ret; 114 unsigned i, count, foundone = 0, numFailuresThisRow; 115 int force; 116 117 force = cfgPtr->force; 118 119 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 120 if (ret) 121 goto fail; 122 123 disks = raidPtr->Disks; 124 125 numFailuresThisRow = 0; 126 for (c = 0; c < raidPtr->numCol; c++) { 127 ret = rf_ConfigureDisk(raidPtr, 128 &cfgPtr->devnames[0][c][0], 129 &disks[c], c); 130 131 if (ret) 132 goto fail; 133 134 if (disks[c].status == rf_ds_optimal) { 135 ret = raidfetch_component_label(raidPtr, c); 136 if (ret) 137 goto fail; 138 } 139 140 if (disks[c].status != rf_ds_optimal) { 141 numFailuresThisRow++; 142 } else { 143 if (disks[c].numBlocks < min_numblks) 144 min_numblks = disks[c].numBlocks; 145 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", 146 c, disks[c].devname, 147 disks[c].numBlocks, 148 disks[c].blockSize, 149 (long int) disks[c].numBlocks * 150 disks[c].blockSize / 1024 / 1024); 151 } 152 } 153 /* XXX fix for n-fault tolerant */ 154 /* XXX this should probably check to see how many failures 155 we can handle for this configuration! */ 156 if (numFailuresThisRow > 0) 157 raidPtr->status = rf_rs_degraded; 158 159 /* all disks must be the same size & have the same block size, bs must 160 * be a power of 2 */ 161 bs = 0; 162 foundone = 0; 163 for (c = 0; c < raidPtr->numCol; c++) { 164 if (disks[c].status == rf_ds_optimal) { 165 bs = disks[c].blockSize; 166 foundone = 1; 167 break; 168 } 169 } 170 if (!foundone) { 171 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); 172 ret = EINVAL; 173 goto fail; 174 } 175 for (count = 0, i = 1; i; i <<= 1) 176 if (bs & i) 177 count++; 178 if (count != 1) { 179 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); 180 ret = EINVAL; 181 goto fail; 182 } 183 184 if (rf_CheckLabels( raidPtr, cfgPtr )) { 185 printf("raid%d: There were fatal errors\n", raidPtr->raidid); 186 if (force != 0) { 187 printf("raid%d: Fatal errors being ignored.\n", 188 raidPtr->raidid); 189 } else { 190 ret = EINVAL; 191 goto fail; 192 } 193 } 194 195 for (c = 0; c < raidPtr->numCol; c++) { 196 if (disks[c].status == rf_ds_optimal) { 197 if (disks[c].blockSize != bs) { 198 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c); 199 ret = EINVAL; 200 goto fail; 201 } 202 if (disks[c].numBlocks != min_numblks) { 203 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n", 204 c, (int) min_numblks); 205 disks[c].numBlocks = min_numblks; 206 } 207 } 208 } 209 210 raidPtr->sectorsPerDisk = min_numblks; 211 raidPtr->logBytesPerSector = ffs(bs) - 1; 212 raidPtr->bytesPerSector = bs; 213 raidPtr->sectorMask = bs - 1; 214 return (0); 215 216 fail: 217 218 rf_UnconfigureVnodes( raidPtr ); 219 220 return (ret); 221 } 222 223 224 /**************************************************************************** 225 * set up the data structures describing the spare disks in the array 226 * recall from the above comment that the spare disk descriptors are stored 227 * in row zero, which is specially expanded to hold them. 228 ****************************************************************************/ 229 int 230 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 231 RF_Config_t *cfgPtr) 232 { 233 int i, ret; 234 unsigned int bs; 235 RF_RaidDisk_t *disks; 236 int num_spares_done; 237 238 num_spares_done = 0; 239 240 /* The space for the spares should have already been allocated by 241 * ConfigureDisks() */ 242 243 disks = &raidPtr->Disks[raidPtr->numCol]; 244 for (i = 0; i < raidPtr->numSpare; i++) { 245 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], 246 &disks[i], raidPtr->numCol + i); 247 if (ret) 248 goto fail; 249 if (disks[i].status != rf_ds_optimal) { 250 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 251 &cfgPtr->spare_names[i][0]); 252 } else { 253 disks[i].status = rf_ds_spare; /* change status to 254 * spare */ 255 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i, 256 disks[i].devname, 257 disks[i].numBlocks, disks[i].blockSize, 258 (long int) disks[i].numBlocks * 259 disks[i].blockSize / 1024 / 1024); 260 } 261 num_spares_done++; 262 } 263 264 /* check sizes and block sizes on spare disks */ 265 bs = 1 << raidPtr->logBytesPerSector; 266 for (i = 0; i < raidPtr->numSpare; i++) { 267 if (disks[i].blockSize != bs) { 268 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); 269 ret = EINVAL; 270 goto fail; 271 } 272 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { 273 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n", 274 disks[i].devname, disks[i].blockSize, 275 raidPtr->sectorsPerDisk); 276 ret = EINVAL; 277 goto fail; 278 } else 279 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { 280 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n", 281 disks[i].devname, 282 raidPtr->sectorsPerDisk, 283 disks[i].numBlocks); 284 285 disks[i].numBlocks = raidPtr->sectorsPerDisk; 286 } 287 } 288 289 return (0); 290 291 fail: 292 293 /* Release the hold on the main components. We've failed to allocate 294 * a spare, and since we're failing, we need to free things.. 295 296 XXX failing to allocate a spare is *not* that big of a deal... 297 We *can* survive without it, if need be, esp. if we get hot 298 adding working. 299 300 If we don't fail out here, then we need a way to remove this spare... 301 that should be easier to do here than if we are "live"... 302 303 */ 304 305 rf_UnconfigureVnodes( raidPtr ); 306 307 return (ret); 308 } 309 310 static int 311 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) 312 { 313 int ret; 314 315 /* We allocate RF_MAXSPARE on the first row so that we 316 have room to do hot-swapping of spares */ 317 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) * 318 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 319 raidPtr->cleanupList); 320 if (raidPtr->Disks == NULL) { 321 ret = ENOMEM; 322 goto fail; 323 } 324 325 /* get space for device specific stuff.. */ 326 RF_MallocAndAdd(raidPtr->raid_cinfo, 327 (raidPtr->numCol + RF_MAXSPARE) * 328 sizeof(struct raidcinfo), (struct raidcinfo *), 329 raidPtr->cleanupList); 330 331 if (raidPtr->raid_cinfo == NULL) { 332 ret = ENOMEM; 333 goto fail; 334 } 335 336 return(0); 337 fail: 338 rf_UnconfigureVnodes( raidPtr ); 339 340 return(ret); 341 } 342 343 344 /* configure a single disk during auto-configuration at boot */ 345 int 346 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, 347 RF_AutoConfig_t *auto_config) 348 { 349 RF_RaidDisk_t *disks; 350 RF_RaidDisk_t *diskPtr; 351 RF_RowCol_t c; 352 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 353 int bs, ret; 354 int numFailuresThisRow; 355 RF_AutoConfig_t *ac; 356 int parity_good; 357 int mod_counter; 358 int mod_counter_found; 359 360 #if DEBUG 361 printf("Starting autoconfiguration of RAID set...\n"); 362 #endif 363 364 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 365 if (ret) 366 goto fail; 367 368 disks = raidPtr->Disks; 369 370 /* assume the parity will be fine.. */ 371 parity_good = RF_RAID_CLEAN; 372 373 /* Check for mod_counters that are too low */ 374 mod_counter_found = 0; 375 mod_counter = 0; 376 ac = auto_config; 377 while(ac!=NULL) { 378 if (mod_counter_found==0) { 379 mod_counter = ac->clabel->mod_counter; 380 mod_counter_found = 1; 381 } else { 382 if (ac->clabel->mod_counter > mod_counter) { 383 mod_counter = ac->clabel->mod_counter; 384 } 385 } 386 ac->flag = 0; /* clear the general purpose flag */ 387 ac = ac->next; 388 } 389 390 bs = 0; 391 392 numFailuresThisRow = 0; 393 for (c = 0; c < raidPtr->numCol; c++) { 394 diskPtr = &disks[c]; 395 396 /* find this row/col in the autoconfig */ 397 #if DEBUG 398 printf("Looking for %d in autoconfig\n",c); 399 #endif 400 ac = auto_config; 401 while(ac!=NULL) { 402 if (ac->clabel==NULL) { 403 /* big-time bad news. */ 404 goto fail; 405 } 406 if ((ac->clabel->column == c) && 407 (ac->clabel->mod_counter == mod_counter)) { 408 /* it's this one... */ 409 /* flag it as 'used', so we don't 410 free it later. */ 411 ac->flag = 1; 412 #if DEBUG 413 printf("Found: %s at %d\n", 414 ac->devname,c); 415 #endif 416 417 break; 418 } 419 ac=ac->next; 420 } 421 422 if (ac==NULL) { 423 /* we didn't find an exact match with a 424 correct mod_counter above... can we find 425 one with an incorrect mod_counter to use 426 instead? (this one, if we find it, will be 427 marked as failed once the set configures) 428 */ 429 430 ac = auto_config; 431 while(ac!=NULL) { 432 if (ac->clabel==NULL) { 433 /* big-time bad news. */ 434 goto fail; 435 } 436 if (ac->clabel->column == c) { 437 /* it's this one... 438 flag it as 'used', so we 439 don't free it later. */ 440 ac->flag = 1; 441 #if DEBUG 442 printf("Found(low mod_counter): %s at %d\n", 443 ac->devname,c); 444 #endif 445 446 break; 447 } 448 ac=ac->next; 449 } 450 } 451 452 453 454 if (ac!=NULL) { 455 /* Found it. Configure it.. */ 456 diskPtr->blockSize = ac->clabel->blockSize; 457 diskPtr->numBlocks = ac->clabel->numBlocks; 458 /* Note: rf_protectedSectors is already 459 factored into numBlocks here */ 460 raidPtr->raid_cinfo[c].ci_vp = ac->vp; 461 raidPtr->raid_cinfo[c].ci_dev = ac->dev; 462 463 memcpy(raidget_component_label(raidPtr, c), 464 ac->clabel, sizeof(*ac->clabel)); 465 snprintf(diskPtr->devname, sizeof(diskPtr->devname), 466 "/dev/%s", ac->devname); 467 468 /* note the fact that this component was 469 autoconfigured. You'll need this info 470 later. Trust me :) */ 471 diskPtr->auto_configured = 1; 472 diskPtr->dev = ac->dev; 473 474 /* 475 * we allow the user to specify that 476 * only a fraction of the disks should 477 * be used this is just for debug: it 478 * speeds up the parity scan 479 */ 480 481 diskPtr->numBlocks = diskPtr->numBlocks * 482 rf_sizePercentage / 100; 483 484 /* XXX these will get set multiple times, 485 but since we're autoconfiguring, they'd 486 better be always the same each time! 487 If not, this is the least of your worries */ 488 489 bs = diskPtr->blockSize; 490 min_numblks = diskPtr->numBlocks; 491 492 /* this gets done multiple times, but that's 493 fine -- the serial number will be the same 494 for all components, guaranteed */ 495 raidPtr->serial_number = ac->clabel->serial_number; 496 /* check the last time the label was modified */ 497 498 if (ac->clabel->mod_counter != mod_counter) { 499 /* Even though we've filled in all of 500 the above, we don't trust this 501 component since it's modification 502 counter is not in sync with the 503 rest, and we really consider it to 504 be failed. */ 505 disks[c].status = rf_ds_failed; 506 numFailuresThisRow++; 507 } else { 508 if (ac->clabel->clean != RF_RAID_CLEAN) { 509 parity_good = RF_RAID_DIRTY; 510 } 511 } 512 } else { 513 /* Didn't find it at all!! Component must 514 really be dead */ 515 disks[c].status = rf_ds_failed; 516 snprintf(disks[c].devname, sizeof(disks[c].devname), 517 "component%d", c); 518 numFailuresThisRow++; 519 } 520 } 521 /* XXX fix for n-fault tolerant */ 522 /* XXX this should probably check to see how many failures 523 we can handle for this configuration! */ 524 if (numFailuresThisRow > 0) { 525 raidPtr->status = rf_rs_degraded; 526 raidPtr->numFailures = numFailuresThisRow; 527 } 528 529 /* close the device for the ones that didn't get used */ 530 531 ac = auto_config; 532 while(ac!=NULL) { 533 if (ac->flag == 0) { 534 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 535 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED); 536 vput(ac->vp); 537 ac->vp = NULL; 538 #if DEBUG 539 printf("Released %s from auto-config set.\n", 540 ac->devname); 541 #endif 542 } 543 ac = ac->next; 544 } 545 546 raidPtr->mod_counter = mod_counter; 547 548 /* note the state of the parity, if any */ 549 raidPtr->parity_good = parity_good; 550 raidPtr->sectorsPerDisk = min_numblks; 551 raidPtr->logBytesPerSector = ffs(bs) - 1; 552 raidPtr->bytesPerSector = bs; 553 raidPtr->sectorMask = bs - 1; 554 return (0); 555 556 fail: 557 558 rf_UnconfigureVnodes( raidPtr ); 559 560 return (ret); 561 562 } 563 564 /* configure a single disk in the array */ 565 int 566 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr, 567 RF_RowCol_t col) 568 { 569 char *p; 570 struct vnode *vp; 571 struct vattr va; 572 int error; 573 574 p = rf_find_non_white(bf); 575 if (p[strlen(p) - 1] == '\n') { 576 /* strip off the newline */ 577 p[strlen(p) - 1] = '\0'; 578 } 579 (void) strcpy(diskPtr->devname, p); 580 581 /* Let's start by claiming the component is fine and well... */ 582 diskPtr->status = rf_ds_optimal; 583 584 raidPtr->raid_cinfo[col].ci_vp = NULL; 585 raidPtr->raid_cinfo[col].ci_dev = 0; 586 587 if (!strcmp("absent", diskPtr->devname)) { 588 printf("Ignoring missing component at column %d\n", col); 589 sprintf(diskPtr->devname, "component%d", col); 590 diskPtr->status = rf_ds_failed; 591 return (0); 592 } 593 594 error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE); 595 if (error) { 596 printf("dk_lookup on device: %s failed!\n", diskPtr->devname); 597 if (error == ENXIO) { 598 /* the component isn't there... must be dead :-( */ 599 diskPtr->status = rf_ds_failed; 600 } else { 601 return (error); 602 } 603 } 604 if (diskPtr->status == rf_ds_optimal) { 605 606 if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0) 607 return (error); 608 if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0) 609 return (error); 610 611 raidPtr->raid_cinfo[col].ci_vp = vp; 612 raidPtr->raid_cinfo[col].ci_dev = va.va_rdev; 613 614 /* This component was not automatically configured */ 615 diskPtr->auto_configured = 0; 616 diskPtr->dev = va.va_rdev; 617 618 /* we allow the user to specify that only a fraction of the 619 * disks should be used this is just for debug: it speeds up 620 * the parity scan */ 621 diskPtr->numBlocks = diskPtr->numBlocks * 622 rf_sizePercentage / 100; 623 } 624 return (0); 625 } 626 627 static void 628 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name, 629 RF_ComponentLabel_t *ci_label) 630 { 631 632 printf("raid%d: Component %s being configured at col: %d\n", 633 raidPtr->raidid, dev_name, column ); 634 printf(" Column: %d Num Columns: %d\n", 635 ci_label->column, 636 ci_label->num_columns); 637 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 638 ci_label->version, ci_label->serial_number, 639 ci_label->mod_counter); 640 printf(" Clean: %s Status: %d\n", 641 ci_label->clean ? "Yes" : "No", ci_label->status ); 642 } 643 644 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, 645 char *dev_name, RF_ComponentLabel_t *ci_label, 646 int serial_number, int mod_counter) 647 { 648 int fatal_error = 0; 649 650 if (serial_number != ci_label->serial_number) { 651 printf("%s has a different serial number: %d %d\n", 652 dev_name, serial_number, ci_label->serial_number); 653 fatal_error = 1; 654 } 655 if (mod_counter != ci_label->mod_counter) { 656 printf("%s has a different modification count: %d %d\n", 657 dev_name, mod_counter, ci_label->mod_counter); 658 } 659 660 if (row != ci_label->row) { 661 printf("Row out of alignment for: %s\n", dev_name); 662 fatal_error = 1; 663 } 664 if (column != ci_label->column) { 665 printf("Column out of alignment for: %s\n", dev_name); 666 fatal_error = 1; 667 } 668 if (raidPtr->numCol != ci_label->num_columns) { 669 printf("Number of columns do not match for: %s\n", dev_name); 670 fatal_error = 1; 671 } 672 if (ci_label->clean == 0) { 673 /* it's not clean, but that's not fatal */ 674 printf("%s is not clean!\n", dev_name); 675 } 676 return(fatal_error); 677 } 678 679 680 /* 681 682 rf_CheckLabels() - check all the component labels for consistency. 683 Return an error if there is anything major amiss. 684 685 */ 686 687 int 688 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) 689 { 690 int c; 691 char *dev_name; 692 RF_ComponentLabel_t *ci_label; 693 int serial_number = 0; 694 int mod_number = 0; 695 int fatal_error = 0; 696 int mod_values[4]; 697 int mod_count[4]; 698 int ser_values[4]; 699 int ser_count[4]; 700 int num_ser; 701 int num_mod; 702 int i; 703 int found; 704 int hosed_column; 705 int too_fatal; 706 int parity_good; 707 int force; 708 709 hosed_column = -1; 710 too_fatal = 0; 711 force = cfgPtr->force; 712 713 /* 714 We're going to try to be a little intelligent here. If one 715 component's label is bogus, and we can identify that it's the 716 *only* one that's gone, we'll mark it as "failed" and allow 717 the configuration to proceed. This will be the *only* case 718 that we'll proceed if there would be (otherwise) fatal errors. 719 720 Basically we simply keep a count of how many components had 721 what serial number. If all but one agree, we simply mark 722 the disagreeing component as being failed, and allow 723 things to come up "normally". 724 725 We do this first for serial numbers, and then for "mod_counter". 726 727 */ 728 729 num_ser = 0; 730 num_mod = 0; 731 732 for (c = 0; c < raidPtr->numCol; c++) { 733 ci_label = raidget_component_label(raidPtr, c); 734 found=0; 735 for(i=0;i<num_ser;i++) { 736 if (ser_values[i] == ci_label->serial_number) { 737 ser_count[i]++; 738 found=1; 739 break; 740 } 741 } 742 if (!found) { 743 ser_values[num_ser] = ci_label->serial_number; 744 ser_count[num_ser] = 1; 745 num_ser++; 746 if (num_ser>2) { 747 fatal_error = 1; 748 break; 749 } 750 } 751 found=0; 752 for(i=0;i<num_mod;i++) { 753 if (mod_values[i] == ci_label->mod_counter) { 754 mod_count[i]++; 755 found=1; 756 break; 757 } 758 } 759 if (!found) { 760 mod_values[num_mod] = ci_label->mod_counter; 761 mod_count[num_mod] = 1; 762 num_mod++; 763 if (num_mod>2) { 764 fatal_error = 1; 765 break; 766 } 767 } 768 } 769 #if DEBUG 770 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); 771 for(i=0;i<num_ser;i++) { 772 printf("%d %d\n", ser_values[i], ser_count[i]); 773 } 774 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); 775 for(i=0;i<num_mod;i++) { 776 printf("%d %d\n", mod_values[i], mod_count[i]); 777 } 778 #endif 779 serial_number = ser_values[0]; 780 if (num_ser == 2) { 781 if ((ser_count[0] == 1) || (ser_count[1] == 1)) { 782 /* Locate the maverick component */ 783 if (ser_count[1] > ser_count[0]) { 784 serial_number = ser_values[1]; 785 } 786 787 for (c = 0; c < raidPtr->numCol; c++) { 788 ci_label = raidget_component_label(raidPtr, c); 789 if (serial_number != ci_label->serial_number) { 790 hosed_column = c; 791 break; 792 } 793 } 794 printf("Hosed component: %s\n", 795 &cfgPtr->devnames[0][hosed_column][0]); 796 if (!force) { 797 /* we'll fail this component, as if there are 798 other major errors, we arn't forcing things 799 and we'll abort the config anyways */ 800 raidPtr->Disks[hosed_column].status 801 = rf_ds_failed; 802 raidPtr->numFailures++; 803 raidPtr->status = rf_rs_degraded; 804 } 805 } else { 806 too_fatal = 1; 807 } 808 if (cfgPtr->parityConfig == '0') { 809 /* We've identified two different serial numbers. 810 RAID 0 can't cope with that, so we'll punt */ 811 too_fatal = 1; 812 } 813 814 } 815 816 /* record the serial number for later. If we bail later, setting 817 this doesn't matter, otherwise we've got the best guess at the 818 correct serial number */ 819 raidPtr->serial_number = serial_number; 820 821 mod_number = mod_values[0]; 822 if (num_mod == 2) { 823 if ((mod_count[0] == 1) || (mod_count[1] == 1)) { 824 /* Locate the maverick component */ 825 if (mod_count[1] > mod_count[0]) { 826 mod_number = mod_values[1]; 827 } else if (mod_count[1] < mod_count[0]) { 828 mod_number = mod_values[0]; 829 } else { 830 /* counts of different modification values 831 are the same. Assume greater value is 832 the correct one, all other things 833 considered */ 834 if (mod_values[0] > mod_values[1]) { 835 mod_number = mod_values[0]; 836 } else { 837 mod_number = mod_values[1]; 838 } 839 840 } 841 842 for (c = 0; c < raidPtr->numCol; c++) { 843 ci_label = raidget_component_label(raidPtr, c); 844 if (mod_number != ci_label->mod_counter) { 845 if (hosed_column == c) { 846 /* same one. Can 847 deal with it. */ 848 } else { 849 hosed_column = c; 850 if (num_ser != 1) { 851 too_fatal = 1; 852 break; 853 } 854 } 855 } 856 } 857 printf("Hosed component: %s\n", 858 &cfgPtr->devnames[0][hosed_column][0]); 859 if (!force) { 860 /* we'll fail this component, as if there are 861 other major errors, we arn't forcing things 862 and we'll abort the config anyways */ 863 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) { 864 raidPtr->Disks[hosed_column].status 865 = rf_ds_failed; 866 raidPtr->numFailures++; 867 raidPtr->status = rf_rs_degraded; 868 } 869 } 870 } else { 871 too_fatal = 1; 872 } 873 if (cfgPtr->parityConfig == '0') { 874 /* We've identified two different mod counters. 875 RAID 0 can't cope with that, so we'll punt */ 876 too_fatal = 1; 877 } 878 } 879 880 raidPtr->mod_counter = mod_number; 881 882 if (too_fatal) { 883 /* we've had both a serial number mismatch, and a mod_counter 884 mismatch -- and they involved two different components!! 885 Bail -- make things fail so that the user must force 886 the issue... */ 887 hosed_column = -1; 888 fatal_error = 1; 889 } 890 891 if (num_ser > 2) { 892 printf("raid%d: Too many different serial numbers!\n", 893 raidPtr->raidid); 894 fatal_error = 1; 895 } 896 897 if (num_mod > 2) { 898 printf("raid%d: Too many different mod counters!\n", 899 raidPtr->raidid); 900 fatal_error = 1; 901 } 902 903 /* we start by assuming the parity will be good, and flee from 904 that notion at the slightest sign of trouble */ 905 906 parity_good = RF_RAID_CLEAN; 907 908 for (c = 0; c < raidPtr->numCol; c++) { 909 dev_name = &cfgPtr->devnames[0][c][0]; 910 ci_label = raidget_component_label(raidPtr, c); 911 912 if (c == hosed_column) { 913 printf("raid%d: Ignoring %s\n", 914 raidPtr->raidid, dev_name); 915 } else { 916 rf_print_label_status( raidPtr, c, dev_name, ci_label); 917 if (rf_check_label_vitals( raidPtr, 0, c, 918 dev_name, ci_label, 919 serial_number, 920 mod_number )) { 921 fatal_error = 1; 922 } 923 if (ci_label->clean != RF_RAID_CLEAN) { 924 parity_good = RF_RAID_DIRTY; 925 } 926 } 927 } 928 929 if (fatal_error) { 930 parity_good = RF_RAID_DIRTY; 931 } 932 933 /* we note the state of the parity */ 934 raidPtr->parity_good = parity_good; 935 936 return(fatal_error); 937 } 938 939 int 940 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) 941 { 942 RF_RaidDisk_t *disks; 943 RF_DiskQueue_t *spareQueues; 944 int ret; 945 unsigned int bs; 946 int spare_number; 947 948 ret=0; 949 950 if (raidPtr->numSpare >= RF_MAXSPARE) { 951 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); 952 return(EINVAL); 953 } 954 955 RF_LOCK_MUTEX(raidPtr->mutex); 956 while (raidPtr->adding_hot_spare==1) { 957 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0, 958 &(raidPtr->mutex)); 959 } 960 raidPtr->adding_hot_spare=1; 961 RF_UNLOCK_MUTEX(raidPtr->mutex); 962 963 /* the beginning of the spares... */ 964 disks = &raidPtr->Disks[raidPtr->numCol]; 965 966 spare_number = raidPtr->numSpare; 967 968 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, 969 &disks[spare_number], 970 raidPtr->numCol + spare_number); 971 972 if (ret) 973 goto fail; 974 if (disks[spare_number].status != rf_ds_optimal) { 975 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 976 sparePtr->component_name); 977 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); 978 ret=EINVAL; 979 goto fail; 980 } else { 981 disks[spare_number].status = rf_ds_spare; 982 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", 983 spare_number, 984 disks[spare_number].devname, 985 disks[spare_number].numBlocks, 986 disks[spare_number].blockSize, 987 (long int) disks[spare_number].numBlocks * 988 disks[spare_number].blockSize / 1024 / 1024); 989 } 990 991 992 /* check sizes and block sizes on the spare disk */ 993 bs = 1 << raidPtr->logBytesPerSector; 994 if (disks[spare_number].blockSize != bs) { 995 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); 996 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); 997 ret = EINVAL; 998 goto fail; 999 } 1000 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { 1001 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n", 1002 disks[spare_number].devname, 1003 disks[spare_number].blockSize, 1004 raidPtr->sectorsPerDisk); 1005 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); 1006 ret = EINVAL; 1007 goto fail; 1008 } else { 1009 if (disks[spare_number].numBlocks > 1010 raidPtr->sectorsPerDisk) { 1011 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n", 1012 disks[spare_number].devname, 1013 raidPtr->sectorsPerDisk, 1014 disks[spare_number].numBlocks); 1015 1016 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; 1017 } 1018 } 1019 1020 spareQueues = &raidPtr->Queues[raidPtr->numCol]; 1021 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 1022 raidPtr->numCol + spare_number, 1023 raidPtr->qType, 1024 raidPtr->sectorsPerDisk, 1025 raidPtr->Disks[raidPtr->numCol + 1026 spare_number].dev, 1027 raidPtr->maxOutstanding, 1028 &raidPtr->shutdownList, 1029 raidPtr->cleanupList); 1030 1031 RF_LOCK_MUTEX(raidPtr->mutex); 1032 raidPtr->numSpare++; 1033 RF_UNLOCK_MUTEX(raidPtr->mutex); 1034 1035 fail: 1036 RF_LOCK_MUTEX(raidPtr->mutex); 1037 raidPtr->adding_hot_spare=0; 1038 wakeup(&(raidPtr->adding_hot_spare)); 1039 RF_UNLOCK_MUTEX(raidPtr->mutex); 1040 1041 return(ret); 1042 } 1043 1044 int 1045 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) 1046 { 1047 int spare_number; 1048 1049 1050 if (raidPtr->numSpare==0) { 1051 printf("No spares to remove!\n"); 1052 return(EINVAL); 1053 } 1054 1055 spare_number = sparePtr->column; 1056 1057 return(EINVAL); /* XXX not implemented yet */ 1058 #if 0 1059 if (spare_number < 0 || spare_number > raidPtr->numSpare) { 1060 return(EINVAL); 1061 } 1062 1063 /* verify that this spare isn't in use... */ 1064 1065 1066 1067 1068 /* it's gone.. */ 1069 1070 raidPtr->numSpare--; 1071 1072 return(0); 1073 #endif 1074 } 1075 1076 1077 int 1078 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) 1079 { 1080 RF_RaidDisk_t *disks; 1081 1082 if ((component->column < 0) || 1083 (component->column >= raidPtr->numCol)) { 1084 return(EINVAL); 1085 } 1086 1087 disks = &raidPtr->Disks[component->column]; 1088 1089 /* 1. This component must be marked as 'failed' */ 1090 1091 return(EINVAL); /* Not implemented yet. */ 1092 } 1093 1094 int 1095 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, 1096 RF_SingleComponent_t *component) 1097 { 1098 1099 /* Issues here include how to 'move' this in if there is IO 1100 taking place (e.g. component queues and such) */ 1101 1102 return(EINVAL); /* Not implemented yet. */ 1103 } 1104