1 /* $NetBSD: rf_disks.c,v 1.71 2009/04/03 16:23:41 sborrill Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 1995 Carnegie-Mellon University. 33 * All rights reserved. 34 * 35 * Author: Mark Holland 36 * 37 * Permission to use, copy, modify and distribute this software and 38 * its documentation is hereby granted, provided that both the copyright 39 * notice and this permission notice appear in all copies of the 40 * software, derivative works or modified versions, and any portions 41 * thereof, and that both notices appear in supporting documentation. 42 * 43 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 44 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 45 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 46 * 47 * Carnegie Mellon requests users of this software to return to 48 * 49 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 50 * School of Computer Science 51 * Carnegie Mellon University 52 * Pittsburgh PA 15213-3890 53 * 54 * any improvements or extensions that they make and grant Carnegie the 55 * rights to redistribute these changes. 56 */ 57 58 /*************************************************************** 59 * rf_disks.c -- code to perform operations on the actual disks 60 ***************************************************************/ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.71 2009/04/03 16:23:41 sborrill Exp $"); 64 65 #include <dev/raidframe/raidframevar.h> 66 67 #include "rf_raid.h" 68 #include "rf_alloclist.h" 69 #include "rf_utils.h" 70 #include "rf_general.h" 71 #include "rf_options.h" 72 #include "rf_kintf.h" 73 #include "rf_netbsd.h" 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/proc.h> 78 #include <sys/ioctl.h> 79 #include <sys/fcntl.h> 80 #include <sys/vnode.h> 81 #include <sys/kauth.h> 82 83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); 84 static void rf_print_label_status( RF_Raid_t *, int, char *, 85 RF_ComponentLabel_t *); 86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 87 RF_ComponentLabel_t *, int, int ); 88 89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) 90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) 91 92 /************************************************************************** 93 * 94 * initialize the disks comprising the array 95 * 96 * We want the spare disks to have regular row,col numbers so that we can 97 * easily substitue a spare for a failed disk. But, the driver code assumes 98 * throughout that the array contains numRow by numCol _non-spare_ disks, so 99 * it's not clear how to fit in the spares. This is an unfortunate holdover 100 * from raidSim. The quick and dirty fix is to make row zero bigger than the 101 * rest, and put all the spares in it. This probably needs to get changed 102 * eventually. 103 * 104 **************************************************************************/ 105 106 int 107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 108 RF_Config_t *cfgPtr) 109 { 110 RF_RaidDisk_t *disks; 111 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 112 RF_RowCol_t c; 113 int bs, ret; 114 unsigned i, count, foundone = 0, numFailuresThisRow; 115 int force; 116 117 force = cfgPtr->force; 118 119 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 120 if (ret) 121 goto fail; 122 123 disks = raidPtr->Disks; 124 125 numFailuresThisRow = 0; 126 for (c = 0; c < raidPtr->numCol; c++) { 127 ret = rf_ConfigureDisk(raidPtr, 128 &cfgPtr->devnames[0][c][0], 129 &disks[c], c); 130 131 if (ret) 132 goto fail; 133 134 if (disks[c].status == rf_ds_optimal) { 135 raidread_component_label( 136 raidPtr->raid_cinfo[c].ci_dev, 137 raidPtr->raid_cinfo[c].ci_vp, 138 &raidPtr->raid_cinfo[c].ci_label); 139 } 140 141 if (disks[c].status != rf_ds_optimal) { 142 numFailuresThisRow++; 143 } else { 144 if (disks[c].numBlocks < min_numblks) 145 min_numblks = disks[c].numBlocks; 146 DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", 147 c, disks[c].devname, 148 disks[c].numBlocks, 149 disks[c].blockSize, 150 (long int) disks[c].numBlocks * 151 disks[c].blockSize / 1024 / 1024); 152 } 153 } 154 /* XXX fix for n-fault tolerant */ 155 /* XXX this should probably check to see how many failures 156 we can handle for this configuration! */ 157 if (numFailuresThisRow > 0) 158 raidPtr->status = rf_rs_degraded; 159 160 /* all disks must be the same size & have the same block size, bs must 161 * be a power of 2 */ 162 bs = 0; 163 foundone = 0; 164 for (c = 0; c < raidPtr->numCol; c++) { 165 if (disks[c].status == rf_ds_optimal) { 166 bs = disks[c].blockSize; 167 foundone = 1; 168 break; 169 } 170 } 171 if (!foundone) { 172 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); 173 ret = EINVAL; 174 goto fail; 175 } 176 for (count = 0, i = 1; i; i <<= 1) 177 if (bs & i) 178 count++; 179 if (count != 1) { 180 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); 181 ret = EINVAL; 182 goto fail; 183 } 184 185 if (rf_CheckLabels( raidPtr, cfgPtr )) { 186 printf("raid%d: There were fatal errors\n", raidPtr->raidid); 187 if (force != 0) { 188 printf("raid%d: Fatal errors being ignored.\n", 189 raidPtr->raidid); 190 } else { 191 ret = EINVAL; 192 goto fail; 193 } 194 } 195 196 for (c = 0; c < raidPtr->numCol; c++) { 197 if (disks[c].status == rf_ds_optimal) { 198 if (disks[c].blockSize != bs) { 199 RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c); 200 ret = EINVAL; 201 goto fail; 202 } 203 if (disks[c].numBlocks != min_numblks) { 204 RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n", 205 c, (int) min_numblks); 206 disks[c].numBlocks = min_numblks; 207 } 208 } 209 } 210 211 raidPtr->sectorsPerDisk = min_numblks; 212 raidPtr->logBytesPerSector = ffs(bs) - 1; 213 raidPtr->bytesPerSector = bs; 214 raidPtr->sectorMask = bs - 1; 215 return (0); 216 217 fail: 218 219 rf_UnconfigureVnodes( raidPtr ); 220 221 return (ret); 222 } 223 224 225 /**************************************************************************** 226 * set up the data structures describing the spare disks in the array 227 * recall from the above comment that the spare disk descriptors are stored 228 * in row zero, which is specially expanded to hold them. 229 ****************************************************************************/ 230 int 231 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 232 RF_Config_t *cfgPtr) 233 { 234 int i, ret; 235 unsigned int bs; 236 RF_RaidDisk_t *disks; 237 int num_spares_done; 238 239 num_spares_done = 0; 240 241 /* The space for the spares should have already been allocated by 242 * ConfigureDisks() */ 243 244 disks = &raidPtr->Disks[raidPtr->numCol]; 245 for (i = 0; i < raidPtr->numSpare; i++) { 246 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], 247 &disks[i], raidPtr->numCol + i); 248 if (ret) 249 goto fail; 250 if (disks[i].status != rf_ds_optimal) { 251 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 252 &cfgPtr->spare_names[i][0]); 253 } else { 254 disks[i].status = rf_ds_spare; /* change status to 255 * spare */ 256 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i, 257 disks[i].devname, 258 disks[i].numBlocks, disks[i].blockSize, 259 (long int) disks[i].numBlocks * 260 disks[i].blockSize / 1024 / 1024); 261 } 262 num_spares_done++; 263 } 264 265 /* check sizes and block sizes on spare disks */ 266 bs = 1 << raidPtr->logBytesPerSector; 267 for (i = 0; i < raidPtr->numSpare; i++) { 268 if (disks[i].blockSize != bs) { 269 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); 270 ret = EINVAL; 271 goto fail; 272 } 273 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { 274 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n", 275 disks[i].devname, disks[i].blockSize, 276 raidPtr->sectorsPerDisk); 277 ret = EINVAL; 278 goto fail; 279 } else 280 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { 281 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n", 282 disks[i].devname, 283 raidPtr->sectorsPerDisk, 284 disks[i].numBlocks); 285 286 disks[i].numBlocks = raidPtr->sectorsPerDisk; 287 } 288 } 289 290 return (0); 291 292 fail: 293 294 /* Release the hold on the main components. We've failed to allocate 295 * a spare, and since we're failing, we need to free things.. 296 297 XXX failing to allocate a spare is *not* that big of a deal... 298 We *can* survive without it, if need be, esp. if we get hot 299 adding working. 300 301 If we don't fail out here, then we need a way to remove this spare... 302 that should be easier to do here than if we are "live"... 303 304 */ 305 306 rf_UnconfigureVnodes( raidPtr ); 307 308 return (ret); 309 } 310 311 static int 312 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) 313 { 314 int ret; 315 316 /* We allocate RF_MAXSPARE on the first row so that we 317 have room to do hot-swapping of spares */ 318 RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) * 319 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 320 raidPtr->cleanupList); 321 if (raidPtr->Disks == NULL) { 322 ret = ENOMEM; 323 goto fail; 324 } 325 326 /* get space for device specific stuff.. */ 327 RF_MallocAndAdd(raidPtr->raid_cinfo, 328 (raidPtr->numCol + RF_MAXSPARE) * 329 sizeof(struct raidcinfo), (struct raidcinfo *), 330 raidPtr->cleanupList); 331 332 if (raidPtr->raid_cinfo == NULL) { 333 ret = ENOMEM; 334 goto fail; 335 } 336 337 return(0); 338 fail: 339 rf_UnconfigureVnodes( raidPtr ); 340 341 return(ret); 342 } 343 344 345 /* configure a single disk during auto-configuration at boot */ 346 int 347 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, 348 RF_AutoConfig_t *auto_config) 349 { 350 RF_RaidDisk_t *disks; 351 RF_RaidDisk_t *diskPtr; 352 RF_RowCol_t c; 353 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 354 int bs, ret; 355 int numFailuresThisRow; 356 RF_AutoConfig_t *ac; 357 int parity_good; 358 int mod_counter; 359 int mod_counter_found; 360 361 #if DEBUG 362 printf("Starting autoconfiguration of RAID set...\n"); 363 #endif 364 365 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 366 if (ret) 367 goto fail; 368 369 disks = raidPtr->Disks; 370 371 /* assume the parity will be fine.. */ 372 parity_good = RF_RAID_CLEAN; 373 374 /* Check for mod_counters that are too low */ 375 mod_counter_found = 0; 376 mod_counter = 0; 377 ac = auto_config; 378 while(ac!=NULL) { 379 if (mod_counter_found==0) { 380 mod_counter = ac->clabel->mod_counter; 381 mod_counter_found = 1; 382 } else { 383 if (ac->clabel->mod_counter > mod_counter) { 384 mod_counter = ac->clabel->mod_counter; 385 } 386 } 387 ac->flag = 0; /* clear the general purpose flag */ 388 ac = ac->next; 389 } 390 391 bs = 0; 392 393 numFailuresThisRow = 0; 394 for (c = 0; c < raidPtr->numCol; c++) { 395 diskPtr = &disks[c]; 396 397 /* find this row/col in the autoconfig */ 398 #if DEBUG 399 printf("Looking for %d in autoconfig\n",c); 400 #endif 401 ac = auto_config; 402 while(ac!=NULL) { 403 if (ac->clabel==NULL) { 404 /* big-time bad news. */ 405 goto fail; 406 } 407 if ((ac->clabel->column == c) && 408 (ac->clabel->mod_counter == mod_counter)) { 409 /* it's this one... */ 410 /* flag it as 'used', so we don't 411 free it later. */ 412 ac->flag = 1; 413 #if DEBUG 414 printf("Found: %s at %d\n", 415 ac->devname,c); 416 #endif 417 418 break; 419 } 420 ac=ac->next; 421 } 422 423 if (ac==NULL) { 424 /* we didn't find an exact match with a 425 correct mod_counter above... can we find 426 one with an incorrect mod_counter to use 427 instead? (this one, if we find it, will be 428 marked as failed once the set configures) 429 */ 430 431 ac = auto_config; 432 while(ac!=NULL) { 433 if (ac->clabel==NULL) { 434 /* big-time bad news. */ 435 goto fail; 436 } 437 if (ac->clabel->column == c) { 438 /* it's this one... 439 flag it as 'used', so we 440 don't free it later. */ 441 ac->flag = 1; 442 #if DEBUG 443 printf("Found(low mod_counter): %s at %d\n", 444 ac->devname,c); 445 #endif 446 447 break; 448 } 449 ac=ac->next; 450 } 451 } 452 453 454 455 if (ac!=NULL) { 456 /* Found it. Configure it.. */ 457 diskPtr->blockSize = ac->clabel->blockSize; 458 diskPtr->numBlocks = ac->clabel->numBlocks; 459 /* Note: rf_protectedSectors is already 460 factored into numBlocks here */ 461 raidPtr->raid_cinfo[c].ci_vp = ac->vp; 462 raidPtr->raid_cinfo[c].ci_dev = ac->dev; 463 464 memcpy(&raidPtr->raid_cinfo[c].ci_label, 465 ac->clabel, sizeof(*ac->clabel)); 466 snprintf(diskPtr->devname, sizeof(diskPtr->devname), 467 "/dev/%s", ac->devname); 468 469 /* note the fact that this component was 470 autoconfigured. You'll need this info 471 later. Trust me :) */ 472 diskPtr->auto_configured = 1; 473 diskPtr->dev = ac->dev; 474 475 /* 476 * we allow the user to specify that 477 * only a fraction of the disks should 478 * be used this is just for debug: it 479 * speeds up the parity scan 480 */ 481 482 diskPtr->numBlocks = diskPtr->numBlocks * 483 rf_sizePercentage / 100; 484 485 /* XXX these will get set multiple times, 486 but since we're autoconfiguring, they'd 487 better be always the same each time! 488 If not, this is the least of your worries */ 489 490 bs = diskPtr->blockSize; 491 min_numblks = diskPtr->numBlocks; 492 493 /* this gets done multiple times, but that's 494 fine -- the serial number will be the same 495 for all components, guaranteed */ 496 raidPtr->serial_number = ac->clabel->serial_number; 497 /* check the last time the label was modified */ 498 499 if (ac->clabel->mod_counter != mod_counter) { 500 /* Even though we've filled in all of 501 the above, we don't trust this 502 component since it's modification 503 counter is not in sync with the 504 rest, and we really consider it to 505 be failed. */ 506 disks[c].status = rf_ds_failed; 507 numFailuresThisRow++; 508 } else { 509 if (ac->clabel->clean != RF_RAID_CLEAN) { 510 parity_good = RF_RAID_DIRTY; 511 } 512 } 513 } else { 514 /* Didn't find it at all!! Component must 515 really be dead */ 516 disks[c].status = rf_ds_failed; 517 snprintf(disks[c].devname, sizeof(disks[c].devname), 518 "component%d", c); 519 numFailuresThisRow++; 520 } 521 } 522 /* XXX fix for n-fault tolerant */ 523 /* XXX this should probably check to see how many failures 524 we can handle for this configuration! */ 525 if (numFailuresThisRow > 0) { 526 raidPtr->status = rf_rs_degraded; 527 raidPtr->numFailures = numFailuresThisRow; 528 } 529 530 /* close the device for the ones that didn't get used */ 531 532 ac = auto_config; 533 while(ac!=NULL) { 534 if (ac->flag == 0) { 535 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 536 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED); 537 vput(ac->vp); 538 ac->vp = NULL; 539 #if DEBUG 540 printf("Released %s from auto-config set.\n", 541 ac->devname); 542 #endif 543 } 544 ac = ac->next; 545 } 546 547 raidPtr->mod_counter = mod_counter; 548 549 /* note the state of the parity, if any */ 550 raidPtr->parity_good = parity_good; 551 raidPtr->sectorsPerDisk = min_numblks; 552 raidPtr->logBytesPerSector = ffs(bs) - 1; 553 raidPtr->bytesPerSector = bs; 554 raidPtr->sectorMask = bs - 1; 555 return (0); 556 557 fail: 558 559 rf_UnconfigureVnodes( raidPtr ); 560 561 return (ret); 562 563 } 564 565 /* configure a single disk in the array */ 566 int 567 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr, 568 RF_RowCol_t col) 569 { 570 char *p; 571 struct vnode *vp; 572 struct vattr va; 573 int error; 574 575 p = rf_find_non_white(bf); 576 if (p[strlen(p) - 1] == '\n') { 577 /* strip off the newline */ 578 p[strlen(p) - 1] = '\0'; 579 } 580 (void) strcpy(diskPtr->devname, p); 581 582 /* Let's start by claiming the component is fine and well... */ 583 diskPtr->status = rf_ds_optimal; 584 585 raidPtr->raid_cinfo[col].ci_vp = NULL; 586 raidPtr->raid_cinfo[col].ci_dev = 0; 587 588 if (!strcmp("absent", diskPtr->devname)) { 589 printf("Ignoring missing component at column %d\n", col); 590 sprintf(diskPtr->devname, "component%d", col); 591 diskPtr->status = rf_ds_failed; 592 return (0); 593 } 594 595 error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE); 596 if (error) { 597 printf("dk_lookup on device: %s failed!\n", diskPtr->devname); 598 if (error == ENXIO) { 599 /* the component isn't there... must be dead :-( */ 600 diskPtr->status = rf_ds_failed; 601 } else { 602 return (error); 603 } 604 } 605 if (diskPtr->status == rf_ds_optimal) { 606 607 if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0) 608 return (error); 609 if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0) 610 return (error); 611 612 raidPtr->raid_cinfo[col].ci_vp = vp; 613 raidPtr->raid_cinfo[col].ci_dev = va.va_rdev; 614 615 /* This component was not automatically configured */ 616 diskPtr->auto_configured = 0; 617 diskPtr->dev = va.va_rdev; 618 619 /* we allow the user to specify that only a fraction of the 620 * disks should be used this is just for debug: it speeds up 621 * the parity scan */ 622 diskPtr->numBlocks = diskPtr->numBlocks * 623 rf_sizePercentage / 100; 624 } 625 return (0); 626 } 627 628 static void 629 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name, 630 RF_ComponentLabel_t *ci_label) 631 { 632 633 printf("raid%d: Component %s being configured at col: %d\n", 634 raidPtr->raidid, dev_name, column ); 635 printf(" Column: %d Num Columns: %d\n", 636 ci_label->column, 637 ci_label->num_columns); 638 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 639 ci_label->version, ci_label->serial_number, 640 ci_label->mod_counter); 641 printf(" Clean: %s Status: %d\n", 642 ci_label->clean ? "Yes" : "No", ci_label->status ); 643 } 644 645 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, 646 char *dev_name, RF_ComponentLabel_t *ci_label, 647 int serial_number, int mod_counter) 648 { 649 int fatal_error = 0; 650 651 if (serial_number != ci_label->serial_number) { 652 printf("%s has a different serial number: %d %d\n", 653 dev_name, serial_number, ci_label->serial_number); 654 fatal_error = 1; 655 } 656 if (mod_counter != ci_label->mod_counter) { 657 printf("%s has a different modification count: %d %d\n", 658 dev_name, mod_counter, ci_label->mod_counter); 659 } 660 661 if (row != ci_label->row) { 662 printf("Row out of alignment for: %s\n", dev_name); 663 fatal_error = 1; 664 } 665 if (column != ci_label->column) { 666 printf("Column out of alignment for: %s\n", dev_name); 667 fatal_error = 1; 668 } 669 if (raidPtr->numCol != ci_label->num_columns) { 670 printf("Number of columns do not match for: %s\n", dev_name); 671 fatal_error = 1; 672 } 673 if (ci_label->clean == 0) { 674 /* it's not clean, but that's not fatal */ 675 printf("%s is not clean!\n", dev_name); 676 } 677 return(fatal_error); 678 } 679 680 681 /* 682 683 rf_CheckLabels() - check all the component labels for consistency. 684 Return an error if there is anything major amiss. 685 686 */ 687 688 int 689 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr) 690 { 691 int c; 692 char *dev_name; 693 RF_ComponentLabel_t *ci_label; 694 int serial_number = 0; 695 int mod_number = 0; 696 int fatal_error = 0; 697 int mod_values[4]; 698 int mod_count[4]; 699 int ser_values[4]; 700 int ser_count[4]; 701 int num_ser; 702 int num_mod; 703 int i; 704 int found; 705 int hosed_column; 706 int too_fatal; 707 int parity_good; 708 int force; 709 710 hosed_column = -1; 711 too_fatal = 0; 712 force = cfgPtr->force; 713 714 /* 715 We're going to try to be a little intelligent here. If one 716 component's label is bogus, and we can identify that it's the 717 *only* one that's gone, we'll mark it as "failed" and allow 718 the configuration to proceed. This will be the *only* case 719 that we'll proceed if there would be (otherwise) fatal errors. 720 721 Basically we simply keep a count of how many components had 722 what serial number. If all but one agree, we simply mark 723 the disagreeing component as being failed, and allow 724 things to come up "normally". 725 726 We do this first for serial numbers, and then for "mod_counter". 727 728 */ 729 730 num_ser = 0; 731 num_mod = 0; 732 733 for (c = 0; c < raidPtr->numCol; c++) { 734 ci_label = &raidPtr->raid_cinfo[c].ci_label; 735 found=0; 736 for(i=0;i<num_ser;i++) { 737 if (ser_values[i] == ci_label->serial_number) { 738 ser_count[i]++; 739 found=1; 740 break; 741 } 742 } 743 if (!found) { 744 ser_values[num_ser] = ci_label->serial_number; 745 ser_count[num_ser] = 1; 746 num_ser++; 747 if (num_ser>2) { 748 fatal_error = 1; 749 break; 750 } 751 } 752 found=0; 753 for(i=0;i<num_mod;i++) { 754 if (mod_values[i] == ci_label->mod_counter) { 755 mod_count[i]++; 756 found=1; 757 break; 758 } 759 } 760 if (!found) { 761 mod_values[num_mod] = ci_label->mod_counter; 762 mod_count[num_mod] = 1; 763 num_mod++; 764 if (num_mod>2) { 765 fatal_error = 1; 766 break; 767 } 768 } 769 } 770 #if DEBUG 771 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); 772 for(i=0;i<num_ser;i++) { 773 printf("%d %d\n", ser_values[i], ser_count[i]); 774 } 775 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); 776 for(i=0;i<num_mod;i++) { 777 printf("%d %d\n", mod_values[i], mod_count[i]); 778 } 779 #endif 780 serial_number = ser_values[0]; 781 if (num_ser == 2) { 782 if ((ser_count[0] == 1) || (ser_count[1] == 1)) { 783 /* Locate the maverick component */ 784 if (ser_count[1] > ser_count[0]) { 785 serial_number = ser_values[1]; 786 } 787 788 for (c = 0; c < raidPtr->numCol; c++) { 789 ci_label = &raidPtr->raid_cinfo[c].ci_label; 790 if (serial_number != ci_label->serial_number) { 791 hosed_column = c; 792 break; 793 } 794 } 795 printf("Hosed component: %s\n", 796 &cfgPtr->devnames[0][hosed_column][0]); 797 if (!force) { 798 /* we'll fail this component, as if there are 799 other major errors, we arn't forcing things 800 and we'll abort the config anyways */ 801 raidPtr->Disks[hosed_column].status 802 = rf_ds_failed; 803 raidPtr->numFailures++; 804 raidPtr->status = rf_rs_degraded; 805 } 806 } else { 807 too_fatal = 1; 808 } 809 if (cfgPtr->parityConfig == '0') { 810 /* We've identified two different serial numbers. 811 RAID 0 can't cope with that, so we'll punt */ 812 too_fatal = 1; 813 } 814 815 } 816 817 /* record the serial number for later. If we bail later, setting 818 this doesn't matter, otherwise we've got the best guess at the 819 correct serial number */ 820 raidPtr->serial_number = serial_number; 821 822 mod_number = mod_values[0]; 823 if (num_mod == 2) { 824 if ((mod_count[0] == 1) || (mod_count[1] == 1)) { 825 /* Locate the maverick component */ 826 if (mod_count[1] > mod_count[0]) { 827 mod_number = mod_values[1]; 828 } else if (mod_count[1] < mod_count[0]) { 829 mod_number = mod_values[0]; 830 } else { 831 /* counts of different modification values 832 are the same. Assume greater value is 833 the correct one, all other things 834 considered */ 835 if (mod_values[0] > mod_values[1]) { 836 mod_number = mod_values[0]; 837 } else { 838 mod_number = mod_values[1]; 839 } 840 841 } 842 843 for (c = 0; c < raidPtr->numCol; c++) { 844 ci_label = &raidPtr->raid_cinfo[c].ci_label; 845 if (mod_number != ci_label->mod_counter) { 846 if (hosed_column == c) { 847 /* same one. Can 848 deal with it. */ 849 } else { 850 hosed_column = c; 851 if (num_ser != 1) { 852 too_fatal = 1; 853 break; 854 } 855 } 856 } 857 } 858 printf("Hosed component: %s\n", 859 &cfgPtr->devnames[0][hosed_column][0]); 860 if (!force) { 861 /* we'll fail this component, as if there are 862 other major errors, we arn't forcing things 863 and we'll abort the config anyways */ 864 if (raidPtr->Disks[hosed_column].status != rf_ds_failed) { 865 raidPtr->Disks[hosed_column].status 866 = rf_ds_failed; 867 raidPtr->numFailures++; 868 raidPtr->status = rf_rs_degraded; 869 } 870 } 871 } else { 872 too_fatal = 1; 873 } 874 if (cfgPtr->parityConfig == '0') { 875 /* We've identified two different mod counters. 876 RAID 0 can't cope with that, so we'll punt */ 877 too_fatal = 1; 878 } 879 } 880 881 raidPtr->mod_counter = mod_number; 882 883 if (too_fatal) { 884 /* we've had both a serial number mismatch, and a mod_counter 885 mismatch -- and they involved two different components!! 886 Bail -- make things fail so that the user must force 887 the issue... */ 888 hosed_column = -1; 889 fatal_error = 1; 890 } 891 892 if (num_ser > 2) { 893 printf("raid%d: Too many different serial numbers!\n", 894 raidPtr->raidid); 895 fatal_error = 1; 896 } 897 898 if (num_mod > 2) { 899 printf("raid%d: Too many different mod counters!\n", 900 raidPtr->raidid); 901 fatal_error = 1; 902 } 903 904 /* we start by assuming the parity will be good, and flee from 905 that notion at the slightest sign of trouble */ 906 907 parity_good = RF_RAID_CLEAN; 908 909 for (c = 0; c < raidPtr->numCol; c++) { 910 dev_name = &cfgPtr->devnames[0][c][0]; 911 ci_label = &raidPtr->raid_cinfo[c].ci_label; 912 913 if (c == hosed_column) { 914 printf("raid%d: Ignoring %s\n", 915 raidPtr->raidid, dev_name); 916 } else { 917 rf_print_label_status( raidPtr, c, dev_name, ci_label); 918 if (rf_check_label_vitals( raidPtr, 0, c, 919 dev_name, ci_label, 920 serial_number, 921 mod_number )) { 922 fatal_error = 1; 923 } 924 if (ci_label->clean != RF_RAID_CLEAN) { 925 parity_good = RF_RAID_DIRTY; 926 } 927 } 928 } 929 930 if (fatal_error) { 931 parity_good = RF_RAID_DIRTY; 932 } 933 934 /* we note the state of the parity */ 935 raidPtr->parity_good = parity_good; 936 937 return(fatal_error); 938 } 939 940 int 941 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) 942 { 943 RF_RaidDisk_t *disks; 944 RF_DiskQueue_t *spareQueues; 945 int ret; 946 unsigned int bs; 947 int spare_number; 948 949 ret=0; 950 951 if (raidPtr->numSpare >= RF_MAXSPARE) { 952 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); 953 return(EINVAL); 954 } 955 956 RF_LOCK_MUTEX(raidPtr->mutex); 957 while (raidPtr->adding_hot_spare==1) { 958 ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0, 959 &(raidPtr->mutex)); 960 } 961 raidPtr->adding_hot_spare=1; 962 RF_UNLOCK_MUTEX(raidPtr->mutex); 963 964 /* the beginning of the spares... */ 965 disks = &raidPtr->Disks[raidPtr->numCol]; 966 967 spare_number = raidPtr->numSpare; 968 969 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, 970 &disks[spare_number], 971 raidPtr->numCol + spare_number); 972 973 if (ret) 974 goto fail; 975 if (disks[spare_number].status != rf_ds_optimal) { 976 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 977 sparePtr->component_name); 978 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); 979 ret=EINVAL; 980 goto fail; 981 } else { 982 disks[spare_number].status = rf_ds_spare; 983 DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", 984 spare_number, 985 disks[spare_number].devname, 986 disks[spare_number].numBlocks, 987 disks[spare_number].blockSize, 988 (long int) disks[spare_number].numBlocks * 989 disks[spare_number].blockSize / 1024 / 1024); 990 } 991 992 993 /* check sizes and block sizes on the spare disk */ 994 bs = 1 << raidPtr->logBytesPerSector; 995 if (disks[spare_number].blockSize != bs) { 996 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); 997 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); 998 ret = EINVAL; 999 goto fail; 1000 } 1001 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { 1002 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n", 1003 disks[spare_number].devname, 1004 disks[spare_number].blockSize, 1005 raidPtr->sectorsPerDisk); 1006 rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0); 1007 ret = EINVAL; 1008 goto fail; 1009 } else { 1010 if (disks[spare_number].numBlocks > 1011 raidPtr->sectorsPerDisk) { 1012 RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n", 1013 disks[spare_number].devname, 1014 raidPtr->sectorsPerDisk, 1015 disks[spare_number].numBlocks); 1016 1017 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; 1018 } 1019 } 1020 1021 spareQueues = &raidPtr->Queues[raidPtr->numCol]; 1022 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 1023 raidPtr->numCol + spare_number, 1024 raidPtr->qType, 1025 raidPtr->sectorsPerDisk, 1026 raidPtr->Disks[raidPtr->numCol + 1027 spare_number].dev, 1028 raidPtr->maxOutstanding, 1029 &raidPtr->shutdownList, 1030 raidPtr->cleanupList); 1031 1032 RF_LOCK_MUTEX(raidPtr->mutex); 1033 raidPtr->numSpare++; 1034 RF_UNLOCK_MUTEX(raidPtr->mutex); 1035 1036 fail: 1037 RF_LOCK_MUTEX(raidPtr->mutex); 1038 raidPtr->adding_hot_spare=0; 1039 wakeup(&(raidPtr->adding_hot_spare)); 1040 RF_UNLOCK_MUTEX(raidPtr->mutex); 1041 1042 return(ret); 1043 } 1044 1045 int 1046 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) 1047 { 1048 int spare_number; 1049 1050 1051 if (raidPtr->numSpare==0) { 1052 printf("No spares to remove!\n"); 1053 return(EINVAL); 1054 } 1055 1056 spare_number = sparePtr->column; 1057 1058 return(EINVAL); /* XXX not implemented yet */ 1059 #if 0 1060 if (spare_number < 0 || spare_number > raidPtr->numSpare) { 1061 return(EINVAL); 1062 } 1063 1064 /* verify that this spare isn't in use... */ 1065 1066 1067 1068 1069 /* it's gone.. */ 1070 1071 raidPtr->numSpare--; 1072 1073 return(0); 1074 #endif 1075 } 1076 1077 1078 int 1079 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) 1080 { 1081 RF_RaidDisk_t *disks; 1082 1083 if ((component->column < 0) || 1084 (component->column >= raidPtr->numCol)) { 1085 return(EINVAL); 1086 } 1087 1088 disks = &raidPtr->Disks[component->column]; 1089 1090 /* 1. This component must be marked as 'failed' */ 1091 1092 return(EINVAL); /* Not implemented yet. */ 1093 } 1094 1095 int 1096 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, 1097 RF_SingleComponent_t *component) 1098 { 1099 1100 /* Issues here include how to 'move' this in if there is IO 1101 taking place (e.g. component queues and such) */ 1102 1103 return(EINVAL); /* Not implemented yet. */ 1104 } 1105