1 /* $NetBSD: rf_disks.c,v 1.23 2000/03/07 02:28:05 oster Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1995 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Author: Mark Holland 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65 /*************************************************************** 66 * rf_disks.c -- code to perform operations on the actual disks 67 ***************************************************************/ 68 69 #include "rf_types.h" 70 #include "rf_raid.h" 71 #include "rf_alloclist.h" 72 #include "rf_utils.h" 73 #include "rf_configure.h" 74 #include "rf_general.h" 75 #include "rf_options.h" 76 #include "rf_kintf.h" 77 #include "rf_netbsd.h" 78 79 #include <sys/types.h> 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/proc.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/vnode.h> 86 87 /* XXX these should be in a header file somewhere */ 88 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *); 89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); 90 91 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) 92 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) 93 94 /************************************************************************** 95 * 96 * initialize the disks comprising the array 97 * 98 * We want the spare disks to have regular row,col numbers so that we can 99 * easily substitue a spare for a failed disk. But, the driver code assumes 100 * throughout that the array contains numRow by numCol _non-spare_ disks, so 101 * it's not clear how to fit in the spares. This is an unfortunate holdover 102 * from raidSim. The quick and dirty fix is to make row zero bigger than the 103 * rest, and put all the spares in it. This probably needs to get changed 104 * eventually. 105 * 106 **************************************************************************/ 107 108 int 109 rf_ConfigureDisks( listp, raidPtr, cfgPtr ) 110 RF_ShutdownList_t **listp; 111 RF_Raid_t *raidPtr; 112 RF_Config_t *cfgPtr; 113 { 114 RF_RaidDisk_t **disks; 115 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 116 RF_RowCol_t r, c; 117 int bs, ret; 118 unsigned i, count, foundone = 0, numFailuresThisRow; 119 int force; 120 121 force = cfgPtr->force; 122 123 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 124 if (ret) 125 goto fail; 126 127 disks = raidPtr->Disks; 128 129 for (r = 0; r < raidPtr->numRow; r++) { 130 numFailuresThisRow = 0; 131 for (c = 0; c < raidPtr->numCol; c++) { 132 ret = rf_ConfigureDisk(raidPtr, 133 &cfgPtr->devnames[r][c][0], 134 &disks[r][c], r, c); 135 136 if (ret) 137 goto fail; 138 139 if (disks[r][c].status == rf_ds_optimal) { 140 raidread_component_label( 141 raidPtr->raid_cinfo[r][c].ci_dev, 142 raidPtr->raid_cinfo[r][c].ci_vp, 143 &raidPtr->raid_cinfo[r][c].ci_label); 144 } 145 146 if (disks[r][c].status != rf_ds_optimal) { 147 numFailuresThisRow++; 148 } else { 149 if (disks[r][c].numBlocks < min_numblks) 150 min_numblks = disks[r][c].numBlocks; 151 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", 152 r, c, disks[r][c].devname, 153 (long int) disks[r][c].numBlocks, 154 disks[r][c].blockSize, 155 (long int) disks[r][c].numBlocks * 156 disks[r][c].blockSize / 1024 / 1024); 157 } 158 } 159 /* XXX fix for n-fault tolerant */ 160 /* XXX this should probably check to see how many failures 161 we can handle for this configuration! */ 162 if (numFailuresThisRow > 0) 163 raidPtr->status[r] = rf_rs_degraded; 164 } 165 166 /* all disks must be the same size & have the same block size, bs must 167 * be a power of 2 */ 168 bs = 0; 169 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { 170 for (c = 0; !foundone && c < raidPtr->numCol; c++) { 171 if (disks[r][c].status == rf_ds_optimal) { 172 bs = disks[r][c].blockSize; 173 foundone = 1; 174 } 175 } 176 } 177 if (!foundone) { 178 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); 179 ret = EINVAL; 180 goto fail; 181 } 182 for (count = 0, i = 1; i; i <<= 1) 183 if (bs & i) 184 count++; 185 if (count != 1) { 186 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); 187 ret = EINVAL; 188 goto fail; 189 } 190 191 if (rf_CheckLabels( raidPtr, cfgPtr )) { 192 printf("raid%d: There were fatal errors\n", raidPtr->raidid); 193 if (force != 0) { 194 printf("raid%d: Fatal errors being ignored.\n", 195 raidPtr->raidid); 196 } else { 197 ret = EINVAL; 198 goto fail; 199 } 200 } 201 202 for (r = 0; r < raidPtr->numRow; r++) { 203 for (c = 0; c < raidPtr->numCol; c++) { 204 if (disks[r][c].status == rf_ds_optimal) { 205 if (disks[r][c].blockSize != bs) { 206 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); 207 ret = EINVAL; 208 goto fail; 209 } 210 if (disks[r][c].numBlocks != min_numblks) { 211 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", 212 r, c, (int) min_numblks); 213 disks[r][c].numBlocks = min_numblks; 214 } 215 } 216 } 217 } 218 219 raidPtr->sectorsPerDisk = min_numblks; 220 raidPtr->logBytesPerSector = ffs(bs) - 1; 221 raidPtr->bytesPerSector = bs; 222 raidPtr->sectorMask = bs - 1; 223 return (0); 224 225 fail: 226 227 rf_UnconfigureVnodes( raidPtr ); 228 229 return (ret); 230 } 231 232 233 /**************************************************************************** 234 * set up the data structures describing the spare disks in the array 235 * recall from the above comment that the spare disk descriptors are stored 236 * in row zero, which is specially expanded to hold them. 237 ****************************************************************************/ 238 int 239 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) 240 RF_ShutdownList_t ** listp; 241 RF_Raid_t * raidPtr; 242 RF_Config_t * cfgPtr; 243 { 244 int i, ret; 245 unsigned int bs; 246 RF_RaidDisk_t *disks; 247 int num_spares_done; 248 249 num_spares_done = 0; 250 251 /* The space for the spares should have already been allocated by 252 * ConfigureDisks() */ 253 254 disks = &raidPtr->Disks[0][raidPtr->numCol]; 255 for (i = 0; i < raidPtr->numSpare; i++) { 256 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], 257 &disks[i], 0, raidPtr->numCol + i); 258 if (ret) 259 goto fail; 260 if (disks[i].status != rf_ds_optimal) { 261 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 262 &cfgPtr->spare_names[i][0]); 263 } else { 264 disks[i].status = rf_ds_spare; /* change status to 265 * spare */ 266 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, 267 disks[i].devname, 268 (long int) disks[i].numBlocks, disks[i].blockSize, 269 (long int) disks[i].numBlocks * 270 disks[i].blockSize / 1024 / 1024); 271 } 272 num_spares_done++; 273 } 274 275 /* check sizes and block sizes on spare disks */ 276 bs = 1 << raidPtr->logBytesPerSector; 277 for (i = 0; i < raidPtr->numSpare; i++) { 278 if (disks[i].blockSize != bs) { 279 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); 280 ret = EINVAL; 281 goto fail; 282 } 283 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { 284 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 285 disks[i].devname, disks[i].blockSize, 286 (long int) raidPtr->sectorsPerDisk); 287 ret = EINVAL; 288 goto fail; 289 } else 290 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { 291 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); 292 293 disks[i].numBlocks = raidPtr->sectorsPerDisk; 294 } 295 } 296 297 return (0); 298 299 fail: 300 301 /* Release the hold on the main components. We've failed to allocate 302 * a spare, and since we're failing, we need to free things.. 303 304 XXX failing to allocate a spare is *not* that big of a deal... 305 We *can* survive without it, if need be, esp. if we get hot 306 adding working. 307 308 If we don't fail out here, then we need a way to remove this spare... 309 that should be easier to do here than if we are "live"... 310 311 */ 312 313 rf_UnconfigureVnodes( raidPtr ); 314 315 return (ret); 316 } 317 318 static int 319 rf_AllocDiskStructures(raidPtr, cfgPtr) 320 RF_Raid_t *raidPtr; 321 RF_Config_t *cfgPtr; 322 { 323 RF_RaidDisk_t **disks; 324 int ret; 325 int r; 326 327 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), 328 (RF_RaidDisk_t **), raidPtr->cleanupList); 329 if (disks == NULL) { 330 ret = ENOMEM; 331 goto fail; 332 } 333 raidPtr->Disks = disks; 334 /* get space for the device-specific stuff... */ 335 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, 336 sizeof(struct raidcinfo *), (struct raidcinfo **), 337 raidPtr->cleanupList); 338 if (raidPtr->raid_cinfo == NULL) { 339 ret = ENOMEM; 340 goto fail; 341 } 342 343 for (r = 0; r < raidPtr->numRow; r++) { 344 /* We allocate RF_MAXSPARE on the first row so that we 345 have room to do hot-swapping of spares */ 346 RF_CallocAndAdd(disks[r], raidPtr->numCol 347 + ((r == 0) ? RF_MAXSPARE : 0), 348 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 349 raidPtr->cleanupList); 350 if (disks[r] == NULL) { 351 ret = ENOMEM; 352 goto fail; 353 } 354 /* get more space for device specific stuff.. */ 355 RF_CallocAndAdd(raidPtr->raid_cinfo[r], 356 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), 357 sizeof(struct raidcinfo), (struct raidcinfo *), 358 raidPtr->cleanupList); 359 if (raidPtr->raid_cinfo[r] == NULL) { 360 ret = ENOMEM; 361 goto fail; 362 } 363 } 364 return(0); 365 fail: 366 rf_UnconfigureVnodes( raidPtr ); 367 368 return(ret); 369 } 370 371 372 /* configure a single disk during auto-configuration at boot */ 373 int 374 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) 375 RF_Raid_t *raidPtr; 376 RF_Config_t *cfgPtr; 377 RF_AutoConfig_t *auto_config; 378 { 379 RF_RaidDisk_t **disks; 380 RF_RaidDisk_t *diskPtr; 381 RF_RowCol_t r, c; 382 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 383 int bs, ret; 384 int numFailuresThisRow; 385 int force; 386 RF_AutoConfig_t *ac; 387 int parity_good; 388 int mod_counter; 389 390 #if DEBUG 391 printf("Starting autoconfiguration of RAID set...\n"); 392 #endif 393 force = cfgPtr->force; 394 395 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 396 if (ret) 397 goto fail; 398 399 disks = raidPtr->Disks; 400 401 /* assume the parity will be fine.. */ 402 parity_good = RF_RAID_CLEAN; 403 404 /* Check for mod_counters that are too low */ 405 mod_counter = -1; 406 ac = auto_config; 407 while(ac!=NULL) { 408 if (ac->clabel->mod_counter > mod_counter) { 409 mod_counter = ac->clabel->mod_counter; 410 } 411 ac = ac->next; 412 } 413 if (mod_counter == -1) { 414 /* mod_counters were all negative!?!?!? 415 Ok, we can deal with that. */ 416 #if 0 417 ac = auto_config; 418 while(ac!=NULL) { 419 if (ac->clabel->mod_counter > mod_counter) { 420 mod_counter = ac->clabel->mod_counter; 421 } 422 ac = ac->next; 423 } 424 #endif 425 } 426 427 for (r = 0; r < raidPtr->numRow; r++) { 428 numFailuresThisRow = 0; 429 for (c = 0; c < raidPtr->numCol; c++) { 430 diskPtr = &disks[r][c]; 431 432 /* find this row/col in the autoconfig */ 433 #if DEBUG 434 printf("Looking for %d,%d in autoconfig\n",r,c); 435 #endif 436 ac = auto_config; 437 while(ac!=NULL) { 438 if (ac->clabel==NULL) { 439 /* big-time bad news. */ 440 goto fail; 441 } 442 if ((ac->clabel->row == r) && 443 (ac->clabel->column == c)) { 444 /* it's this one... */ 445 #if DEBUG 446 printf("Found: %s at %d,%d\n", 447 ac->devname,r,c); 448 #endif 449 450 break; 451 } 452 ac=ac->next; 453 } 454 455 if (ac!=NULL) { 456 /* Found it. Configure it.. */ 457 diskPtr->blockSize = ac->clabel->blockSize; 458 diskPtr->numBlocks = ac->clabel->numBlocks; 459 /* Note: rf_protectedSectors is already 460 factored into numBlocks here */ 461 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; 462 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; 463 464 memcpy(&raidPtr->raid_cinfo[r][c].ci_label, 465 ac->clabel, sizeof(*ac->clabel)); 466 sprintf(diskPtr->devname, "/dev/%s", 467 ac->devname); 468 469 /* note the fact that this component was 470 autoconfigured. You'll need this info 471 later. Trust me :) */ 472 diskPtr->auto_configured = 1; 473 diskPtr->dev = ac->dev; 474 475 /* 476 * we allow the user to specify that 477 * only a fraction of the disks should 478 * be used this is just for debug: it 479 * speeds up the parity scan 480 */ 481 482 diskPtr->numBlocks = diskPtr->numBlocks * 483 rf_sizePercentage / 100; 484 485 /* XXX these will get set multiple times, 486 but since we're autoconfiguring, they'd 487 better be always the same each time! 488 If not, this is the least of your worries */ 489 490 bs = diskPtr->blockSize; 491 min_numblks = diskPtr->numBlocks; 492 493 /* this gets done multiple times, but that's 494 fine -- the serial number will be the same 495 for all components, guaranteed */ 496 raidPtr->serial_number = 497 ac->clabel->serial_number; 498 /* check the last time the label 499 was modified */ 500 if (ac->clabel->mod_counter != 501 mod_counter) { 502 /* Even though we've filled in all 503 of the above, we don't trust 504 this component since it's 505 modification counter is not 506 in sync with the rest, and we really 507 consider it to be failed. */ 508 disks[r][c].status = rf_ds_failed; 509 numFailuresThisRow++; 510 } else { 511 if (ac->clabel->clean != 512 RF_RAID_CLEAN) { 513 parity_good = RF_RAID_DIRTY; 514 } 515 } 516 } else { 517 /* Didn't find it at all!! 518 Component must really be dead */ 519 disks[r][c].status = rf_ds_failed; 520 numFailuresThisRow++; 521 } 522 } 523 /* XXX fix for n-fault tolerant */ 524 /* XXX this should probably check to see how many failures 525 we can handle for this configuration! */ 526 if (numFailuresThisRow > 0) 527 raidPtr->status[r] = rf_rs_degraded; 528 } 529 530 raidPtr->mod_counter = mod_counter; 531 532 /* note the state of the parity, if any */ 533 raidPtr->parity_good = parity_good; 534 raidPtr->sectorsPerDisk = min_numblks; 535 raidPtr->logBytesPerSector = ffs(bs) - 1; 536 raidPtr->bytesPerSector = bs; 537 raidPtr->sectorMask = bs - 1; 538 return (0); 539 540 fail: 541 542 rf_UnconfigureVnodes( raidPtr ); 543 544 return (ret); 545 546 } 547 548 /* configure a single disk in the array */ 549 int 550 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) 551 RF_Raid_t *raidPtr; 552 char *buf; 553 RF_RaidDisk_t *diskPtr; 554 RF_RowCol_t row; 555 RF_RowCol_t col; 556 { 557 char *p; 558 int retcode; 559 560 struct partinfo dpart; 561 struct vnode *vp; 562 struct vattr va; 563 struct proc *proc; 564 int error; 565 566 retcode = 0; 567 p = rf_find_non_white(buf); 568 if (p[strlen(p) - 1] == '\n') { 569 /* strip off the newline */ 570 p[strlen(p) - 1] = '\0'; 571 } 572 (void) strcpy(diskPtr->devname, p); 573 574 proc = raidPtr->engine_thread; 575 576 /* Let's start by claiming the component is fine and well... */ 577 diskPtr->status = rf_ds_optimal; 578 579 raidPtr->raid_cinfo[row][col].ci_vp = NULL; 580 raidPtr->raid_cinfo[row][col].ci_dev = NULL; 581 582 error = raidlookup(diskPtr->devname, proc, &vp); 583 if (error) { 584 printf("raidlookup on device: %s failed!\n", diskPtr->devname); 585 if (error == ENXIO) { 586 /* the component isn't there... must be dead :-( */ 587 diskPtr->status = rf_ds_failed; 588 } else { 589 return (error); 590 } 591 } 592 if (diskPtr->status == rf_ds_optimal) { 593 594 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { 595 return (error); 596 } 597 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, 598 FREAD, proc->p_ucred, proc); 599 if (error) { 600 return (error); 601 } 602 603 diskPtr->blockSize = dpart.disklab->d_secsize; 604 605 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 606 diskPtr->partitionSize = dpart.part->p_size; 607 608 raidPtr->raid_cinfo[row][col].ci_vp = vp; 609 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; 610 611 /* This component was not automatically configured */ 612 diskPtr->auto_configured = 0; 613 diskPtr->dev = va.va_rdev; 614 615 /* we allow the user to specify that only a fraction of the 616 * disks should be used this is just for debug: it speeds up 617 * the parity scan */ 618 diskPtr->numBlocks = diskPtr->numBlocks * 619 rf_sizePercentage / 100; 620 } 621 return (0); 622 } 623 624 static void rf_print_label_status( RF_Raid_t *, int, int, char *, 625 RF_ComponentLabel_t *); 626 627 static void 628 rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) 629 RF_Raid_t *raidPtr; 630 int row; 631 int column; 632 char *dev_name; 633 RF_ComponentLabel_t *ci_label; 634 { 635 636 printf("raid%d: Component %s being configured at row: %d col: %d\n", 637 raidPtr->raidid, dev_name, row, column ); 638 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 639 ci_label->row, ci_label->column, 640 ci_label->num_rows, ci_label->num_columns); 641 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 642 ci_label->version, ci_label->serial_number, 643 ci_label->mod_counter); 644 printf(" Clean: %s Status: %d\n", 645 ci_label->clean ? "Yes" : "No", ci_label->status ); 646 } 647 648 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 649 RF_ComponentLabel_t *, int, int ); 650 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, 651 serial_number, mod_counter ) 652 RF_Raid_t *raidPtr; 653 int row; 654 int column; 655 char *dev_name; 656 RF_ComponentLabel_t *ci_label; 657 int serial_number; 658 int mod_counter; 659 { 660 int fatal_error = 0; 661 662 if (serial_number != ci_label->serial_number) { 663 printf("%s has a different serial number: %d %d\n", 664 dev_name, serial_number, ci_label->serial_number); 665 fatal_error = 1; 666 } 667 if (mod_counter != ci_label->mod_counter) { 668 printf("%s has a different modfication count: %d %d\n", 669 dev_name, mod_counter, ci_label->mod_counter); 670 } 671 672 if (row != ci_label->row) { 673 printf("Row out of alignment for: %s\n", dev_name); 674 fatal_error = 1; 675 } 676 if (column != ci_label->column) { 677 printf("Column out of alignment for: %s\n", dev_name); 678 fatal_error = 1; 679 } 680 if (raidPtr->numRow != ci_label->num_rows) { 681 printf("Number of rows do not match for: %s\n", dev_name); 682 fatal_error = 1; 683 } 684 if (raidPtr->numCol != ci_label->num_columns) { 685 printf("Number of columns do not match for: %s\n", dev_name); 686 fatal_error = 1; 687 } 688 if (ci_label->clean == 0) { 689 /* it's not clean, but that's not fatal */ 690 printf("%s is not clean!\n", dev_name); 691 } 692 return(fatal_error); 693 } 694 695 696 /* 697 698 rf_CheckLabels() - check all the component labels for consistency. 699 Return an error if there is anything major amiss. 700 701 */ 702 703 int 704 rf_CheckLabels( raidPtr, cfgPtr ) 705 RF_Raid_t *raidPtr; 706 RF_Config_t *cfgPtr; 707 { 708 int r,c; 709 char *dev_name; 710 RF_ComponentLabel_t *ci_label; 711 int serial_number = 0; 712 int mod_number = 0; 713 int fatal_error = 0; 714 int mod_values[4]; 715 int mod_count[4]; 716 int ser_values[4]; 717 int ser_count[4]; 718 int num_ser; 719 int num_mod; 720 int i; 721 int found; 722 int hosed_row; 723 int hosed_column; 724 int too_fatal; 725 int parity_good; 726 int force; 727 728 hosed_row = -1; 729 hosed_column = -1; 730 too_fatal = 0; 731 force = cfgPtr->force; 732 733 /* 734 We're going to try to be a little intelligent here. If one 735 component's label is bogus, and we can identify that it's the 736 *only* one that's gone, we'll mark it as "failed" and allow 737 the configuration to proceed. This will be the *only* case 738 that we'll proceed if there would be (otherwise) fatal errors. 739 740 Basically we simply keep a count of how many components had 741 what serial number. If all but one agree, we simply mark 742 the disagreeing component as being failed, and allow 743 things to come up "normally". 744 745 We do this first for serial numbers, and then for "mod_counter". 746 747 */ 748 749 num_ser = 0; 750 num_mod = 0; 751 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) { 752 for (c = 0; c < raidPtr->numCol; c++) { 753 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 754 found=0; 755 for(i=0;i<num_ser;i++) { 756 if (ser_values[i] == ci_label->serial_number) { 757 ser_count[i]++; 758 found=1; 759 break; 760 } 761 } 762 if (!found) { 763 ser_values[num_ser] = ci_label->serial_number; 764 ser_count[num_ser] = 1; 765 num_ser++; 766 if (num_ser>2) { 767 fatal_error = 1; 768 break; 769 } 770 } 771 found=0; 772 for(i=0;i<num_mod;i++) { 773 if (mod_values[i] == ci_label->mod_counter) { 774 mod_count[i]++; 775 found=1; 776 break; 777 } 778 } 779 if (!found) { 780 mod_values[num_mod] = ci_label->mod_counter; 781 mod_count[num_mod] = 1; 782 num_mod++; 783 if (num_mod>2) { 784 fatal_error = 1; 785 break; 786 } 787 } 788 } 789 } 790 #if DEBUG 791 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); 792 for(i=0;i<num_ser;i++) { 793 printf("%d %d\n", ser_values[i], ser_count[i]); 794 } 795 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); 796 for(i=0;i<num_mod;i++) { 797 printf("%d %d\n", mod_values[i], mod_count[i]); 798 } 799 #endif 800 serial_number = ser_values[0]; 801 if (num_ser == 2) { 802 if ((ser_count[0] == 1) || (ser_count[1] == 1)) { 803 /* Locate the maverick component */ 804 if (ser_count[1] > ser_count[0]) { 805 serial_number = ser_values[1]; 806 } 807 for (r = 0; r < raidPtr->numRow; r++) { 808 for (c = 0; c < raidPtr->numCol; c++) { 809 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 810 if (serial_number != 811 ci_label->serial_number) { 812 hosed_row = r; 813 hosed_column = c; 814 break; 815 } 816 } 817 } 818 printf("Hosed component: %s\n", 819 &cfgPtr->devnames[hosed_row][hosed_column][0]); 820 if (!force) { 821 /* we'll fail this component, as if there are 822 other major errors, we arn't forcing things 823 and we'll abort the config anyways */ 824 raidPtr->Disks[hosed_row][hosed_column].status 825 = rf_ds_failed; 826 raidPtr->numFailures++; 827 raidPtr->status[hosed_row] = rf_rs_degraded; 828 } 829 } else { 830 too_fatal = 1; 831 } 832 if (cfgPtr->parityConfig == '0') { 833 /* We've identified two different serial numbers. 834 RAID 0 can't cope with that, so we'll punt */ 835 too_fatal = 1; 836 } 837 838 } 839 840 /* record the serial number for later. If we bail later, setting 841 this doesn't matter, otherwise we've got the best guess at the 842 correct serial number */ 843 raidPtr->serial_number = serial_number; 844 845 mod_number = mod_values[0]; 846 if (num_mod == 2) { 847 if ((mod_count[0] == 1) || (mod_count[1] == 1)) { 848 /* Locate the maverick component */ 849 if (mod_count[1] > mod_count[0]) { 850 mod_number = mod_values[1]; 851 } else if (mod_count[1] < mod_count[0]) { 852 mod_number = mod_values[0]; 853 } else { 854 /* counts of different modification values 855 are the same. Assume greater value is 856 the correct one, all other things 857 considered */ 858 if (mod_values[0] > mod_values[1]) { 859 mod_number = mod_values[0]; 860 } else { 861 mod_number = mod_values[1]; 862 } 863 864 } 865 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) { 866 for (c = 0; c < raidPtr->numCol; c++) { 867 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 868 if (mod_number != 869 ci_label->mod_counter) { 870 if ( ( hosed_row == r ) && 871 ( hosed_column == c )) { 872 /* same one. Can 873 deal with it. */ 874 } else { 875 hosed_row = r; 876 hosed_column = c; 877 if (num_ser != 1) { 878 too_fatal = 1; 879 break; 880 } 881 } 882 } 883 } 884 } 885 printf("Hosed component: %s\n", 886 &cfgPtr->devnames[hosed_row][hosed_column][0]); 887 if (!force) { 888 /* we'll fail this component, as if there are 889 other major errors, we arn't forcing things 890 and we'll abort the config anyways */ 891 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) { 892 raidPtr->Disks[hosed_row][hosed_column].status 893 = rf_ds_failed; 894 raidPtr->numFailures++; 895 raidPtr->status[hosed_row] = rf_rs_degraded; 896 } 897 } 898 } else { 899 too_fatal = 1; 900 } 901 if (cfgPtr->parityConfig == '0') { 902 /* We've identified two different mod counters. 903 RAID 0 can't cope with that, so we'll punt */ 904 too_fatal = 1; 905 } 906 } 907 908 raidPtr->mod_counter = mod_number; 909 910 if (too_fatal) { 911 /* we've had both a serial number mismatch, and a mod_counter 912 mismatch -- and they involved two different components!! 913 Bail -- make things fail so that the user must force 914 the issue... */ 915 hosed_row = -1; 916 hosed_column = -1; 917 } 918 919 if (num_ser > 2) { 920 printf("raid%d: Too many different serial numbers!\n", 921 raidPtr->raidid); 922 } 923 924 if (num_mod > 2) { 925 printf("raid%d: Too many different mod counters!\n", 926 raidPtr->raidid); 927 } 928 929 /* we start by assuming the parity will be good, and flee from 930 that notion at the slightest sign of trouble */ 931 932 parity_good = RF_RAID_CLEAN; 933 for (r = 0; r < raidPtr->numRow; r++) { 934 for (c = 0; c < raidPtr->numCol; c++) { 935 dev_name = &cfgPtr->devnames[r][c][0]; 936 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 937 938 if ((r == hosed_row) && (c == hosed_column)) { 939 printf("raid%d: Ignoring %s\n", 940 raidPtr->raidid, dev_name); 941 } else { 942 rf_print_label_status( raidPtr, r, c, 943 dev_name, ci_label ); 944 if (rf_check_label_vitals( raidPtr, r, c, 945 dev_name, ci_label, 946 serial_number, 947 mod_number )) { 948 fatal_error = 1; 949 } 950 if (ci_label->clean != RF_RAID_CLEAN) { 951 parity_good = RF_RAID_DIRTY; 952 } 953 } 954 } 955 } 956 if (fatal_error) { 957 parity_good = RF_RAID_DIRTY; 958 } 959 960 /* we note the state of the parity */ 961 raidPtr->parity_good = parity_good; 962 963 return(fatal_error); 964 } 965 966 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); 967 int 968 rf_add_hot_spare(raidPtr, sparePtr) 969 RF_Raid_t *raidPtr; 970 RF_SingleComponent_t *sparePtr; 971 { 972 RF_RaidDisk_t *disks; 973 RF_DiskQueue_t *spareQueues; 974 int ret; 975 unsigned int bs; 976 int spare_number; 977 978 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare); 979 printf("Num col: %d\n",raidPtr->numCol); 980 if (raidPtr->numSpare >= RF_MAXSPARE) { 981 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); 982 return(EINVAL); 983 } 984 985 RF_LOCK_MUTEX(raidPtr->mutex); 986 987 /* the beginning of the spares... */ 988 disks = &raidPtr->Disks[0][raidPtr->numCol]; 989 990 spare_number = raidPtr->numSpare; 991 992 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, 993 &disks[spare_number], 0, 994 raidPtr->numCol + spare_number); 995 996 if (ret) 997 goto fail; 998 if (disks[spare_number].status != rf_ds_optimal) { 999 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 1000 sparePtr->component_name); 1001 ret=EINVAL; 1002 goto fail; 1003 } else { 1004 disks[spare_number].status = rf_ds_spare; 1005 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number, 1006 disks[spare_number].devname, 1007 (long int) disks[spare_number].numBlocks, 1008 disks[spare_number].blockSize, 1009 (long int) disks[spare_number].numBlocks * 1010 disks[spare_number].blockSize / 1024 / 1024); 1011 } 1012 1013 1014 /* check sizes and block sizes on the spare disk */ 1015 bs = 1 << raidPtr->logBytesPerSector; 1016 if (disks[spare_number].blockSize != bs) { 1017 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); 1018 ret = EINVAL; 1019 goto fail; 1020 } 1021 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { 1022 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 1023 disks[spare_number].devname, 1024 disks[spare_number].blockSize, 1025 (long int) raidPtr->sectorsPerDisk); 1026 ret = EINVAL; 1027 goto fail; 1028 } else { 1029 if (disks[spare_number].numBlocks > 1030 raidPtr->sectorsPerDisk) { 1031 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, 1032 (long int) raidPtr->sectorsPerDisk); 1033 1034 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; 1035 } 1036 } 1037 1038 spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; 1039 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 1040 0, raidPtr->numCol + spare_number, 1041 raidPtr->qType, 1042 raidPtr->sectorsPerDisk, 1043 raidPtr->Disks[0][raidPtr->numCol + 1044 spare_number].dev, 1045 raidPtr->maxOutstanding, 1046 &raidPtr->shutdownList, 1047 raidPtr->cleanupList); 1048 1049 1050 raidPtr->numSpare++; 1051 RF_UNLOCK_MUTEX(raidPtr->mutex); 1052 return (0); 1053 1054 fail: 1055 RF_UNLOCK_MUTEX(raidPtr->mutex); 1056 return(ret); 1057 } 1058 1059 int 1060 rf_remove_hot_spare(raidPtr,sparePtr) 1061 RF_Raid_t *raidPtr; 1062 RF_SingleComponent_t *sparePtr; 1063 { 1064 int spare_number; 1065 1066 1067 if (raidPtr->numSpare==0) { 1068 printf("No spares to remove!\n"); 1069 return(EINVAL); 1070 } 1071 1072 spare_number = sparePtr->column; 1073 1074 return(EINVAL); /* XXX not implemented yet */ 1075 #if 0 1076 if (spare_number < 0 || spare_number > raidPtr->numSpare) { 1077 return(EINVAL); 1078 } 1079 1080 /* verify that this spare isn't in use... */ 1081 1082 1083 1084 1085 /* it's gone.. */ 1086 1087 raidPtr->numSpare--; 1088 1089 return(0); 1090 #endif 1091 } 1092 1093 1094