1 /* $NetBSD: rf_disks.c,v 1.22 2000/03/03 03:47:17 oster Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1995 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Author: Mark Holland 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65 /*************************************************************** 66 * rf_disks.c -- code to perform operations on the actual disks 67 ***************************************************************/ 68 69 #include "rf_types.h" 70 #include "rf_raid.h" 71 #include "rf_alloclist.h" 72 #include "rf_utils.h" 73 #include "rf_configure.h" 74 #include "rf_general.h" 75 #include "rf_options.h" 76 #include "rf_kintf.h" 77 #include "rf_netbsd.h" 78 79 #include <sys/types.h> 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/proc.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/vnode.h> 86 87 /* XXX these should be in a header file somewhere */ 88 void rf_UnconfigureVnodes( RF_Raid_t * ); 89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *); 90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); 91 92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) 93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) 94 95 /************************************************************************** 96 * 97 * initialize the disks comprising the array 98 * 99 * We want the spare disks to have regular row,col numbers so that we can 100 * easily substitue a spare for a failed disk. But, the driver code assumes 101 * throughout that the array contains numRow by numCol _non-spare_ disks, so 102 * it's not clear how to fit in the spares. This is an unfortunate holdover 103 * from raidSim. The quick and dirty fix is to make row zero bigger than the 104 * rest, and put all the spares in it. This probably needs to get changed 105 * eventually. 106 * 107 **************************************************************************/ 108 109 int 110 rf_ConfigureDisks( listp, raidPtr, cfgPtr ) 111 RF_ShutdownList_t **listp; 112 RF_Raid_t *raidPtr; 113 RF_Config_t *cfgPtr; 114 { 115 RF_RaidDisk_t **disks; 116 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 117 RF_RowCol_t r, c; 118 int bs, ret; 119 unsigned i, count, foundone = 0, numFailuresThisRow; 120 int force; 121 122 force = cfgPtr->force; 123 124 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 125 if (ret) 126 goto fail; 127 128 disks = raidPtr->Disks; 129 130 for (r = 0; r < raidPtr->numRow; r++) { 131 numFailuresThisRow = 0; 132 for (c = 0; c < raidPtr->numCol; c++) { 133 ret = rf_ConfigureDisk(raidPtr, 134 &cfgPtr->devnames[r][c][0], 135 &disks[r][c], r, c); 136 137 if (ret) 138 goto fail; 139 140 if (disks[r][c].status == rf_ds_optimal) { 141 raidread_component_label( 142 raidPtr->raid_cinfo[r][c].ci_dev, 143 raidPtr->raid_cinfo[r][c].ci_vp, 144 &raidPtr->raid_cinfo[r][c].ci_label); 145 } 146 147 if (disks[r][c].status != rf_ds_optimal) { 148 numFailuresThisRow++; 149 } else { 150 if (disks[r][c].numBlocks < min_numblks) 151 min_numblks = disks[r][c].numBlocks; 152 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", 153 r, c, disks[r][c].devname, 154 (long int) disks[r][c].numBlocks, 155 disks[r][c].blockSize, 156 (long int) disks[r][c].numBlocks * 157 disks[r][c].blockSize / 1024 / 1024); 158 } 159 } 160 /* XXX fix for n-fault tolerant */ 161 /* XXX this should probably check to see how many failures 162 we can handle for this configuration! */ 163 if (numFailuresThisRow > 0) 164 raidPtr->status[r] = rf_rs_degraded; 165 } 166 167 /* all disks must be the same size & have the same block size, bs must 168 * be a power of 2 */ 169 bs = 0; 170 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { 171 for (c = 0; !foundone && c < raidPtr->numCol; c++) { 172 if (disks[r][c].status == rf_ds_optimal) { 173 bs = disks[r][c].blockSize; 174 foundone = 1; 175 } 176 } 177 } 178 if (!foundone) { 179 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); 180 ret = EINVAL; 181 goto fail; 182 } 183 for (count = 0, i = 1; i; i <<= 1) 184 if (bs & i) 185 count++; 186 if (count != 1) { 187 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); 188 ret = EINVAL; 189 goto fail; 190 } 191 192 if (rf_CheckLabels( raidPtr, cfgPtr )) { 193 printf("raid%d: There were fatal errors\n", raidPtr->raidid); 194 if (force != 0) { 195 printf("raid%d: Fatal errors being ignored.\n", 196 raidPtr->raidid); 197 } else { 198 ret = EINVAL; 199 goto fail; 200 } 201 } 202 203 for (r = 0; r < raidPtr->numRow; r++) { 204 for (c = 0; c < raidPtr->numCol; c++) { 205 if (disks[r][c].status == rf_ds_optimal) { 206 if (disks[r][c].blockSize != bs) { 207 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); 208 ret = EINVAL; 209 goto fail; 210 } 211 if (disks[r][c].numBlocks != min_numblks) { 212 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", 213 r, c, (int) min_numblks); 214 disks[r][c].numBlocks = min_numblks; 215 } 216 } 217 } 218 } 219 220 raidPtr->sectorsPerDisk = min_numblks; 221 raidPtr->logBytesPerSector = ffs(bs) - 1; 222 raidPtr->bytesPerSector = bs; 223 raidPtr->sectorMask = bs - 1; 224 return (0); 225 226 fail: 227 228 rf_UnconfigureVnodes( raidPtr ); 229 230 return (ret); 231 } 232 233 234 /**************************************************************************** 235 * set up the data structures describing the spare disks in the array 236 * recall from the above comment that the spare disk descriptors are stored 237 * in row zero, which is specially expanded to hold them. 238 ****************************************************************************/ 239 int 240 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) 241 RF_ShutdownList_t ** listp; 242 RF_Raid_t * raidPtr; 243 RF_Config_t * cfgPtr; 244 { 245 int i, ret; 246 unsigned int bs; 247 RF_RaidDisk_t *disks; 248 int num_spares_done; 249 250 num_spares_done = 0; 251 252 /* The space for the spares should have already been allocated by 253 * ConfigureDisks() */ 254 255 disks = &raidPtr->Disks[0][raidPtr->numCol]; 256 for (i = 0; i < raidPtr->numSpare; i++) { 257 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], 258 &disks[i], 0, raidPtr->numCol + i); 259 if (ret) 260 goto fail; 261 if (disks[i].status != rf_ds_optimal) { 262 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 263 &cfgPtr->spare_names[i][0]); 264 } else { 265 disks[i].status = rf_ds_spare; /* change status to 266 * spare */ 267 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, 268 disks[i].devname, 269 (long int) disks[i].numBlocks, disks[i].blockSize, 270 (long int) disks[i].numBlocks * 271 disks[i].blockSize / 1024 / 1024); 272 } 273 num_spares_done++; 274 } 275 276 /* check sizes and block sizes on spare disks */ 277 bs = 1 << raidPtr->logBytesPerSector; 278 for (i = 0; i < raidPtr->numSpare; i++) { 279 if (disks[i].blockSize != bs) { 280 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); 281 ret = EINVAL; 282 goto fail; 283 } 284 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { 285 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 286 disks[i].devname, disks[i].blockSize, 287 (long int) raidPtr->sectorsPerDisk); 288 ret = EINVAL; 289 goto fail; 290 } else 291 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { 292 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); 293 294 disks[i].numBlocks = raidPtr->sectorsPerDisk; 295 } 296 } 297 298 return (0); 299 300 fail: 301 302 /* Release the hold on the main components. We've failed to allocate 303 * a spare, and since we're failing, we need to free things.. 304 305 XXX failing to allocate a spare is *not* that big of a deal... 306 We *can* survive without it, if need be, esp. if we get hot 307 adding working. 308 309 If we don't fail out here, then we need a way to remove this spare... 310 that should be easier to do here than if we are "live"... 311 312 */ 313 314 rf_UnconfigureVnodes( raidPtr ); 315 316 return (ret); 317 } 318 319 static int 320 rf_AllocDiskStructures(raidPtr, cfgPtr) 321 RF_Raid_t *raidPtr; 322 RF_Config_t *cfgPtr; 323 { 324 RF_RaidDisk_t **disks; 325 int ret; 326 int r; 327 328 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), 329 (RF_RaidDisk_t **), raidPtr->cleanupList); 330 if (disks == NULL) { 331 ret = ENOMEM; 332 goto fail; 333 } 334 raidPtr->Disks = disks; 335 /* get space for the device-specific stuff... */ 336 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, 337 sizeof(struct raidcinfo *), (struct raidcinfo **), 338 raidPtr->cleanupList); 339 if (raidPtr->raid_cinfo == NULL) { 340 ret = ENOMEM; 341 goto fail; 342 } 343 344 for (r = 0; r < raidPtr->numRow; r++) { 345 /* We allocate RF_MAXSPARE on the first row so that we 346 have room to do hot-swapping of spares */ 347 RF_CallocAndAdd(disks[r], raidPtr->numCol 348 + ((r == 0) ? RF_MAXSPARE : 0), 349 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 350 raidPtr->cleanupList); 351 if (disks[r] == NULL) { 352 ret = ENOMEM; 353 goto fail; 354 } 355 /* get more space for device specific stuff.. */ 356 RF_CallocAndAdd(raidPtr->raid_cinfo[r], 357 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), 358 sizeof(struct raidcinfo), (struct raidcinfo *), 359 raidPtr->cleanupList); 360 if (raidPtr->raid_cinfo[r] == NULL) { 361 ret = ENOMEM; 362 goto fail; 363 } 364 } 365 return(0); 366 fail: 367 rf_UnconfigureVnodes( raidPtr ); 368 369 return(ret); 370 } 371 372 373 /* configure a single disk during auto-configuration at boot */ 374 int 375 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) 376 RF_Raid_t *raidPtr; 377 RF_Config_t *cfgPtr; 378 RF_AutoConfig_t *auto_config; 379 { 380 RF_RaidDisk_t **disks; 381 RF_RaidDisk_t *diskPtr; 382 RF_RowCol_t r, c; 383 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 384 int bs, ret; 385 int numFailuresThisRow; 386 int force; 387 RF_AutoConfig_t *ac; 388 int parity_good; 389 int mod_counter; 390 391 #if DEBUG 392 printf("Starting autoconfiguration of RAID set...\n"); 393 #endif 394 force = cfgPtr->force; 395 396 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 397 if (ret) 398 goto fail; 399 400 disks = raidPtr->Disks; 401 402 /* assume the parity will be fine.. */ 403 parity_good = RF_RAID_CLEAN; 404 405 /* Check for mod_counters that are too low */ 406 mod_counter = -1; 407 ac = auto_config; 408 while(ac!=NULL) { 409 if (ac->clabel->mod_counter > mod_counter) { 410 mod_counter = ac->clabel->mod_counter; 411 } 412 ac = ac->next; 413 } 414 if (mod_counter == -1) { 415 /* mod_counters were all negative!?!?!? 416 Ok, we can deal with that. */ 417 #if 0 418 ac = auto_config; 419 while(ac!=NULL) { 420 if (ac->clabel->mod_counter > mod_counter) { 421 mod_counter = ac->clabel->mod_counter; 422 } 423 ac = ac->next; 424 } 425 #endif 426 } 427 428 for (r = 0; r < raidPtr->numRow; r++) { 429 numFailuresThisRow = 0; 430 for (c = 0; c < raidPtr->numCol; c++) { 431 diskPtr = &disks[r][c]; 432 433 /* find this row/col in the autoconfig */ 434 #if DEBUG 435 printf("Looking for %d,%d in autoconfig\n",r,c); 436 #endif 437 ac = auto_config; 438 while(ac!=NULL) { 439 if (ac->clabel==NULL) { 440 /* big-time bad news. */ 441 goto fail; 442 } 443 if ((ac->clabel->row == r) && 444 (ac->clabel->column == c)) { 445 /* it's this one... */ 446 #if DEBUG 447 printf("Found: %s at %d,%d\n", 448 ac->devname,r,c); 449 #endif 450 451 break; 452 } 453 ac=ac->next; 454 } 455 456 if (ac!=NULL) { 457 /* Found it. Configure it.. */ 458 diskPtr->blockSize = ac->clabel->blockSize; 459 diskPtr->numBlocks = ac->clabel->numBlocks; 460 /* Note: rf_protectedSectors is already 461 factored into numBlocks here */ 462 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; 463 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; 464 465 memcpy(&raidPtr->raid_cinfo[r][c].ci_label, 466 ac->clabel, sizeof(*ac->clabel)); 467 sprintf(diskPtr->devname, "/dev/%s", 468 ac->devname); 469 470 /* note the fact that this component was 471 autoconfigured. You'll need this info 472 later. Trust me :) */ 473 diskPtr->auto_configured = 1; 474 diskPtr->dev = ac->dev; 475 476 /* 477 * we allow the user to specify that 478 * only a fraction of the disks should 479 * be used this is just for debug: it 480 * speeds up the parity scan 481 */ 482 483 diskPtr->numBlocks = diskPtr->numBlocks * 484 rf_sizePercentage / 100; 485 486 /* XXX these will get set multiple times, 487 but since we're autoconfiguring, they'd 488 better be always the same each time! 489 If not, this is the least of your worries */ 490 491 bs = diskPtr->blockSize; 492 min_numblks = diskPtr->numBlocks; 493 494 /* this gets done multiple times, but that's 495 fine -- the serial number will be the same 496 for all components, guaranteed */ 497 raidPtr->serial_number = 498 ac->clabel->serial_number; 499 /* check the last time the label 500 was modified */ 501 if (ac->clabel->mod_counter != 502 mod_counter) { 503 /* Even though we've filled in all 504 of the above, we don't trust 505 this component since it's 506 modification counter is not 507 in sync with the rest, and we really 508 consider it to be failed. */ 509 disks[r][c].status = rf_ds_failed; 510 numFailuresThisRow++; 511 } else { 512 if (ac->clabel->clean != 513 RF_RAID_CLEAN) { 514 parity_good = RF_RAID_DIRTY; 515 } 516 } 517 } else { 518 /* Didn't find it at all!! 519 Component must really be dead */ 520 disks[r][c].status = rf_ds_failed; 521 numFailuresThisRow++; 522 } 523 } 524 /* XXX fix for n-fault tolerant */ 525 /* XXX this should probably check to see how many failures 526 we can handle for this configuration! */ 527 if (numFailuresThisRow > 0) 528 raidPtr->status[r] = rf_rs_degraded; 529 } 530 531 raidPtr->mod_counter = mod_counter; 532 533 /* note the state of the parity, if any */ 534 raidPtr->parity_good = parity_good; 535 raidPtr->sectorsPerDisk = min_numblks; 536 raidPtr->logBytesPerSector = ffs(bs) - 1; 537 raidPtr->bytesPerSector = bs; 538 raidPtr->sectorMask = bs - 1; 539 return (0); 540 541 fail: 542 543 rf_UnconfigureVnodes( raidPtr ); 544 545 return (ret); 546 547 } 548 549 /* configure a single disk in the array */ 550 int 551 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) 552 RF_Raid_t *raidPtr; 553 char *buf; 554 RF_RaidDisk_t *diskPtr; 555 RF_RowCol_t row; 556 RF_RowCol_t col; 557 { 558 char *p; 559 int retcode; 560 561 struct partinfo dpart; 562 struct vnode *vp; 563 struct vattr va; 564 struct proc *proc; 565 int error; 566 567 retcode = 0; 568 p = rf_find_non_white(buf); 569 if (p[strlen(p) - 1] == '\n') { 570 /* strip off the newline */ 571 p[strlen(p) - 1] = '\0'; 572 } 573 (void) strcpy(diskPtr->devname, p); 574 575 proc = raidPtr->engine_thread; 576 577 /* Let's start by claiming the component is fine and well... */ 578 diskPtr->status = rf_ds_optimal; 579 580 raidPtr->raid_cinfo[row][col].ci_vp = NULL; 581 raidPtr->raid_cinfo[row][col].ci_dev = NULL; 582 583 error = raidlookup(diskPtr->devname, proc, &vp); 584 if (error) { 585 printf("raidlookup on device: %s failed!\n", diskPtr->devname); 586 if (error == ENXIO) { 587 /* the component isn't there... must be dead :-( */ 588 diskPtr->status = rf_ds_failed; 589 } else { 590 return (error); 591 } 592 } 593 if (diskPtr->status == rf_ds_optimal) { 594 595 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { 596 return (error); 597 } 598 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, 599 FREAD, proc->p_ucred, proc); 600 if (error) { 601 return (error); 602 } 603 604 diskPtr->blockSize = dpart.disklab->d_secsize; 605 606 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 607 diskPtr->partitionSize = dpart.part->p_size; 608 609 raidPtr->raid_cinfo[row][col].ci_vp = vp; 610 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; 611 612 /* This component was not automatically configured */ 613 diskPtr->auto_configured = 0; 614 diskPtr->dev = va.va_rdev; 615 616 /* we allow the user to specify that only a fraction of the 617 * disks should be used this is just for debug: it speeds up 618 * the parity scan */ 619 diskPtr->numBlocks = diskPtr->numBlocks * 620 rf_sizePercentage / 100; 621 } 622 return (0); 623 } 624 625 static void rf_print_label_status( RF_Raid_t *, int, int, char *, 626 RF_ComponentLabel_t *); 627 628 static void 629 rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) 630 RF_Raid_t *raidPtr; 631 int row; 632 int column; 633 char *dev_name; 634 RF_ComponentLabel_t *ci_label; 635 { 636 637 printf("raid%d: Component %s being configured at row: %d col: %d\n", 638 raidPtr->raidid, dev_name, row, column ); 639 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 640 ci_label->row, ci_label->column, 641 ci_label->num_rows, ci_label->num_columns); 642 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 643 ci_label->version, ci_label->serial_number, 644 ci_label->mod_counter); 645 printf(" Clean: %s Status: %d\n", 646 ci_label->clean ? "Yes" : "No", ci_label->status ); 647 } 648 649 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 650 RF_ComponentLabel_t *, int, int ); 651 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, 652 serial_number, mod_counter ) 653 RF_Raid_t *raidPtr; 654 int row; 655 int column; 656 char *dev_name; 657 RF_ComponentLabel_t *ci_label; 658 int serial_number; 659 int mod_counter; 660 { 661 int fatal_error = 0; 662 663 if (serial_number != ci_label->serial_number) { 664 printf("%s has a different serial number: %d %d\n", 665 dev_name, serial_number, ci_label->serial_number); 666 fatal_error = 1; 667 } 668 if (mod_counter != ci_label->mod_counter) { 669 printf("%s has a different modfication count: %d %d\n", 670 dev_name, mod_counter, ci_label->mod_counter); 671 } 672 673 if (row != ci_label->row) { 674 printf("Row out of alignment for: %s\n", dev_name); 675 fatal_error = 1; 676 } 677 if (column != ci_label->column) { 678 printf("Column out of alignment for: %s\n", dev_name); 679 fatal_error = 1; 680 } 681 if (raidPtr->numRow != ci_label->num_rows) { 682 printf("Number of rows do not match for: %s\n", dev_name); 683 fatal_error = 1; 684 } 685 if (raidPtr->numCol != ci_label->num_columns) { 686 printf("Number of columns do not match for: %s\n", dev_name); 687 fatal_error = 1; 688 } 689 if (ci_label->clean == 0) { 690 /* it's not clean, but that's not fatal */ 691 printf("%s is not clean!\n", dev_name); 692 } 693 return(fatal_error); 694 } 695 696 697 /* 698 699 rf_CheckLabels() - check all the component labels for consistency. 700 Return an error if there is anything major amiss. 701 702 */ 703 704 int 705 rf_CheckLabels( raidPtr, cfgPtr ) 706 RF_Raid_t *raidPtr; 707 RF_Config_t *cfgPtr; 708 { 709 int r,c; 710 char *dev_name; 711 RF_ComponentLabel_t *ci_label; 712 int serial_number = 0; 713 int mod_number = 0; 714 int fatal_error = 0; 715 int mod_values[4]; 716 int mod_count[4]; 717 int ser_values[4]; 718 int ser_count[4]; 719 int num_ser; 720 int num_mod; 721 int i; 722 int found; 723 int hosed_row; 724 int hosed_column; 725 int too_fatal; 726 int parity_good; 727 int force; 728 729 hosed_row = -1; 730 hosed_column = -1; 731 too_fatal = 0; 732 force = cfgPtr->force; 733 734 /* 735 We're going to try to be a little intelligent here. If one 736 component's label is bogus, and we can identify that it's the 737 *only* one that's gone, we'll mark it as "failed" and allow 738 the configuration to proceed. This will be the *only* case 739 that we'll proceed if there would be (otherwise) fatal errors. 740 741 Basically we simply keep a count of how many components had 742 what serial number. If all but one agree, we simply mark 743 the disagreeing component as being failed, and allow 744 things to come up "normally". 745 746 We do this first for serial numbers, and then for "mod_counter". 747 748 */ 749 750 num_ser = 0; 751 num_mod = 0; 752 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) { 753 for (c = 0; c < raidPtr->numCol; c++) { 754 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 755 found=0; 756 for(i=0;i<num_ser;i++) { 757 if (ser_values[i] == ci_label->serial_number) { 758 ser_count[i]++; 759 found=1; 760 break; 761 } 762 } 763 if (!found) { 764 ser_values[num_ser] = ci_label->serial_number; 765 ser_count[num_ser] = 1; 766 num_ser++; 767 if (num_ser>2) { 768 fatal_error = 1; 769 break; 770 } 771 } 772 found=0; 773 for(i=0;i<num_mod;i++) { 774 if (mod_values[i] == ci_label->mod_counter) { 775 mod_count[i]++; 776 found=1; 777 break; 778 } 779 } 780 if (!found) { 781 mod_values[num_mod] = ci_label->mod_counter; 782 mod_count[num_mod] = 1; 783 num_mod++; 784 if (num_mod>2) { 785 fatal_error = 1; 786 break; 787 } 788 } 789 } 790 } 791 #if DEBUG 792 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); 793 for(i=0;i<num_ser;i++) { 794 printf("%d %d\n", ser_values[i], ser_count[i]); 795 } 796 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); 797 for(i=0;i<num_mod;i++) { 798 printf("%d %d\n", mod_values[i], mod_count[i]); 799 } 800 #endif 801 serial_number = ser_values[0]; 802 if (num_ser == 2) { 803 if ((ser_count[0] == 1) || (ser_count[1] == 1)) { 804 /* Locate the maverick component */ 805 if (ser_count[1] > ser_count[0]) { 806 serial_number = ser_values[1]; 807 } 808 for (r = 0; r < raidPtr->numRow; r++) { 809 for (c = 0; c < raidPtr->numCol; c++) { 810 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 811 if (serial_number != 812 ci_label->serial_number) { 813 hosed_row = r; 814 hosed_column = c; 815 break; 816 } 817 } 818 } 819 printf("Hosed component: %s\n", 820 &cfgPtr->devnames[hosed_row][hosed_column][0]); 821 if (!force) { 822 /* we'll fail this component, as if there are 823 other major errors, we arn't forcing things 824 and we'll abort the config anyways */ 825 raidPtr->Disks[hosed_row][hosed_column].status 826 = rf_ds_failed; 827 raidPtr->numFailures++; 828 raidPtr->status[hosed_row] = rf_rs_degraded; 829 } 830 } else { 831 too_fatal = 1; 832 } 833 if (cfgPtr->parityConfig == '0') { 834 /* We've identified two different serial numbers. 835 RAID 0 can't cope with that, so we'll punt */ 836 too_fatal = 1; 837 } 838 839 } 840 841 /* record the serial number for later. If we bail later, setting 842 this doesn't matter, otherwise we've got the best guess at the 843 correct serial number */ 844 raidPtr->serial_number = serial_number; 845 846 mod_number = mod_values[0]; 847 if (num_mod == 2) { 848 if ((mod_count[0] == 1) || (mod_count[1] == 1)) { 849 /* Locate the maverick component */ 850 if (mod_count[1] > mod_count[0]) { 851 mod_number = mod_values[1]; 852 } else if (mod_count[1] < mod_count[0]) { 853 mod_number = mod_values[0]; 854 } else { 855 /* counts of different modification values 856 are the same. Assume greater value is 857 the correct one, all other things 858 considered */ 859 if (mod_values[0] > mod_values[1]) { 860 mod_number = mod_values[0]; 861 } else { 862 mod_number = mod_values[1]; 863 } 864 865 } 866 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) { 867 for (c = 0; c < raidPtr->numCol; c++) { 868 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 869 if (mod_number != 870 ci_label->mod_counter) { 871 if ( ( hosed_row == r ) && 872 ( hosed_column == c )) { 873 /* same one. Can 874 deal with it. */ 875 } else { 876 hosed_row = r; 877 hosed_column = c; 878 if (num_ser != 1) { 879 too_fatal = 1; 880 break; 881 } 882 } 883 } 884 } 885 } 886 printf("Hosed component: %s\n", 887 &cfgPtr->devnames[hosed_row][hosed_column][0]); 888 if (!force) { 889 /* we'll fail this component, as if there are 890 other major errors, we arn't forcing things 891 and we'll abort the config anyways */ 892 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) { 893 raidPtr->Disks[hosed_row][hosed_column].status 894 = rf_ds_failed; 895 raidPtr->numFailures++; 896 raidPtr->status[hosed_row] = rf_rs_degraded; 897 } 898 } 899 } else { 900 too_fatal = 1; 901 } 902 if (cfgPtr->parityConfig == '0') { 903 /* We've identified two different mod counters. 904 RAID 0 can't cope with that, so we'll punt */ 905 too_fatal = 1; 906 } 907 } 908 909 raidPtr->mod_counter = mod_number; 910 911 if (too_fatal) { 912 /* we've had both a serial number mismatch, and a mod_counter 913 mismatch -- and they involved two different components!! 914 Bail -- make things fail so that the user must force 915 the issue... */ 916 hosed_row = -1; 917 hosed_column = -1; 918 } 919 920 if (num_ser > 2) { 921 printf("raid%d: Too many different serial numbers!\n", 922 raidPtr->raidid); 923 } 924 925 if (num_mod > 2) { 926 printf("raid%d: Too many different mod counters!\n", 927 raidPtr->raidid); 928 } 929 930 /* we start by assuming the parity will be good, and flee from 931 that notion at the slightest sign of trouble */ 932 933 parity_good = RF_RAID_CLEAN; 934 for (r = 0; r < raidPtr->numRow; r++) { 935 for (c = 0; c < raidPtr->numCol; c++) { 936 dev_name = &cfgPtr->devnames[r][c][0]; 937 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 938 939 if ((r == hosed_row) && (c == hosed_column)) { 940 printf("raid%d: Ignoring %s\n", 941 raidPtr->raidid, dev_name); 942 } else { 943 rf_print_label_status( raidPtr, r, c, 944 dev_name, ci_label ); 945 if (rf_check_label_vitals( raidPtr, r, c, 946 dev_name, ci_label, 947 serial_number, 948 mod_number )) { 949 fatal_error = 1; 950 } 951 if (ci_label->clean != RF_RAID_CLEAN) { 952 parity_good = RF_RAID_DIRTY; 953 } 954 } 955 } 956 } 957 if (fatal_error) { 958 parity_good = RF_RAID_DIRTY; 959 } 960 961 /* we note the state of the parity */ 962 raidPtr->parity_good = parity_good; 963 964 return(fatal_error); 965 } 966 967 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); 968 int 969 rf_add_hot_spare(raidPtr, sparePtr) 970 RF_Raid_t *raidPtr; 971 RF_SingleComponent_t *sparePtr; 972 { 973 RF_RaidDisk_t *disks; 974 RF_DiskQueue_t *spareQueues; 975 int ret; 976 unsigned int bs; 977 int spare_number; 978 979 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare); 980 printf("Num col: %d\n",raidPtr->numCol); 981 if (raidPtr->numSpare >= RF_MAXSPARE) { 982 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); 983 return(EINVAL); 984 } 985 986 RF_LOCK_MUTEX(raidPtr->mutex); 987 988 /* the beginning of the spares... */ 989 disks = &raidPtr->Disks[0][raidPtr->numCol]; 990 991 spare_number = raidPtr->numSpare; 992 993 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, 994 &disks[spare_number], 0, 995 raidPtr->numCol + spare_number); 996 997 if (ret) 998 goto fail; 999 if (disks[spare_number].status != rf_ds_optimal) { 1000 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 1001 sparePtr->component_name); 1002 ret=EINVAL; 1003 goto fail; 1004 } else { 1005 disks[spare_number].status = rf_ds_spare; 1006 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number, 1007 disks[spare_number].devname, 1008 (long int) disks[spare_number].numBlocks, 1009 disks[spare_number].blockSize, 1010 (long int) disks[spare_number].numBlocks * 1011 disks[spare_number].blockSize / 1024 / 1024); 1012 } 1013 1014 1015 /* check sizes and block sizes on the spare disk */ 1016 bs = 1 << raidPtr->logBytesPerSector; 1017 if (disks[spare_number].blockSize != bs) { 1018 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); 1019 ret = EINVAL; 1020 goto fail; 1021 } 1022 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { 1023 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 1024 disks[spare_number].devname, 1025 disks[spare_number].blockSize, 1026 (long int) raidPtr->sectorsPerDisk); 1027 ret = EINVAL; 1028 goto fail; 1029 } else { 1030 if (disks[spare_number].numBlocks > 1031 raidPtr->sectorsPerDisk) { 1032 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, 1033 (long int) raidPtr->sectorsPerDisk); 1034 1035 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; 1036 } 1037 } 1038 1039 spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; 1040 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 1041 0, raidPtr->numCol + spare_number, 1042 raidPtr->qType, 1043 raidPtr->sectorsPerDisk, 1044 raidPtr->Disks[0][raidPtr->numCol + 1045 spare_number].dev, 1046 raidPtr->maxOutstanding, 1047 &raidPtr->shutdownList, 1048 raidPtr->cleanupList); 1049 1050 1051 raidPtr->numSpare++; 1052 RF_UNLOCK_MUTEX(raidPtr->mutex); 1053 return (0); 1054 1055 fail: 1056 RF_UNLOCK_MUTEX(raidPtr->mutex); 1057 return(ret); 1058 } 1059 1060 int 1061 rf_remove_hot_spare(raidPtr,sparePtr) 1062 RF_Raid_t *raidPtr; 1063 RF_SingleComponent_t *sparePtr; 1064 { 1065 int spare_number; 1066 1067 1068 if (raidPtr->numSpare==0) { 1069 printf("No spares to remove!\n"); 1070 return(EINVAL); 1071 } 1072 1073 spare_number = sparePtr->column; 1074 1075 return(EINVAL); /* XXX not implemented yet */ 1076 #if 0 1077 if (spare_number < 0 || spare_number > raidPtr->numSpare) { 1078 return(EINVAL); 1079 } 1080 1081 /* verify that this spare isn't in use... */ 1082 1083 1084 1085 1086 /* it's gone.. */ 1087 1088 raidPtr->numSpare--; 1089 1090 return(0); 1091 #endif 1092 } 1093 1094 1095