1 /* $NetBSD: rf_disks.c,v 1.34 2000/12/05 01:35:56 oster Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1995 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Author: Mark Holland 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65 /*************************************************************** 66 * rf_disks.c -- code to perform operations on the actual disks 67 ***************************************************************/ 68 69 #include "rf_types.h" 70 #include "rf_raid.h" 71 #include "rf_alloclist.h" 72 #include "rf_utils.h" 73 #include "rf_configure.h" 74 #include "rf_general.h" 75 #include "rf_options.h" 76 #include "rf_kintf.h" 77 #include "rf_netbsd.h" 78 79 #include <sys/types.h> 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/proc.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/vnode.h> 86 87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); 88 static void rf_print_label_status( RF_Raid_t *, int, int, char *, 89 RF_ComponentLabel_t *); 90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 91 RF_ComponentLabel_t *, int, int ); 92 93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) 94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) 95 96 /************************************************************************** 97 * 98 * initialize the disks comprising the array 99 * 100 * We want the spare disks to have regular row,col numbers so that we can 101 * easily substitue a spare for a failed disk. But, the driver code assumes 102 * throughout that the array contains numRow by numCol _non-spare_ disks, so 103 * it's not clear how to fit in the spares. This is an unfortunate holdover 104 * from raidSim. The quick and dirty fix is to make row zero bigger than the 105 * rest, and put all the spares in it. This probably needs to get changed 106 * eventually. 107 * 108 **************************************************************************/ 109 110 int 111 rf_ConfigureDisks( listp, raidPtr, cfgPtr ) 112 RF_ShutdownList_t **listp; 113 RF_Raid_t *raidPtr; 114 RF_Config_t *cfgPtr; 115 { 116 RF_RaidDisk_t **disks; 117 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 118 RF_RowCol_t r, c; 119 int bs, ret; 120 unsigned i, count, foundone = 0, numFailuresThisRow; 121 int force; 122 123 force = cfgPtr->force; 124 125 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 126 if (ret) 127 goto fail; 128 129 disks = raidPtr->Disks; 130 131 for (r = 0; r < raidPtr->numRow; r++) { 132 numFailuresThisRow = 0; 133 for (c = 0; c < raidPtr->numCol; c++) { 134 ret = rf_ConfigureDisk(raidPtr, 135 &cfgPtr->devnames[r][c][0], 136 &disks[r][c], r, c); 137 138 if (ret) 139 goto fail; 140 141 if (disks[r][c].status == rf_ds_optimal) { 142 raidread_component_label( 143 raidPtr->raid_cinfo[r][c].ci_dev, 144 raidPtr->raid_cinfo[r][c].ci_vp, 145 &raidPtr->raid_cinfo[r][c].ci_label); 146 } 147 148 if (disks[r][c].status != rf_ds_optimal) { 149 numFailuresThisRow++; 150 } else { 151 if (disks[r][c].numBlocks < min_numblks) 152 min_numblks = disks[r][c].numBlocks; 153 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", 154 r, c, disks[r][c].devname, 155 (long int) disks[r][c].numBlocks, 156 disks[r][c].blockSize, 157 (long int) disks[r][c].numBlocks * 158 disks[r][c].blockSize / 1024 / 1024); 159 } 160 } 161 /* XXX fix for n-fault tolerant */ 162 /* XXX this should probably check to see how many failures 163 we can handle for this configuration! */ 164 if (numFailuresThisRow > 0) 165 raidPtr->status[r] = rf_rs_degraded; 166 } 167 168 /* all disks must be the same size & have the same block size, bs must 169 * be a power of 2 */ 170 bs = 0; 171 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { 172 for (c = 0; !foundone && c < raidPtr->numCol; c++) { 173 if (disks[r][c].status == rf_ds_optimal) { 174 bs = disks[r][c].blockSize; 175 foundone = 1; 176 } 177 } 178 } 179 if (!foundone) { 180 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); 181 ret = EINVAL; 182 goto fail; 183 } 184 for (count = 0, i = 1; i; i <<= 1) 185 if (bs & i) 186 count++; 187 if (count != 1) { 188 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); 189 ret = EINVAL; 190 goto fail; 191 } 192 193 if (rf_CheckLabels( raidPtr, cfgPtr )) { 194 printf("raid%d: There were fatal errors\n", raidPtr->raidid); 195 if (force != 0) { 196 printf("raid%d: Fatal errors being ignored.\n", 197 raidPtr->raidid); 198 } else { 199 ret = EINVAL; 200 goto fail; 201 } 202 } 203 204 for (r = 0; r < raidPtr->numRow; r++) { 205 for (c = 0; c < raidPtr->numCol; c++) { 206 if (disks[r][c].status == rf_ds_optimal) { 207 if (disks[r][c].blockSize != bs) { 208 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); 209 ret = EINVAL; 210 goto fail; 211 } 212 if (disks[r][c].numBlocks != min_numblks) { 213 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", 214 r, c, (int) min_numblks); 215 disks[r][c].numBlocks = min_numblks; 216 } 217 } 218 } 219 } 220 221 raidPtr->sectorsPerDisk = min_numblks; 222 raidPtr->logBytesPerSector = ffs(bs) - 1; 223 raidPtr->bytesPerSector = bs; 224 raidPtr->sectorMask = bs - 1; 225 return (0); 226 227 fail: 228 229 rf_UnconfigureVnodes( raidPtr ); 230 231 return (ret); 232 } 233 234 235 /**************************************************************************** 236 * set up the data structures describing the spare disks in the array 237 * recall from the above comment that the spare disk descriptors are stored 238 * in row zero, which is specially expanded to hold them. 239 ****************************************************************************/ 240 int 241 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) 242 RF_ShutdownList_t ** listp; 243 RF_Raid_t * raidPtr; 244 RF_Config_t * cfgPtr; 245 { 246 int i, ret; 247 unsigned int bs; 248 RF_RaidDisk_t *disks; 249 int num_spares_done; 250 251 num_spares_done = 0; 252 253 /* The space for the spares should have already been allocated by 254 * ConfigureDisks() */ 255 256 disks = &raidPtr->Disks[0][raidPtr->numCol]; 257 for (i = 0; i < raidPtr->numSpare; i++) { 258 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], 259 &disks[i], 0, raidPtr->numCol + i); 260 if (ret) 261 goto fail; 262 if (disks[i].status != rf_ds_optimal) { 263 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 264 &cfgPtr->spare_names[i][0]); 265 } else { 266 disks[i].status = rf_ds_spare; /* change status to 267 * spare */ 268 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, 269 disks[i].devname, 270 (long int) disks[i].numBlocks, disks[i].blockSize, 271 (long int) disks[i].numBlocks * 272 disks[i].blockSize / 1024 / 1024); 273 } 274 num_spares_done++; 275 } 276 277 /* check sizes and block sizes on spare disks */ 278 bs = 1 << raidPtr->logBytesPerSector; 279 for (i = 0; i < raidPtr->numSpare; i++) { 280 if (disks[i].blockSize != bs) { 281 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); 282 ret = EINVAL; 283 goto fail; 284 } 285 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { 286 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 287 disks[i].devname, disks[i].blockSize, 288 (long int) raidPtr->sectorsPerDisk); 289 ret = EINVAL; 290 goto fail; 291 } else 292 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { 293 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); 294 295 disks[i].numBlocks = raidPtr->sectorsPerDisk; 296 } 297 } 298 299 return (0); 300 301 fail: 302 303 /* Release the hold on the main components. We've failed to allocate 304 * a spare, and since we're failing, we need to free things.. 305 306 XXX failing to allocate a spare is *not* that big of a deal... 307 We *can* survive without it, if need be, esp. if we get hot 308 adding working. 309 310 If we don't fail out here, then we need a way to remove this spare... 311 that should be easier to do here than if we are "live"... 312 313 */ 314 315 rf_UnconfigureVnodes( raidPtr ); 316 317 return (ret); 318 } 319 320 static int 321 rf_AllocDiskStructures(raidPtr, cfgPtr) 322 RF_Raid_t *raidPtr; 323 RF_Config_t *cfgPtr; 324 { 325 RF_RaidDisk_t **disks; 326 int ret; 327 int r; 328 329 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), 330 (RF_RaidDisk_t **), raidPtr->cleanupList); 331 if (disks == NULL) { 332 ret = ENOMEM; 333 goto fail; 334 } 335 raidPtr->Disks = disks; 336 /* get space for the device-specific stuff... */ 337 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, 338 sizeof(struct raidcinfo *), (struct raidcinfo **), 339 raidPtr->cleanupList); 340 if (raidPtr->raid_cinfo == NULL) { 341 ret = ENOMEM; 342 goto fail; 343 } 344 345 for (r = 0; r < raidPtr->numRow; r++) { 346 /* We allocate RF_MAXSPARE on the first row so that we 347 have room to do hot-swapping of spares */ 348 RF_CallocAndAdd(disks[r], raidPtr->numCol 349 + ((r == 0) ? RF_MAXSPARE : 0), 350 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 351 raidPtr->cleanupList); 352 if (disks[r] == NULL) { 353 ret = ENOMEM; 354 goto fail; 355 } 356 /* get more space for device specific stuff.. */ 357 RF_CallocAndAdd(raidPtr->raid_cinfo[r], 358 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), 359 sizeof(struct raidcinfo), (struct raidcinfo *), 360 raidPtr->cleanupList); 361 if (raidPtr->raid_cinfo[r] == NULL) { 362 ret = ENOMEM; 363 goto fail; 364 } 365 } 366 return(0); 367 fail: 368 rf_UnconfigureVnodes( raidPtr ); 369 370 return(ret); 371 } 372 373 374 /* configure a single disk during auto-configuration at boot */ 375 int 376 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) 377 RF_Raid_t *raidPtr; 378 RF_Config_t *cfgPtr; 379 RF_AutoConfig_t *auto_config; 380 { 381 RF_RaidDisk_t **disks; 382 RF_RaidDisk_t *diskPtr; 383 RF_RowCol_t r, c; 384 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 385 int bs, ret; 386 int numFailuresThisRow; 387 int force; 388 RF_AutoConfig_t *ac; 389 int parity_good; 390 int mod_counter; 391 int mod_counter_found; 392 393 #if DEBUG 394 printf("Starting autoconfiguration of RAID set...\n"); 395 #endif 396 force = cfgPtr->force; 397 398 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 399 if (ret) 400 goto fail; 401 402 disks = raidPtr->Disks; 403 404 /* assume the parity will be fine.. */ 405 parity_good = RF_RAID_CLEAN; 406 407 /* Check for mod_counters that are too low */ 408 mod_counter_found = 0; 409 mod_counter = 0; 410 ac = auto_config; 411 while(ac!=NULL) { 412 if (mod_counter_found==0) { 413 mod_counter = ac->clabel->mod_counter; 414 mod_counter_found = 1; 415 } else { 416 if (ac->clabel->mod_counter > mod_counter) { 417 mod_counter = ac->clabel->mod_counter; 418 } 419 } 420 ac->flag = 0; /* clear the general purpose flag */ 421 ac = ac->next; 422 } 423 424 bs = 0; 425 for (r = 0; r < raidPtr->numRow; r++) { 426 numFailuresThisRow = 0; 427 for (c = 0; c < raidPtr->numCol; c++) { 428 diskPtr = &disks[r][c]; 429 430 /* find this row/col in the autoconfig */ 431 #if DEBUG 432 printf("Looking for %d,%d in autoconfig\n",r,c); 433 #endif 434 ac = auto_config; 435 while(ac!=NULL) { 436 if (ac->clabel==NULL) { 437 /* big-time bad news. */ 438 goto fail; 439 } 440 if ((ac->clabel->row == r) && 441 (ac->clabel->column == c) && 442 (ac->clabel->mod_counter == mod_counter)) { 443 /* it's this one... */ 444 /* flag it as 'used', so we don't 445 free it later. */ 446 ac->flag = 1; 447 #if DEBUG 448 printf("Found: %s at %d,%d\n", 449 ac->devname,r,c); 450 #endif 451 452 break; 453 } 454 ac=ac->next; 455 } 456 457 if (ac==NULL) { 458 /* we didn't find an exact match with a 459 correct mod_counter above... can we 460 find one with an incorrect mod_counter 461 to use instead? (this one, if we find 462 it, will be marked as failed once the 463 set configures) 464 */ 465 466 ac = auto_config; 467 while(ac!=NULL) { 468 if (ac->clabel==NULL) { 469 /* big-time bad news. */ 470 goto fail; 471 } 472 if ((ac->clabel->row == r) && 473 (ac->clabel->column == c)) { 474 /* it's this one... 475 flag it as 'used', so we 476 don't free it later. */ 477 ac->flag = 1; 478 #if DEBUG 479 printf("Found(low mod_counter): %s at %d,%d\n", 480 ac->devname,r,c); 481 #endif 482 483 break; 484 } 485 ac=ac->next; 486 } 487 } 488 489 490 491 if (ac!=NULL) { 492 /* Found it. Configure it.. */ 493 diskPtr->blockSize = ac->clabel->blockSize; 494 diskPtr->numBlocks = ac->clabel->numBlocks; 495 /* Note: rf_protectedSectors is already 496 factored into numBlocks here */ 497 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; 498 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; 499 500 memcpy(&raidPtr->raid_cinfo[r][c].ci_label, 501 ac->clabel, sizeof(*ac->clabel)); 502 sprintf(diskPtr->devname, "/dev/%s", 503 ac->devname); 504 505 /* note the fact that this component was 506 autoconfigured. You'll need this info 507 later. Trust me :) */ 508 diskPtr->auto_configured = 1; 509 diskPtr->dev = ac->dev; 510 511 /* 512 * we allow the user to specify that 513 * only a fraction of the disks should 514 * be used this is just for debug: it 515 * speeds up the parity scan 516 */ 517 518 diskPtr->numBlocks = diskPtr->numBlocks * 519 rf_sizePercentage / 100; 520 521 /* XXX these will get set multiple times, 522 but since we're autoconfiguring, they'd 523 better be always the same each time! 524 If not, this is the least of your worries */ 525 526 bs = diskPtr->blockSize; 527 min_numblks = diskPtr->numBlocks; 528 529 /* this gets done multiple times, but that's 530 fine -- the serial number will be the same 531 for all components, guaranteed */ 532 raidPtr->serial_number = 533 ac->clabel->serial_number; 534 /* check the last time the label 535 was modified */ 536 if (ac->clabel->mod_counter != 537 mod_counter) { 538 /* Even though we've filled in all 539 of the above, we don't trust 540 this component since it's 541 modification counter is not 542 in sync with the rest, and we really 543 consider it to be failed. */ 544 disks[r][c].status = rf_ds_failed; 545 numFailuresThisRow++; 546 } else { 547 if (ac->clabel->clean != 548 RF_RAID_CLEAN) { 549 parity_good = RF_RAID_DIRTY; 550 } 551 } 552 } else { 553 /* Didn't find it at all!! 554 Component must really be dead */ 555 disks[r][c].status = rf_ds_failed; 556 sprintf(disks[r][c].devname,"component%d", 557 r * raidPtr->numCol + c); 558 numFailuresThisRow++; 559 } 560 } 561 /* XXX fix for n-fault tolerant */ 562 /* XXX this should probably check to see how many failures 563 we can handle for this configuration! */ 564 if (numFailuresThisRow > 0) 565 raidPtr->status[r] = rf_rs_degraded; 566 } 567 568 /* close the device for the ones that didn't get used */ 569 570 ac = auto_config; 571 while(ac!=NULL) { 572 if (ac->flag == 0) { 573 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 574 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0); 575 vput(ac->vp); 576 ac->vp = NULL; 577 #if DEBUG 578 printf("Released %s from auto-config set.\n", 579 ac->devname); 580 #endif 581 } 582 ac = ac->next; 583 } 584 585 raidPtr->mod_counter = mod_counter; 586 587 /* note the state of the parity, if any */ 588 raidPtr->parity_good = parity_good; 589 raidPtr->sectorsPerDisk = min_numblks; 590 raidPtr->logBytesPerSector = ffs(bs) - 1; 591 raidPtr->bytesPerSector = bs; 592 raidPtr->sectorMask = bs - 1; 593 return (0); 594 595 fail: 596 597 rf_UnconfigureVnodes( raidPtr ); 598 599 return (ret); 600 601 } 602 603 /* configure a single disk in the array */ 604 int 605 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) 606 RF_Raid_t *raidPtr; 607 char *buf; 608 RF_RaidDisk_t *diskPtr; 609 RF_RowCol_t row; 610 RF_RowCol_t col; 611 { 612 char *p; 613 int retcode; 614 615 struct partinfo dpart; 616 struct vnode *vp; 617 struct vattr va; 618 struct proc *proc; 619 int error; 620 621 retcode = 0; 622 p = rf_find_non_white(buf); 623 if (p[strlen(p) - 1] == '\n') { 624 /* strip off the newline */ 625 p[strlen(p) - 1] = '\0'; 626 } 627 (void) strcpy(diskPtr->devname, p); 628 629 proc = raidPtr->engine_thread; 630 631 /* Let's start by claiming the component is fine and well... */ 632 diskPtr->status = rf_ds_optimal; 633 634 raidPtr->raid_cinfo[row][col].ci_vp = NULL; 635 raidPtr->raid_cinfo[row][col].ci_dev = NULL; 636 637 error = raidlookup(diskPtr->devname, proc, &vp); 638 if (error) { 639 printf("raidlookup on device: %s failed!\n", diskPtr->devname); 640 if (error == ENXIO) { 641 /* the component isn't there... must be dead :-( */ 642 diskPtr->status = rf_ds_failed; 643 } else { 644 return (error); 645 } 646 } 647 if (diskPtr->status == rf_ds_optimal) { 648 649 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { 650 return (error); 651 } 652 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, 653 FREAD, proc->p_ucred, proc); 654 if (error) { 655 return (error); 656 } 657 658 diskPtr->blockSize = dpart.disklab->d_secsize; 659 660 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 661 diskPtr->partitionSize = dpart.part->p_size; 662 663 raidPtr->raid_cinfo[row][col].ci_vp = vp; 664 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; 665 666 /* This component was not automatically configured */ 667 diskPtr->auto_configured = 0; 668 diskPtr->dev = va.va_rdev; 669 670 /* we allow the user to specify that only a fraction of the 671 * disks should be used this is just for debug: it speeds up 672 * the parity scan */ 673 diskPtr->numBlocks = diskPtr->numBlocks * 674 rf_sizePercentage / 100; 675 } 676 return (0); 677 } 678 679 static void 680 rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) 681 RF_Raid_t *raidPtr; 682 int row; 683 int column; 684 char *dev_name; 685 RF_ComponentLabel_t *ci_label; 686 { 687 688 printf("raid%d: Component %s being configured at row: %d col: %d\n", 689 raidPtr->raidid, dev_name, row, column ); 690 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 691 ci_label->row, ci_label->column, 692 ci_label->num_rows, ci_label->num_columns); 693 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 694 ci_label->version, ci_label->serial_number, 695 ci_label->mod_counter); 696 printf(" Clean: %s Status: %d\n", 697 ci_label->clean ? "Yes" : "No", ci_label->status ); 698 } 699 700 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, 701 serial_number, mod_counter ) 702 RF_Raid_t *raidPtr; 703 int row; 704 int column; 705 char *dev_name; 706 RF_ComponentLabel_t *ci_label; 707 int serial_number; 708 int mod_counter; 709 { 710 int fatal_error = 0; 711 712 if (serial_number != ci_label->serial_number) { 713 printf("%s has a different serial number: %d %d\n", 714 dev_name, serial_number, ci_label->serial_number); 715 fatal_error = 1; 716 } 717 if (mod_counter != ci_label->mod_counter) { 718 printf("%s has a different modfication count: %d %d\n", 719 dev_name, mod_counter, ci_label->mod_counter); 720 } 721 722 if (row != ci_label->row) { 723 printf("Row out of alignment for: %s\n", dev_name); 724 fatal_error = 1; 725 } 726 if (column != ci_label->column) { 727 printf("Column out of alignment for: %s\n", dev_name); 728 fatal_error = 1; 729 } 730 if (raidPtr->numRow != ci_label->num_rows) { 731 printf("Number of rows do not match for: %s\n", dev_name); 732 fatal_error = 1; 733 } 734 if (raidPtr->numCol != ci_label->num_columns) { 735 printf("Number of columns do not match for: %s\n", dev_name); 736 fatal_error = 1; 737 } 738 if (ci_label->clean == 0) { 739 /* it's not clean, but that's not fatal */ 740 printf("%s is not clean!\n", dev_name); 741 } 742 return(fatal_error); 743 } 744 745 746 /* 747 748 rf_CheckLabels() - check all the component labels for consistency. 749 Return an error if there is anything major amiss. 750 751 */ 752 753 int 754 rf_CheckLabels( raidPtr, cfgPtr ) 755 RF_Raid_t *raidPtr; 756 RF_Config_t *cfgPtr; 757 { 758 int r,c; 759 char *dev_name; 760 RF_ComponentLabel_t *ci_label; 761 int serial_number = 0; 762 int mod_number = 0; 763 int fatal_error = 0; 764 int mod_values[4]; 765 int mod_count[4]; 766 int ser_values[4]; 767 int ser_count[4]; 768 int num_ser; 769 int num_mod; 770 int i; 771 int found; 772 int hosed_row; 773 int hosed_column; 774 int too_fatal; 775 int parity_good; 776 int force; 777 778 hosed_row = -1; 779 hosed_column = -1; 780 too_fatal = 0; 781 force = cfgPtr->force; 782 783 /* 784 We're going to try to be a little intelligent here. If one 785 component's label is bogus, and we can identify that it's the 786 *only* one that's gone, we'll mark it as "failed" and allow 787 the configuration to proceed. This will be the *only* case 788 that we'll proceed if there would be (otherwise) fatal errors. 789 790 Basically we simply keep a count of how many components had 791 what serial number. If all but one agree, we simply mark 792 the disagreeing component as being failed, and allow 793 things to come up "normally". 794 795 We do this first for serial numbers, and then for "mod_counter". 796 797 */ 798 799 num_ser = 0; 800 num_mod = 0; 801 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) { 802 for (c = 0; c < raidPtr->numCol; c++) { 803 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 804 found=0; 805 for(i=0;i<num_ser;i++) { 806 if (ser_values[i] == ci_label->serial_number) { 807 ser_count[i]++; 808 found=1; 809 break; 810 } 811 } 812 if (!found) { 813 ser_values[num_ser] = ci_label->serial_number; 814 ser_count[num_ser] = 1; 815 num_ser++; 816 if (num_ser>2) { 817 fatal_error = 1; 818 break; 819 } 820 } 821 found=0; 822 for(i=0;i<num_mod;i++) { 823 if (mod_values[i] == ci_label->mod_counter) { 824 mod_count[i]++; 825 found=1; 826 break; 827 } 828 } 829 if (!found) { 830 mod_values[num_mod] = ci_label->mod_counter; 831 mod_count[num_mod] = 1; 832 num_mod++; 833 if (num_mod>2) { 834 fatal_error = 1; 835 break; 836 } 837 } 838 } 839 } 840 #if DEBUG 841 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); 842 for(i=0;i<num_ser;i++) { 843 printf("%d %d\n", ser_values[i], ser_count[i]); 844 } 845 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); 846 for(i=0;i<num_mod;i++) { 847 printf("%d %d\n", mod_values[i], mod_count[i]); 848 } 849 #endif 850 serial_number = ser_values[0]; 851 if (num_ser == 2) { 852 if ((ser_count[0] == 1) || (ser_count[1] == 1)) { 853 /* Locate the maverick component */ 854 if (ser_count[1] > ser_count[0]) { 855 serial_number = ser_values[1]; 856 } 857 for (r = 0; r < raidPtr->numRow; r++) { 858 for (c = 0; c < raidPtr->numCol; c++) { 859 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 860 if (serial_number != 861 ci_label->serial_number) { 862 hosed_row = r; 863 hosed_column = c; 864 break; 865 } 866 } 867 } 868 printf("Hosed component: %s\n", 869 &cfgPtr->devnames[hosed_row][hosed_column][0]); 870 if (!force) { 871 /* we'll fail this component, as if there are 872 other major errors, we arn't forcing things 873 and we'll abort the config anyways */ 874 raidPtr->Disks[hosed_row][hosed_column].status 875 = rf_ds_failed; 876 raidPtr->numFailures++; 877 raidPtr->status[hosed_row] = rf_rs_degraded; 878 } 879 } else { 880 too_fatal = 1; 881 } 882 if (cfgPtr->parityConfig == '0') { 883 /* We've identified two different serial numbers. 884 RAID 0 can't cope with that, so we'll punt */ 885 too_fatal = 1; 886 } 887 888 } 889 890 /* record the serial number for later. If we bail later, setting 891 this doesn't matter, otherwise we've got the best guess at the 892 correct serial number */ 893 raidPtr->serial_number = serial_number; 894 895 mod_number = mod_values[0]; 896 if (num_mod == 2) { 897 if ((mod_count[0] == 1) || (mod_count[1] == 1)) { 898 /* Locate the maverick component */ 899 if (mod_count[1] > mod_count[0]) { 900 mod_number = mod_values[1]; 901 } else if (mod_count[1] < mod_count[0]) { 902 mod_number = mod_values[0]; 903 } else { 904 /* counts of different modification values 905 are the same. Assume greater value is 906 the correct one, all other things 907 considered */ 908 if (mod_values[0] > mod_values[1]) { 909 mod_number = mod_values[0]; 910 } else { 911 mod_number = mod_values[1]; 912 } 913 914 } 915 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) { 916 for (c = 0; c < raidPtr->numCol; c++) { 917 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 918 if (mod_number != 919 ci_label->mod_counter) { 920 if ( ( hosed_row == r ) && 921 ( hosed_column == c )) { 922 /* same one. Can 923 deal with it. */ 924 } else { 925 hosed_row = r; 926 hosed_column = c; 927 if (num_ser != 1) { 928 too_fatal = 1; 929 break; 930 } 931 } 932 } 933 } 934 } 935 printf("Hosed component: %s\n", 936 &cfgPtr->devnames[hosed_row][hosed_column][0]); 937 if (!force) { 938 /* we'll fail this component, as if there are 939 other major errors, we arn't forcing things 940 and we'll abort the config anyways */ 941 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) { 942 raidPtr->Disks[hosed_row][hosed_column].status 943 = rf_ds_failed; 944 raidPtr->numFailures++; 945 raidPtr->status[hosed_row] = rf_rs_degraded; 946 } 947 } 948 } else { 949 too_fatal = 1; 950 } 951 if (cfgPtr->parityConfig == '0') { 952 /* We've identified two different mod counters. 953 RAID 0 can't cope with that, so we'll punt */ 954 too_fatal = 1; 955 } 956 } 957 958 raidPtr->mod_counter = mod_number; 959 960 if (too_fatal) { 961 /* we've had both a serial number mismatch, and a mod_counter 962 mismatch -- and they involved two different components!! 963 Bail -- make things fail so that the user must force 964 the issue... */ 965 hosed_row = -1; 966 hosed_column = -1; 967 } 968 969 if (num_ser > 2) { 970 printf("raid%d: Too many different serial numbers!\n", 971 raidPtr->raidid); 972 } 973 974 if (num_mod > 2) { 975 printf("raid%d: Too many different mod counters!\n", 976 raidPtr->raidid); 977 } 978 979 /* we start by assuming the parity will be good, and flee from 980 that notion at the slightest sign of trouble */ 981 982 parity_good = RF_RAID_CLEAN; 983 for (r = 0; r < raidPtr->numRow; r++) { 984 for (c = 0; c < raidPtr->numCol; c++) { 985 dev_name = &cfgPtr->devnames[r][c][0]; 986 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 987 988 if ((r == hosed_row) && (c == hosed_column)) { 989 printf("raid%d: Ignoring %s\n", 990 raidPtr->raidid, dev_name); 991 } else { 992 rf_print_label_status( raidPtr, r, c, 993 dev_name, ci_label ); 994 if (rf_check_label_vitals( raidPtr, r, c, 995 dev_name, ci_label, 996 serial_number, 997 mod_number )) { 998 fatal_error = 1; 999 } 1000 if (ci_label->clean != RF_RAID_CLEAN) { 1001 parity_good = RF_RAID_DIRTY; 1002 } 1003 } 1004 } 1005 } 1006 if (fatal_error) { 1007 parity_good = RF_RAID_DIRTY; 1008 } 1009 1010 /* we note the state of the parity */ 1011 raidPtr->parity_good = parity_good; 1012 1013 return(fatal_error); 1014 } 1015 1016 int 1017 rf_add_hot_spare(raidPtr, sparePtr) 1018 RF_Raid_t *raidPtr; 1019 RF_SingleComponent_t *sparePtr; 1020 { 1021 RF_RaidDisk_t *disks; 1022 RF_DiskQueue_t *spareQueues; 1023 int ret; 1024 unsigned int bs; 1025 int spare_number; 1026 1027 #if 0 1028 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare); 1029 printf("Num col: %d\n",raidPtr->numCol); 1030 #endif 1031 if (raidPtr->numSpare >= RF_MAXSPARE) { 1032 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); 1033 return(EINVAL); 1034 } 1035 1036 RF_LOCK_MUTEX(raidPtr->mutex); 1037 1038 /* the beginning of the spares... */ 1039 disks = &raidPtr->Disks[0][raidPtr->numCol]; 1040 1041 spare_number = raidPtr->numSpare; 1042 1043 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, 1044 &disks[spare_number], 0, 1045 raidPtr->numCol + spare_number); 1046 1047 if (ret) 1048 goto fail; 1049 if (disks[spare_number].status != rf_ds_optimal) { 1050 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 1051 sparePtr->component_name); 1052 ret=EINVAL; 1053 goto fail; 1054 } else { 1055 disks[spare_number].status = rf_ds_spare; 1056 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number, 1057 disks[spare_number].devname, 1058 (long int) disks[spare_number].numBlocks, 1059 disks[spare_number].blockSize, 1060 (long int) disks[spare_number].numBlocks * 1061 disks[spare_number].blockSize / 1024 / 1024); 1062 } 1063 1064 1065 /* check sizes and block sizes on the spare disk */ 1066 bs = 1 << raidPtr->logBytesPerSector; 1067 if (disks[spare_number].blockSize != bs) { 1068 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); 1069 ret = EINVAL; 1070 goto fail; 1071 } 1072 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { 1073 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 1074 disks[spare_number].devname, 1075 disks[spare_number].blockSize, 1076 (long int) raidPtr->sectorsPerDisk); 1077 ret = EINVAL; 1078 goto fail; 1079 } else { 1080 if (disks[spare_number].numBlocks > 1081 raidPtr->sectorsPerDisk) { 1082 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, 1083 (long int) raidPtr->sectorsPerDisk); 1084 1085 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; 1086 } 1087 } 1088 1089 spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; 1090 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 1091 0, raidPtr->numCol + spare_number, 1092 raidPtr->qType, 1093 raidPtr->sectorsPerDisk, 1094 raidPtr->Disks[0][raidPtr->numCol + 1095 spare_number].dev, 1096 raidPtr->maxOutstanding, 1097 &raidPtr->shutdownList, 1098 raidPtr->cleanupList); 1099 1100 1101 raidPtr->numSpare++; 1102 RF_UNLOCK_MUTEX(raidPtr->mutex); 1103 return (0); 1104 1105 fail: 1106 RF_UNLOCK_MUTEX(raidPtr->mutex); 1107 return(ret); 1108 } 1109 1110 int 1111 rf_remove_hot_spare(raidPtr,sparePtr) 1112 RF_Raid_t *raidPtr; 1113 RF_SingleComponent_t *sparePtr; 1114 { 1115 int spare_number; 1116 1117 1118 if (raidPtr->numSpare==0) { 1119 printf("No spares to remove!\n"); 1120 return(EINVAL); 1121 } 1122 1123 spare_number = sparePtr->column; 1124 1125 return(EINVAL); /* XXX not implemented yet */ 1126 #if 0 1127 if (spare_number < 0 || spare_number > raidPtr->numSpare) { 1128 return(EINVAL); 1129 } 1130 1131 /* verify that this spare isn't in use... */ 1132 1133 1134 1135 1136 /* it's gone.. */ 1137 1138 raidPtr->numSpare--; 1139 1140 return(0); 1141 #endif 1142 } 1143 1144 1145 int 1146 rf_delete_component(raidPtr,component) 1147 RF_Raid_t *raidPtr; 1148 RF_SingleComponent_t *component; 1149 { 1150 RF_RaidDisk_t *disks; 1151 1152 if ((component->row < 0) || 1153 (component->row >= raidPtr->numRow) || 1154 (component->column < 0) || 1155 (component->column >= raidPtr->numCol)) { 1156 return(EINVAL); 1157 } 1158 1159 disks = &raidPtr->Disks[component->row][component->column]; 1160 1161 /* 1. This component must be marked as 'failed' */ 1162 1163 return(EINVAL); /* Not implemented yet. */ 1164 } 1165 1166 int 1167 rf_incorporate_hot_spare(raidPtr,component) 1168 RF_Raid_t *raidPtr; 1169 RF_SingleComponent_t *component; 1170 { 1171 1172 /* Issues here include how to 'move' this in if there is IO 1173 taking place (e.g. component queues and such) */ 1174 1175 return(EINVAL); /* Not implemented yet. */ 1176 } 1177