1 /* $NetBSD: rf_disks.c,v 1.31 2000/06/02 01:17:14 oster Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1995 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Author: Mark Holland 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65 /*************************************************************** 66 * rf_disks.c -- code to perform operations on the actual disks 67 ***************************************************************/ 68 69 #include "rf_types.h" 70 #include "rf_raid.h" 71 #include "rf_alloclist.h" 72 #include "rf_utils.h" 73 #include "rf_configure.h" 74 #include "rf_general.h" 75 #include "rf_options.h" 76 #include "rf_kintf.h" 77 #include "rf_netbsd.h" 78 79 #include <sys/types.h> 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/proc.h> 83 #include <sys/ioctl.h> 84 #include <sys/fcntl.h> 85 #include <sys/vnode.h> 86 87 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); 88 static void rf_print_label_status( RF_Raid_t *, int, int, char *, 89 RF_ComponentLabel_t *); 90 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 91 RF_ComponentLabel_t *, int, int ); 92 93 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) 94 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) 95 96 /************************************************************************** 97 * 98 * initialize the disks comprising the array 99 * 100 * We want the spare disks to have regular row,col numbers so that we can 101 * easily substitue a spare for a failed disk. But, the driver code assumes 102 * throughout that the array contains numRow by numCol _non-spare_ disks, so 103 * it's not clear how to fit in the spares. This is an unfortunate holdover 104 * from raidSim. The quick and dirty fix is to make row zero bigger than the 105 * rest, and put all the spares in it. This probably needs to get changed 106 * eventually. 107 * 108 **************************************************************************/ 109 110 int 111 rf_ConfigureDisks( listp, raidPtr, cfgPtr ) 112 RF_ShutdownList_t **listp; 113 RF_Raid_t *raidPtr; 114 RF_Config_t *cfgPtr; 115 { 116 RF_RaidDisk_t **disks; 117 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 118 RF_RowCol_t r, c; 119 int bs, ret; 120 unsigned i, count, foundone = 0, numFailuresThisRow; 121 int force; 122 123 force = cfgPtr->force; 124 125 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 126 if (ret) 127 goto fail; 128 129 disks = raidPtr->Disks; 130 131 for (r = 0; r < raidPtr->numRow; r++) { 132 numFailuresThisRow = 0; 133 for (c = 0; c < raidPtr->numCol; c++) { 134 ret = rf_ConfigureDisk(raidPtr, 135 &cfgPtr->devnames[r][c][0], 136 &disks[r][c], r, c); 137 138 if (ret) 139 goto fail; 140 141 if (disks[r][c].status == rf_ds_optimal) { 142 raidread_component_label( 143 raidPtr->raid_cinfo[r][c].ci_dev, 144 raidPtr->raid_cinfo[r][c].ci_vp, 145 &raidPtr->raid_cinfo[r][c].ci_label); 146 } 147 148 if (disks[r][c].status != rf_ds_optimal) { 149 numFailuresThisRow++; 150 } else { 151 if (disks[r][c].numBlocks < min_numblks) 152 min_numblks = disks[r][c].numBlocks; 153 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", 154 r, c, disks[r][c].devname, 155 (long int) disks[r][c].numBlocks, 156 disks[r][c].blockSize, 157 (long int) disks[r][c].numBlocks * 158 disks[r][c].blockSize / 1024 / 1024); 159 } 160 } 161 /* XXX fix for n-fault tolerant */ 162 /* XXX this should probably check to see how many failures 163 we can handle for this configuration! */ 164 if (numFailuresThisRow > 0) 165 raidPtr->status[r] = rf_rs_degraded; 166 } 167 168 /* all disks must be the same size & have the same block size, bs must 169 * be a power of 2 */ 170 bs = 0; 171 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { 172 for (c = 0; !foundone && c < raidPtr->numCol; c++) { 173 if (disks[r][c].status == rf_ds_optimal) { 174 bs = disks[r][c].blockSize; 175 foundone = 1; 176 } 177 } 178 } 179 if (!foundone) { 180 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); 181 ret = EINVAL; 182 goto fail; 183 } 184 for (count = 0, i = 1; i; i <<= 1) 185 if (bs & i) 186 count++; 187 if (count != 1) { 188 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); 189 ret = EINVAL; 190 goto fail; 191 } 192 193 if (rf_CheckLabels( raidPtr, cfgPtr )) { 194 printf("raid%d: There were fatal errors\n", raidPtr->raidid); 195 if (force != 0) { 196 printf("raid%d: Fatal errors being ignored.\n", 197 raidPtr->raidid); 198 } else { 199 ret = EINVAL; 200 goto fail; 201 } 202 } 203 204 for (r = 0; r < raidPtr->numRow; r++) { 205 for (c = 0; c < raidPtr->numCol; c++) { 206 if (disks[r][c].status == rf_ds_optimal) { 207 if (disks[r][c].blockSize != bs) { 208 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); 209 ret = EINVAL; 210 goto fail; 211 } 212 if (disks[r][c].numBlocks != min_numblks) { 213 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", 214 r, c, (int) min_numblks); 215 disks[r][c].numBlocks = min_numblks; 216 } 217 } 218 } 219 } 220 221 raidPtr->sectorsPerDisk = min_numblks; 222 raidPtr->logBytesPerSector = ffs(bs) - 1; 223 raidPtr->bytesPerSector = bs; 224 raidPtr->sectorMask = bs - 1; 225 return (0); 226 227 fail: 228 229 rf_UnconfigureVnodes( raidPtr ); 230 231 return (ret); 232 } 233 234 235 /**************************************************************************** 236 * set up the data structures describing the spare disks in the array 237 * recall from the above comment that the spare disk descriptors are stored 238 * in row zero, which is specially expanded to hold them. 239 ****************************************************************************/ 240 int 241 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) 242 RF_ShutdownList_t ** listp; 243 RF_Raid_t * raidPtr; 244 RF_Config_t * cfgPtr; 245 { 246 int i, ret; 247 unsigned int bs; 248 RF_RaidDisk_t *disks; 249 int num_spares_done; 250 251 num_spares_done = 0; 252 253 /* The space for the spares should have already been allocated by 254 * ConfigureDisks() */ 255 256 disks = &raidPtr->Disks[0][raidPtr->numCol]; 257 for (i = 0; i < raidPtr->numSpare; i++) { 258 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], 259 &disks[i], 0, raidPtr->numCol + i); 260 if (ret) 261 goto fail; 262 if (disks[i].status != rf_ds_optimal) { 263 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 264 &cfgPtr->spare_names[i][0]); 265 } else { 266 disks[i].status = rf_ds_spare; /* change status to 267 * spare */ 268 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, 269 disks[i].devname, 270 (long int) disks[i].numBlocks, disks[i].blockSize, 271 (long int) disks[i].numBlocks * 272 disks[i].blockSize / 1024 / 1024); 273 } 274 num_spares_done++; 275 } 276 277 /* check sizes and block sizes on spare disks */ 278 bs = 1 << raidPtr->logBytesPerSector; 279 for (i = 0; i < raidPtr->numSpare; i++) { 280 if (disks[i].blockSize != bs) { 281 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); 282 ret = EINVAL; 283 goto fail; 284 } 285 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { 286 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 287 disks[i].devname, disks[i].blockSize, 288 (long int) raidPtr->sectorsPerDisk); 289 ret = EINVAL; 290 goto fail; 291 } else 292 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { 293 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); 294 295 disks[i].numBlocks = raidPtr->sectorsPerDisk; 296 } 297 } 298 299 return (0); 300 301 fail: 302 303 /* Release the hold on the main components. We've failed to allocate 304 * a spare, and since we're failing, we need to free things.. 305 306 XXX failing to allocate a spare is *not* that big of a deal... 307 We *can* survive without it, if need be, esp. if we get hot 308 adding working. 309 310 If we don't fail out here, then we need a way to remove this spare... 311 that should be easier to do here than if we are "live"... 312 313 */ 314 315 rf_UnconfigureVnodes( raidPtr ); 316 317 return (ret); 318 } 319 320 static int 321 rf_AllocDiskStructures(raidPtr, cfgPtr) 322 RF_Raid_t *raidPtr; 323 RF_Config_t *cfgPtr; 324 { 325 RF_RaidDisk_t **disks; 326 int ret; 327 int r; 328 329 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), 330 (RF_RaidDisk_t **), raidPtr->cleanupList); 331 if (disks == NULL) { 332 ret = ENOMEM; 333 goto fail; 334 } 335 raidPtr->Disks = disks; 336 /* get space for the device-specific stuff... */ 337 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, 338 sizeof(struct raidcinfo *), (struct raidcinfo **), 339 raidPtr->cleanupList); 340 if (raidPtr->raid_cinfo == NULL) { 341 ret = ENOMEM; 342 goto fail; 343 } 344 345 for (r = 0; r < raidPtr->numRow; r++) { 346 /* We allocate RF_MAXSPARE on the first row so that we 347 have room to do hot-swapping of spares */ 348 RF_CallocAndAdd(disks[r], raidPtr->numCol 349 + ((r == 0) ? RF_MAXSPARE : 0), 350 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 351 raidPtr->cleanupList); 352 if (disks[r] == NULL) { 353 ret = ENOMEM; 354 goto fail; 355 } 356 /* get more space for device specific stuff.. */ 357 RF_CallocAndAdd(raidPtr->raid_cinfo[r], 358 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), 359 sizeof(struct raidcinfo), (struct raidcinfo *), 360 raidPtr->cleanupList); 361 if (raidPtr->raid_cinfo[r] == NULL) { 362 ret = ENOMEM; 363 goto fail; 364 } 365 } 366 return(0); 367 fail: 368 rf_UnconfigureVnodes( raidPtr ); 369 370 return(ret); 371 } 372 373 374 /* configure a single disk during auto-configuration at boot */ 375 int 376 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) 377 RF_Raid_t *raidPtr; 378 RF_Config_t *cfgPtr; 379 RF_AutoConfig_t *auto_config; 380 { 381 RF_RaidDisk_t **disks; 382 RF_RaidDisk_t *diskPtr; 383 RF_RowCol_t r, c; 384 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 385 int bs, ret; 386 int numFailuresThisRow; 387 int force; 388 RF_AutoConfig_t *ac; 389 int parity_good; 390 int mod_counter; 391 int mod_counter_found; 392 393 #if DEBUG 394 printf("Starting autoconfiguration of RAID set...\n"); 395 #endif 396 force = cfgPtr->force; 397 398 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 399 if (ret) 400 goto fail; 401 402 disks = raidPtr->Disks; 403 404 /* assume the parity will be fine.. */ 405 parity_good = RF_RAID_CLEAN; 406 407 /* Check for mod_counters that are too low */ 408 mod_counter_found = 0; 409 ac = auto_config; 410 while(ac!=NULL) { 411 if (mod_counter_found==0) { 412 mod_counter = ac->clabel->mod_counter; 413 mod_counter_found = 1; 414 } else { 415 if (ac->clabel->mod_counter > mod_counter) { 416 mod_counter = ac->clabel->mod_counter; 417 } 418 } 419 ac->flag = 0; /* clear the general purpose flag */ 420 ac = ac->next; 421 } 422 423 for (r = 0; r < raidPtr->numRow; r++) { 424 numFailuresThisRow = 0; 425 for (c = 0; c < raidPtr->numCol; c++) { 426 diskPtr = &disks[r][c]; 427 428 /* find this row/col in the autoconfig */ 429 #if DEBUG 430 printf("Looking for %d,%d in autoconfig\n",r,c); 431 #endif 432 ac = auto_config; 433 while(ac!=NULL) { 434 if (ac->clabel==NULL) { 435 /* big-time bad news. */ 436 goto fail; 437 } 438 if ((ac->clabel->row == r) && 439 (ac->clabel->column == c) && 440 (ac->clabel->mod_counter == mod_counter)) { 441 /* it's this one... */ 442 /* flag it as 'used', so we don't 443 free it later. */ 444 ac->flag = 1; 445 #if DEBUG 446 printf("Found: %s at %d,%d\n", 447 ac->devname,r,c); 448 #endif 449 450 break; 451 } 452 ac=ac->next; 453 } 454 455 if (ac==NULL) { 456 /* we didn't find an exact match with a 457 correct mod_counter above... can we 458 find one with an incorrect mod_counter 459 to use instead? (this one, if we find 460 it, will be marked as failed once the 461 set configures) 462 */ 463 464 ac = auto_config; 465 while(ac!=NULL) { 466 if (ac->clabel==NULL) { 467 /* big-time bad news. */ 468 goto fail; 469 } 470 if ((ac->clabel->row == r) && 471 (ac->clabel->column == c)) { 472 /* it's this one... 473 flag it as 'used', so we 474 don't free it later. */ 475 ac->flag = 1; 476 #if DEBUG 477 printf("Found(low mod_counter): %s at %d,%d\n", 478 ac->devname,r,c); 479 #endif 480 481 break; 482 } 483 ac=ac->next; 484 } 485 } 486 487 488 489 if (ac!=NULL) { 490 /* Found it. Configure it.. */ 491 diskPtr->blockSize = ac->clabel->blockSize; 492 diskPtr->numBlocks = ac->clabel->numBlocks; 493 /* Note: rf_protectedSectors is already 494 factored into numBlocks here */ 495 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; 496 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; 497 498 memcpy(&raidPtr->raid_cinfo[r][c].ci_label, 499 ac->clabel, sizeof(*ac->clabel)); 500 sprintf(diskPtr->devname, "/dev/%s", 501 ac->devname); 502 503 /* note the fact that this component was 504 autoconfigured. You'll need this info 505 later. Trust me :) */ 506 diskPtr->auto_configured = 1; 507 diskPtr->dev = ac->dev; 508 509 /* 510 * we allow the user to specify that 511 * only a fraction of the disks should 512 * be used this is just for debug: it 513 * speeds up the parity scan 514 */ 515 516 diskPtr->numBlocks = diskPtr->numBlocks * 517 rf_sizePercentage / 100; 518 519 /* XXX these will get set multiple times, 520 but since we're autoconfiguring, they'd 521 better be always the same each time! 522 If not, this is the least of your worries */ 523 524 bs = diskPtr->blockSize; 525 min_numblks = diskPtr->numBlocks; 526 527 /* this gets done multiple times, but that's 528 fine -- the serial number will be the same 529 for all components, guaranteed */ 530 raidPtr->serial_number = 531 ac->clabel->serial_number; 532 /* check the last time the label 533 was modified */ 534 if (ac->clabel->mod_counter != 535 mod_counter) { 536 /* Even though we've filled in all 537 of the above, we don't trust 538 this component since it's 539 modification counter is not 540 in sync with the rest, and we really 541 consider it to be failed. */ 542 disks[r][c].status = rf_ds_failed; 543 numFailuresThisRow++; 544 } else { 545 if (ac->clabel->clean != 546 RF_RAID_CLEAN) { 547 parity_good = RF_RAID_DIRTY; 548 } 549 } 550 } else { 551 /* Didn't find it at all!! 552 Component must really be dead */ 553 disks[r][c].status = rf_ds_failed; 554 sprintf(disks[r][c].devname,"component%d", 555 r * raidPtr->numCol + c); 556 numFailuresThisRow++; 557 } 558 } 559 /* XXX fix for n-fault tolerant */ 560 /* XXX this should probably check to see how many failures 561 we can handle for this configuration! */ 562 if (numFailuresThisRow > 0) 563 raidPtr->status[r] = rf_rs_degraded; 564 } 565 566 /* close the device for the ones that didn't get used */ 567 568 ac = auto_config; 569 while(ac!=NULL) { 570 if (ac->flag == 0) { 571 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 572 vput(ac->vp); 573 ac->vp = NULL; 574 #if DEBUG 575 printf("Released %s from auto-config set.\n", 576 ac->devname); 577 #endif 578 } 579 ac = ac->next; 580 } 581 582 raidPtr->mod_counter = mod_counter; 583 584 /* note the state of the parity, if any */ 585 raidPtr->parity_good = parity_good; 586 raidPtr->sectorsPerDisk = min_numblks; 587 raidPtr->logBytesPerSector = ffs(bs) - 1; 588 raidPtr->bytesPerSector = bs; 589 raidPtr->sectorMask = bs - 1; 590 return (0); 591 592 fail: 593 594 rf_UnconfigureVnodes( raidPtr ); 595 596 return (ret); 597 598 } 599 600 /* configure a single disk in the array */ 601 int 602 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) 603 RF_Raid_t *raidPtr; 604 char *buf; 605 RF_RaidDisk_t *diskPtr; 606 RF_RowCol_t row; 607 RF_RowCol_t col; 608 { 609 char *p; 610 int retcode; 611 612 struct partinfo dpart; 613 struct vnode *vp; 614 struct vattr va; 615 struct proc *proc; 616 int error; 617 618 retcode = 0; 619 p = rf_find_non_white(buf); 620 if (p[strlen(p) - 1] == '\n') { 621 /* strip off the newline */ 622 p[strlen(p) - 1] = '\0'; 623 } 624 (void) strcpy(diskPtr->devname, p); 625 626 proc = raidPtr->engine_thread; 627 628 /* Let's start by claiming the component is fine and well... */ 629 diskPtr->status = rf_ds_optimal; 630 631 raidPtr->raid_cinfo[row][col].ci_vp = NULL; 632 raidPtr->raid_cinfo[row][col].ci_dev = NULL; 633 634 error = raidlookup(diskPtr->devname, proc, &vp); 635 if (error) { 636 printf("raidlookup on device: %s failed!\n", diskPtr->devname); 637 if (error == ENXIO) { 638 /* the component isn't there... must be dead :-( */ 639 diskPtr->status = rf_ds_failed; 640 } else { 641 return (error); 642 } 643 } 644 if (diskPtr->status == rf_ds_optimal) { 645 646 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { 647 return (error); 648 } 649 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, 650 FREAD, proc->p_ucred, proc); 651 if (error) { 652 return (error); 653 } 654 655 diskPtr->blockSize = dpart.disklab->d_secsize; 656 657 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 658 diskPtr->partitionSize = dpart.part->p_size; 659 660 raidPtr->raid_cinfo[row][col].ci_vp = vp; 661 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; 662 663 /* This component was not automatically configured */ 664 diskPtr->auto_configured = 0; 665 diskPtr->dev = va.va_rdev; 666 667 /* we allow the user to specify that only a fraction of the 668 * disks should be used this is just for debug: it speeds up 669 * the parity scan */ 670 diskPtr->numBlocks = diskPtr->numBlocks * 671 rf_sizePercentage / 100; 672 } 673 return (0); 674 } 675 676 static void 677 rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) 678 RF_Raid_t *raidPtr; 679 int row; 680 int column; 681 char *dev_name; 682 RF_ComponentLabel_t *ci_label; 683 { 684 685 printf("raid%d: Component %s being configured at row: %d col: %d\n", 686 raidPtr->raidid, dev_name, row, column ); 687 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 688 ci_label->row, ci_label->column, 689 ci_label->num_rows, ci_label->num_columns); 690 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 691 ci_label->version, ci_label->serial_number, 692 ci_label->mod_counter); 693 printf(" Clean: %s Status: %d\n", 694 ci_label->clean ? "Yes" : "No", ci_label->status ); 695 } 696 697 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, 698 serial_number, mod_counter ) 699 RF_Raid_t *raidPtr; 700 int row; 701 int column; 702 char *dev_name; 703 RF_ComponentLabel_t *ci_label; 704 int serial_number; 705 int mod_counter; 706 { 707 int fatal_error = 0; 708 709 if (serial_number != ci_label->serial_number) { 710 printf("%s has a different serial number: %d %d\n", 711 dev_name, serial_number, ci_label->serial_number); 712 fatal_error = 1; 713 } 714 if (mod_counter != ci_label->mod_counter) { 715 printf("%s has a different modfication count: %d %d\n", 716 dev_name, mod_counter, ci_label->mod_counter); 717 } 718 719 if (row != ci_label->row) { 720 printf("Row out of alignment for: %s\n", dev_name); 721 fatal_error = 1; 722 } 723 if (column != ci_label->column) { 724 printf("Column out of alignment for: %s\n", dev_name); 725 fatal_error = 1; 726 } 727 if (raidPtr->numRow != ci_label->num_rows) { 728 printf("Number of rows do not match for: %s\n", dev_name); 729 fatal_error = 1; 730 } 731 if (raidPtr->numCol != ci_label->num_columns) { 732 printf("Number of columns do not match for: %s\n", dev_name); 733 fatal_error = 1; 734 } 735 if (ci_label->clean == 0) { 736 /* it's not clean, but that's not fatal */ 737 printf("%s is not clean!\n", dev_name); 738 } 739 return(fatal_error); 740 } 741 742 743 /* 744 745 rf_CheckLabels() - check all the component labels for consistency. 746 Return an error if there is anything major amiss. 747 748 */ 749 750 int 751 rf_CheckLabels( raidPtr, cfgPtr ) 752 RF_Raid_t *raidPtr; 753 RF_Config_t *cfgPtr; 754 { 755 int r,c; 756 char *dev_name; 757 RF_ComponentLabel_t *ci_label; 758 int serial_number = 0; 759 int mod_number = 0; 760 int fatal_error = 0; 761 int mod_values[4]; 762 int mod_count[4]; 763 int ser_values[4]; 764 int ser_count[4]; 765 int num_ser; 766 int num_mod; 767 int i; 768 int found; 769 int hosed_row; 770 int hosed_column; 771 int too_fatal; 772 int parity_good; 773 int force; 774 775 hosed_row = -1; 776 hosed_column = -1; 777 too_fatal = 0; 778 force = cfgPtr->force; 779 780 /* 781 We're going to try to be a little intelligent here. If one 782 component's label is bogus, and we can identify that it's the 783 *only* one that's gone, we'll mark it as "failed" and allow 784 the configuration to proceed. This will be the *only* case 785 that we'll proceed if there would be (otherwise) fatal errors. 786 787 Basically we simply keep a count of how many components had 788 what serial number. If all but one agree, we simply mark 789 the disagreeing component as being failed, and allow 790 things to come up "normally". 791 792 We do this first for serial numbers, and then for "mod_counter". 793 794 */ 795 796 num_ser = 0; 797 num_mod = 0; 798 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) { 799 for (c = 0; c < raidPtr->numCol; c++) { 800 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 801 found=0; 802 for(i=0;i<num_ser;i++) { 803 if (ser_values[i] == ci_label->serial_number) { 804 ser_count[i]++; 805 found=1; 806 break; 807 } 808 } 809 if (!found) { 810 ser_values[num_ser] = ci_label->serial_number; 811 ser_count[num_ser] = 1; 812 num_ser++; 813 if (num_ser>2) { 814 fatal_error = 1; 815 break; 816 } 817 } 818 found=0; 819 for(i=0;i<num_mod;i++) { 820 if (mod_values[i] == ci_label->mod_counter) { 821 mod_count[i]++; 822 found=1; 823 break; 824 } 825 } 826 if (!found) { 827 mod_values[num_mod] = ci_label->mod_counter; 828 mod_count[num_mod] = 1; 829 num_mod++; 830 if (num_mod>2) { 831 fatal_error = 1; 832 break; 833 } 834 } 835 } 836 } 837 #if DEBUG 838 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); 839 for(i=0;i<num_ser;i++) { 840 printf("%d %d\n", ser_values[i], ser_count[i]); 841 } 842 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); 843 for(i=0;i<num_mod;i++) { 844 printf("%d %d\n", mod_values[i], mod_count[i]); 845 } 846 #endif 847 serial_number = ser_values[0]; 848 if (num_ser == 2) { 849 if ((ser_count[0] == 1) || (ser_count[1] == 1)) { 850 /* Locate the maverick component */ 851 if (ser_count[1] > ser_count[0]) { 852 serial_number = ser_values[1]; 853 } 854 for (r = 0; r < raidPtr->numRow; r++) { 855 for (c = 0; c < raidPtr->numCol; c++) { 856 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 857 if (serial_number != 858 ci_label->serial_number) { 859 hosed_row = r; 860 hosed_column = c; 861 break; 862 } 863 } 864 } 865 printf("Hosed component: %s\n", 866 &cfgPtr->devnames[hosed_row][hosed_column][0]); 867 if (!force) { 868 /* we'll fail this component, as if there are 869 other major errors, we arn't forcing things 870 and we'll abort the config anyways */ 871 raidPtr->Disks[hosed_row][hosed_column].status 872 = rf_ds_failed; 873 raidPtr->numFailures++; 874 raidPtr->status[hosed_row] = rf_rs_degraded; 875 } 876 } else { 877 too_fatal = 1; 878 } 879 if (cfgPtr->parityConfig == '0') { 880 /* We've identified two different serial numbers. 881 RAID 0 can't cope with that, so we'll punt */ 882 too_fatal = 1; 883 } 884 885 } 886 887 /* record the serial number for later. If we bail later, setting 888 this doesn't matter, otherwise we've got the best guess at the 889 correct serial number */ 890 raidPtr->serial_number = serial_number; 891 892 mod_number = mod_values[0]; 893 if (num_mod == 2) { 894 if ((mod_count[0] == 1) || (mod_count[1] == 1)) { 895 /* Locate the maverick component */ 896 if (mod_count[1] > mod_count[0]) { 897 mod_number = mod_values[1]; 898 } else if (mod_count[1] < mod_count[0]) { 899 mod_number = mod_values[0]; 900 } else { 901 /* counts of different modification values 902 are the same. Assume greater value is 903 the correct one, all other things 904 considered */ 905 if (mod_values[0] > mod_values[1]) { 906 mod_number = mod_values[0]; 907 } else { 908 mod_number = mod_values[1]; 909 } 910 911 } 912 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) { 913 for (c = 0; c < raidPtr->numCol; c++) { 914 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 915 if (mod_number != 916 ci_label->mod_counter) { 917 if ( ( hosed_row == r ) && 918 ( hosed_column == c )) { 919 /* same one. Can 920 deal with it. */ 921 } else { 922 hosed_row = r; 923 hosed_column = c; 924 if (num_ser != 1) { 925 too_fatal = 1; 926 break; 927 } 928 } 929 } 930 } 931 } 932 printf("Hosed component: %s\n", 933 &cfgPtr->devnames[hosed_row][hosed_column][0]); 934 if (!force) { 935 /* we'll fail this component, as if there are 936 other major errors, we arn't forcing things 937 and we'll abort the config anyways */ 938 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) { 939 raidPtr->Disks[hosed_row][hosed_column].status 940 = rf_ds_failed; 941 raidPtr->numFailures++; 942 raidPtr->status[hosed_row] = rf_rs_degraded; 943 } 944 } 945 } else { 946 too_fatal = 1; 947 } 948 if (cfgPtr->parityConfig == '0') { 949 /* We've identified two different mod counters. 950 RAID 0 can't cope with that, so we'll punt */ 951 too_fatal = 1; 952 } 953 } 954 955 raidPtr->mod_counter = mod_number; 956 957 if (too_fatal) { 958 /* we've had both a serial number mismatch, and a mod_counter 959 mismatch -- and they involved two different components!! 960 Bail -- make things fail so that the user must force 961 the issue... */ 962 hosed_row = -1; 963 hosed_column = -1; 964 } 965 966 if (num_ser > 2) { 967 printf("raid%d: Too many different serial numbers!\n", 968 raidPtr->raidid); 969 } 970 971 if (num_mod > 2) { 972 printf("raid%d: Too many different mod counters!\n", 973 raidPtr->raidid); 974 } 975 976 /* we start by assuming the parity will be good, and flee from 977 that notion at the slightest sign of trouble */ 978 979 parity_good = RF_RAID_CLEAN; 980 for (r = 0; r < raidPtr->numRow; r++) { 981 for (c = 0; c < raidPtr->numCol; c++) { 982 dev_name = &cfgPtr->devnames[r][c][0]; 983 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 984 985 if ((r == hosed_row) && (c == hosed_column)) { 986 printf("raid%d: Ignoring %s\n", 987 raidPtr->raidid, dev_name); 988 } else { 989 rf_print_label_status( raidPtr, r, c, 990 dev_name, ci_label ); 991 if (rf_check_label_vitals( raidPtr, r, c, 992 dev_name, ci_label, 993 serial_number, 994 mod_number )) { 995 fatal_error = 1; 996 } 997 if (ci_label->clean != RF_RAID_CLEAN) { 998 parity_good = RF_RAID_DIRTY; 999 } 1000 } 1001 } 1002 } 1003 if (fatal_error) { 1004 parity_good = RF_RAID_DIRTY; 1005 } 1006 1007 /* we note the state of the parity */ 1008 raidPtr->parity_good = parity_good; 1009 1010 return(fatal_error); 1011 } 1012 1013 int 1014 rf_add_hot_spare(raidPtr, sparePtr) 1015 RF_Raid_t *raidPtr; 1016 RF_SingleComponent_t *sparePtr; 1017 { 1018 RF_RaidDisk_t *disks; 1019 RF_DiskQueue_t *spareQueues; 1020 int ret; 1021 unsigned int bs; 1022 int spare_number; 1023 1024 #if 0 1025 printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare); 1026 printf("Num col: %d\n",raidPtr->numCol); 1027 #endif 1028 if (raidPtr->numSpare >= RF_MAXSPARE) { 1029 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); 1030 return(EINVAL); 1031 } 1032 1033 RF_LOCK_MUTEX(raidPtr->mutex); 1034 1035 /* the beginning of the spares... */ 1036 disks = &raidPtr->Disks[0][raidPtr->numCol]; 1037 1038 spare_number = raidPtr->numSpare; 1039 1040 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, 1041 &disks[spare_number], 0, 1042 raidPtr->numCol + spare_number); 1043 1044 if (ret) 1045 goto fail; 1046 if (disks[spare_number].status != rf_ds_optimal) { 1047 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 1048 sparePtr->component_name); 1049 ret=EINVAL; 1050 goto fail; 1051 } else { 1052 disks[spare_number].status = rf_ds_spare; 1053 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number, 1054 disks[spare_number].devname, 1055 (long int) disks[spare_number].numBlocks, 1056 disks[spare_number].blockSize, 1057 (long int) disks[spare_number].numBlocks * 1058 disks[spare_number].blockSize / 1024 / 1024); 1059 } 1060 1061 1062 /* check sizes and block sizes on the spare disk */ 1063 bs = 1 << raidPtr->logBytesPerSector; 1064 if (disks[spare_number].blockSize != bs) { 1065 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); 1066 ret = EINVAL; 1067 goto fail; 1068 } 1069 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { 1070 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 1071 disks[spare_number].devname, 1072 disks[spare_number].blockSize, 1073 (long int) raidPtr->sectorsPerDisk); 1074 ret = EINVAL; 1075 goto fail; 1076 } else { 1077 if (disks[spare_number].numBlocks > 1078 raidPtr->sectorsPerDisk) { 1079 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, 1080 (long int) raidPtr->sectorsPerDisk); 1081 1082 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; 1083 } 1084 } 1085 1086 spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; 1087 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 1088 0, raidPtr->numCol + spare_number, 1089 raidPtr->qType, 1090 raidPtr->sectorsPerDisk, 1091 raidPtr->Disks[0][raidPtr->numCol + 1092 spare_number].dev, 1093 raidPtr->maxOutstanding, 1094 &raidPtr->shutdownList, 1095 raidPtr->cleanupList); 1096 1097 1098 raidPtr->numSpare++; 1099 RF_UNLOCK_MUTEX(raidPtr->mutex); 1100 return (0); 1101 1102 fail: 1103 RF_UNLOCK_MUTEX(raidPtr->mutex); 1104 return(ret); 1105 } 1106 1107 int 1108 rf_remove_hot_spare(raidPtr,sparePtr) 1109 RF_Raid_t *raidPtr; 1110 RF_SingleComponent_t *sparePtr; 1111 { 1112 int spare_number; 1113 1114 1115 if (raidPtr->numSpare==0) { 1116 printf("No spares to remove!\n"); 1117 return(EINVAL); 1118 } 1119 1120 spare_number = sparePtr->column; 1121 1122 return(EINVAL); /* XXX not implemented yet */ 1123 #if 0 1124 if (spare_number < 0 || spare_number > raidPtr->numSpare) { 1125 return(EINVAL); 1126 } 1127 1128 /* verify that this spare isn't in use... */ 1129 1130 1131 1132 1133 /* it's gone.. */ 1134 1135 raidPtr->numSpare--; 1136 1137 return(0); 1138 #endif 1139 } 1140 1141 1142 int 1143 rf_delete_component(raidPtr,component) 1144 RF_Raid_t *raidPtr; 1145 RF_SingleComponent_t *component; 1146 { 1147 RF_RaidDisk_t *disks; 1148 1149 if ((component->row < 0) || 1150 (component->row >= raidPtr->numRow) || 1151 (component->column < 0) || 1152 (component->column >= raidPtr->numCol)) { 1153 return(EINVAL); 1154 } 1155 1156 disks = &raidPtr->Disks[component->row][component->column]; 1157 1158 /* 1. This component must be marked as 'failed' */ 1159 1160 return(EINVAL); /* Not implemented yet. */ 1161 } 1162 1163 int 1164 rf_incorporate_hot_spare(raidPtr,component) 1165 RF_Raid_t *raidPtr; 1166 RF_SingleComponent_t *component; 1167 { 1168 1169 /* Issues here include how to 'move' this in if there is IO 1170 taking place (e.g. component queues and such) */ 1171 1172 return(EINVAL); /* Not implemented yet. */ 1173 } 1174