1 /* $NetBSD: rf_disks.c,v 1.37 2001/11/13 07:11:13 lukem Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1995 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Author: Mark Holland 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65 /*************************************************************** 66 * rf_disks.c -- code to perform operations on the actual disks 67 ***************************************************************/ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.37 2001/11/13 07:11:13 lukem Exp $"); 71 72 #include <dev/raidframe/raidframevar.h> 73 74 #include "rf_raid.h" 75 #include "rf_alloclist.h" 76 #include "rf_utils.h" 77 #include "rf_general.h" 78 #include "rf_options.h" 79 #include "rf_kintf.h" 80 #include "rf_netbsd.h" 81 82 #include <sys/types.h> 83 #include <sys/param.h> 84 #include <sys/systm.h> 85 #include <sys/proc.h> 86 #include <sys/ioctl.h> 87 #include <sys/fcntl.h> 88 #include <sys/vnode.h> 89 90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *); 91 static void rf_print_label_status( RF_Raid_t *, int, int, char *, 92 RF_ComponentLabel_t *); 93 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *, 94 RF_ComponentLabel_t *, int, int ); 95 96 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f) 97 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g) 98 99 /************************************************************************** 100 * 101 * initialize the disks comprising the array 102 * 103 * We want the spare disks to have regular row,col numbers so that we can 104 * easily substitue a spare for a failed disk. But, the driver code assumes 105 * throughout that the array contains numRow by numCol _non-spare_ disks, so 106 * it's not clear how to fit in the spares. This is an unfortunate holdover 107 * from raidSim. The quick and dirty fix is to make row zero bigger than the 108 * rest, and put all the spares in it. This probably needs to get changed 109 * eventually. 110 * 111 **************************************************************************/ 112 113 int 114 rf_ConfigureDisks( listp, raidPtr, cfgPtr ) 115 RF_ShutdownList_t **listp; 116 RF_Raid_t *raidPtr; 117 RF_Config_t *cfgPtr; 118 { 119 RF_RaidDisk_t **disks; 120 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 121 RF_RowCol_t r, c; 122 int bs, ret; 123 unsigned i, count, foundone = 0, numFailuresThisRow; 124 int force; 125 126 force = cfgPtr->force; 127 128 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 129 if (ret) 130 goto fail; 131 132 disks = raidPtr->Disks; 133 134 for (r = 0; r < raidPtr->numRow; r++) { 135 numFailuresThisRow = 0; 136 for (c = 0; c < raidPtr->numCol; c++) { 137 ret = rf_ConfigureDisk(raidPtr, 138 &cfgPtr->devnames[r][c][0], 139 &disks[r][c], r, c); 140 141 if (ret) 142 goto fail; 143 144 if (disks[r][c].status == rf_ds_optimal) { 145 raidread_component_label( 146 raidPtr->raid_cinfo[r][c].ci_dev, 147 raidPtr->raid_cinfo[r][c].ci_vp, 148 &raidPtr->raid_cinfo[r][c].ci_label); 149 } 150 151 if (disks[r][c].status != rf_ds_optimal) { 152 numFailuresThisRow++; 153 } else { 154 if (disks[r][c].numBlocks < min_numblks) 155 min_numblks = disks[r][c].numBlocks; 156 DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", 157 r, c, disks[r][c].devname, 158 (long int) disks[r][c].numBlocks, 159 disks[r][c].blockSize, 160 (long int) disks[r][c].numBlocks * 161 disks[r][c].blockSize / 1024 / 1024); 162 } 163 } 164 /* XXX fix for n-fault tolerant */ 165 /* XXX this should probably check to see how many failures 166 we can handle for this configuration! */ 167 if (numFailuresThisRow > 0) 168 raidPtr->status[r] = rf_rs_degraded; 169 } 170 171 /* all disks must be the same size & have the same block size, bs must 172 * be a power of 2 */ 173 bs = 0; 174 for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) { 175 for (c = 0; !foundone && c < raidPtr->numCol; c++) { 176 if (disks[r][c].status == rf_ds_optimal) { 177 bs = disks[r][c].blockSize; 178 foundone = 1; 179 } 180 } 181 } 182 if (!foundone) { 183 RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n"); 184 ret = EINVAL; 185 goto fail; 186 } 187 for (count = 0, i = 1; i; i <<= 1) 188 if (bs & i) 189 count++; 190 if (count != 1) { 191 RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs); 192 ret = EINVAL; 193 goto fail; 194 } 195 196 if (rf_CheckLabels( raidPtr, cfgPtr )) { 197 printf("raid%d: There were fatal errors\n", raidPtr->raidid); 198 if (force != 0) { 199 printf("raid%d: Fatal errors being ignored.\n", 200 raidPtr->raidid); 201 } else { 202 ret = EINVAL; 203 goto fail; 204 } 205 } 206 207 for (r = 0; r < raidPtr->numRow; r++) { 208 for (c = 0; c < raidPtr->numCol; c++) { 209 if (disks[r][c].status == rf_ds_optimal) { 210 if (disks[r][c].blockSize != bs) { 211 RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c); 212 ret = EINVAL; 213 goto fail; 214 } 215 if (disks[r][c].numBlocks != min_numblks) { 216 RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n", 217 r, c, (int) min_numblks); 218 disks[r][c].numBlocks = min_numblks; 219 } 220 } 221 } 222 } 223 224 raidPtr->sectorsPerDisk = min_numblks; 225 raidPtr->logBytesPerSector = ffs(bs) - 1; 226 raidPtr->bytesPerSector = bs; 227 raidPtr->sectorMask = bs - 1; 228 return (0); 229 230 fail: 231 232 rf_UnconfigureVnodes( raidPtr ); 233 234 return (ret); 235 } 236 237 238 /**************************************************************************** 239 * set up the data structures describing the spare disks in the array 240 * recall from the above comment that the spare disk descriptors are stored 241 * in row zero, which is specially expanded to hold them. 242 ****************************************************************************/ 243 int 244 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr ) 245 RF_ShutdownList_t ** listp; 246 RF_Raid_t * raidPtr; 247 RF_Config_t * cfgPtr; 248 { 249 int i, ret; 250 unsigned int bs; 251 RF_RaidDisk_t *disks; 252 int num_spares_done; 253 254 num_spares_done = 0; 255 256 /* The space for the spares should have already been allocated by 257 * ConfigureDisks() */ 258 259 disks = &raidPtr->Disks[0][raidPtr->numCol]; 260 for (i = 0; i < raidPtr->numSpare; i++) { 261 ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0], 262 &disks[i], 0, raidPtr->numCol + i); 263 if (ret) 264 goto fail; 265 if (disks[i].status != rf_ds_optimal) { 266 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 267 &cfgPtr->spare_names[i][0]); 268 } else { 269 disks[i].status = rf_ds_spare; /* change status to 270 * spare */ 271 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i, 272 disks[i].devname, 273 (long int) disks[i].numBlocks, disks[i].blockSize, 274 (long int) disks[i].numBlocks * 275 disks[i].blockSize / 1024 / 1024); 276 } 277 num_spares_done++; 278 } 279 280 /* check sizes and block sizes on spare disks */ 281 bs = 1 << raidPtr->logBytesPerSector; 282 for (i = 0; i < raidPtr->numSpare; i++) { 283 if (disks[i].blockSize != bs) { 284 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs); 285 ret = EINVAL; 286 goto fail; 287 } 288 if (disks[i].numBlocks < raidPtr->sectorsPerDisk) { 289 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 290 disks[i].devname, disks[i].blockSize, 291 (long int) raidPtr->sectorsPerDisk); 292 ret = EINVAL; 293 goto fail; 294 } else 295 if (disks[i].numBlocks > raidPtr->sectorsPerDisk) { 296 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk); 297 298 disks[i].numBlocks = raidPtr->sectorsPerDisk; 299 } 300 } 301 302 return (0); 303 304 fail: 305 306 /* Release the hold on the main components. We've failed to allocate 307 * a spare, and since we're failing, we need to free things.. 308 309 XXX failing to allocate a spare is *not* that big of a deal... 310 We *can* survive without it, if need be, esp. if we get hot 311 adding working. 312 313 If we don't fail out here, then we need a way to remove this spare... 314 that should be easier to do here than if we are "live"... 315 316 */ 317 318 rf_UnconfigureVnodes( raidPtr ); 319 320 return (ret); 321 } 322 323 static int 324 rf_AllocDiskStructures(raidPtr, cfgPtr) 325 RF_Raid_t *raidPtr; 326 RF_Config_t *cfgPtr; 327 { 328 RF_RaidDisk_t **disks; 329 int ret; 330 int r; 331 332 RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *), 333 (RF_RaidDisk_t **), raidPtr->cleanupList); 334 if (disks == NULL) { 335 ret = ENOMEM; 336 goto fail; 337 } 338 raidPtr->Disks = disks; 339 /* get space for the device-specific stuff... */ 340 RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow, 341 sizeof(struct raidcinfo *), (struct raidcinfo **), 342 raidPtr->cleanupList); 343 if (raidPtr->raid_cinfo == NULL) { 344 ret = ENOMEM; 345 goto fail; 346 } 347 348 for (r = 0; r < raidPtr->numRow; r++) { 349 /* We allocate RF_MAXSPARE on the first row so that we 350 have room to do hot-swapping of spares */ 351 RF_CallocAndAdd(disks[r], raidPtr->numCol 352 + ((r == 0) ? RF_MAXSPARE : 0), 353 sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *), 354 raidPtr->cleanupList); 355 if (disks[r] == NULL) { 356 ret = ENOMEM; 357 goto fail; 358 } 359 /* get more space for device specific stuff.. */ 360 RF_CallocAndAdd(raidPtr->raid_cinfo[r], 361 raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0), 362 sizeof(struct raidcinfo), (struct raidcinfo *), 363 raidPtr->cleanupList); 364 if (raidPtr->raid_cinfo[r] == NULL) { 365 ret = ENOMEM; 366 goto fail; 367 } 368 } 369 return(0); 370 fail: 371 rf_UnconfigureVnodes( raidPtr ); 372 373 return(ret); 374 } 375 376 377 /* configure a single disk during auto-configuration at boot */ 378 int 379 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config) 380 RF_Raid_t *raidPtr; 381 RF_Config_t *cfgPtr; 382 RF_AutoConfig_t *auto_config; 383 { 384 RF_RaidDisk_t **disks; 385 RF_RaidDisk_t *diskPtr; 386 RF_RowCol_t r, c; 387 RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL; 388 int bs, ret; 389 int numFailuresThisRow; 390 int force; 391 RF_AutoConfig_t *ac; 392 int parity_good; 393 int mod_counter; 394 int mod_counter_found; 395 396 #if DEBUG 397 printf("Starting autoconfiguration of RAID set...\n"); 398 #endif 399 force = cfgPtr->force; 400 401 ret = rf_AllocDiskStructures(raidPtr, cfgPtr); 402 if (ret) 403 goto fail; 404 405 disks = raidPtr->Disks; 406 407 /* assume the parity will be fine.. */ 408 parity_good = RF_RAID_CLEAN; 409 410 /* Check for mod_counters that are too low */ 411 mod_counter_found = 0; 412 mod_counter = 0; 413 ac = auto_config; 414 while(ac!=NULL) { 415 if (mod_counter_found==0) { 416 mod_counter = ac->clabel->mod_counter; 417 mod_counter_found = 1; 418 } else { 419 if (ac->clabel->mod_counter > mod_counter) { 420 mod_counter = ac->clabel->mod_counter; 421 } 422 } 423 ac->flag = 0; /* clear the general purpose flag */ 424 ac = ac->next; 425 } 426 427 bs = 0; 428 for (r = 0; r < raidPtr->numRow; r++) { 429 numFailuresThisRow = 0; 430 for (c = 0; c < raidPtr->numCol; c++) { 431 diskPtr = &disks[r][c]; 432 433 /* find this row/col in the autoconfig */ 434 #if DEBUG 435 printf("Looking for %d,%d in autoconfig\n",r,c); 436 #endif 437 ac = auto_config; 438 while(ac!=NULL) { 439 if (ac->clabel==NULL) { 440 /* big-time bad news. */ 441 goto fail; 442 } 443 if ((ac->clabel->row == r) && 444 (ac->clabel->column == c) && 445 (ac->clabel->mod_counter == mod_counter)) { 446 /* it's this one... */ 447 /* flag it as 'used', so we don't 448 free it later. */ 449 ac->flag = 1; 450 #if DEBUG 451 printf("Found: %s at %d,%d\n", 452 ac->devname,r,c); 453 #endif 454 455 break; 456 } 457 ac=ac->next; 458 } 459 460 if (ac==NULL) { 461 /* we didn't find an exact match with a 462 correct mod_counter above... can we 463 find one with an incorrect mod_counter 464 to use instead? (this one, if we find 465 it, will be marked as failed once the 466 set configures) 467 */ 468 469 ac = auto_config; 470 while(ac!=NULL) { 471 if (ac->clabel==NULL) { 472 /* big-time bad news. */ 473 goto fail; 474 } 475 if ((ac->clabel->row == r) && 476 (ac->clabel->column == c)) { 477 /* it's this one... 478 flag it as 'used', so we 479 don't free it later. */ 480 ac->flag = 1; 481 #if DEBUG 482 printf("Found(low mod_counter): %s at %d,%d\n", 483 ac->devname,r,c); 484 #endif 485 486 break; 487 } 488 ac=ac->next; 489 } 490 } 491 492 493 494 if (ac!=NULL) { 495 /* Found it. Configure it.. */ 496 diskPtr->blockSize = ac->clabel->blockSize; 497 diskPtr->numBlocks = ac->clabel->numBlocks; 498 /* Note: rf_protectedSectors is already 499 factored into numBlocks here */ 500 raidPtr->raid_cinfo[r][c].ci_vp = ac->vp; 501 raidPtr->raid_cinfo[r][c].ci_dev = ac->dev; 502 503 memcpy(&raidPtr->raid_cinfo[r][c].ci_label, 504 ac->clabel, sizeof(*ac->clabel)); 505 sprintf(diskPtr->devname, "/dev/%s", 506 ac->devname); 507 508 /* note the fact that this component was 509 autoconfigured. You'll need this info 510 later. Trust me :) */ 511 diskPtr->auto_configured = 1; 512 diskPtr->dev = ac->dev; 513 514 /* 515 * we allow the user to specify that 516 * only a fraction of the disks should 517 * be used this is just for debug: it 518 * speeds up the parity scan 519 */ 520 521 diskPtr->numBlocks = diskPtr->numBlocks * 522 rf_sizePercentage / 100; 523 524 /* XXX these will get set multiple times, 525 but since we're autoconfiguring, they'd 526 better be always the same each time! 527 If not, this is the least of your worries */ 528 529 bs = diskPtr->blockSize; 530 min_numblks = diskPtr->numBlocks; 531 532 /* this gets done multiple times, but that's 533 fine -- the serial number will be the same 534 for all components, guaranteed */ 535 raidPtr->serial_number = 536 ac->clabel->serial_number; 537 /* check the last time the label 538 was modified */ 539 if (ac->clabel->mod_counter != 540 mod_counter) { 541 /* Even though we've filled in all 542 of the above, we don't trust 543 this component since it's 544 modification counter is not 545 in sync with the rest, and we really 546 consider it to be failed. */ 547 disks[r][c].status = rf_ds_failed; 548 numFailuresThisRow++; 549 } else { 550 if (ac->clabel->clean != 551 RF_RAID_CLEAN) { 552 parity_good = RF_RAID_DIRTY; 553 } 554 } 555 } else { 556 /* Didn't find it at all!! 557 Component must really be dead */ 558 disks[r][c].status = rf_ds_failed; 559 sprintf(disks[r][c].devname,"component%d", 560 r * raidPtr->numCol + c); 561 numFailuresThisRow++; 562 } 563 } 564 /* XXX fix for n-fault tolerant */ 565 /* XXX this should probably check to see how many failures 566 we can handle for this configuration! */ 567 if (numFailuresThisRow > 0) 568 raidPtr->status[r] = rf_rs_degraded; 569 } 570 571 /* close the device for the ones that didn't get used */ 572 573 ac = auto_config; 574 while(ac!=NULL) { 575 if (ac->flag == 0) { 576 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 577 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0); 578 vput(ac->vp); 579 ac->vp = NULL; 580 #if DEBUG 581 printf("Released %s from auto-config set.\n", 582 ac->devname); 583 #endif 584 } 585 ac = ac->next; 586 } 587 588 raidPtr->mod_counter = mod_counter; 589 590 /* note the state of the parity, if any */ 591 raidPtr->parity_good = parity_good; 592 raidPtr->sectorsPerDisk = min_numblks; 593 raidPtr->logBytesPerSector = ffs(bs) - 1; 594 raidPtr->bytesPerSector = bs; 595 raidPtr->sectorMask = bs - 1; 596 return (0); 597 598 fail: 599 600 rf_UnconfigureVnodes( raidPtr ); 601 602 return (ret); 603 604 } 605 606 /* configure a single disk in the array */ 607 int 608 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col) 609 RF_Raid_t *raidPtr; 610 char *buf; 611 RF_RaidDisk_t *diskPtr; 612 RF_RowCol_t row; 613 RF_RowCol_t col; 614 { 615 char *p; 616 int retcode; 617 618 struct partinfo dpart; 619 struct vnode *vp; 620 struct vattr va; 621 struct proc *proc; 622 int error; 623 624 retcode = 0; 625 p = rf_find_non_white(buf); 626 if (p[strlen(p) - 1] == '\n') { 627 /* strip off the newline */ 628 p[strlen(p) - 1] = '\0'; 629 } 630 (void) strcpy(diskPtr->devname, p); 631 632 proc = raidPtr->engine_thread; 633 634 /* Let's start by claiming the component is fine and well... */ 635 diskPtr->status = rf_ds_optimal; 636 637 raidPtr->raid_cinfo[row][col].ci_vp = NULL; 638 raidPtr->raid_cinfo[row][col].ci_dev = NULL; 639 640 error = raidlookup(diskPtr->devname, proc, &vp); 641 if (error) { 642 printf("raidlookup on device: %s failed!\n", diskPtr->devname); 643 if (error == ENXIO) { 644 /* the component isn't there... must be dead :-( */ 645 diskPtr->status = rf_ds_failed; 646 } else { 647 return (error); 648 } 649 } 650 if (diskPtr->status == rf_ds_optimal) { 651 652 if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) { 653 return (error); 654 } 655 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, 656 FREAD, proc->p_ucred, proc); 657 if (error) { 658 return (error); 659 } 660 661 diskPtr->blockSize = dpart.disklab->d_secsize; 662 663 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 664 diskPtr->partitionSize = dpart.part->p_size; 665 666 raidPtr->raid_cinfo[row][col].ci_vp = vp; 667 raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev; 668 669 /* This component was not automatically configured */ 670 diskPtr->auto_configured = 0; 671 diskPtr->dev = va.va_rdev; 672 673 /* we allow the user to specify that only a fraction of the 674 * disks should be used this is just for debug: it speeds up 675 * the parity scan */ 676 diskPtr->numBlocks = diskPtr->numBlocks * 677 rf_sizePercentage / 100; 678 } 679 return (0); 680 } 681 682 static void 683 rf_print_label_status( raidPtr, row, column, dev_name, ci_label ) 684 RF_Raid_t *raidPtr; 685 int row; 686 int column; 687 char *dev_name; 688 RF_ComponentLabel_t *ci_label; 689 { 690 691 printf("raid%d: Component %s being configured at row: %d col: %d\n", 692 raidPtr->raidid, dev_name, row, column ); 693 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 694 ci_label->row, ci_label->column, 695 ci_label->num_rows, ci_label->num_columns); 696 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 697 ci_label->version, ci_label->serial_number, 698 ci_label->mod_counter); 699 printf(" Clean: %s Status: %d\n", 700 ci_label->clean ? "Yes" : "No", ci_label->status ); 701 } 702 703 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label, 704 serial_number, mod_counter ) 705 RF_Raid_t *raidPtr; 706 int row; 707 int column; 708 char *dev_name; 709 RF_ComponentLabel_t *ci_label; 710 int serial_number; 711 int mod_counter; 712 { 713 int fatal_error = 0; 714 715 if (serial_number != ci_label->serial_number) { 716 printf("%s has a different serial number: %d %d\n", 717 dev_name, serial_number, ci_label->serial_number); 718 fatal_error = 1; 719 } 720 if (mod_counter != ci_label->mod_counter) { 721 printf("%s has a different modfication count: %d %d\n", 722 dev_name, mod_counter, ci_label->mod_counter); 723 } 724 725 if (row != ci_label->row) { 726 printf("Row out of alignment for: %s\n", dev_name); 727 fatal_error = 1; 728 } 729 if (column != ci_label->column) { 730 printf("Column out of alignment for: %s\n", dev_name); 731 fatal_error = 1; 732 } 733 if (raidPtr->numRow != ci_label->num_rows) { 734 printf("Number of rows do not match for: %s\n", dev_name); 735 fatal_error = 1; 736 } 737 if (raidPtr->numCol != ci_label->num_columns) { 738 printf("Number of columns do not match for: %s\n", dev_name); 739 fatal_error = 1; 740 } 741 if (ci_label->clean == 0) { 742 /* it's not clean, but that's not fatal */ 743 printf("%s is not clean!\n", dev_name); 744 } 745 return(fatal_error); 746 } 747 748 749 /* 750 751 rf_CheckLabels() - check all the component labels for consistency. 752 Return an error if there is anything major amiss. 753 754 */ 755 756 int 757 rf_CheckLabels( raidPtr, cfgPtr ) 758 RF_Raid_t *raidPtr; 759 RF_Config_t *cfgPtr; 760 { 761 int r,c; 762 char *dev_name; 763 RF_ComponentLabel_t *ci_label; 764 int serial_number = 0; 765 int mod_number = 0; 766 int fatal_error = 0; 767 int mod_values[4]; 768 int mod_count[4]; 769 int ser_values[4]; 770 int ser_count[4]; 771 int num_ser; 772 int num_mod; 773 int i; 774 int found; 775 int hosed_row; 776 int hosed_column; 777 int too_fatal; 778 int parity_good; 779 int force; 780 781 hosed_row = -1; 782 hosed_column = -1; 783 too_fatal = 0; 784 force = cfgPtr->force; 785 786 /* 787 We're going to try to be a little intelligent here. If one 788 component's label is bogus, and we can identify that it's the 789 *only* one that's gone, we'll mark it as "failed" and allow 790 the configuration to proceed. This will be the *only* case 791 that we'll proceed if there would be (otherwise) fatal errors. 792 793 Basically we simply keep a count of how many components had 794 what serial number. If all but one agree, we simply mark 795 the disagreeing component as being failed, and allow 796 things to come up "normally". 797 798 We do this first for serial numbers, and then for "mod_counter". 799 800 */ 801 802 num_ser = 0; 803 num_mod = 0; 804 for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) { 805 for (c = 0; c < raidPtr->numCol; c++) { 806 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 807 found=0; 808 for(i=0;i<num_ser;i++) { 809 if (ser_values[i] == ci_label->serial_number) { 810 ser_count[i]++; 811 found=1; 812 break; 813 } 814 } 815 if (!found) { 816 ser_values[num_ser] = ci_label->serial_number; 817 ser_count[num_ser] = 1; 818 num_ser++; 819 if (num_ser>2) { 820 fatal_error = 1; 821 break; 822 } 823 } 824 found=0; 825 for(i=0;i<num_mod;i++) { 826 if (mod_values[i] == ci_label->mod_counter) { 827 mod_count[i]++; 828 found=1; 829 break; 830 } 831 } 832 if (!found) { 833 mod_values[num_mod] = ci_label->mod_counter; 834 mod_count[num_mod] = 1; 835 num_mod++; 836 if (num_mod>2) { 837 fatal_error = 1; 838 break; 839 } 840 } 841 } 842 } 843 #if DEBUG 844 printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid); 845 for(i=0;i<num_ser;i++) { 846 printf("%d %d\n", ser_values[i], ser_count[i]); 847 } 848 printf("raid%d: Summary of mod counters:\n", raidPtr->raidid); 849 for(i=0;i<num_mod;i++) { 850 printf("%d %d\n", mod_values[i], mod_count[i]); 851 } 852 #endif 853 serial_number = ser_values[0]; 854 if (num_ser == 2) { 855 if ((ser_count[0] == 1) || (ser_count[1] == 1)) { 856 /* Locate the maverick component */ 857 if (ser_count[1] > ser_count[0]) { 858 serial_number = ser_values[1]; 859 } 860 for (r = 0; r < raidPtr->numRow; r++) { 861 for (c = 0; c < raidPtr->numCol; c++) { 862 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 863 if (serial_number != 864 ci_label->serial_number) { 865 hosed_row = r; 866 hosed_column = c; 867 break; 868 } 869 } 870 } 871 printf("Hosed component: %s\n", 872 &cfgPtr->devnames[hosed_row][hosed_column][0]); 873 if (!force) { 874 /* we'll fail this component, as if there are 875 other major errors, we arn't forcing things 876 and we'll abort the config anyways */ 877 raidPtr->Disks[hosed_row][hosed_column].status 878 = rf_ds_failed; 879 raidPtr->numFailures++; 880 raidPtr->status[hosed_row] = rf_rs_degraded; 881 } 882 } else { 883 too_fatal = 1; 884 } 885 if (cfgPtr->parityConfig == '0') { 886 /* We've identified two different serial numbers. 887 RAID 0 can't cope with that, so we'll punt */ 888 too_fatal = 1; 889 } 890 891 } 892 893 /* record the serial number for later. If we bail later, setting 894 this doesn't matter, otherwise we've got the best guess at the 895 correct serial number */ 896 raidPtr->serial_number = serial_number; 897 898 mod_number = mod_values[0]; 899 if (num_mod == 2) { 900 if ((mod_count[0] == 1) || (mod_count[1] == 1)) { 901 /* Locate the maverick component */ 902 if (mod_count[1] > mod_count[0]) { 903 mod_number = mod_values[1]; 904 } else if (mod_count[1] < mod_count[0]) { 905 mod_number = mod_values[0]; 906 } else { 907 /* counts of different modification values 908 are the same. Assume greater value is 909 the correct one, all other things 910 considered */ 911 if (mod_values[0] > mod_values[1]) { 912 mod_number = mod_values[0]; 913 } else { 914 mod_number = mod_values[1]; 915 } 916 917 } 918 for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) { 919 for (c = 0; c < raidPtr->numCol; c++) { 920 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 921 if (mod_number != 922 ci_label->mod_counter) { 923 if ( ( hosed_row == r ) && 924 ( hosed_column == c )) { 925 /* same one. Can 926 deal with it. */ 927 } else { 928 hosed_row = r; 929 hosed_column = c; 930 if (num_ser != 1) { 931 too_fatal = 1; 932 break; 933 } 934 } 935 } 936 } 937 } 938 printf("Hosed component: %s\n", 939 &cfgPtr->devnames[hosed_row][hosed_column][0]); 940 if (!force) { 941 /* we'll fail this component, as if there are 942 other major errors, we arn't forcing things 943 and we'll abort the config anyways */ 944 if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) { 945 raidPtr->Disks[hosed_row][hosed_column].status 946 = rf_ds_failed; 947 raidPtr->numFailures++; 948 raidPtr->status[hosed_row] = rf_rs_degraded; 949 } 950 } 951 } else { 952 too_fatal = 1; 953 } 954 if (cfgPtr->parityConfig == '0') { 955 /* We've identified two different mod counters. 956 RAID 0 can't cope with that, so we'll punt */ 957 too_fatal = 1; 958 } 959 } 960 961 raidPtr->mod_counter = mod_number; 962 963 if (too_fatal) { 964 /* we've had both a serial number mismatch, and a mod_counter 965 mismatch -- and they involved two different components!! 966 Bail -- make things fail so that the user must force 967 the issue... */ 968 hosed_row = -1; 969 hosed_column = -1; 970 } 971 972 if (num_ser > 2) { 973 printf("raid%d: Too many different serial numbers!\n", 974 raidPtr->raidid); 975 } 976 977 if (num_mod > 2) { 978 printf("raid%d: Too many different mod counters!\n", 979 raidPtr->raidid); 980 } 981 982 /* we start by assuming the parity will be good, and flee from 983 that notion at the slightest sign of trouble */ 984 985 parity_good = RF_RAID_CLEAN; 986 for (r = 0; r < raidPtr->numRow; r++) { 987 for (c = 0; c < raidPtr->numCol; c++) { 988 dev_name = &cfgPtr->devnames[r][c][0]; 989 ci_label = &raidPtr->raid_cinfo[r][c].ci_label; 990 991 if ((r == hosed_row) && (c == hosed_column)) { 992 printf("raid%d: Ignoring %s\n", 993 raidPtr->raidid, dev_name); 994 } else { 995 rf_print_label_status( raidPtr, r, c, 996 dev_name, ci_label ); 997 if (rf_check_label_vitals( raidPtr, r, c, 998 dev_name, ci_label, 999 serial_number, 1000 mod_number )) { 1001 fatal_error = 1; 1002 } 1003 if (ci_label->clean != RF_RAID_CLEAN) { 1004 parity_good = RF_RAID_DIRTY; 1005 } 1006 } 1007 } 1008 } 1009 if (fatal_error) { 1010 parity_good = RF_RAID_DIRTY; 1011 } 1012 1013 /* we note the state of the parity */ 1014 raidPtr->parity_good = parity_good; 1015 1016 return(fatal_error); 1017 } 1018 1019 int 1020 rf_add_hot_spare(raidPtr, sparePtr) 1021 RF_Raid_t *raidPtr; 1022 RF_SingleComponent_t *sparePtr; 1023 { 1024 RF_RaidDisk_t *disks; 1025 RF_DiskQueue_t *spareQueues; 1026 int ret; 1027 unsigned int bs; 1028 int spare_number; 1029 1030 if (raidPtr->numSpare >= RF_MAXSPARE) { 1031 RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare); 1032 return(EINVAL); 1033 } 1034 1035 RF_LOCK_MUTEX(raidPtr->mutex); 1036 1037 /* the beginning of the spares... */ 1038 disks = &raidPtr->Disks[0][raidPtr->numCol]; 1039 1040 spare_number = raidPtr->numSpare; 1041 1042 ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name, 1043 &disks[spare_number], 0, 1044 raidPtr->numCol + spare_number); 1045 1046 if (ret) 1047 goto fail; 1048 if (disks[spare_number].status != rf_ds_optimal) { 1049 RF_ERRORMSG1("Warning: spare disk %s failed TUR\n", 1050 sparePtr->component_name); 1051 ret=EINVAL; 1052 goto fail; 1053 } else { 1054 disks[spare_number].status = rf_ds_spare; 1055 DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number, 1056 disks[spare_number].devname, 1057 (long int) disks[spare_number].numBlocks, 1058 disks[spare_number].blockSize, 1059 (long int) disks[spare_number].numBlocks * 1060 disks[spare_number].blockSize / 1024 / 1024); 1061 } 1062 1063 1064 /* check sizes and block sizes on the spare disk */ 1065 bs = 1 << raidPtr->logBytesPerSector; 1066 if (disks[spare_number].blockSize != bs) { 1067 RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs); 1068 ret = EINVAL; 1069 goto fail; 1070 } 1071 if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) { 1072 RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n", 1073 disks[spare_number].devname, 1074 disks[spare_number].blockSize, 1075 (long int) raidPtr->sectorsPerDisk); 1076 ret = EINVAL; 1077 goto fail; 1078 } else { 1079 if (disks[spare_number].numBlocks > 1080 raidPtr->sectorsPerDisk) { 1081 RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname, 1082 (long int) raidPtr->sectorsPerDisk); 1083 1084 disks[spare_number].numBlocks = raidPtr->sectorsPerDisk; 1085 } 1086 } 1087 1088 spareQueues = &raidPtr->Queues[0][raidPtr->numCol]; 1089 ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], 1090 0, raidPtr->numCol + spare_number, 1091 raidPtr->qType, 1092 raidPtr->sectorsPerDisk, 1093 raidPtr->Disks[0][raidPtr->numCol + 1094 spare_number].dev, 1095 raidPtr->maxOutstanding, 1096 &raidPtr->shutdownList, 1097 raidPtr->cleanupList); 1098 1099 1100 raidPtr->numSpare++; 1101 RF_UNLOCK_MUTEX(raidPtr->mutex); 1102 return (0); 1103 1104 fail: 1105 RF_UNLOCK_MUTEX(raidPtr->mutex); 1106 return(ret); 1107 } 1108 1109 int 1110 rf_remove_hot_spare(raidPtr,sparePtr) 1111 RF_Raid_t *raidPtr; 1112 RF_SingleComponent_t *sparePtr; 1113 { 1114 int spare_number; 1115 1116 1117 if (raidPtr->numSpare==0) { 1118 printf("No spares to remove!\n"); 1119 return(EINVAL); 1120 } 1121 1122 spare_number = sparePtr->column; 1123 1124 return(EINVAL); /* XXX not implemented yet */ 1125 #if 0 1126 if (spare_number < 0 || spare_number > raidPtr->numSpare) { 1127 return(EINVAL); 1128 } 1129 1130 /* verify that this spare isn't in use... */ 1131 1132 1133 1134 1135 /* it's gone.. */ 1136 1137 raidPtr->numSpare--; 1138 1139 return(0); 1140 #endif 1141 } 1142 1143 1144 int 1145 rf_delete_component(raidPtr,component) 1146 RF_Raid_t *raidPtr; 1147 RF_SingleComponent_t *component; 1148 { 1149 RF_RaidDisk_t *disks; 1150 1151 if ((component->row < 0) || 1152 (component->row >= raidPtr->numRow) || 1153 (component->column < 0) || 1154 (component->column >= raidPtr->numCol)) { 1155 return(EINVAL); 1156 } 1157 1158 disks = &raidPtr->Disks[component->row][component->column]; 1159 1160 /* 1. This component must be marked as 'failed' */ 1161 1162 return(EINVAL); /* Not implemented yet. */ 1163 } 1164 1165 int 1166 rf_incorporate_hot_spare(raidPtr,component) 1167 RF_Raid_t *raidPtr; 1168 RF_SingleComponent_t *component; 1169 { 1170 1171 /* Issues here include how to 'move' this in if there is IO 1172 taking place (e.g. component queues and such) */ 1173 1174 return(EINVAL); /* Not implemented yet. */ 1175 } 1176