1 /* $NetBSD: rf_driver.c,v 1.132 2015/12/26 00:58:45 pgoyette Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 1995 Carnegie-Mellon University. 33 * All rights reserved. 34 * 35 * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II, 36 * Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka 37 * 38 * Permission to use, copy, modify and distribute this software and 39 * its documentation is hereby granted, provided that both the copyright 40 * notice and this permission notice appear in all copies of the 41 * software, derivative works or modified versions, and any portions 42 * thereof, and that both notices appear in supporting documentation. 43 * 44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 46 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 47 * 48 * Carnegie Mellon requests users of this software to return to 49 * 50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 51 * School of Computer Science 52 * Carnegie Mellon University 53 * Pittsburgh PA 15213-3890 54 * 55 * any improvements or extensions that they make and grant Carnegie the 56 * rights to redistribute these changes. 57 */ 58 59 /****************************************************************************** 60 * 61 * rf_driver.c -- main setup, teardown, and access routines for the RAID driver 62 * 63 * all routines are prefixed with rf_ (raidframe), to avoid conficts. 64 * 65 ******************************************************************************/ 66 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.132 2015/12/26 00:58:45 pgoyette Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_raid_diagnostic.h" 73 #endif 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/ioctl.h> 78 #include <sys/fcntl.h> 79 #include <sys/vnode.h> 80 81 82 #include "rf_archs.h" 83 #include "rf_threadstuff.h" 84 85 #include <sys/errno.h> 86 87 #include "rf_raid.h" 88 #include "rf_dag.h" 89 #include "rf_aselect.h" 90 #include "rf_diskqueue.h" 91 #include "rf_parityscan.h" 92 #include "rf_alloclist.h" 93 #include "rf_dagutils.h" 94 #include "rf_utils.h" 95 #include "rf_etimer.h" 96 #include "rf_acctrace.h" 97 #include "rf_general.h" 98 #include "rf_desc.h" 99 #include "rf_states.h" 100 #include "rf_decluster.h" 101 #include "rf_map.h" 102 #include "rf_revent.h" 103 #include "rf_callback.h" 104 #include "rf_engine.h" 105 #include "rf_mcpair.h" 106 #include "rf_nwayxor.h" 107 #include "rf_copyback.h" 108 #include "rf_driver.h" 109 #include "rf_options.h" 110 #include "rf_shutdown.h" 111 #include "rf_kintf.h" 112 #include "rf_paritymap.h" 113 114 #include <sys/buf.h> 115 116 #ifndef RF_ACCESS_DEBUG 117 #define RF_ACCESS_DEBUG 0 118 #endif 119 120 /* rad == RF_RaidAccessDesc_t */ 121 #define RF_MAX_FREE_RAD 128 122 #define RF_MIN_FREE_RAD 32 123 124 /* debug variables */ 125 char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */ 126 127 /* main configuration routines */ 128 static int raidframe_booted = 0; 129 130 static void rf_ConfigureDebug(RF_Config_t * cfgPtr); 131 static void set_debug_option(char *name, long val); 132 static void rf_UnconfigureArray(void); 133 static void rf_ShutdownRDFreeList(void *); 134 static int rf_ConfigureRDFreeList(RF_ShutdownList_t **); 135 136 rf_declare_mutex2(rf_printf_mutex); /* debug only: avoids interleaved 137 * printfs by different stripes */ 138 139 #define SIGNAL_QUIESCENT_COND(_raid_) \ 140 rf_broadcast_cond2((_raid_)->access_suspend_cv) 141 #define WAIT_FOR_QUIESCENCE(_raid_) \ 142 rf_wait_cond2((_raid_)->access_suspend_cv, \ 143 (_raid_)->access_suspend_mutex) 144 145 static int configureCount = 0; /* number of active configurations */ 146 static int isconfigged = 0; /* is basic raidframe (non per-array) 147 * stuff configured */ 148 static rf_declare_mutex2(configureMutex); /* used to lock the configuration 149 * stuff */ 150 static RF_ShutdownList_t *globalShutdown; /* non array-specific 151 * stuff */ 152 153 static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp); 154 static int rf_AllocEmergBuffers(RF_Raid_t *); 155 static void rf_FreeEmergBuffers(RF_Raid_t *); 156 static void rf_destroy_mutex_cond(RF_Raid_t *); 157 static void rf_alloc_mutex_cond(RF_Raid_t *); 158 159 /* called at system boot time */ 160 int 161 rf_BootRaidframe(bool boot) 162 { 163 164 if (boot) { 165 if (raidframe_booted) 166 return (EBUSY); 167 raidframe_booted = 1; 168 rf_init_mutex2(configureMutex, IPL_NONE); 169 configureCount = 0; 170 isconfigged = 0; 171 globalShutdown = NULL; 172 } else { 173 rf_destroy_mutex2(configureMutex); 174 raidframe_booted = 0; 175 } 176 return (0); 177 } 178 179 /* 180 * Called whenever an array is shutdown 181 */ 182 static void 183 rf_UnconfigureArray(void) 184 { 185 186 rf_lock_mutex2(configureMutex); 187 if (--configureCount == 0) { /* if no active configurations, shut 188 * everything down */ 189 rf_destroy_mutex2(rf_printf_mutex); 190 isconfigged = 0; 191 rf_ShutdownList(&globalShutdown); 192 193 /* 194 * We must wait until now, because the AllocList module 195 * uses the DebugMem module. 196 */ 197 #if RF_DEBUG_MEM 198 if (rf_memDebug) 199 rf_print_unfreed(); 200 #endif 201 } 202 rf_unlock_mutex2(configureMutex); 203 } 204 205 /* 206 * Called to shut down an array. 207 */ 208 int 209 rf_Shutdown(RF_Raid_t *raidPtr) 210 { 211 212 if (!raidPtr->valid) { 213 RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); 214 return (EINVAL); 215 } 216 /* 217 * wait for outstanding IOs to land 218 * As described in rf_raid.h, we use the rad_freelist lock 219 * to protect the per-array info about outstanding descs 220 * since we need to do freelist locking anyway, and this 221 * cuts down on the amount of serialization we've got going 222 * on. 223 */ 224 rf_lock_mutex2(raidPtr->rad_lock); 225 if (raidPtr->waitShutdown) { 226 rf_unlock_mutex2(raidPtr->rad_lock); 227 return (EBUSY); 228 } 229 raidPtr->waitShutdown = 1; 230 while (raidPtr->nAccOutstanding) { 231 rf_wait_cond2(raidPtr->outstandingCond, raidPtr->rad_lock); 232 } 233 rf_unlock_mutex2(raidPtr->rad_lock); 234 235 /* Wait for any parity re-writes to stop... */ 236 while (raidPtr->parity_rewrite_in_progress) { 237 printf("raid%d: Waiting for parity re-write to exit...\n", 238 raidPtr->raidid); 239 tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO, 240 "rfprwshutdown", 0); 241 } 242 243 /* Wait for any reconstruction to stop... */ 244 rf_lock_mutex2(raidPtr->mutex); 245 while (raidPtr->reconInProgress) { 246 printf("raid%d: Waiting for reconstruction to stop...\n", 247 raidPtr->raidid); 248 rf_wait_cond2(raidPtr->waitForReconCond, raidPtr->mutex); 249 } 250 rf_unlock_mutex2(raidPtr->mutex); 251 252 raidPtr->valid = 0; 253 254 if (raidPtr->parity_map != NULL) 255 rf_paritymap_detach(raidPtr); 256 257 rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE); 258 259 rf_UnconfigureVnodes(raidPtr); 260 261 rf_FreeEmergBuffers(raidPtr); 262 263 rf_ShutdownList(&raidPtr->shutdownList); 264 265 rf_destroy_mutex_cond(raidPtr); 266 267 rf_UnconfigureArray(); 268 269 return (0); 270 } 271 272 273 #define DO_INIT_CONFIGURE(f) { \ 274 rc = f (&globalShutdown); \ 275 if (rc) { \ 276 RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ 277 rf_ShutdownList(&globalShutdown); \ 278 configureCount--; \ 279 rf_unlock_mutex2(configureMutex); \ 280 rf_destroy_mutex2(rf_printf_mutex); \ 281 return(rc); \ 282 } \ 283 } 284 285 #define DO_RAID_FAIL() { \ 286 rf_UnconfigureVnodes(raidPtr); \ 287 rf_FreeEmergBuffers(raidPtr); \ 288 rf_ShutdownList(&raidPtr->shutdownList); \ 289 rf_UnconfigureArray(); \ 290 rf_destroy_mutex_cond(raidPtr); \ 291 } 292 293 #define DO_RAID_INIT_CONFIGURE(f) { \ 294 rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \ 295 if (rc) { \ 296 RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ 297 DO_RAID_FAIL(); \ 298 return(rc); \ 299 } \ 300 } 301 302 int 303 rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac) 304 { 305 RF_RowCol_t col; 306 int rc; 307 308 rf_lock_mutex2(configureMutex); 309 configureCount++; 310 if (isconfigged == 0) { 311 rf_init_mutex2(rf_printf_mutex, IPL_VM); 312 313 /* initialize globals */ 314 315 DO_INIT_CONFIGURE(rf_ConfigureAllocList); 316 317 /* 318 * Yes, this does make debugging general to the whole 319 * system instead of being array specific. Bummer, drag. 320 */ 321 rf_ConfigureDebug(cfgPtr); 322 DO_INIT_CONFIGURE(rf_ConfigureDebugMem); 323 #if RF_ACC_TRACE > 0 324 DO_INIT_CONFIGURE(rf_ConfigureAccessTrace); 325 #endif 326 DO_INIT_CONFIGURE(rf_ConfigureMapModule); 327 DO_INIT_CONFIGURE(rf_ConfigureReconEvent); 328 DO_INIT_CONFIGURE(rf_ConfigureCallback); 329 DO_INIT_CONFIGURE(rf_ConfigureRDFreeList); 330 DO_INIT_CONFIGURE(rf_ConfigureNWayXor); 331 DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList); 332 DO_INIT_CONFIGURE(rf_ConfigureMCPair); 333 DO_INIT_CONFIGURE(rf_ConfigureDAGs); 334 DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs); 335 DO_INIT_CONFIGURE(rf_ConfigureReconstruction); 336 DO_INIT_CONFIGURE(rf_ConfigureCopyback); 337 DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem); 338 DO_INIT_CONFIGURE(rf_ConfigurePSStatus); 339 isconfigged = 1; 340 } 341 rf_unlock_mutex2(configureMutex); 342 343 rf_alloc_mutex_cond(raidPtr); 344 345 /* set up the cleanup list. Do this after ConfigureDebug so that 346 * value of memDebug will be set */ 347 348 rf_MakeAllocList(raidPtr->cleanupList); 349 if (raidPtr->cleanupList == NULL) { 350 DO_RAID_FAIL(); 351 return (ENOMEM); 352 } 353 rf_ShutdownCreate(&raidPtr->shutdownList, 354 (void (*) (void *)) rf_FreeAllocList, 355 raidPtr->cleanupList); 356 357 raidPtr->numCol = cfgPtr->numCol; 358 raidPtr->numSpare = cfgPtr->numSpare; 359 360 raidPtr->status = rf_rs_optimal; 361 raidPtr->reconControl = NULL; 362 363 DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine); 364 DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks); 365 366 raidPtr->nAccOutstanding = 0; 367 raidPtr->waitShutdown = 0; 368 369 if (ac!=NULL) { 370 /* We have an AutoConfig structure.. Don't do the 371 normal disk configuration... call the auto config 372 stuff */ 373 rf_AutoConfigureDisks(raidPtr, cfgPtr, ac); 374 } else { 375 DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); 376 DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); 377 } 378 /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev 379 * no. is set */ 380 DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); 381 382 DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout); 383 384 /* Initialize per-RAID PSS bits */ 385 rf_InitPSStatus(raidPtr); 386 387 #if RF_INCLUDE_CHAINDECLUSTER > 0 388 for (col = 0; col < raidPtr->numCol; col++) { 389 /* 390 * XXX better distribution 391 */ 392 raidPtr->hist_diskreq[col] = 0; 393 } 394 #endif 395 raidPtr->numNewFailures = 0; 396 raidPtr->copyback_in_progress = 0; 397 raidPtr->parity_rewrite_in_progress = 0; 398 raidPtr->adding_hot_spare = 0; 399 raidPtr->recon_in_progress = 0; 400 401 raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs; 402 403 /* autoconfigure and root_partition will actually get filled in 404 after the config is done */ 405 raidPtr->autoconfigure = 0; 406 raidPtr->root_partition = 0; 407 raidPtr->last_unit = raidPtr->raidid; 408 raidPtr->config_order = 0; 409 410 if (rf_keepAccTotals) { 411 raidPtr->keep_acc_totals = 1; 412 } 413 414 /* Allocate a bunch of buffers to be used in low-memory conditions */ 415 raidPtr->iobuf = NULL; 416 417 rc = rf_AllocEmergBuffers(raidPtr); 418 if (rc) { 419 printf("raid%d: Unable to allocate emergency buffers.\n", 420 raidPtr->raidid); 421 DO_RAID_FAIL(); 422 return(rc); 423 } 424 425 /* Set up parity map stuff, if applicable. */ 426 #ifndef RF_NO_PARITY_MAP 427 rf_paritymap_attach(raidPtr, cfgPtr->force); 428 #endif 429 430 raidPtr->valid = 1; 431 432 printf("raid%d: %s\n", raidPtr->raidid, 433 raidPtr->Layout.map->configName); 434 printf("raid%d: Components:", raidPtr->raidid); 435 436 for (col = 0; col < raidPtr->numCol; col++) { 437 printf(" %s", raidPtr->Disks[col].devname); 438 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) { 439 printf("[**FAILED**]"); 440 } 441 } 442 printf("\n"); 443 printf("raid%d: Total Sectors: %" PRIu64 " (%" PRIu64 " MB)\n", 444 raidPtr->raidid, 445 raidPtr->totalSectors, 446 (raidPtr->totalSectors / 1024 * 447 (1 << raidPtr->logBytesPerSector) / 1024)); 448 449 return (0); 450 } 451 452 453 /* 454 455 Routines to allocate and free the "emergency buffers" for a given 456 RAID set. These emergency buffers will be used when the kernel runs 457 out of kernel memory. 458 459 */ 460 461 static int 462 rf_AllocEmergBuffers(RF_Raid_t *raidPtr) 463 { 464 void *tmpbuf; 465 RF_VoidPointerListElem_t *vple; 466 int i; 467 468 /* XXX next line needs tuning... */ 469 raidPtr->numEmergencyBuffers = 10 * raidPtr->numCol; 470 #if DEBUG 471 printf("raid%d: allocating %d buffers of %d bytes.\n", 472 raidPtr->raidid, 473 raidPtr->numEmergencyBuffers, 474 (int)(raidPtr->Layout.sectorsPerStripeUnit << 475 raidPtr->logBytesPerSector)); 476 #endif 477 for (i = 0; i < raidPtr->numEmergencyBuffers; i++) { 478 tmpbuf = malloc( raidPtr->Layout.sectorsPerStripeUnit << 479 raidPtr->logBytesPerSector, 480 M_RAIDFRAME, M_WAITOK); 481 if (tmpbuf) { 482 vple = rf_AllocVPListElem(); 483 vple->p= tmpbuf; 484 vple->next = raidPtr->iobuf; 485 raidPtr->iobuf = vple; 486 raidPtr->iobuf_count++; 487 } else { 488 printf("raid%d: failed to allocate emergency buffer!\n", 489 raidPtr->raidid); 490 return 1; 491 } 492 } 493 494 /* XXX next line needs tuning too... */ 495 raidPtr->numEmergencyStripeBuffers = 10; 496 for (i = 0; i < raidPtr->numEmergencyStripeBuffers; i++) { 497 tmpbuf = malloc( raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit << 498 raidPtr->logBytesPerSector), 499 M_RAIDFRAME, M_WAITOK); 500 if (tmpbuf) { 501 vple = rf_AllocVPListElem(); 502 vple->p= tmpbuf; 503 vple->next = raidPtr->stripebuf; 504 raidPtr->stripebuf = vple; 505 raidPtr->stripebuf_count++; 506 } else { 507 printf("raid%d: failed to allocate emergency stripe buffer!\n", 508 raidPtr->raidid); 509 return 1; 510 } 511 } 512 513 return (0); 514 } 515 516 static void 517 rf_FreeEmergBuffers(RF_Raid_t *raidPtr) 518 { 519 RF_VoidPointerListElem_t *tmp; 520 521 /* Free the emergency IO buffers */ 522 while (raidPtr->iobuf != NULL) { 523 tmp = raidPtr->iobuf; 524 raidPtr->iobuf = raidPtr->iobuf->next; 525 free(tmp->p, M_RAIDFRAME); 526 rf_FreeVPListElem(tmp); 527 } 528 529 /* Free the emergency stripe buffers */ 530 while (raidPtr->stripebuf != NULL) { 531 tmp = raidPtr->stripebuf; 532 raidPtr->stripebuf = raidPtr->stripebuf->next; 533 free(tmp->p, M_RAIDFRAME); 534 rf_FreeVPListElem(tmp); 535 } 536 } 537 538 539 static void 540 rf_ShutdownRDFreeList(void *ignored) 541 { 542 pool_destroy(&rf_pools.rad); 543 } 544 545 static int 546 rf_ConfigureRDFreeList(RF_ShutdownList_t **listp) 547 { 548 549 rf_pool_init(&rf_pools.rad, sizeof(RF_RaidAccessDesc_t), 550 "rf_rad_pl", RF_MIN_FREE_RAD, RF_MAX_FREE_RAD); 551 rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL); 552 return (0); 553 } 554 555 RF_RaidAccessDesc_t * 556 rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type, 557 RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, 558 void *bufPtr, void *bp, RF_RaidAccessFlags_t flags, 559 const RF_AccessState_t *states) 560 { 561 RF_RaidAccessDesc_t *desc; 562 563 desc = pool_get(&rf_pools.rad, PR_WAITOK); 564 565 rf_lock_mutex2(raidPtr->rad_lock); 566 if (raidPtr->waitShutdown) { 567 /* 568 * Actually, we're shutting the array down. Free the desc 569 * and return NULL. 570 */ 571 572 rf_unlock_mutex2(raidPtr->rad_lock); 573 pool_put(&rf_pools.rad, desc); 574 return (NULL); 575 } 576 raidPtr->nAccOutstanding++; 577 578 rf_unlock_mutex2(raidPtr->rad_lock); 579 580 desc->raidPtr = (void *) raidPtr; 581 desc->type = type; 582 desc->raidAddress = raidAddress; 583 desc->numBlocks = numBlocks; 584 desc->bufPtr = bufPtr; 585 desc->bp = bp; 586 desc->flags = flags; 587 desc->states = states; 588 desc->state = 0; 589 desc->dagList = NULL; 590 591 desc->status = 0; 592 desc->numRetries = 0; 593 #if RF_ACC_TRACE > 0 594 memset((char *) &desc->tracerec, 0, sizeof(RF_AccTraceEntry_t)); 595 #endif 596 desc->callbackFunc = NULL; 597 desc->callbackArg = NULL; 598 desc->next = NULL; 599 desc->iobufs = NULL; 600 desc->stripebufs = NULL; 601 602 return (desc); 603 } 604 605 void 606 rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc) 607 { 608 RF_Raid_t *raidPtr = desc->raidPtr; 609 RF_DagList_t *dagList, *temp; 610 RF_VoidPointerListElem_t *tmp; 611 612 RF_ASSERT(desc); 613 614 /* Cleanup the dagList(s) */ 615 dagList = desc->dagList; 616 while(dagList != NULL) { 617 temp = dagList; 618 dagList = dagList->next; 619 rf_FreeDAGList(temp); 620 } 621 622 while (desc->iobufs) { 623 tmp = desc->iobufs; 624 desc->iobufs = desc->iobufs->next; 625 rf_FreeIOBuffer(raidPtr, tmp); 626 } 627 628 while (desc->stripebufs) { 629 tmp = desc->stripebufs; 630 desc->stripebufs = desc->stripebufs->next; 631 rf_FreeStripeBuffer(raidPtr, tmp); 632 } 633 634 pool_put(&rf_pools.rad, desc); 635 rf_lock_mutex2(raidPtr->rad_lock); 636 raidPtr->nAccOutstanding--; 637 if (raidPtr->waitShutdown) { 638 rf_signal_cond2(raidPtr->outstandingCond); 639 } 640 rf_unlock_mutex2(raidPtr->rad_lock); 641 } 642 /********************************************************************* 643 * Main routine for performing an access. 644 * Accesses are retried until a DAG can not be selected. This occurs 645 * when either the DAG library is incomplete or there are too many 646 * failures in a parity group. 647 * 648 * type should be read or write async_flag should be RF_TRUE or 649 * RF_FALSE bp_in is a buf pointer. void *to facilitate ignoring it 650 * outside the kernel 651 ********************************************************************/ 652 int 653 rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag, 654 RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, 655 void *bufPtr, struct buf *bp, RF_RaidAccessFlags_t flags) 656 { 657 RF_RaidAccessDesc_t *desc; 658 void *lbufPtr = bufPtr; 659 660 raidAddress += rf_raidSectorOffset; 661 662 #if RF_ACCESS_DEBUG 663 if (rf_accessDebug) { 664 665 printf("logBytes is: %d %d %d\n", raidPtr->raidid, 666 raidPtr->logBytesPerSector, 667 (int) rf_RaidAddressToByte(raidPtr, numBlocks)); 668 printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid, 669 (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress, 670 (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress), 671 (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1), 672 (int) numBlocks, 673 (int) rf_RaidAddressToByte(raidPtr, numBlocks), 674 (long) bufPtr); 675 } 676 #endif 677 678 desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress, 679 numBlocks, lbufPtr, bp, flags, raidPtr->Layout.map->states); 680 681 if (desc == NULL) { 682 return (ENOMEM); 683 } 684 #if RF_ACC_TRACE > 0 685 RF_ETIMER_START(desc->tracerec.tot_timer); 686 #endif 687 desc->async_flag = async_flag; 688 689 if (raidPtr->parity_map != NULL && 690 type == RF_IO_TYPE_WRITE) 691 rf_paritymap_begin(raidPtr->parity_map, raidAddress, 692 numBlocks); 693 694 rf_ContinueRaidAccess(desc); 695 696 return (0); 697 } 698 #if 0 699 /* force the array into reconfigured mode without doing reconstruction */ 700 int 701 rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col) 702 { 703 if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { 704 printf("Can't set reconfigured mode in dedicated-spare array\n"); 705 RF_PANIC(); 706 } 707 rf_lock_mutex2(raidPtr->mutex); 708 raidPtr->numFailures++; 709 raidPtr->Disks[col].status = rf_ds_dist_spared; 710 raidPtr->status = rf_rs_reconfigured; 711 rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); 712 /* install spare table only if declustering + distributed sparing 713 * architecture. */ 714 if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED) 715 rf_InstallSpareTable(raidPtr, col); 716 rf_unlock_mutex2(raidPtr->mutex); 717 return (0); 718 } 719 #endif 720 721 int 722 rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon) 723 { 724 725 /* need to suspend IO's here -- if there are DAGs in flight 726 and we pull the rug out from under ci_vp, Bad Things 727 can happen. */ 728 729 rf_SuspendNewRequestsAndWait(raidPtr); 730 731 rf_lock_mutex2(raidPtr->mutex); 732 if (raidPtr->Disks[fcol].status != rf_ds_failed) { 733 /* must be failing something that is valid, or else it's 734 already marked as failed (in which case we don't 735 want to mark it failed again!) */ 736 raidPtr->numFailures++; 737 raidPtr->Disks[fcol].status = rf_ds_failed; 738 raidPtr->status = rf_rs_degraded; 739 } 740 rf_unlock_mutex2(raidPtr->mutex); 741 742 rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); 743 744 /* Close the component, so that it's not "locked" if someone 745 else want's to use it! */ 746 747 rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp, 748 raidPtr->Disks[fcol].auto_configured); 749 750 rf_lock_mutex2(raidPtr->mutex); 751 raidPtr->raid_cinfo[fcol].ci_vp = NULL; 752 753 /* Need to mark the component as not being auto_configured 754 (in case it was previously). */ 755 756 raidPtr->Disks[fcol].auto_configured = 0; 757 rf_unlock_mutex2(raidPtr->mutex); 758 /* now we can allow IO to continue -- we'll be suspending it 759 again in rf_ReconstructFailedDisk() if we have to.. */ 760 761 rf_ResumeNewRequests(raidPtr); 762 763 if (initRecon) 764 rf_ReconstructFailedDisk(raidPtr, fcol); 765 return (0); 766 } 767 /* releases a thread that is waiting for the array to become quiesced. 768 * access_suspend_mutex should be locked upon calling this 769 */ 770 void 771 rf_SignalQuiescenceLock(RF_Raid_t *raidPtr) 772 { 773 #if RF_DEBUG_QUIESCE 774 if (rf_quiesceDebug) { 775 printf("raid%d: Signalling quiescence lock\n", 776 raidPtr->raidid); 777 } 778 #endif 779 raidPtr->access_suspend_release = 1; 780 781 if (raidPtr->waiting_for_quiescence) { 782 SIGNAL_QUIESCENT_COND(raidPtr); 783 } 784 } 785 /* suspends all new requests to the array. No effect on accesses that are in flight. */ 786 int 787 rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr) 788 { 789 #if RF_DEBUG_QUIESCE 790 if (rf_quiesceDebug) 791 printf("raid%d: Suspending new reqs\n", raidPtr->raidid); 792 #endif 793 rf_lock_mutex2(raidPtr->access_suspend_mutex); 794 raidPtr->accesses_suspended++; 795 raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1; 796 797 if (raidPtr->waiting_for_quiescence) { 798 raidPtr->access_suspend_release = 0; 799 while (!raidPtr->access_suspend_release) { 800 #if RF_DEBUG_QUIESCE 801 printf("raid%d: Suspending: Waiting for Quiescence\n", 802 raidPtr->raidid); 803 #endif 804 WAIT_FOR_QUIESCENCE(raidPtr); 805 raidPtr->waiting_for_quiescence = 0; 806 } 807 } 808 #if RF_DEBUG_QUIESCE 809 printf("raid%d: Quiescence reached..\n", raidPtr->raidid); 810 #endif 811 812 rf_unlock_mutex2(raidPtr->access_suspend_mutex); 813 return (raidPtr->waiting_for_quiescence); 814 } 815 /* wake up everyone waiting for quiescence to be released */ 816 void 817 rf_ResumeNewRequests(RF_Raid_t *raidPtr) 818 { 819 RF_CallbackDesc_t *t, *cb; 820 821 #if RF_DEBUG_QUIESCE 822 if (rf_quiesceDebug) 823 printf("raid%d: Resuming new requests\n", raidPtr->raidid); 824 #endif 825 826 rf_lock_mutex2(raidPtr->access_suspend_mutex); 827 raidPtr->accesses_suspended--; 828 if (raidPtr->accesses_suspended == 0) 829 cb = raidPtr->quiesce_wait_list; 830 else 831 cb = NULL; 832 raidPtr->quiesce_wait_list = NULL; 833 rf_unlock_mutex2(raidPtr->access_suspend_mutex); 834 835 while (cb) { 836 t = cb; 837 cb = cb->next; 838 (t->callbackFunc) (t->callbackArg); 839 rf_FreeCallbackDesc(t); 840 } 841 } 842 /***************************************************************************************** 843 * 844 * debug routines 845 * 846 ****************************************************************************************/ 847 848 static void 849 set_debug_option(char *name, long val) 850 { 851 RF_DebugName_t *p; 852 853 for (p = rf_debugNames; p->name; p++) { 854 if (!strcmp(p->name, name)) { 855 *(p->ptr) = val; 856 printf("[Set debug variable %s to %ld]\n", name, val); 857 return; 858 } 859 } 860 RF_ERRORMSG1("Unknown debug string \"%s\"\n", name); 861 } 862 863 864 /* would like to use sscanf here, but apparently not available in kernel */ 865 /*ARGSUSED*/ 866 static void 867 rf_ConfigureDebug(RF_Config_t *cfgPtr) 868 { 869 char *val_p, *name_p, *white_p; 870 long val; 871 int i; 872 873 rf_ResetDebugOptions(); 874 for (i = 0; i < RF_MAXDBGV && cfgPtr->debugVars[i][0]; i++) { 875 name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]); 876 white_p = rf_find_white(name_p); /* skip to start of 2nd 877 * word */ 878 val_p = rf_find_non_white(white_p); 879 if (*val_p == '0' && *(val_p + 1) == 'x') 880 val = rf_htoi(val_p + 2); 881 else 882 val = rf_atoi(val_p); 883 *white_p = '\0'; 884 set_debug_option(name_p, val); 885 } 886 } 887 888 void 889 rf_print_panic_message(int line, const char *file) 890 { 891 snprintf(rf_panicbuf, sizeof(rf_panicbuf), 892 "raidframe error at line %d file %s", line, file); 893 } 894 895 #ifdef RAID_DIAGNOSTIC 896 void 897 rf_print_assert_panic_message(int line, const char *file, const char *condition) 898 { 899 snprintf(rf_panicbuf, sizeof(rf_panicbuf), 900 "raidframe error at line %d file %s (failed asserting %s)\n", 901 line, file, condition); 902 } 903 #endif 904 905 void 906 rf_print_unable_to_init_mutex(const char *file, int line, int rc) 907 { 908 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", 909 file, line, rc); 910 } 911 912 void 913 rf_print_unable_to_add_shutdown(const char *file, int line, int rc) 914 { 915 RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", 916 file, line, rc); 917 } 918 919 static void 920 rf_alloc_mutex_cond(RF_Raid_t *raidPtr) 921 { 922 923 rf_init_mutex2(raidPtr->mutex, IPL_VM); 924 925 rf_init_cond2(raidPtr->outstandingCond, "rfocond"); 926 rf_init_mutex2(raidPtr->rad_lock, IPL_VM); 927 928 rf_init_mutex2(raidPtr->access_suspend_mutex, IPL_VM); 929 rf_init_cond2(raidPtr->access_suspend_cv, "rfquiesce"); 930 931 rf_init_cond2(raidPtr->waitForReconCond, "rfrcnw"); 932 933 rf_init_cond2(raidPtr->adding_hot_spare_cv, "raidhs"); 934 } 935 936 static void 937 rf_destroy_mutex_cond(RF_Raid_t *raidPtr) 938 { 939 940 rf_destroy_cond2(raidPtr->waitForReconCond); 941 rf_destroy_cond2(raidPtr->adding_hot_spare_cv); 942 943 rf_destroy_mutex2(raidPtr->access_suspend_mutex); 944 rf_destroy_cond2(raidPtr->access_suspend_cv); 945 946 rf_destroy_cond2(raidPtr->outstandingCond); 947 rf_destroy_mutex2(raidPtr->rad_lock); 948 949 rf_destroy_mutex2(raidPtr->mutex); 950 } 951