1 /* $NetBSD: rf_driver.c,v 1.84 2004/01/15 20:27:27 oster Exp $ */ 2 /*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1995 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II, 43 * Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka 44 * 45 * Permission to use, copy, modify and distribute this software and 46 * its documentation is hereby granted, provided that both the copyright 47 * notice and this permission notice appear in all copies of the 48 * software, derivative works or modified versions, and any portions 49 * thereof, and that both notices appear in supporting documentation. 50 * 51 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 52 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 53 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 54 * 55 * Carnegie Mellon requests users of this software to return to 56 * 57 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 58 * School of Computer Science 59 * Carnegie Mellon University 60 * Pittsburgh PA 15213-3890 61 * 62 * any improvements or extensions that they make and grant Carnegie the 63 * rights to redistribute these changes. 64 */ 65 66 /****************************************************************************** 67 * 68 * rf_driver.c -- main setup, teardown, and access routines for the RAID driver 69 * 70 * all routines are prefixed with rf_ (raidframe), to avoid conficts. 71 * 72 ******************************************************************************/ 73 74 75 #include <sys/cdefs.h> 76 __KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.84 2004/01/15 20:27:27 oster Exp $"); 77 78 #include "opt_raid_diagnostic.h" 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/ioctl.h> 83 #include <sys/fcntl.h> 84 #include <sys/vnode.h> 85 86 87 #include "rf_archs.h" 88 #include "rf_threadstuff.h" 89 90 #include <sys/errno.h> 91 92 #include "rf_raid.h" 93 #include "rf_dag.h" 94 #include "rf_aselect.h" 95 #include "rf_diskqueue.h" 96 #include "rf_parityscan.h" 97 #include "rf_alloclist.h" 98 #include "rf_dagutils.h" 99 #include "rf_utils.h" 100 #include "rf_etimer.h" 101 #include "rf_acctrace.h" 102 #include "rf_general.h" 103 #include "rf_desc.h" 104 #include "rf_states.h" 105 #include "rf_decluster.h" 106 #include "rf_map.h" 107 #include "rf_revent.h" 108 #include "rf_callback.h" 109 #include "rf_engine.h" 110 #include "rf_mcpair.h" 111 #include "rf_nwayxor.h" 112 #include "rf_copyback.h" 113 #include "rf_driver.h" 114 #include "rf_options.h" 115 #include "rf_shutdown.h" 116 #include "rf_kintf.h" 117 118 #include <sys/buf.h> 119 120 #ifndef RF_ACCESS_DEBUG 121 #define RF_ACCESS_DEBUG 0 122 #endif 123 124 /* rad == RF_RaidAccessDesc_t */ 125 RF_DECLARE_MUTEX(rf_rad_pool_lock) 126 static struct pool rf_rad_pool; 127 #define RF_MAX_FREE_RAD 128 128 #define RF_RAD_INC 16 129 #define RF_RAD_INITIAL 32 130 131 /* debug variables */ 132 char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */ 133 134 /* main configuration routines */ 135 static int raidframe_booted = 0; 136 137 static void rf_ConfigureDebug(RF_Config_t * cfgPtr); 138 static void set_debug_option(char *name, long val); 139 static void rf_UnconfigureArray(void); 140 static void rf_ShutdownRDFreeList(void *); 141 static int rf_ConfigureRDFreeList(RF_ShutdownList_t **); 142 143 RF_DECLARE_MUTEX(rf_printf_mutex) /* debug only: avoids interleaved 144 * printfs by different stripes */ 145 146 #define SIGNAL_QUIESCENT_COND(_raid_) wakeup(&((_raid_)->accesses_suspended)) 147 #define WAIT_FOR_QUIESCENCE(_raid_) \ 148 ltsleep(&((_raid_)->accesses_suspended), PRIBIO, \ 149 "raidframe quiesce", 0, &((_raid_)->access_suspend_mutex)) 150 151 static int configureCount = 0; /* number of active configurations */ 152 static int isconfigged = 0; /* is basic raidframe (non per-array) 153 * stuff configged */ 154 RF_DECLARE_LKMGR_STATIC_MUTEX(configureMutex) /* used to lock the configuration 155 * stuff */ 156 static RF_ShutdownList_t *globalShutdown; /* non array-specific 157 * stuff */ 158 159 static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp); 160 161 /* called at system boot time */ 162 int 163 rf_BootRaidframe() 164 { 165 166 if (raidframe_booted) 167 return (EBUSY); 168 raidframe_booted = 1; 169 lockinit(&configureMutex, PRIBIO, "RAIDframe lock", 0, 0); 170 configureCount = 0; 171 isconfigged = 0; 172 globalShutdown = NULL; 173 return (0); 174 } 175 176 /* 177 * Called whenever an array is shutdown 178 */ 179 static void 180 rf_UnconfigureArray() 181 { 182 int rc; 183 184 RF_LOCK_LKMGR_MUTEX(configureMutex); 185 if (--configureCount == 0) { /* if no active configurations, shut 186 * everything down */ 187 isconfigged = 0; 188 189 rc = rf_ShutdownList(&globalShutdown); 190 if (rc) { 191 RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown, rc=%d\n", rc); 192 } 193 194 /* 195 * We must wait until now, because the AllocList module 196 * uses the DebugMem module. 197 */ 198 #if RF_DEBUG_MEM 199 if (rf_memDebug) 200 rf_print_unfreed(); 201 #endif 202 } 203 RF_UNLOCK_LKMGR_MUTEX(configureMutex); 204 } 205 206 /* 207 * Called to shut down an array. 208 */ 209 int 210 rf_Shutdown(RF_Raid_t *raidPtr) 211 { 212 213 if (!raidPtr->valid) { 214 RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); 215 return (EINVAL); 216 } 217 /* 218 * wait for outstanding IOs to land 219 * As described in rf_raid.h, we use the rad_freelist lock 220 * to protect the per-array info about outstanding descs 221 * since we need to do freelist locking anyway, and this 222 * cuts down on the amount of serialization we've got going 223 * on. 224 */ 225 RF_LOCK_MUTEX(rf_rad_pool_lock); 226 if (raidPtr->waitShutdown) { 227 RF_UNLOCK_MUTEX(rf_rad_pool_lock); 228 return (EBUSY); 229 } 230 raidPtr->waitShutdown = 1; 231 while (raidPtr->nAccOutstanding) { 232 RF_WAIT_COND(raidPtr->outstandingCond, rf_rad_pool_lock); 233 } 234 RF_UNLOCK_MUTEX(rf_rad_pool_lock); 235 236 /* Wait for any parity re-writes to stop... */ 237 while (raidPtr->parity_rewrite_in_progress) { 238 printf("Waiting for parity re-write to exit...\n"); 239 tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO, 240 "rfprwshutdown", 0); 241 } 242 243 raidPtr->valid = 0; 244 245 rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE); 246 247 rf_UnconfigureVnodes(raidPtr); 248 249 rf_ShutdownList(&raidPtr->shutdownList); 250 251 rf_UnconfigureArray(); 252 253 return (0); 254 } 255 256 257 #define DO_INIT_CONFIGURE(f) { \ 258 rc = f (&globalShutdown); \ 259 if (rc) { \ 260 RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ 261 rf_ShutdownList(&globalShutdown); \ 262 configureCount--; \ 263 RF_UNLOCK_LKMGR_MUTEX(configureMutex); \ 264 return(rc); \ 265 } \ 266 } 267 268 #define DO_RAID_FAIL() { \ 269 rf_UnconfigureVnodes(raidPtr); \ 270 rf_ShutdownList(&raidPtr->shutdownList); \ 271 rf_UnconfigureArray(); \ 272 } 273 274 #define DO_RAID_INIT_CONFIGURE(f) { \ 275 rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \ 276 if (rc) { \ 277 RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ 278 DO_RAID_FAIL(); \ 279 return(rc); \ 280 } \ 281 } 282 283 #define DO_RAID_MUTEX(_m_) { \ 284 rf_mutex_init((_m_)); \ 285 } 286 287 int 288 rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac) 289 { 290 RF_RowCol_t col; 291 int rc; 292 293 RF_LOCK_LKMGR_MUTEX(configureMutex); 294 configureCount++; 295 if (isconfigged == 0) { 296 rf_mutex_init(&rf_printf_mutex); 297 298 /* initialize globals */ 299 300 DO_INIT_CONFIGURE(rf_ConfigureAllocList); 301 302 /* 303 * Yes, this does make debugging general to the whole 304 * system instead of being array specific. Bummer, drag. 305 */ 306 rf_ConfigureDebug(cfgPtr); 307 DO_INIT_CONFIGURE(rf_ConfigureDebugMem); 308 DO_INIT_CONFIGURE(rf_ConfigureAccessTrace); 309 DO_INIT_CONFIGURE(rf_ConfigureMapModule); 310 DO_INIT_CONFIGURE(rf_ConfigureReconEvent); 311 DO_INIT_CONFIGURE(rf_ConfigureCallback); 312 DO_INIT_CONFIGURE(rf_ConfigureRDFreeList); 313 DO_INIT_CONFIGURE(rf_ConfigureNWayXor); 314 DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList); 315 DO_INIT_CONFIGURE(rf_ConfigureMCPair); 316 DO_INIT_CONFIGURE(rf_ConfigureDAGs); 317 DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs); 318 DO_INIT_CONFIGURE(rf_ConfigureReconstruction); 319 DO_INIT_CONFIGURE(rf_ConfigureCopyback); 320 DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem); 321 isconfigged = 1; 322 } 323 RF_UNLOCK_LKMGR_MUTEX(configureMutex); 324 325 DO_RAID_MUTEX(&raidPtr->mutex); 326 /* set up the cleanup list. Do this after ConfigureDebug so that 327 * value of memDebug will be set */ 328 329 rf_MakeAllocList(raidPtr->cleanupList); 330 if (raidPtr->cleanupList == NULL) { 331 DO_RAID_FAIL(); 332 return (ENOMEM); 333 } 334 rc = rf_ShutdownCreate(&raidPtr->shutdownList, 335 (void (*) (void *)) rf_FreeAllocList, 336 raidPtr->cleanupList); 337 if (rc) { 338 rf_print_unable_to_add_shutdown(__FILE__, __LINE__, rc); 339 DO_RAID_FAIL(); 340 return (rc); 341 } 342 raidPtr->numCol = cfgPtr->numCol; 343 raidPtr->numSpare = cfgPtr->numSpare; 344 345 raidPtr->status = rf_rs_optimal; 346 raidPtr->reconControl = NULL; 347 348 TAILQ_INIT(&(raidPtr->iodone)); 349 simple_lock_init(&(raidPtr->iodone_lock)); 350 351 DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine); 352 DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks); 353 354 raidPtr->outstandingCond = 0; 355 356 raidPtr->nAccOutstanding = 0; 357 raidPtr->waitShutdown = 0; 358 359 DO_RAID_MUTEX(&raidPtr->access_suspend_mutex); 360 361 raidPtr->waitForReconCond = 0; 362 363 if (ac!=NULL) { 364 /* We have an AutoConfig structure.. Don't do the 365 normal disk configuration... call the auto config 366 stuff */ 367 rf_AutoConfigureDisks(raidPtr, cfgPtr, ac); 368 } else { 369 DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); 370 DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); 371 } 372 /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev 373 * no. is set */ 374 DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); 375 376 DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout); 377 378 DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus); 379 380 #if RF_INCLUDE_CHAINDECLUSTER > 0 381 for (col = 0; col < raidPtr->numCol; col++) { 382 /* 383 * XXX better distribution 384 */ 385 raidPtr->hist_diskreq[col] = 0; 386 } 387 #endif 388 raidPtr->numNewFailures = 0; 389 raidPtr->copyback_in_progress = 0; 390 raidPtr->parity_rewrite_in_progress = 0; 391 raidPtr->adding_hot_spare = 0; 392 raidPtr->recon_in_progress = 0; 393 raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs; 394 395 /* autoconfigure and root_partition will actually get filled in 396 after the config is done */ 397 raidPtr->autoconfigure = 0; 398 raidPtr->root_partition = 0; 399 raidPtr->last_unit = raidPtr->raidid; 400 raidPtr->config_order = 0; 401 402 if (rf_keepAccTotals) { 403 raidPtr->keep_acc_totals = 1; 404 } 405 rf_StartUserStats(raidPtr); 406 407 raidPtr->valid = 1; 408 409 printf("raid%d: %s\n", raidPtr->raidid, 410 raidPtr->Layout.map->configName); 411 printf("raid%d: Components:", raidPtr->raidid); 412 413 for (col = 0; col < raidPtr->numCol; col++) { 414 printf(" %s", raidPtr->Disks[col].devname); 415 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) { 416 printf("[**FAILED**]"); 417 } 418 } 419 printf("\n"); 420 printf("raid%d: Total Sectors: %lu (%lu MB)\n", 421 raidPtr->raidid, 422 (unsigned long) raidPtr->totalSectors, 423 (unsigned long) (raidPtr->totalSectors / 1024 * 424 (1 << raidPtr->logBytesPerSector) / 1024)); 425 426 return (0); 427 } 428 429 static void 430 rf_ShutdownRDFreeList(void *ignored) 431 { 432 pool_destroy(&rf_rad_pool); 433 } 434 435 static int 436 rf_ConfigureRDFreeList(RF_ShutdownList_t **listp) 437 { 438 int rc; 439 440 pool_init(&rf_rad_pool, sizeof(RF_RaidAccessDesc_t), 0, 0, 0, 441 "rf_rad_pl", NULL); 442 pool_sethiwat(&rf_rad_pool, RF_MAX_FREE_RAD); 443 pool_prime(&rf_rad_pool, RF_RAD_INITIAL); 444 rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL); 445 if (rc) { 446 rf_print_unable_to_add_shutdown(__FILE__, __LINE__, rc); 447 rf_ShutdownRDFreeList(NULL); 448 return (rc); 449 } 450 simple_lock_init(&rf_rad_pool_lock); 451 return (0); 452 } 453 454 RF_RaidAccessDesc_t * 455 rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type, 456 RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, 457 caddr_t bufPtr, void *bp, RF_RaidAccessFlags_t flags, 458 RF_AccessState_t *states) 459 { 460 RF_RaidAccessDesc_t *desc; 461 462 desc = pool_get(&rf_rad_pool, PR_WAITOK); 463 simple_lock_init(&desc->mutex); 464 465 RF_LOCK_MUTEX(rf_rad_pool_lock); 466 if (raidPtr->waitShutdown) { 467 /* 468 * Actually, we're shutting the array down. Free the desc 469 * and return NULL. 470 */ 471 472 RF_UNLOCK_MUTEX(rf_rad_pool_lock); 473 pool_put(&rf_rad_pool, desc); 474 return (NULL); 475 } 476 raidPtr->nAccOutstanding++; 477 478 RF_UNLOCK_MUTEX(rf_rad_pool_lock); 479 480 desc->raidPtr = (void *) raidPtr; 481 desc->type = type; 482 desc->raidAddress = raidAddress; 483 desc->numBlocks = numBlocks; 484 desc->bufPtr = bufPtr; 485 desc->bp = bp; 486 desc->paramDAG = NULL; 487 desc->paramASM = NULL; 488 desc->flags = flags; 489 desc->states = states; 490 desc->state = 0; 491 492 desc->status = 0; 493 memset((char *) &desc->tracerec, 0, sizeof(RF_AccTraceEntry_t)); 494 desc->callbackFunc = NULL; 495 desc->callbackArg = NULL; 496 desc->next = NULL; 497 desc->cleanupList = NULL; 498 rf_MakeAllocList(desc->cleanupList); 499 return (desc); 500 } 501 502 void 503 rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc) 504 { 505 RF_Raid_t *raidPtr = desc->raidPtr; 506 507 RF_ASSERT(desc); 508 509 rf_FreeAllocList(desc->cleanupList); 510 pool_put(&rf_rad_pool, desc); 511 RF_LOCK_MUTEX(rf_rad_pool_lock); 512 raidPtr->nAccOutstanding--; 513 if (raidPtr->waitShutdown) { 514 RF_SIGNAL_COND(raidPtr->outstandingCond); 515 } 516 RF_UNLOCK_MUTEX(rf_rad_pool_lock); 517 } 518 /********************************************************************* 519 * Main routine for performing an access. 520 * Accesses are retried until a DAG can not be selected. This occurs 521 * when either the DAG library is incomplete or there are too many 522 * failures in a parity group. 523 * 524 * type should be read or write async_flag should be RF_TRUE or 525 * RF_FALSE bp_in is a buf pointer. void * to facilitate ignoring it 526 * outside the kernel 527 ********************************************************************/ 528 int 529 rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag, 530 RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, 531 caddr_t bufPtr, void *bp_in, RF_RaidAccessFlags_t flags) 532 { 533 RF_RaidAccessDesc_t *desc; 534 caddr_t lbufPtr = bufPtr; 535 struct buf *bp = (struct buf *) bp_in; 536 537 raidAddress += rf_raidSectorOffset; 538 539 #if RF_ACCESS_DEBUG 540 if (rf_accessDebug) { 541 542 printf("logBytes is: %d %d %d\n", raidPtr->raidid, 543 raidPtr->logBytesPerSector, 544 (int) rf_RaidAddressToByte(raidPtr, numBlocks)); 545 printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid, 546 (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress, 547 (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress), 548 (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1), 549 (int) numBlocks, 550 (int) rf_RaidAddressToByte(raidPtr, numBlocks), 551 (long) bufPtr); 552 } 553 #endif 554 if (raidAddress + numBlocks > raidPtr->totalSectors) { 555 556 printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu\n", 557 (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors); 558 559 560 bp->b_flags |= B_ERROR; 561 bp->b_resid = bp->b_bcount; 562 bp->b_error = ENOSPC; 563 biodone(bp); 564 return (ENOSPC); 565 } 566 desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress, 567 numBlocks, lbufPtr, bp, flags, raidPtr->Layout.map->states); 568 569 if (desc == NULL) { 570 return (ENOMEM); 571 } 572 RF_ETIMER_START(desc->tracerec.tot_timer); 573 574 desc->async_flag = async_flag; 575 576 rf_ContinueRaidAccess(desc); 577 578 return (0); 579 } 580 #if 0 581 /* force the array into reconfigured mode without doing reconstruction */ 582 int 583 rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col) 584 { 585 if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { 586 printf("Can't set reconfigured mode in dedicated-spare array\n"); 587 RF_PANIC(); 588 } 589 RF_LOCK_MUTEX(raidPtr->mutex); 590 raidPtr->numFailures++; 591 raidPtr->Disks[col].status = rf_ds_dist_spared; 592 raidPtr->status = rf_rs_reconfigured; 593 rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); 594 /* install spare table only if declustering + distributed sparing 595 * architecture. */ 596 if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED) 597 rf_InstallSpareTable(raidPtr, col); 598 RF_UNLOCK_MUTEX(raidPtr->mutex); 599 return (0); 600 } 601 #endif 602 603 int 604 rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon) 605 { 606 RF_LOCK_MUTEX(raidPtr->mutex); 607 if (raidPtr->Disks[fcol].status != rf_ds_failed) { 608 /* must be failing something that is valid, or else it's 609 already marked as failed (in which case we don't 610 want to mark it failed again!) */ 611 raidPtr->numFailures++; 612 raidPtr->Disks[fcol].status = rf_ds_failed; 613 raidPtr->status = rf_rs_degraded; 614 } 615 RF_UNLOCK_MUTEX(raidPtr->mutex); 616 617 rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); 618 619 /* Close the component, so that it's not "locked" if someone 620 else want's to use it! */ 621 622 rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp, 623 raidPtr->Disks[fcol].auto_configured); 624 625 RF_LOCK_MUTEX(raidPtr->mutex); 626 raidPtr->raid_cinfo[fcol].ci_vp = NULL; 627 628 /* Need to mark the component as not being auto_configured 629 (in case it was previously). */ 630 631 raidPtr->Disks[fcol].auto_configured = 0; 632 RF_UNLOCK_MUTEX(raidPtr->mutex); 633 634 if (initRecon) 635 rf_ReconstructFailedDisk(raidPtr, fcol); 636 return (0); 637 } 638 /* releases a thread that is waiting for the array to become quiesced. 639 * access_suspend_mutex should be locked upon calling this 640 */ 641 void 642 rf_SignalQuiescenceLock(RF_Raid_t *raidPtr) 643 { 644 #if RF_DEBUG_QUIESCE 645 if (rf_quiesceDebug) { 646 printf("raid%d: Signalling quiescence lock\n", 647 raidPtr->raidid); 648 } 649 #endif 650 raidPtr->access_suspend_release = 1; 651 652 if (raidPtr->waiting_for_quiescence) { 653 SIGNAL_QUIESCENT_COND(raidPtr); 654 } 655 } 656 /* suspends all new requests to the array. No effect on accesses that are in flight. */ 657 int 658 rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr) 659 { 660 #if RF_DEBUG_QUIESCE 661 if (rf_quiesceDebug) 662 printf("raid%d: Suspending new reqs\n", raidPtr->raidid); 663 #endif 664 RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); 665 raidPtr->accesses_suspended++; 666 raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1; 667 668 if (raidPtr->waiting_for_quiescence) { 669 raidPtr->access_suspend_release = 0; 670 while (!raidPtr->access_suspend_release) { 671 printf("raid%d: Suspending: Waiting for Quiescence\n", 672 raidPtr->raidid); 673 WAIT_FOR_QUIESCENCE(raidPtr); 674 raidPtr->waiting_for_quiescence = 0; 675 } 676 } 677 printf("raid%d: Quiescence reached..\n", raidPtr->raidid); 678 679 RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); 680 return (raidPtr->waiting_for_quiescence); 681 } 682 /* wake up everyone waiting for quiescence to be released */ 683 void 684 rf_ResumeNewRequests(RF_Raid_t *raidPtr) 685 { 686 RF_CallbackDesc_t *t, *cb; 687 688 #if RF_DEBUG_QUIESCE 689 if (rf_quiesceDebug) 690 printf("Resuming new reqs\n"); 691 #endif 692 693 RF_LOCK_MUTEX(raidPtr->access_suspend_mutex); 694 raidPtr->accesses_suspended--; 695 if (raidPtr->accesses_suspended == 0) 696 cb = raidPtr->quiesce_wait_list; 697 else 698 cb = NULL; 699 raidPtr->quiesce_wait_list = NULL; 700 RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex); 701 702 while (cb) { 703 t = cb; 704 cb = cb->next; 705 (t->callbackFunc) (t->callbackArg); 706 rf_FreeCallbackDesc(t); 707 } 708 } 709 /***************************************************************************************** 710 * 711 * debug routines 712 * 713 ****************************************************************************************/ 714 715 static void 716 set_debug_option(char *name, long val) 717 { 718 RF_DebugName_t *p; 719 720 for (p = rf_debugNames; p->name; p++) { 721 if (!strcmp(p->name, name)) { 722 *(p->ptr) = val; 723 printf("[Set debug variable %s to %ld]\n", name, val); 724 return; 725 } 726 } 727 RF_ERRORMSG1("Unknown debug string \"%s\"\n", name); 728 } 729 730 731 /* would like to use sscanf here, but apparently not available in kernel */ 732 /*ARGSUSED*/ 733 static void 734 rf_ConfigureDebug(RF_Config_t *cfgPtr) 735 { 736 char *val_p, *name_p, *white_p; 737 long val; 738 int i; 739 740 rf_ResetDebugOptions(); 741 for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) { 742 name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]); 743 white_p = rf_find_white(name_p); /* skip to start of 2nd 744 * word */ 745 val_p = rf_find_non_white(white_p); 746 if (*val_p == '0' && *(val_p + 1) == 'x') 747 val = rf_htoi(val_p + 2); 748 else 749 val = rf_atoi(val_p); 750 *white_p = '\0'; 751 set_debug_option(name_p, val); 752 } 753 } 754 /* performance monitoring stuff */ 755 756 #define TIMEVAL_TO_US(t) (((long) t.tv_sec) * 1000000L + (long) t.tv_usec) 757 758 #if !defined(_KERNEL) && !defined(SIMULATE) 759 760 /* 761 * Throughput stats currently only used in user-level RAIDframe 762 */ 763 764 static int 765 rf_InitThroughputStats(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 766 RF_Config_t *cfgPtr) 767 { 768 int rc; 769 770 /* these used by user-level raidframe only */ 771 rf_mutex_init(&raidPtr->throughputstats.mutex); 772 raidPtr->throughputstats.sum_io_us = 0; 773 raidPtr->throughputstats.num_ios = 0; 774 raidPtr->throughputstats.num_out_ios = 0; 775 return (0); 776 } 777 778 void 779 rf_StartThroughputStats(RF_Raid_t *raidPtr) 780 { 781 RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); 782 raidPtr->throughputstats.num_ios++; 783 raidPtr->throughputstats.num_out_ios++; 784 if (raidPtr->throughputstats.num_out_ios == 1) 785 RF_GETTIME(raidPtr->throughputstats.start); 786 RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); 787 } 788 789 static void 790 rf_StopThroughputStats(RF_Raid_t *raidPtr) 791 { 792 struct timeval diff; 793 794 RF_LOCK_MUTEX(raidPtr->throughputstats.mutex); 795 raidPtr->throughputstats.num_out_ios--; 796 if (raidPtr->throughputstats.num_out_ios == 0) { 797 RF_GETTIME(raidPtr->throughputstats.stop); 798 RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start, &raidPtr->throughputstats.stop, &diff); 799 raidPtr->throughputstats.sum_io_us += TIMEVAL_TO_US(diff); 800 } 801 RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex); 802 } 803 804 static void 805 rf_PrintThroughputStats(RF_Raid_t *raidPtr) 806 { 807 RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0); 808 if (raidPtr->throughputstats.sum_io_us != 0) { 809 printf("[Througphut: %8.2f IOs/second]\n", raidPtr->throughputstats.num_ios 810 / (raidPtr->throughputstats.sum_io_us / 1000000.0)); 811 } 812 } 813 #endif /* !KERNEL && !SIMULATE */ 814 815 void 816 rf_StartUserStats(RF_Raid_t *raidPtr) 817 { 818 RF_GETTIME(raidPtr->userstats.start); 819 raidPtr->userstats.sum_io_us = 0; 820 raidPtr->userstats.num_ios = 0; 821 raidPtr->userstats.num_sect_moved = 0; 822 } 823 824 void 825 rf_StopUserStats(RF_Raid_t *raidPtr) 826 { 827 RF_GETTIME(raidPtr->userstats.stop); 828 } 829 830 /* rt: resp time in us 831 numsect: number of sectors for this access */ 832 void 833 rf_UpdateUserStats(RF_Raid_t *raidPtr, int rt, int numsect) 834 { 835 raidPtr->userstats.sum_io_us += rt; 836 raidPtr->userstats.num_ios++; 837 raidPtr->userstats.num_sect_moved += numsect; 838 } 839 840 void 841 rf_PrintUserStats(RF_Raid_t *raidPtr) 842 { 843 long elapsed_us, mbs, mbs_frac; 844 struct timeval diff; 845 846 RF_TIMEVAL_DIFF(&raidPtr->userstats.start, 847 &raidPtr->userstats.stop, &diff); 848 elapsed_us = TIMEVAL_TO_US(diff); 849 850 /* 2000 sectors per megabyte, 10000000 microseconds per second */ 851 if (elapsed_us) 852 mbs = (raidPtr->userstats.num_sect_moved / 2000) / 853 (elapsed_us / 1000000); 854 else 855 mbs = 0; 856 857 /* this computes only the first digit of the fractional mb/s moved */ 858 if (elapsed_us) { 859 mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) / 860 (elapsed_us / 1000000)) - (mbs * 10); 861 } else { 862 mbs_frac = 0; 863 } 864 865 printf("raid%d: Number of I/Os: %ld\n", 866 raidPtr->raidid, raidPtr->userstats.num_ios); 867 printf("raid%d: Elapsed time (us): %ld\n", 868 raidPtr->raidid, elapsed_us); 869 printf("raid%d: User I/Os per second: %ld\n", 870 raidPtr->raidid, RF_DB0_CHECK(raidPtr->userstats.num_ios, 871 (elapsed_us / 1000000))); 872 printf("raid%d: Average user response time: %ld us\n", 873 raidPtr->raidid, RF_DB0_CHECK(raidPtr->userstats.sum_io_us, 874 raidPtr->userstats.num_ios)); 875 printf("raid%d: Total sectors moved: %ld\n", 876 raidPtr->raidid, raidPtr->userstats.num_sect_moved); 877 printf("raid%d: Average access size (sect): %ld\n", 878 raidPtr->raidid, RF_DB0_CHECK(raidPtr->userstats.num_sect_moved, 879 raidPtr->userstats.num_ios)); 880 printf("raid%d: Achieved data rate: %ld.%ld MB/sec\n", 881 raidPtr->raidid, mbs, mbs_frac); 882 } 883 884 885 void 886 rf_print_panic_message(int line, char *file) 887 { 888 sprintf(rf_panicbuf,"raidframe error at line %d file %s", 889 line, file); 890 } 891 892 #ifdef RAID_DIAGNOSTIC 893 void 894 rf_print_assert_panic_message(int line, char *file, char *condition) 895 { 896 sprintf(rf_panicbuf, 897 "raidframe error at line %d file %s (failed asserting %s)\n", 898 line, file, condition); 899 } 900 #endif 901 902 void 903 rf_print_unable_to_init_mutex(char *file, int line, int rc) 904 { 905 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", 906 file, line, rc); 907 } 908 909 void 910 rf_print_unable_to_add_shutdown(char *file, int line, int rc) 911 { 912 RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", 913 file, line, rc); 914 } 915