1 /* $NetBSD: rf_map.c,v 1.21 2002/09/24 00:12:55 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /************************************************************************** 30 * 31 * map.c -- main code for mapping RAID addresses to physical disk addresses 32 * 33 **************************************************************************/ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: rf_map.c,v 1.21 2002/09/24 00:12:55 oster Exp $"); 37 38 #include <dev/raidframe/raidframevar.h> 39 40 #include "rf_threadstuff.h" 41 #include "rf_raid.h" 42 #include "rf_general.h" 43 #include "rf_map.h" 44 #include "rf_freelist.h" 45 #include "rf_shutdown.h" 46 47 static void rf_FreePDAList(RF_PhysDiskAddr_t * start, RF_PhysDiskAddr_t * end, int count); 48 static void 49 rf_FreeASMList(RF_AccessStripeMap_t * start, RF_AccessStripeMap_t * end, 50 int count); 51 52 /*************************************************************************** 53 * 54 * MapAccess -- main 1st order mapping routine. Maps an access in the 55 * RAID address space to the corresponding set of physical disk 56 * addresses. The result is returned as a list of AccessStripeMap 57 * structures, one per stripe accessed. Each ASM structure contains a 58 * pointer to a list of PhysDiskAddr structures, which describe the 59 * physical locations touched by the user access. Note that this routine 60 * returns only static mapping information, i.e. the list of physical 61 * addresses returned does not necessarily identify the set of physical 62 * locations that will actually be read or written. The routine also 63 * maps the parity. The physical disk location returned always indicates 64 * the entire parity unit, even when only a subset of it is being 65 * accessed. This is because an access that is not stripe unit aligned 66 * but that spans a stripe unit boundary may require access two distinct 67 * portions of the parity unit, and we can't yet tell which portion(s) 68 * we'll actually need. We leave it up to the algorithm selection code 69 * to decide what subset of the parity unit to access. Note that 70 * addresses in the RAID address space must always be maintained as 71 * longs, instead of ints. 72 * 73 * This routine returns NULL if numBlocks is 0 74 * 75 ***************************************************************************/ 76 77 RF_AccessStripeMapHeader_t * 78 rf_MapAccess(raidPtr, raidAddress, numBlocks, buffer, remap) 79 RF_Raid_t *raidPtr; 80 RF_RaidAddr_t raidAddress; /* starting address in RAID address 81 * space */ 82 RF_SectorCount_t numBlocks; /* number of blocks in RAID address 83 * space to access */ 84 caddr_t buffer; /* buffer to supply/receive data */ 85 int remap; /* 1 => remap addresses to spare space */ 86 { 87 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 88 RF_AccessStripeMapHeader_t *asm_hdr = NULL; 89 RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL; 90 int faultsTolerated = layoutPtr->map->faultsTolerated; 91 RF_RaidAddr_t startAddress = raidAddress; /* we'll change 92 * raidAddress along the 93 * way */ 94 RF_RaidAddr_t endAddress = raidAddress + numBlocks; 95 RF_RaidDisk_t **disks = raidPtr->Disks; 96 97 RF_PhysDiskAddr_t *pda_p, *pda_q; 98 RF_StripeCount_t numStripes = 0; 99 RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress, nextStripeUnitAddress; 100 RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr; 101 RF_StripeCount_t totStripes; 102 RF_StripeNum_t stripeID, lastSID, SUID, lastSUID; 103 RF_AccessStripeMap_t *asmList, *t_asm; 104 RF_PhysDiskAddr_t *pdaList, *t_pda; 105 106 /* allocate all the ASMs and PDAs up front */ 107 lastRaidAddr = raidAddress + numBlocks - 1; 108 stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress); 109 lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr); 110 totStripes = lastSID - stripeID + 1; 111 SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress); 112 lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr); 113 114 asmList = rf_AllocASMList(totStripes); 115 pdaList = rf_AllocPDAList(lastSUID - SUID + 1 + faultsTolerated * totStripes); /* may also need pda(s) 116 * per stripe for parity */ 117 118 if (raidAddress + numBlocks > raidPtr->totalSectors) { 119 RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n", 120 (int) raidAddress); 121 return (NULL); 122 } 123 #if RF_DEBUG_MAP 124 if (rf_mapDebug) 125 rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks); 126 #endif 127 for (; raidAddress < endAddress;) { 128 /* make the next stripe structure */ 129 RF_ASSERT(asmList); 130 t_asm = asmList; 131 asmList = asmList->next; 132 memset((char *) t_asm, 0, sizeof(RF_AccessStripeMap_t)); 133 if (!asm_p) 134 asm_list = asm_p = t_asm; 135 else { 136 asm_p->next = t_asm; 137 asm_p = asm_p->next; 138 } 139 numStripes++; 140 141 /* map SUs from current location to the end of the stripe */ 142 asm_p->stripeID = /* rf_RaidAddressToStripeID(layoutPtr, 143 raidAddress) */ stripeID++; 144 stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress); 145 stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress); 146 asm_p->raidAddress = raidAddress; 147 asm_p->endRaidAddress = stripeEndAddress; 148 149 /* map each stripe unit in the stripe */ 150 pda_p = NULL; 151 startAddrWithinStripe = raidAddress; /* Raid addr of start of 152 * portion of access 153 * that is within this 154 * stripe */ 155 for (; raidAddress < stripeEndAddress;) { 156 RF_ASSERT(pdaList); 157 t_pda = pdaList; 158 pdaList = pdaList->next; 159 memset((char *) t_pda, 0, sizeof(RF_PhysDiskAddr_t)); 160 if (!pda_p) 161 asm_p->physInfo = pda_p = t_pda; 162 else { 163 pda_p->next = t_pda; 164 pda_p = pda_p->next; 165 } 166 167 pda_p->type = RF_PDA_TYPE_DATA; 168 (layoutPtr->map->MapSector) (raidPtr, raidAddress, &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); 169 170 /* mark any failures we find. failedPDA is don't-care 171 * if there is more than one failure */ 172 pda_p->raidAddress = raidAddress; /* the RAID address 173 * corresponding to this 174 * physical disk address */ 175 nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress); 176 pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress; 177 RF_ASSERT(pda_p->numSector != 0); 178 rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0); 179 pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress)); 180 asm_p->totalSectorsAccessed += pda_p->numSector; 181 asm_p->numStripeUnitsAccessed++; 182 asm_p->origRow = pda_p->row; /* redundant but 183 * harmless to do this 184 * in every loop 185 * iteration */ 186 187 raidAddress = RF_MIN(endAddress, nextStripeUnitAddress); 188 } 189 190 /* Map the parity. At this stage, the startSector and 191 * numSector fields for the parity unit are always set to 192 * indicate the entire parity unit. We may modify this after 193 * mapping the data portion. */ 194 switch (faultsTolerated) { 195 case 0: 196 break; 197 case 1: /* single fault tolerant */ 198 RF_ASSERT(pdaList); 199 t_pda = pdaList; 200 pdaList = pdaList->next; 201 memset((char *) t_pda, 0, sizeof(RF_PhysDiskAddr_t)); 202 pda_p = asm_p->parityInfo = t_pda; 203 pda_p->type = RF_PDA_TYPE_PARITY; 204 (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), 205 &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); 206 pda_p->numSector = layoutPtr->sectorsPerStripeUnit; 207 /* raidAddr may be needed to find unit to redirect to */ 208 pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); 209 rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); 210 rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); 211 212 break; 213 case 2: /* two fault tolerant */ 214 RF_ASSERT(pdaList && pdaList->next); 215 t_pda = pdaList; 216 pdaList = pdaList->next; 217 memset((char *) t_pda, 0, sizeof(RF_PhysDiskAddr_t)); 218 pda_p = asm_p->parityInfo = t_pda; 219 pda_p->type = RF_PDA_TYPE_PARITY; 220 t_pda = pdaList; 221 pdaList = pdaList->next; 222 memset((char *) t_pda, 0, sizeof(RF_PhysDiskAddr_t)); 223 pda_q = asm_p->qInfo = t_pda; 224 pda_q->type = RF_PDA_TYPE_Q; 225 (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), 226 &(pda_p->row), &(pda_p->col), &(pda_p->startSector), remap); 227 (layoutPtr->map->MapQ) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe), 228 &(pda_q->row), &(pda_q->col), &(pda_q->startSector), remap); 229 pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit; 230 /* raidAddr may be needed to find unit to redirect to */ 231 pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); 232 pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe); 233 /* failure mode stuff */ 234 rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1); 235 rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1); 236 rf_ASMParityAdjust(asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); 237 rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p); 238 break; 239 } 240 } 241 RF_ASSERT(asmList == NULL && pdaList == NULL); 242 /* make the header structure */ 243 asm_hdr = rf_AllocAccessStripeMapHeader(); 244 RF_ASSERT(numStripes == totStripes); 245 asm_hdr->numStripes = numStripes; 246 asm_hdr->stripeMap = asm_list; 247 248 #if RF_DEBUG_MAP 249 if (rf_mapDebug) 250 rf_PrintAccessStripeMap(asm_hdr); 251 #endif 252 return (asm_hdr); 253 } 254 255 /*************************************************************************** 256 * This routine walks through an ASM list and marks the PDAs that have 257 * failed. It's called only when a disk failure causes an in-flight 258 * DAG to fail. The parity may consist of two components, but we want 259 * to use only one failedPDA pointer. Thus we set failedPDA to point 260 * to the first parity component, and rely on the rest of the code to 261 * do the right thing with this. 262 ***************************************************************************/ 263 264 void 265 rf_MarkFailuresInASMList(raidPtr, asm_h) 266 RF_Raid_t *raidPtr; 267 RF_AccessStripeMapHeader_t *asm_h; 268 { 269 RF_RaidDisk_t **disks = raidPtr->Disks; 270 RF_AccessStripeMap_t *asmap; 271 RF_PhysDiskAddr_t *pda; 272 273 for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) { 274 asmap->numDataFailed = asmap->numParityFailed = asmap->numQFailed = 0; 275 asmap->numFailedPDAs = 0; 276 memset((char *) asmap->failedPDAs, 0, 277 RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *)); 278 for (pda = asmap->physInfo; pda; pda = pda->next) { 279 if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) { 280 asmap->numDataFailed++; 281 asmap->failedPDAs[asmap->numFailedPDAs] = pda; 282 asmap->numFailedPDAs++; 283 } 284 } 285 pda = asmap->parityInfo; 286 if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { 287 asmap->numParityFailed++; 288 asmap->failedPDAs[asmap->numFailedPDAs] = pda; 289 asmap->numFailedPDAs++; 290 } 291 pda = asmap->qInfo; 292 if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) { 293 asmap->numQFailed++; 294 asmap->failedPDAs[asmap->numFailedPDAs] = pda; 295 asmap->numFailedPDAs++; 296 } 297 } 298 } 299 300 /*************************************************************************** 301 * 302 * routines to allocate and free list elements. All allocation 303 * routines zero the structure before returning it. 304 * 305 * FreePhysDiskAddr is static. It should never be called directly, 306 * because FreeAccessStripeMap takes care of freeing the PhysDiskAddr 307 * list. 308 * 309 ***************************************************************************/ 310 311 static RF_FreeList_t *rf_asmhdr_freelist; 312 #define RF_MAX_FREE_ASMHDR 128 313 #define RF_ASMHDR_INC 16 314 #define RF_ASMHDR_INITIAL 32 315 316 static RF_FreeList_t *rf_asm_freelist; 317 #define RF_MAX_FREE_ASM 192 318 #define RF_ASM_INC 24 319 #define RF_ASM_INITIAL 64 320 321 static RF_FreeList_t *rf_pda_freelist; 322 #define RF_MAX_FREE_PDA 192 323 #define RF_PDA_INC 24 324 #define RF_PDA_INITIAL 64 325 326 /* called at shutdown time. So far, all that is necessary is to release all the free lists */ 327 static void rf_ShutdownMapModule(void *); 328 static void 329 rf_ShutdownMapModule(ignored) 330 void *ignored; 331 { 332 RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); 333 RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); 334 RF_FREELIST_DESTROY(rf_asm_freelist, next, (RF_AccessStripeMap_t *)); 335 } 336 337 int 338 rf_ConfigureMapModule(listp) 339 RF_ShutdownList_t **listp; 340 { 341 int rc; 342 343 RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR, 344 RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t)); 345 if (rf_asmhdr_freelist == NULL) { 346 return (ENOMEM); 347 } 348 RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM, 349 RF_ASM_INC, sizeof(RF_AccessStripeMap_t)); 350 if (rf_asm_freelist == NULL) { 351 RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); 352 return (ENOMEM); 353 } 354 RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA, 355 RF_PDA_INC, sizeof(RF_PhysDiskAddr_t)); 356 if (rf_pda_freelist == NULL) { 357 RF_FREELIST_DESTROY(rf_asmhdr_freelist, next, (RF_AccessStripeMapHeader_t *)); 358 RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *)); 359 return (ENOMEM); 360 } 361 rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL); 362 if (rc) { 363 rf_print_unable_to_add_shutdown(__FILE__, __LINE__, rc); 364 rf_ShutdownMapModule(NULL); 365 return (rc); 366 } 367 RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL, next, 368 (RF_AccessStripeMapHeader_t *)); 369 RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL, next, 370 (RF_AccessStripeMap_t *)); 371 RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL, next, 372 (RF_PhysDiskAddr_t *)); 373 374 return (0); 375 } 376 377 RF_AccessStripeMapHeader_t * 378 rf_AllocAccessStripeMapHeader() 379 { 380 RF_AccessStripeMapHeader_t *p; 381 382 RF_FREELIST_GET(rf_asmhdr_freelist, p, next, (RF_AccessStripeMapHeader_t *)); 383 memset((char *) p, 0, sizeof(RF_AccessStripeMapHeader_t)); 384 385 return (p); 386 } 387 388 389 void 390 rf_FreeAccessStripeMapHeader(p) 391 RF_AccessStripeMapHeader_t *p; 392 { 393 RF_FREELIST_FREE(rf_asmhdr_freelist, p, next); 394 } 395 396 RF_PhysDiskAddr_t * 397 rf_AllocPhysDiskAddr() 398 { 399 RF_PhysDiskAddr_t *p; 400 401 RF_FREELIST_GET(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *)); 402 memset((char *) p, 0, sizeof(RF_PhysDiskAddr_t)); 403 404 return (p); 405 } 406 /* allocates a list of PDAs, locking the free list only once 407 * when we have to call calloc, we do it one component at a time to simplify 408 * the process of freeing the list at program shutdown. This should not be 409 * much of a performance hit, because it should be very infrequently executed. 410 */ 411 RF_PhysDiskAddr_t * 412 rf_AllocPDAList(count) 413 int count; 414 { 415 RF_PhysDiskAddr_t *p = NULL; 416 417 RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *), count); 418 return (p); 419 } 420 421 #if RF_INCLUDE_PARITYLOGGING > 0 422 void 423 rf_FreePhysDiskAddr(p) 424 RF_PhysDiskAddr_t *p; 425 { 426 RF_FREELIST_FREE(rf_pda_freelist, p, next); 427 } 428 #endif 429 430 static void 431 rf_FreePDAList(l_start, l_end, count) 432 RF_PhysDiskAddr_t *l_start, *l_end; /* pointers to start and end 433 * of list */ 434 int count; /* number of elements in list */ 435 { 436 RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next, (RF_PhysDiskAddr_t *), count); 437 } 438 439 /* this is essentially identical to AllocPDAList. I should combine the two. 440 * when we have to call calloc, we do it one component at a time to simplify 441 * the process of freeing the list at program shutdown. This should not be 442 * much of a performance hit, because it should be very infrequently executed. 443 */ 444 RF_AccessStripeMap_t * 445 rf_AllocASMList(count) 446 int count; 447 { 448 RF_AccessStripeMap_t *p = NULL; 449 450 RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *), count); 451 return (p); 452 } 453 454 static void 455 rf_FreeASMList(l_start, l_end, count) 456 RF_AccessStripeMap_t *l_start, *l_end; 457 int count; 458 { 459 RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next, (RF_AccessStripeMap_t *), count); 460 } 461 462 void 463 rf_FreeAccessStripeMap(hdr) 464 RF_AccessStripeMapHeader_t *hdr; 465 { 466 RF_AccessStripeMap_t *p, *pt = NULL; 467 RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL; 468 int count = 0, t, asm_count = 0; 469 470 for (p = hdr->stripeMap; p; p = p->next) { 471 472 /* link the 3 pda lists into the accumulating pda list */ 473 474 if (!pdaList) 475 pdaList = p->qInfo; 476 else 477 pdaEnd->next = p->qInfo; 478 for (trailer = NULL, pdp = p->qInfo; pdp;) { 479 trailer = pdp; 480 pdp = pdp->next; 481 count++; 482 } 483 if (trailer) 484 pdaEnd = trailer; 485 486 if (!pdaList) 487 pdaList = p->parityInfo; 488 else 489 pdaEnd->next = p->parityInfo; 490 for (trailer = NULL, pdp = p->parityInfo; pdp;) { 491 trailer = pdp; 492 pdp = pdp->next; 493 count++; 494 } 495 if (trailer) 496 pdaEnd = trailer; 497 498 if (!pdaList) 499 pdaList = p->physInfo; 500 else 501 pdaEnd->next = p->physInfo; 502 for (trailer = NULL, pdp = p->physInfo; pdp;) { 503 trailer = pdp; 504 pdp = pdp->next; 505 count++; 506 } 507 if (trailer) 508 pdaEnd = trailer; 509 510 pt = p; 511 asm_count++; 512 } 513 514 /* debug only */ 515 for (t = 0, pdp = pdaList; pdp; pdp = pdp->next) 516 t++; 517 RF_ASSERT(t == count); 518 519 if (pdaList) 520 rf_FreePDAList(pdaList, pdaEnd, count); 521 rf_FreeASMList(hdr->stripeMap, pt, asm_count); 522 rf_FreeAccessStripeMapHeader(hdr); 523 } 524 /* We can't use the large write optimization if there are any failures 525 * in the stripe. In the declustered layout, there is no way to 526 * immediately determine what disks constitute a stripe, so we 527 * actually have to hunt through the stripe looking for failures. The 528 * reason we map the parity instead of just using asm->parityInfo->col 529 * is because the latter may have been already redirected to a spare 530 * drive, which would mess up the computation of the stripe offset. 531 * 532 * ASSUMES AT MOST ONE FAILURE IN THE STRIPE. */ 533 int 534 rf_CheckStripeForFailures(raidPtr, asmap) 535 RF_Raid_t *raidPtr; 536 RF_AccessStripeMap_t *asmap; 537 { 538 RF_RowCol_t trow, tcol, prow, pcol, *diskids, i; 539 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 540 RF_StripeCount_t stripeOffset; 541 int numFailures; 542 RF_RaidAddr_t sosAddr; 543 RF_SectorNum_t diskOffset, poffset; 544 RF_RowCol_t testrow; 545 546 /* quick out in the fault-free case. */ 547 RF_LOCK_MUTEX(raidPtr->mutex); 548 numFailures = raidPtr->numFailures; 549 RF_UNLOCK_MUTEX(raidPtr->mutex); 550 if (numFailures == 0) 551 return (0); 552 553 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); 554 (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids, &testrow); 555 (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress, &prow, &pcol, &poffset, 0); /* get pcol */ 556 557 /* this need not be true if we've redirected the access to a spare in 558 * another row RF_ASSERT(row == testrow); */ 559 stripeOffset = 0; 560 for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) { 561 if (diskids[i] != pcol) { 562 if (RF_DEAD_DISK(raidPtr->Disks[testrow][diskids[i]].status)) { 563 if (raidPtr->status[testrow] != rf_rs_reconstructing) 564 return (1); 565 RF_ASSERT(raidPtr->reconControl[testrow]->fcol == diskids[i]); 566 layoutPtr->map->MapSector(raidPtr, 567 sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit, 568 &trow, &tcol, &diskOffset, 0); 569 RF_ASSERT((trow == testrow) && (tcol == diskids[i])); 570 if (!rf_CheckRUReconstructed(raidPtr->reconControl[testrow]->reconMap, diskOffset)) 571 return (1); 572 asmap->flags |= RF_ASM_REDIR_LARGE_WRITE; 573 return (0); 574 } 575 stripeOffset++; 576 } 577 } 578 return (0); 579 } 580 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD >0) 581 /* 582 return the number of failed data units in the stripe. 583 */ 584 585 int 586 rf_NumFailedDataUnitsInStripe(raidPtr, asmap) 587 RF_Raid_t *raidPtr; 588 RF_AccessStripeMap_t *asmap; 589 { 590 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 591 RF_RowCol_t trow, tcol, i; 592 RF_SectorNum_t diskOffset; 593 RF_RaidAddr_t sosAddr; 594 int numFailures; 595 596 /* quick out in the fault-free case. */ 597 RF_LOCK_MUTEX(raidPtr->mutex); 598 numFailures = raidPtr->numFailures; 599 RF_UNLOCK_MUTEX(raidPtr->mutex); 600 if (numFailures == 0) 601 return (0); 602 numFailures = 0; 603 604 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); 605 for (i = 0; i < layoutPtr->numDataCol; i++) { 606 (layoutPtr->map->MapSector) (raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit, 607 &trow, &tcol, &diskOffset, 0); 608 if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status)) 609 numFailures++; 610 } 611 612 return numFailures; 613 } 614 #endif 615 616 /***************************************************************************************** 617 * 618 * debug routines 619 * 620 ****************************************************************************************/ 621 #if RF_DEBUG_MAP 622 void 623 rf_PrintAccessStripeMap(asm_h) 624 RF_AccessStripeMapHeader_t *asm_h; 625 { 626 rf_PrintFullAccessStripeMap(asm_h, 0); 627 } 628 #endif 629 630 void 631 rf_PrintFullAccessStripeMap(asm_h, prbuf) 632 RF_AccessStripeMapHeader_t *asm_h; 633 int prbuf; /* flag to print buffer pointers */ 634 { 635 int i; 636 RF_AccessStripeMap_t *asmap = asm_h->stripeMap; 637 RF_PhysDiskAddr_t *p; 638 printf("%d stripes total\n", (int) asm_h->numStripes); 639 for (; asmap; asmap = asmap->next) { 640 /* printf("Num failures: %d\n",asmap->numDataFailed); */ 641 /* printf("Num sectors: 642 * %d\n",(int)asmap->totalSectorsAccessed); */ 643 printf("Stripe %d (%d sectors), failures: %d data, %d parity: ", 644 (int) asmap->stripeID, 645 (int) asmap->totalSectorsAccessed, 646 (int) asmap->numDataFailed, 647 (int) asmap->numParityFailed); 648 if (asmap->parityInfo) { 649 printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row, asmap->parityInfo->col, 650 (int) asmap->parityInfo->startSector, 651 (int) (asmap->parityInfo->startSector + 652 asmap->parityInfo->numSector - 1)); 653 if (prbuf) 654 printf(" b0x%lx", (unsigned long) asmap->parityInfo->bufPtr); 655 if (asmap->parityInfo->next) { 656 printf(", r%d c%d s%d-%d", asmap->parityInfo->next->row, 657 asmap->parityInfo->next->col, 658 (int) asmap->parityInfo->next->startSector, 659 (int) (asmap->parityInfo->next->startSector + 660 asmap->parityInfo->next->numSector - 1)); 661 if (prbuf) 662 printf(" b0x%lx", (unsigned long) asmap->parityInfo->next->bufPtr); 663 RF_ASSERT(asmap->parityInfo->next->next == NULL); 664 } 665 printf("]\n\t"); 666 } 667 for (i = 0, p = asmap->physInfo; p; p = p->next, i++) { 668 printf("SU r%d c%d s%d-%d ", p->row, p->col, (int) p->startSector, 669 (int) (p->startSector + p->numSector - 1)); 670 if (prbuf) 671 printf("b0x%lx ", (unsigned long) p->bufPtr); 672 if (i && !(i & 1)) 673 printf("\n\t"); 674 } 675 printf("\n"); 676 p = asm_h->stripeMap->failedPDAs[0]; 677 if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1) 678 printf("[multiple failures]\n"); 679 else 680 if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0) 681 printf("\t[Failed PDA: r%d c%d s%d-%d]\n", p->row, p->col, 682 (int) p->startSector, (int) (p->startSector + p->numSector - 1)); 683 } 684 } 685 686 #if RF_MAP_DEBUG 687 void 688 rf_PrintRaidAddressInfo(raidPtr, raidAddr, numBlocks) 689 RF_Raid_t *raidPtr; 690 RF_RaidAddr_t raidAddr; 691 RF_SectorCount_t numBlocks; 692 { 693 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 694 RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr); 695 696 printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t"); 697 for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) { 698 printf("%d (0x%x), ", (int) ra, (int) ra); 699 } 700 printf("\n"); 701 printf("Offset into stripe unit: %d (0x%x)\n", 702 (int) (raidAddr % layoutPtr->sectorsPerStripeUnit), 703 (int) (raidAddr % layoutPtr->sectorsPerStripeUnit)); 704 } 705 #endif 706 /* 707 given a parity descriptor and the starting address within a stripe, 708 range restrict the parity descriptor to touch only the correct stuff. 709 */ 710 void 711 rf_ASMParityAdjust( 712 RF_PhysDiskAddr_t * toAdjust, 713 RF_StripeNum_t startAddrWithinStripe, 714 RF_SectorNum_t endAddress, 715 RF_RaidLayout_t * layoutPtr, 716 RF_AccessStripeMap_t * asm_p) 717 { 718 RF_PhysDiskAddr_t *new_pda; 719 720 /* when we're accessing only a portion of one stripe unit, we want the 721 * parity descriptor to identify only the chunk of parity associated 722 * with the data. When the access spans exactly one stripe unit 723 * boundary and is less than a stripe unit in size, it uses two 724 * disjoint regions of the parity unit. When an access spans more 725 * than one stripe unit boundary, it uses all of the parity unit. 726 * 727 * To better handle the case where stripe units are small, we may 728 * eventually want to change the 2nd case so that if the SU size is 729 * below some threshold, we just read/write the whole thing instead of 730 * breaking it up into two accesses. */ 731 if (asm_p->numStripeUnitsAccessed == 1) { 732 int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); 733 toAdjust->startSector += x; 734 toAdjust->raidAddress += x; 735 toAdjust->numSector = asm_p->physInfo->numSector; 736 RF_ASSERT(toAdjust->numSector != 0); 737 } else 738 if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit) { 739 int x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit); 740 741 /* create a second pda and copy the parity map info 742 * into it */ 743 RF_ASSERT(toAdjust->next == NULL); 744 new_pda = toAdjust->next = rf_AllocPhysDiskAddr(); 745 *new_pda = *toAdjust; /* structure assignment */ 746 new_pda->next = NULL; 747 748 /* adjust the start sector & number of blocks for the 749 * first parity pda */ 750 toAdjust->startSector += x; 751 toAdjust->raidAddress += x; 752 toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe; 753 RF_ASSERT(toAdjust->numSector != 0); 754 755 /* adjust the second pda */ 756 new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress); 757 /* new_pda->raidAddress = 758 * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, 759 * toAdjust->raidAddress); */ 760 RF_ASSERT(new_pda->numSector != 0); 761 } 762 } 763 764 /* 765 Check if a disk has been spared or failed. If spared, 766 redirect the I/O. 767 If it has been failed, record it in the asm pointer. 768 Fourth arg is whether data or parity. 769 */ 770 void 771 rf_ASMCheckStatus( 772 RF_Raid_t * raidPtr, 773 RF_PhysDiskAddr_t * pda_p, 774 RF_AccessStripeMap_t * asm_p, 775 RF_RaidDisk_t ** disks, 776 int parity) 777 { 778 RF_DiskStatus_t dstatus; 779 RF_RowCol_t frow, fcol; 780 781 dstatus = disks[pda_p->row][pda_p->col].status; 782 783 if (dstatus == rf_ds_spared) { 784 /* if the disk has been spared, redirect access to the spare */ 785 frow = pda_p->row; 786 fcol = pda_p->col; 787 pda_p->row = disks[frow][fcol].spareRow; 788 pda_p->col = disks[frow][fcol].spareCol; 789 } else 790 if (dstatus == rf_ds_dist_spared) { 791 /* ditto if disk has been spared to dist spare space */ 792 #if RF_DEBUG_MAP 793 RF_RowCol_t or = pda_p->row, oc = pda_p->col; 794 RF_SectorNum_t oo = pda_p->startSector; 795 #endif 796 if (pda_p->type == RF_PDA_TYPE_DATA) 797 raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); 798 else 799 raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->row, &pda_p->col, &pda_p->startSector, RF_REMAP); 800 801 #if RF_DEBUG_MAP 802 if (rf_mapDebug) { 803 printf("Redirected r %d c %d o %d -> r%d c %d o %d\n", or, oc, (int) oo, 804 pda_p->row, pda_p->col, (int) pda_p->startSector); 805 } 806 #endif 807 } else 808 if (RF_DEAD_DISK(dstatus)) { 809 /* if the disk is inaccessible, mark the 810 * failure */ 811 if (parity) 812 asm_p->numParityFailed++; 813 else { 814 asm_p->numDataFailed++; 815 } 816 asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p; 817 asm_p->numFailedPDAs++; 818 #if 0 819 switch (asm_p->numParityFailed + asm_p->numDataFailed) { 820 case 1: 821 asm_p->failedPDAs[0] = pda_p; 822 break; 823 case 2: 824 asm_p->failedPDAs[1] = pda_p; 825 default: 826 break; 827 } 828 #endif 829 } 830 /* the redirected access should never span a stripe unit boundary */ 831 RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress) == 832 rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress + pda_p->numSector - 1)); 833 RF_ASSERT(pda_p->col != -1); 834 } 835