1 /* $NetBSD: rf_paritylogging.c,v 1.34 2011/05/11 06:20:33 mrg Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 30 /* 31 parity logging configuration, dag selection, and mapping is implemented here 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: rf_paritylogging.c,v 1.34 2011/05/11 06:20:33 mrg Exp $"); 36 37 #include "rf_archs.h" 38 39 #if RF_INCLUDE_PARITYLOGGING > 0 40 41 #include <dev/raidframe/raidframevar.h> 42 43 #include "rf_raid.h" 44 #include "rf_dag.h" 45 #include "rf_dagutils.h" 46 #include "rf_dagfuncs.h" 47 #include "rf_dagffrd.h" 48 #include "rf_dagffwr.h" 49 #include "rf_dagdegrd.h" 50 #include "rf_dagdegwr.h" 51 #include "rf_paritylog.h" 52 #include "rf_paritylogDiskMgr.h" 53 #include "rf_paritylogging.h" 54 #include "rf_parityloggingdags.h" 55 #include "rf_general.h" 56 #include "rf_map.h" 57 #include "rf_utils.h" 58 #include "rf_shutdown.h" 59 60 typedef struct RF_ParityLoggingConfigInfo_s { 61 RF_RowCol_t **stripeIdentifier; /* filled in at config time & used by 62 * IdentifyStripe */ 63 } RF_ParityLoggingConfigInfo_t; 64 65 static void FreeRegionInfo(RF_Raid_t * raidPtr, RF_RegionId_t regionID); 66 static void rf_ShutdownParityLogging(RF_ThreadArg_t arg); 67 static void rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg); 68 static void rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg); 69 static void rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg); 70 static void rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg); 71 static void rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg); 72 73 int 74 rf_ConfigureParityLogging( 75 RF_ShutdownList_t ** listp, 76 RF_Raid_t * raidPtr, 77 RF_Config_t * cfgPtr) 78 { 79 int i, j, startdisk, rc; 80 RF_SectorCount_t totalLogCapacity, fragmentation, lastRegionCapacity; 81 RF_SectorCount_t parityBufferCapacity, maxRegionParityRange; 82 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 83 RF_ParityLoggingConfigInfo_t *info; 84 RF_ParityLog_t *l = NULL, *next; 85 void *lHeapPtr; 86 87 if (rf_numParityRegions <= 0) 88 return(EINVAL); 89 90 /* 91 * We create multiple entries on the shutdown list here, since 92 * this configuration routine is fairly complicated in and of 93 * itself, and this makes backing out of a failed configuration 94 * much simpler. 95 */ 96 97 raidPtr->numSectorsPerLog = RF_DEFAULT_NUM_SECTORS_PER_LOG; 98 99 /* create a parity logging configuration structure */ 100 RF_MallocAndAdd(info, sizeof(RF_ParityLoggingConfigInfo_t), 101 (RF_ParityLoggingConfigInfo_t *), 102 raidPtr->cleanupList); 103 if (info == NULL) 104 return (ENOMEM); 105 layoutPtr->layoutSpecificInfo = (void *) info; 106 107 /* the stripe identifier must identify the disks in each stripe, IN 108 * THE ORDER THAT THEY APPEAR IN THE STRIPE. */ 109 info->stripeIdentifier = rf_make_2d_array((raidPtr->numCol), 110 (raidPtr->numCol), 111 raidPtr->cleanupList); 112 if (info->stripeIdentifier == NULL) 113 return (ENOMEM); 114 115 startdisk = 0; 116 for (i = 0; i < (raidPtr->numCol); i++) { 117 for (j = 0; j < (raidPtr->numCol); j++) { 118 info->stripeIdentifier[i][j] = (startdisk + j) % 119 (raidPtr->numCol - 1); 120 } 121 if ((--startdisk) < 0) 122 startdisk = raidPtr->numCol - 1 - 1; 123 } 124 125 /* fill in the remaining layout parameters */ 126 layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk; 127 layoutPtr->numParityCol = 1; 128 layoutPtr->numParityLogCol = 1; 129 layoutPtr->numDataCol = raidPtr->numCol - layoutPtr->numParityCol - 130 layoutPtr->numParityLogCol; 131 layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * 132 layoutPtr->sectorsPerStripeUnit; 133 layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk; 134 raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * 135 layoutPtr->sectorsPerStripeUnit; 136 137 raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk * 138 layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; 139 140 /* configure parity log parameters 141 * 142 * parameter comment/constraints 143 * ------------------------------------------- 144 * numParityRegions* all regions (except possibly last) 145 * of equal size 146 * totalInCoreLogCapacity* amount of memory in bytes available 147 * for in-core logs (default 1 MB) 148 * numSectorsPerLog# capacity of an in-core log in sectors 149 * (1 * disk track) 150 * numParityLogs total number of in-core logs, 151 * should be at least numParityRegions 152 * regionLogCapacity size of a region log (except possibly 153 * last one) in sectors 154 * totalLogCapacity total amount of log space in sectors 155 * 156 * where '*' denotes a user settable parameter. 157 * Note that logs are fixed to be the size of a disk track, 158 * value #defined in rf_paritylog.h 159 * 160 */ 161 162 totalLogCapacity = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit * layoutPtr->numParityLogCol; 163 raidPtr->regionLogCapacity = totalLogCapacity / rf_numParityRegions; 164 if (rf_parityLogDebug) 165 printf("bytes per sector %d\n", raidPtr->bytesPerSector); 166 167 /* reduce fragmentation within a disk region by adjusting the number 168 * of regions in an attempt to allow an integral number of logs to fit 169 * into a disk region */ 170 fragmentation = raidPtr->regionLogCapacity % raidPtr->numSectorsPerLog; 171 if (fragmentation > 0) 172 for (i = 1; i < (raidPtr->numSectorsPerLog / 2); i++) { 173 if (((totalLogCapacity / (rf_numParityRegions + i)) % 174 raidPtr->numSectorsPerLog) < fragmentation) { 175 rf_numParityRegions++; 176 raidPtr->regionLogCapacity = totalLogCapacity / 177 rf_numParityRegions; 178 fragmentation = raidPtr->regionLogCapacity % 179 raidPtr->numSectorsPerLog; 180 } 181 if (((totalLogCapacity / (rf_numParityRegions - i)) % 182 raidPtr->numSectorsPerLog) < fragmentation) { 183 rf_numParityRegions--; 184 raidPtr->regionLogCapacity = totalLogCapacity / 185 rf_numParityRegions; 186 fragmentation = raidPtr->regionLogCapacity % 187 raidPtr->numSectorsPerLog; 188 } 189 } 190 /* ensure integral number of regions per log */ 191 raidPtr->regionLogCapacity = (raidPtr->regionLogCapacity / 192 raidPtr->numSectorsPerLog) * 193 raidPtr->numSectorsPerLog; 194 195 raidPtr->numParityLogs = rf_totalInCoreLogCapacity / 196 (raidPtr->bytesPerSector * raidPtr->numSectorsPerLog); 197 /* to avoid deadlock, must ensure that enough logs exist for each 198 * region to have one simultaneously */ 199 if (raidPtr->numParityLogs < rf_numParityRegions) 200 raidPtr->numParityLogs = rf_numParityRegions; 201 202 /* create region information structs */ 203 printf("Allocating %d bytes for in-core parity region info\n", 204 (int) (rf_numParityRegions * sizeof(RF_RegionInfo_t))); 205 RF_Malloc(raidPtr->regionInfo, 206 (rf_numParityRegions * sizeof(RF_RegionInfo_t)), 207 (RF_RegionInfo_t *)); 208 if (raidPtr->regionInfo == NULL) 209 return (ENOMEM); 210 211 /* last region may not be full capacity */ 212 lastRegionCapacity = raidPtr->regionLogCapacity; 213 while ((rf_numParityRegions - 1) * raidPtr->regionLogCapacity + 214 lastRegionCapacity > totalLogCapacity) 215 lastRegionCapacity = lastRegionCapacity - 216 raidPtr->numSectorsPerLog; 217 218 raidPtr->regionParityRange = raidPtr->sectorsPerDisk / 219 rf_numParityRegions; 220 maxRegionParityRange = raidPtr->regionParityRange; 221 222 /* i can't remember why this line is in the code -wvcii 6/30/95 */ 223 /* if (raidPtr->sectorsPerDisk % rf_numParityRegions > 0) 224 regionParityRange++; */ 225 226 /* build pool of unused parity logs */ 227 printf("Allocating %d bytes for %d parity logs\n", 228 raidPtr->numParityLogs * raidPtr->numSectorsPerLog * 229 raidPtr->bytesPerSector, 230 raidPtr->numParityLogs); 231 RF_Malloc(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * 232 raidPtr->numSectorsPerLog * raidPtr->bytesPerSector, 233 (void *)); 234 if (raidPtr->parityLogBufferHeap == NULL) 235 return (ENOMEM); 236 lHeapPtr = raidPtr->parityLogBufferHeap; 237 rf_init_mutex2(raidPtr->parityLogPool.mutex, IPL_VM); 238 for (i = 0; i < raidPtr->numParityLogs; i++) { 239 if (i == 0) { 240 RF_Malloc(raidPtr->parityLogPool.parityLogs, 241 sizeof(RF_ParityLog_t), (RF_ParityLog_t *)); 242 if (raidPtr->parityLogPool.parityLogs == NULL) { 243 RF_Free(raidPtr->parityLogBufferHeap, 244 raidPtr->numParityLogs * 245 raidPtr->numSectorsPerLog * 246 raidPtr->bytesPerSector); 247 return (ENOMEM); 248 } 249 l = raidPtr->parityLogPool.parityLogs; 250 } else { 251 RF_Malloc(l->next, sizeof(RF_ParityLog_t), 252 (RF_ParityLog_t *)); 253 if (l->next == NULL) { 254 RF_Free(raidPtr->parityLogBufferHeap, 255 raidPtr->numParityLogs * 256 raidPtr->numSectorsPerLog * 257 raidPtr->bytesPerSector); 258 for (l = raidPtr->parityLogPool.parityLogs; 259 l; 260 l = next) { 261 next = l->next; 262 if (l->records) 263 RF_Free(l->records, (raidPtr->numSectorsPerLog * sizeof(RF_ParityLogRecord_t))); 264 RF_Free(l, sizeof(RF_ParityLog_t)); 265 } 266 return (ENOMEM); 267 } 268 l = l->next; 269 } 270 l->bufPtr = lHeapPtr; 271 lHeapPtr = (char *)lHeapPtr + raidPtr->numSectorsPerLog * 272 raidPtr->bytesPerSector; 273 RF_Malloc(l->records, (raidPtr->numSectorsPerLog * 274 sizeof(RF_ParityLogRecord_t)), 275 (RF_ParityLogRecord_t *)); 276 if (l->records == NULL) { 277 RF_Free(raidPtr->parityLogBufferHeap, 278 raidPtr->numParityLogs * 279 raidPtr->numSectorsPerLog * 280 raidPtr->bytesPerSector); 281 for (l = raidPtr->parityLogPool.parityLogs; 282 l; 283 l = next) { 284 next = l->next; 285 if (l->records) 286 RF_Free(l->records, 287 (raidPtr->numSectorsPerLog * 288 sizeof(RF_ParityLogRecord_t))); 289 RF_Free(l, sizeof(RF_ParityLog_t)); 290 } 291 return (ENOMEM); 292 } 293 } 294 rf_ShutdownCreate(listp, rf_ShutdownParityLoggingPool, raidPtr); 295 /* build pool of region buffers */ 296 rf_init_mutex2(raidPtr->regionBufferPool.mutex, IPL_VM); 297 rf_init_cond2(raidPtr->regionBufferPool.cond, "rfrbpl"); 298 raidPtr->regionBufferPool.bufferSize = raidPtr->regionLogCapacity * 299 raidPtr->bytesPerSector; 300 printf("regionBufferPool.bufferSize %d\n", 301 raidPtr->regionBufferPool.bufferSize); 302 303 /* for now, only one region at a time may be reintegrated */ 304 raidPtr->regionBufferPool.totalBuffers = 1; 305 306 raidPtr->regionBufferPool.availableBuffers = 307 raidPtr->regionBufferPool.totalBuffers; 308 raidPtr->regionBufferPool.availBuffersIndex = 0; 309 raidPtr->regionBufferPool.emptyBuffersIndex = 0; 310 printf("Allocating %d bytes for regionBufferPool\n", 311 (int) (raidPtr->regionBufferPool.totalBuffers * 312 sizeof(void *))); 313 RF_Malloc(raidPtr->regionBufferPool.buffers, 314 raidPtr->regionBufferPool.totalBuffers * sizeof(void *), 315 (void **)); 316 if (raidPtr->regionBufferPool.buffers == NULL) { 317 return (ENOMEM); 318 } 319 for (i = 0; i < raidPtr->regionBufferPool.totalBuffers; i++) { 320 printf("Allocating %d bytes for regionBufferPool#%d\n", 321 (int) (raidPtr->regionBufferPool.bufferSize * 322 sizeof(char)), i); 323 RF_Malloc(raidPtr->regionBufferPool.buffers[i], 324 raidPtr->regionBufferPool.bufferSize * sizeof(char), 325 (void *)); 326 if (raidPtr->regionBufferPool.buffers[i] == NULL) { 327 for (j = 0; j < i; j++) { 328 RF_Free(raidPtr->regionBufferPool.buffers[i], 329 raidPtr->regionBufferPool.bufferSize * 330 sizeof(char)); 331 } 332 RF_Free(raidPtr->regionBufferPool.buffers, 333 raidPtr->regionBufferPool.totalBuffers * 334 sizeof(void *)); 335 return (ENOMEM); 336 } 337 printf("raidPtr->regionBufferPool.buffers[%d] = %lx\n", i, 338 (long) raidPtr->regionBufferPool.buffers[i]); 339 } 340 rf_ShutdownCreate(listp, 341 rf_ShutdownParityLoggingRegionBufferPool, 342 raidPtr); 343 /* build pool of parity buffers */ 344 parityBufferCapacity = maxRegionParityRange; 345 rf_init_mutex2(raidPtr->parityBufferPool.mutex, IPL_VM); 346 rf_init_cond2(raidPtr->parityBufferPool.cond, "rfpbpl"); 347 raidPtr->parityBufferPool.bufferSize = parityBufferCapacity * 348 raidPtr->bytesPerSector; 349 printf("parityBufferPool.bufferSize %d\n", 350 raidPtr->parityBufferPool.bufferSize); 351 352 /* for now, only one region at a time may be reintegrated */ 353 raidPtr->parityBufferPool.totalBuffers = 1; 354 355 raidPtr->parityBufferPool.availableBuffers = 356 raidPtr->parityBufferPool.totalBuffers; 357 raidPtr->parityBufferPool.availBuffersIndex = 0; 358 raidPtr->parityBufferPool.emptyBuffersIndex = 0; 359 printf("Allocating %d bytes for parityBufferPool of %d units\n", 360 (int) (raidPtr->parityBufferPool.totalBuffers * 361 sizeof(void *)), 362 raidPtr->parityBufferPool.totalBuffers ); 363 RF_Malloc(raidPtr->parityBufferPool.buffers, 364 raidPtr->parityBufferPool.totalBuffers * sizeof(void *), 365 (void **)); 366 if (raidPtr->parityBufferPool.buffers == NULL) { 367 return (ENOMEM); 368 } 369 for (i = 0; i < raidPtr->parityBufferPool.totalBuffers; i++) { 370 printf("Allocating %d bytes for parityBufferPool#%d\n", 371 (int) (raidPtr->parityBufferPool.bufferSize * 372 sizeof(char)),i); 373 RF_Malloc(raidPtr->parityBufferPool.buffers[i], 374 raidPtr->parityBufferPool.bufferSize * sizeof(char), 375 (void *)); 376 if (raidPtr->parityBufferPool.buffers == NULL) { 377 for (j = 0; j < i; j++) { 378 RF_Free(raidPtr->parityBufferPool.buffers[i], 379 raidPtr->regionBufferPool.bufferSize * 380 sizeof(char)); 381 } 382 RF_Free(raidPtr->parityBufferPool.buffers, 383 raidPtr->regionBufferPool.totalBuffers * 384 sizeof(void *)); 385 return (ENOMEM); 386 } 387 printf("parityBufferPool.buffers[%d] = %lx\n", i, 388 (long) raidPtr->parityBufferPool.buffers[i]); 389 } 390 rf_ShutdownCreate(listp, 391 rf_ShutdownParityLoggingParityBufferPool, 392 raidPtr); 393 /* initialize parityLogDiskQueue */ 394 rf_init_mutex2(raidPtr->parityLogDiskQueue.mutex, IPL_VM); 395 rf_init_cond2(raidPtr->parityLogDiskQueue.cond, "rfpldq"); 396 raidPtr->parityLogDiskQueue.flushQueue = NULL; 397 raidPtr->parityLogDiskQueue.reintQueue = NULL; 398 raidPtr->parityLogDiskQueue.bufHead = NULL; 399 raidPtr->parityLogDiskQueue.bufTail = NULL; 400 raidPtr->parityLogDiskQueue.reintHead = NULL; 401 raidPtr->parityLogDiskQueue.reintTail = NULL; 402 raidPtr->parityLogDiskQueue.logBlockHead = NULL; 403 raidPtr->parityLogDiskQueue.logBlockTail = NULL; 404 raidPtr->parityLogDiskQueue.reintBlockHead = NULL; 405 raidPtr->parityLogDiskQueue.reintBlockTail = NULL; 406 raidPtr->parityLogDiskQueue.freeDataList = NULL; 407 raidPtr->parityLogDiskQueue.freeCommonList = NULL; 408 409 rf_ShutdownCreate(listp, 410 rf_ShutdownParityLoggingDiskQueue, 411 raidPtr); 412 for (i = 0; i < rf_numParityRegions; i++) { 413 rf_init_mutex2(raidPtr->regionInfo[i].mutex, IPL_VM); 414 rf_init_mutex2(raidPtr->regionInfo[i].reintMutex, IPL_VM); 415 raidPtr->regionInfo[i].reintInProgress = RF_FALSE; 416 raidPtr->regionInfo[i].regionStartAddr = 417 raidPtr->regionLogCapacity * i; 418 raidPtr->regionInfo[i].parityStartAddr = 419 raidPtr->regionParityRange * i; 420 if (i < rf_numParityRegions - 1) { 421 raidPtr->regionInfo[i].capacity = 422 raidPtr->regionLogCapacity; 423 raidPtr->regionInfo[i].numSectorsParity = 424 raidPtr->regionParityRange; 425 } else { 426 raidPtr->regionInfo[i].capacity = 427 lastRegionCapacity; 428 raidPtr->regionInfo[i].numSectorsParity = 429 raidPtr->sectorsPerDisk - 430 raidPtr->regionParityRange * i; 431 if (raidPtr->regionInfo[i].numSectorsParity > 432 maxRegionParityRange) 433 maxRegionParityRange = 434 raidPtr->regionInfo[i].numSectorsParity; 435 } 436 raidPtr->regionInfo[i].diskCount = 0; 437 RF_ASSERT(raidPtr->regionInfo[i].capacity + 438 raidPtr->regionInfo[i].regionStartAddr <= 439 totalLogCapacity); 440 RF_ASSERT(raidPtr->regionInfo[i].parityStartAddr + 441 raidPtr->regionInfo[i].numSectorsParity <= 442 raidPtr->sectorsPerDisk); 443 printf("Allocating %d bytes for region %d\n", 444 (int) (raidPtr->regionInfo[i].capacity * 445 sizeof(RF_DiskMap_t)), i); 446 RF_Malloc(raidPtr->regionInfo[i].diskMap, 447 (raidPtr->regionInfo[i].capacity * 448 sizeof(RF_DiskMap_t)), 449 (RF_DiskMap_t *)); 450 if (raidPtr->regionInfo[i].diskMap == NULL) { 451 for (j = 0; j < i; j++) 452 FreeRegionInfo(raidPtr, j); 453 RF_Free(raidPtr->regionInfo, 454 (rf_numParityRegions * 455 sizeof(RF_RegionInfo_t))); 456 return (ENOMEM); 457 } 458 raidPtr->regionInfo[i].loggingEnabled = RF_FALSE; 459 raidPtr->regionInfo[i].coreLog = NULL; 460 } 461 rf_ShutdownCreate(listp, 462 rf_ShutdownParityLoggingRegionInfo, 463 raidPtr); 464 RF_ASSERT(raidPtr->parityLogDiskQueue.threadState == 0); 465 raidPtr->parityLogDiskQueue.threadState = RF_PLOG_CREATED; 466 rc = RF_CREATE_THREAD(raidPtr->pLogDiskThreadHandle, 467 rf_ParityLoggingDiskManager, raidPtr,"rf_log"); 468 if (rc) { 469 raidPtr->parityLogDiskQueue.threadState = 0; 470 RF_ERRORMSG3("Unable to create parity logging disk thread file %s line %d rc=%d\n", 471 __FILE__, __LINE__, rc); 472 return (ENOMEM); 473 } 474 /* wait for thread to start */ 475 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 476 while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_RUNNING)) { 477 rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, 478 raidPtr->parityLogDiskQueue.mutex); 479 } 480 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 481 482 rf_ShutdownCreate(listp, rf_ShutdownParityLogging, raidPtr); 483 if (rf_parityLogDebug) { 484 printf(" size of disk log in sectors: %d\n", 485 (int) totalLogCapacity); 486 printf(" total number of parity regions is %d\n", (int) rf_numParityRegions); 487 printf(" nominal sectors of log per parity region is %d\n", (int) raidPtr->regionLogCapacity); 488 printf(" nominal region fragmentation is %d sectors\n", (int) fragmentation); 489 printf(" total number of parity logs is %d\n", raidPtr->numParityLogs); 490 printf(" parity log size is %d sectors\n", raidPtr->numSectorsPerLog); 491 printf(" total in-core log space is %d bytes\n", (int) rf_totalInCoreLogCapacity); 492 } 493 rf_EnableParityLogging(raidPtr); 494 495 return (0); 496 } 497 498 static void 499 FreeRegionInfo( 500 RF_Raid_t * raidPtr, 501 RF_RegionId_t regionID) 502 { 503 RF_Free(raidPtr->regionInfo[regionID].diskMap, 504 (raidPtr->regionInfo[regionID].capacity * 505 sizeof(RF_DiskMap_t))); 506 if (!rf_forceParityLogReint && raidPtr->regionInfo[regionID].coreLog) { 507 rf_ReleaseParityLogs(raidPtr, 508 raidPtr->regionInfo[regionID].coreLog); 509 raidPtr->regionInfo[regionID].coreLog = NULL; 510 } else { 511 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog == NULL); 512 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == 0); 513 } 514 rf_destroy_mutex2(raidPtr->regionInfo[regionID].reintMutex); 515 rf_destroy_mutex2(raidPtr->regionInfo[regionID].mutex); 516 } 517 518 519 static void 520 FreeParityLogQueue(RF_Raid_t * raidPtr) 521 { 522 RF_ParityLog_t *l1, *l2; 523 524 l1 = raidPtr->parityLogPool.parityLogs; 525 while (l1) { 526 l2 = l1; 527 l1 = l2->next; 528 RF_Free(l2->records, (raidPtr->numSectorsPerLog * 529 sizeof(RF_ParityLogRecord_t))); 530 RF_Free(l2, sizeof(RF_ParityLog_t)); 531 } 532 rf_destroy_mutex2(raidPtr->parityLogPool.mutex); 533 } 534 535 536 static void 537 FreeRegionBufferQueue(RF_RegionBufferQueue_t * queue) 538 { 539 int i; 540 541 if (queue->availableBuffers != queue->totalBuffers) { 542 printf("Attempt to free region queue which is still in use!\n"); 543 RF_ASSERT(0); 544 } 545 for (i = 0; i < queue->totalBuffers; i++) 546 RF_Free(queue->buffers[i], queue->bufferSize); 547 RF_Free(queue->buffers, queue->totalBuffers * sizeof(void *)); 548 rf_destroy_mutex2(queue->mutex); 549 rf_destroy_cond2(queue->cond); 550 } 551 552 static void 553 rf_ShutdownParityLoggingRegionInfo(RF_ThreadArg_t arg) 554 { 555 RF_Raid_t *raidPtr; 556 RF_RegionId_t i; 557 558 raidPtr = (RF_Raid_t *) arg; 559 if (rf_parityLogDebug) { 560 printf("raid%d: ShutdownParityLoggingRegionInfo\n", 561 raidPtr->raidid); 562 } 563 /* free region information structs */ 564 for (i = 0; i < rf_numParityRegions; i++) 565 FreeRegionInfo(raidPtr, i); 566 RF_Free(raidPtr->regionInfo, (rf_numParityRegions * 567 sizeof(raidPtr->regionInfo))); 568 raidPtr->regionInfo = NULL; 569 } 570 571 static void 572 rf_ShutdownParityLoggingPool(RF_ThreadArg_t arg) 573 { 574 RF_Raid_t *raidPtr; 575 576 raidPtr = (RF_Raid_t *) arg; 577 if (rf_parityLogDebug) { 578 printf("raid%d: ShutdownParityLoggingPool\n", raidPtr->raidid); 579 } 580 /* free contents of parityLogPool */ 581 FreeParityLogQueue(raidPtr); 582 RF_Free(raidPtr->parityLogBufferHeap, raidPtr->numParityLogs * 583 raidPtr->numSectorsPerLog * raidPtr->bytesPerSector); 584 } 585 586 static void 587 rf_ShutdownParityLoggingRegionBufferPool(RF_ThreadArg_t arg) 588 { 589 RF_Raid_t *raidPtr; 590 591 raidPtr = (RF_Raid_t *) arg; 592 if (rf_parityLogDebug) { 593 printf("raid%d: ShutdownParityLoggingRegionBufferPool\n", 594 raidPtr->raidid); 595 } 596 FreeRegionBufferQueue(&raidPtr->regionBufferPool); 597 } 598 599 static void 600 rf_ShutdownParityLoggingParityBufferPool(RF_ThreadArg_t arg) 601 { 602 RF_Raid_t *raidPtr; 603 604 raidPtr = (RF_Raid_t *) arg; 605 if (rf_parityLogDebug) { 606 printf("raid%d: ShutdownParityLoggingParityBufferPool\n", 607 raidPtr->raidid); 608 } 609 FreeRegionBufferQueue(&raidPtr->parityBufferPool); 610 } 611 612 static void 613 rf_ShutdownParityLoggingDiskQueue(RF_ThreadArg_t arg) 614 { 615 RF_ParityLogData_t *d; 616 RF_CommonLogData_t *c; 617 RF_Raid_t *raidPtr; 618 619 raidPtr = (RF_Raid_t *) arg; 620 if (rf_parityLogDebug) { 621 printf("raid%d: ShutdownParityLoggingDiskQueue\n", 622 raidPtr->raidid); 623 } 624 /* free disk manager stuff */ 625 RF_ASSERT(raidPtr->parityLogDiskQueue.bufHead == NULL); 626 RF_ASSERT(raidPtr->parityLogDiskQueue.bufTail == NULL); 627 RF_ASSERT(raidPtr->parityLogDiskQueue.reintHead == NULL); 628 RF_ASSERT(raidPtr->parityLogDiskQueue.reintTail == NULL); 629 while (raidPtr->parityLogDiskQueue.freeDataList) { 630 d = raidPtr->parityLogDiskQueue.freeDataList; 631 raidPtr->parityLogDiskQueue.freeDataList = 632 raidPtr->parityLogDiskQueue.freeDataList->next; 633 RF_Free(d, sizeof(RF_ParityLogData_t)); 634 } 635 while (raidPtr->parityLogDiskQueue.freeCommonList) { 636 c = raidPtr->parityLogDiskQueue.freeCommonList; 637 raidPtr->parityLogDiskQueue.freeCommonList = c->next; 638 /* init is in rf_paritylog.c */ 639 rf_destroy_mutex2(c->mutex); 640 RF_Free(c, sizeof(RF_CommonLogData_t)); 641 } 642 643 rf_destroy_mutex2(raidPtr->parityLogDiskQueue.mutex); 644 rf_destroy_cond2(raidPtr->parityLogDiskQueue.cond); 645 } 646 647 static void 648 rf_ShutdownParityLogging(RF_ThreadArg_t arg) 649 { 650 RF_Raid_t *raidPtr; 651 652 raidPtr = (RF_Raid_t *) arg; 653 if (rf_parityLogDebug) { 654 printf("raid%d: ShutdownParityLogging\n", raidPtr->raidid); 655 } 656 /* shutdown disk thread */ 657 /* This has the desirable side-effect of forcing all regions to be 658 * reintegrated. This is necessary since all parity log maps are 659 * currently held in volatile memory. */ 660 661 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 662 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_TERMINATE; 663 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); 664 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 665 /* 666 * pLogDiskThread will now terminate when queues are cleared 667 * now wait for it to be done 668 */ 669 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); 670 while (!(raidPtr->parityLogDiskQueue.threadState & RF_PLOG_SHUTDOWN)) { 671 rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, 672 raidPtr->parityLogDiskQueue.mutex); 673 } 674 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); 675 if (rf_parityLogDebug) { 676 printf("raid%d: ShutdownParityLogging done (thread completed)\n", raidPtr->raidid); 677 } 678 } 679 680 int 681 rf_GetDefaultNumFloatingReconBuffersParityLogging(RF_Raid_t * raidPtr) 682 { 683 return (20); 684 } 685 686 RF_HeadSepLimit_t 687 rf_GetDefaultHeadSepLimitParityLogging(RF_Raid_t * raidPtr) 688 { 689 return (10); 690 } 691 /* return the region ID for a given RAID address */ 692 RF_RegionId_t 693 rf_MapRegionIDParityLogging( 694 RF_Raid_t * raidPtr, 695 RF_SectorNum_t address) 696 { 697 RF_RegionId_t regionID; 698 699 /* regionID = address / (raidPtr->regionParityRange * raidPtr->Layout.numDataCol); */ 700 regionID = address / raidPtr->regionParityRange; 701 if (regionID == rf_numParityRegions) { 702 /* last region may be larger than other regions */ 703 regionID--; 704 } 705 RF_ASSERT(address >= raidPtr->regionInfo[regionID].parityStartAddr); 706 RF_ASSERT(address < raidPtr->regionInfo[regionID].parityStartAddr + 707 raidPtr->regionInfo[regionID].numSectorsParity); 708 RF_ASSERT(regionID < rf_numParityRegions); 709 return (regionID); 710 } 711 712 713 /* given a logical RAID sector, determine physical disk address of data */ 714 void 715 rf_MapSectorParityLogging( 716 RF_Raid_t * raidPtr, 717 RF_RaidAddr_t raidSector, 718 RF_RowCol_t * col, 719 RF_SectorNum_t * diskSector, 720 int remap) 721 { 722 RF_StripeNum_t SUID = raidSector / 723 raidPtr->Layout.sectorsPerStripeUnit; 724 /* *col = (SUID % (raidPtr->numCol - 725 * raidPtr->Layout.numParityLogCol)); */ 726 *col = SUID % raidPtr->Layout.numDataCol; 727 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * 728 raidPtr->Layout.sectorsPerStripeUnit + 729 (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 730 } 731 732 733 /* given a logical RAID sector, determine physical disk address of parity */ 734 void 735 rf_MapParityParityLogging( 736 RF_Raid_t * raidPtr, 737 RF_RaidAddr_t raidSector, 738 RF_RowCol_t * col, 739 RF_SectorNum_t * diskSector, 740 int remap) 741 { 742 RF_StripeNum_t SUID = raidSector / 743 raidPtr->Layout.sectorsPerStripeUnit; 744 745 /* *col = 746 * raidPtr->Layout.numDataCol-(SUID/raidPtr->Layout.numDataCol)%(raidPt 747 * r->numCol - raidPtr->Layout.numParityLogCol); */ 748 *col = raidPtr->Layout.numDataCol; 749 *diskSector = (SUID / (raidPtr->Layout.numDataCol)) * 750 raidPtr->Layout.sectorsPerStripeUnit + 751 (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 752 } 753 754 755 /* given a regionID and sector offset, determine the physical disk address of the parity log */ 756 void 757 rf_MapLogParityLogging( 758 RF_Raid_t * raidPtr, 759 RF_RegionId_t regionID, 760 RF_SectorNum_t regionOffset, 761 RF_RowCol_t * col, 762 RF_SectorNum_t * startSector) 763 { 764 *col = raidPtr->numCol - 1; 765 *startSector = raidPtr->regionInfo[regionID].regionStartAddr + regionOffset; 766 } 767 768 769 /* given a regionID, determine the physical disk address of the logged 770 parity for that region */ 771 void 772 rf_MapRegionParity( 773 RF_Raid_t * raidPtr, 774 RF_RegionId_t regionID, 775 RF_RowCol_t * col, 776 RF_SectorNum_t * startSector, 777 RF_SectorCount_t * numSector) 778 { 779 *col = raidPtr->numCol - 2; 780 *startSector = raidPtr->regionInfo[regionID].parityStartAddr; 781 *numSector = raidPtr->regionInfo[regionID].numSectorsParity; 782 } 783 784 785 /* given a logical RAID address, determine the participating disks in 786 the stripe */ 787 void 788 rf_IdentifyStripeParityLogging( 789 RF_Raid_t * raidPtr, 790 RF_RaidAddr_t addr, 791 RF_RowCol_t ** diskids) 792 { 793 RF_StripeNum_t stripeID = rf_RaidAddressToStripeID(&raidPtr->Layout, 794 addr); 795 RF_ParityLoggingConfigInfo_t *info = (RF_ParityLoggingConfigInfo_t *) 796 raidPtr->Layout.layoutSpecificInfo; 797 *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol]; 798 } 799 800 801 void 802 rf_MapSIDToPSIDParityLogging( 803 RF_RaidLayout_t * layoutPtr, 804 RF_StripeNum_t stripeID, 805 RF_StripeNum_t * psID, 806 RF_ReconUnitNum_t * which_ru) 807 { 808 *which_ru = 0; 809 *psID = stripeID; 810 } 811 812 813 /* select an algorithm for performing an access. Returns two pointers, 814 * one to a function that will return information about the DAG, and 815 * another to a function that will create the dag. 816 */ 817 void 818 rf_ParityLoggingDagSelect( 819 RF_Raid_t * raidPtr, 820 RF_IoType_t type, 821 RF_AccessStripeMap_t * asmp, 822 RF_VoidFuncPtr * createFunc) 823 { 824 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); 825 RF_PhysDiskAddr_t *failedPDA = NULL; 826 RF_RowCol_t fcol; 827 RF_RowStatus_t rstat; 828 int prior_recon; 829 830 RF_ASSERT(RF_IO_IS_R_OR_W(type)); 831 832 if (asmp->numDataFailed + asmp->numParityFailed > 1) { 833 RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); 834 *createFunc = NULL; 835 return; 836 } else 837 if (asmp->numDataFailed + asmp->numParityFailed == 1) { 838 839 /* if under recon & already reconstructed, redirect 840 * the access to the spare drive and eliminate the 841 * failure indication */ 842 failedPDA = asmp->failedPDAs[0]; 843 fcol = failedPDA->col; 844 rstat = raidPtr->status; 845 prior_recon = (rstat == rf_rs_reconfigured) || ( 846 (rstat == rf_rs_reconstructing) ? 847 rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, failedPDA->startSector) : 0 848 ); 849 if (prior_recon) { 850 RF_RowCol_t oc = failedPDA->col; 851 RF_SectorNum_t oo = failedPDA->startSector; 852 if (layoutPtr->map->flags & 853 RF_DISTRIBUTE_SPARE) { 854 /* redirect to dist spare space */ 855 856 if (failedPDA == asmp->parityInfo) { 857 858 /* parity has failed */ 859 (layoutPtr->map->MapParity) (raidPtr, failedPDA->raidAddress, 860 &failedPDA->col, &failedPDA->startSector, RF_REMAP); 861 862 if (asmp->parityInfo->next) { /* redir 2nd component, 863 * if any */ 864 RF_PhysDiskAddr_t *p = asmp->parityInfo->next; 865 RF_SectorNum_t SUoffs = p->startSector % layoutPtr->sectorsPerStripeUnit; 866 p->col = failedPDA->col; 867 p->startSector = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->startSector) + 868 SUoffs; /* cheating: 869 * startSector is not 870 * really a RAID address */ 871 } 872 } else 873 if (asmp->parityInfo->next && failedPDA == asmp->parityInfo->next) { 874 RF_ASSERT(0); /* should not ever 875 * happen */ 876 } else { 877 878 /* data has failed */ 879 (layoutPtr->map->MapSector) (raidPtr, failedPDA->raidAddress, 880 &failedPDA->col, &failedPDA->startSector, RF_REMAP); 881 882 } 883 884 } else { 885 /* redirect to dedicated spare space */ 886 887 failedPDA->col = raidPtr->Disks[fcol].spareCol; 888 889 /* the parity may have two distinct 890 * components, both of which may need 891 * to be redirected */ 892 if (asmp->parityInfo->next) { 893 if (failedPDA == asmp->parityInfo) { 894 failedPDA->next->col = failedPDA->col; 895 } else 896 if (failedPDA == asmp->parityInfo->next) { /* paranoid: should never occur */ 897 asmp->parityInfo->col = failedPDA->col; 898 } 899 } 900 } 901 902 RF_ASSERT(failedPDA->col != -1); 903 904 if (rf_dagDebug || rf_mapDebug) { 905 printf("raid%d: Redirected type '%c' c %d o %ld -> c %d o %ld\n", 906 raidPtr->raidid, type, oc, (long) oo, failedPDA->col, (long) failedPDA->startSector); 907 } 908 asmp->numDataFailed = asmp->numParityFailed = 0; 909 } 910 } 911 if (type == RF_IO_TYPE_READ) { 912 913 if (asmp->numDataFailed == 0) 914 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; 915 else 916 *createFunc = (RF_VoidFuncPtr) rf_CreateRaidFiveDegradedReadDAG; 917 918 } else { 919 920 921 /* if mirroring, always use large writes. If the access 922 * requires two distinct parity updates, always do a small 923 * write. If the stripe contains a failure but the access 924 * does not, do a small write. The first conditional 925 * (numStripeUnitsAccessed <= numDataCol/2) uses a 926 * less-than-or-equal rather than just a less-than because 927 * when G is 3 or 4, numDataCol/2 is 1, and I want 928 * single-stripe-unit updates to use just one disk. */ 929 if ((asmp->numDataFailed + asmp->numParityFailed) == 0) { 930 if (((asmp->numStripeUnitsAccessed <= 931 (layoutPtr->numDataCol / 2)) && 932 (layoutPtr->numDataCol != 1)) || 933 (asmp->parityInfo->next != NULL) || 934 rf_CheckStripeForFailures(raidPtr, asmp)) { 935 *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingSmallWriteDAG; 936 } else 937 *createFunc = (RF_VoidFuncPtr) rf_CreateParityLoggingLargeWriteDAG; 938 } else 939 if (asmp->numParityFailed == 1) 940 *createFunc = (RF_VoidFuncPtr) rf_CreateNonRedundantWriteDAG; 941 else 942 if (asmp->numStripeUnitsAccessed != 1 && failedPDA->numSector != layoutPtr->sectorsPerStripeUnit) 943 *createFunc = NULL; 944 else 945 *createFunc = (RF_VoidFuncPtr) rf_CreateDegradedWriteDAG; 946 } 947 } 948 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 949