1 /* $NetBSD: rf_paritylogDiskMgr.c,v 1.16 2004/01/10 00:56:28 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 /* Code for flushing and reintegration operations related to parity logging. 29 * 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: rf_paritylogDiskMgr.c,v 1.16 2004/01/10 00:56:28 oster Exp $"); 34 35 #include "rf_archs.h" 36 37 #if RF_INCLUDE_PARITYLOGGING > 0 38 39 #include <dev/raidframe/raidframevar.h> 40 41 #include "rf_threadstuff.h" 42 #include "rf_mcpair.h" 43 #include "rf_raid.h" 44 #include "rf_dag.h" 45 #include "rf_dagfuncs.h" 46 #include "rf_desc.h" 47 #include "rf_layout.h" 48 #include "rf_diskqueue.h" 49 #include "rf_paritylog.h" 50 #include "rf_general.h" 51 #include "rf_etimer.h" 52 #include "rf_paritylogging.h" 53 #include "rf_engine.h" 54 #include "rf_dagutils.h" 55 #include "rf_map.h" 56 #include "rf_parityscan.h" 57 58 #include "rf_paritylogDiskMgr.h" 59 60 static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *); 61 62 static caddr_t 63 AcquireReintBuffer(pool) 64 RF_RegionBufferQueue_t *pool; 65 { 66 caddr_t bufPtr = NULL; 67 68 /* Return a region buffer from the free list (pool). If the free list 69 * is empty, WAIT. BLOCKING */ 70 71 RF_LOCK_MUTEX(pool->mutex); 72 if (pool->availableBuffers > 0) { 73 bufPtr = pool->buffers[pool->availBuffersIndex]; 74 pool->availableBuffers--; 75 pool->availBuffersIndex++; 76 if (pool->availBuffersIndex == pool->totalBuffers) 77 pool->availBuffersIndex = 0; 78 RF_UNLOCK_MUTEX(pool->mutex); 79 } else { 80 RF_PANIC(); /* should never happen in correct config, 81 * single reint */ 82 RF_WAIT_COND(pool->cond, pool->mutex); 83 } 84 return (bufPtr); 85 } 86 87 static void 88 ReleaseReintBuffer( 89 RF_RegionBufferQueue_t * pool, 90 caddr_t bufPtr) 91 { 92 /* Insert a region buffer (bufPtr) into the free list (pool). 93 * NON-BLOCKING */ 94 95 RF_LOCK_MUTEX(pool->mutex); 96 pool->availableBuffers++; 97 pool->buffers[pool->emptyBuffersIndex] = bufPtr; 98 pool->emptyBuffersIndex++; 99 if (pool->emptyBuffersIndex == pool->totalBuffers) 100 pool->emptyBuffersIndex = 0; 101 RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); 102 RF_UNLOCK_MUTEX(pool->mutex); 103 RF_SIGNAL_COND(pool->cond); 104 } 105 106 107 108 static void 109 ReadRegionLog( 110 RF_RegionId_t regionID, 111 RF_MCPair_t * rrd_mcpair, 112 caddr_t regionBuffer, 113 RF_Raid_t * raidPtr, 114 RF_DagHeader_t ** rrd_dag_h, 115 RF_AllocListElem_t ** rrd_alloclist, 116 RF_PhysDiskAddr_t ** rrd_pda) 117 { 118 /* Initiate the read a region log from disk. Once initiated, return 119 * to the calling routine. 120 * 121 * NON-BLOCKING */ 122 123 RF_AccTraceEntry_t *tracerec; 124 RF_DagNode_t *rrd_rdNode; 125 126 /* create DAG to read region log from disk */ 127 rf_MakeAllocList(*rrd_alloclist); 128 *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, 129 rf_DiskReadFunc, rf_DiskReadUndoFunc, 130 "Rrl", *rrd_alloclist, 131 RF_DAG_FLAGS_NONE, 132 RF_IO_NORMAL_PRIORITY); 133 134 /* create and initialize PDA for the core log */ 135 /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t 136 * *)); */ 137 *rrd_pda = rf_AllocPDAList(1); 138 rf_MapLogParityLogging(raidPtr, regionID, 0, 139 &((*rrd_pda)->col), &((*rrd_pda)->startSector)); 140 (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; 141 142 if ((*rrd_pda)->next) { 143 (*rrd_pda)->next = NULL; 144 printf("set rrd_pda->next to NULL\n"); 145 } 146 /* initialize DAG parameters */ 147 RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); 148 memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); 149 (*rrd_dag_h)->tracerec = tracerec; 150 rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; 151 rrd_rdNode->params[0].p = *rrd_pda; 152 /* rrd_rdNode->params[1] = regionBuffer; */ 153 rrd_rdNode->params[2].v = 0; 154 rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 155 156 /* launch region log read dag */ 157 rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 158 (void *) rrd_mcpair); 159 } 160 161 162 163 static void 164 WriteCoreLog( 165 RF_ParityLog_t * log, 166 RF_MCPair_t * fwr_mcpair, 167 RF_Raid_t * raidPtr, 168 RF_DagHeader_t ** fwr_dag_h, 169 RF_AllocListElem_t ** fwr_alloclist, 170 RF_PhysDiskAddr_t ** fwr_pda) 171 { 172 RF_RegionId_t regionID = log->regionID; 173 RF_AccTraceEntry_t *tracerec; 174 RF_SectorNum_t regionOffset; 175 RF_DagNode_t *fwr_wrNode; 176 177 /* Initiate the write of a core log to a region log disk. Once 178 * initiated, return to the calling routine. 179 * 180 * NON-BLOCKING */ 181 182 /* create DAG to write a core log to a region log disk */ 183 rf_MakeAllocList(*fwr_alloclist); 184 *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, 185 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 186 "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); 187 188 /* create and initialize PDA for the region log */ 189 /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t 190 * *)); */ 191 *fwr_pda = rf_AllocPDAList(1); 192 regionOffset = log->diskOffset; 193 rf_MapLogParityLogging(raidPtr, regionID, regionOffset, 194 &((*fwr_pda)->col), 195 &((*fwr_pda)->startSector)); 196 (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; 197 198 /* initialize DAG parameters */ 199 RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); 200 memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); 201 (*fwr_dag_h)->tracerec = tracerec; 202 fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; 203 fwr_wrNode->params[0].p = *fwr_pda; 204 /* fwr_wrNode->params[1] = log->bufPtr; */ 205 fwr_wrNode->params[2].v = 0; 206 fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 207 208 /* launch the dag to write the core log to disk */ 209 rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 210 (void *) fwr_mcpair); 211 } 212 213 214 static void 215 ReadRegionParity( 216 RF_RegionId_t regionID, 217 RF_MCPair_t * prd_mcpair, 218 caddr_t parityBuffer, 219 RF_Raid_t * raidPtr, 220 RF_DagHeader_t ** prd_dag_h, 221 RF_AllocListElem_t ** prd_alloclist, 222 RF_PhysDiskAddr_t ** prd_pda) 223 { 224 /* Initiate the read region parity from disk. Once initiated, return 225 * to the calling routine. 226 * 227 * NON-BLOCKING */ 228 229 RF_AccTraceEntry_t *tracerec; 230 RF_DagNode_t *prd_rdNode; 231 232 /* create DAG to read region parity from disk */ 233 rf_MakeAllocList(*prd_alloclist); 234 *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, 235 rf_DiskReadUndoFunc, "Rrp", 236 *prd_alloclist, RF_DAG_FLAGS_NONE, 237 RF_IO_NORMAL_PRIORITY); 238 239 /* create and initialize PDA for region parity */ 240 /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t 241 * *)); */ 242 *prd_pda = rf_AllocPDAList(1); 243 rf_MapRegionParity(raidPtr, regionID, 244 &((*prd_pda)->col), &((*prd_pda)->startSector), 245 &((*prd_pda)->numSector)); 246 if (rf_parityLogDebug) 247 printf("[reading %d sectors of parity from region %d]\n", 248 (int) (*prd_pda)->numSector, regionID); 249 if ((*prd_pda)->next) { 250 (*prd_pda)->next = NULL; 251 printf("set prd_pda->next to NULL\n"); 252 } 253 /* initialize DAG parameters */ 254 RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); 255 memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); 256 (*prd_dag_h)->tracerec = tracerec; 257 prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; 258 prd_rdNode->params[0].p = *prd_pda; 259 prd_rdNode->params[1].p = parityBuffer; 260 prd_rdNode->params[2].v = 0; 261 prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 262 #if RF_DEBUG_VALIDATE_DAG 263 if (rf_validateDAGDebug) 264 rf_ValidateDAG(*prd_dag_h); 265 #endif 266 /* launch region parity read dag */ 267 rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 268 (void *) prd_mcpair); 269 } 270 271 static void 272 WriteRegionParity( 273 RF_RegionId_t regionID, 274 RF_MCPair_t * pwr_mcpair, 275 caddr_t parityBuffer, 276 RF_Raid_t * raidPtr, 277 RF_DagHeader_t ** pwr_dag_h, 278 RF_AllocListElem_t ** pwr_alloclist, 279 RF_PhysDiskAddr_t ** pwr_pda) 280 { 281 /* Initiate the write of region parity to disk. Once initiated, return 282 * to the calling routine. 283 * 284 * NON-BLOCKING */ 285 286 RF_AccTraceEntry_t *tracerec; 287 RF_DagNode_t *pwr_wrNode; 288 289 /* create DAG to write region log from disk */ 290 rf_MakeAllocList(*pwr_alloclist); 291 *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, 292 rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 293 "Wrp", *pwr_alloclist, 294 RF_DAG_FLAGS_NONE, 295 RF_IO_NORMAL_PRIORITY); 296 297 /* create and initialize PDA for region parity */ 298 /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t 299 * *)); */ 300 *pwr_pda = rf_AllocPDAList(1); 301 rf_MapRegionParity(raidPtr, regionID, 302 &((*pwr_pda)->col), &((*pwr_pda)->startSector), 303 &((*pwr_pda)->numSector)); 304 305 /* initialize DAG parameters */ 306 RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); 307 memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); 308 (*pwr_dag_h)->tracerec = tracerec; 309 pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; 310 pwr_wrNode->params[0].p = *pwr_pda; 311 /* pwr_wrNode->params[1] = parityBuffer; */ 312 pwr_wrNode->params[2].v = 0; 313 pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); 314 315 /* launch the dag to write region parity to disk */ 316 rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, 317 (void *) pwr_mcpair); 318 } 319 320 static void 321 FlushLogsToDisk( 322 RF_Raid_t * raidPtr, 323 RF_ParityLog_t * logList) 324 { 325 /* Flush a linked list of core logs to the log disk. Logs contain the 326 * disk location where they should be written. Logs were written in 327 * FIFO order and that order must be preserved. 328 * 329 * Recommended optimizations: 1) allow multiple flushes to occur 330 * simultaneously 2) coalesce contiguous flush operations 331 * 332 * BLOCKING */ 333 334 RF_ParityLog_t *log; 335 RF_RegionId_t regionID; 336 RF_MCPair_t *fwr_mcpair; 337 RF_DagHeader_t *fwr_dag_h; 338 RF_AllocListElem_t *fwr_alloclist; 339 RF_PhysDiskAddr_t *fwr_pda; 340 341 fwr_mcpair = rf_AllocMCPair(); 342 RF_LOCK_MUTEX(fwr_mcpair->mutex); 343 344 RF_ASSERT(logList); 345 log = logList; 346 while (log) { 347 regionID = log->regionID; 348 349 /* create and launch a DAG to write the core log */ 350 if (rf_parityLogDebug) 351 printf("[initiating write of core log for region %d]\n", regionID); 352 fwr_mcpair->flag = RF_FALSE; 353 WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, 354 &fwr_alloclist, &fwr_pda); 355 356 /* wait for the DAG to complete */ 357 while (!fwr_mcpair->flag) 358 RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex); 359 if (fwr_dag_h->status != rf_enable) { 360 RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID); 361 RF_ASSERT(0); 362 } 363 /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ 364 rf_FreePhysDiskAddr(fwr_pda); 365 rf_FreeDAG(fwr_dag_h); 366 rf_FreeAllocList(fwr_alloclist); 367 368 log = log->next; 369 } 370 RF_UNLOCK_MUTEX(fwr_mcpair->mutex); 371 rf_FreeMCPair(fwr_mcpair); 372 rf_ReleaseParityLogs(raidPtr, logList); 373 } 374 375 static void 376 ReintegrateRegion( 377 RF_Raid_t * raidPtr, 378 RF_RegionId_t regionID, 379 RF_ParityLog_t * coreLog) 380 { 381 RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; 382 RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h; 383 RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist; 384 RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda; 385 caddr_t parityBuffer, regionBuffer = NULL; 386 387 /* Reintegrate a region (regionID). 388 * 389 * 1. acquire region and parity buffers 390 * 2. read log from disk 391 * 3. read parity from disk 392 * 4. apply log to parity 393 * 5. apply core log to parity 394 * 6. write new parity to disk 395 * 396 * BLOCKING */ 397 398 if (rf_parityLogDebug) 399 printf("[reintegrating region %d]\n", regionID); 400 401 /* initiate read of region parity */ 402 if (rf_parityLogDebug) 403 printf("[initiating read of parity for region %d]\n",regionID); 404 parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); 405 prd_mcpair = rf_AllocMCPair(); 406 RF_LOCK_MUTEX(prd_mcpair->mutex); 407 prd_mcpair->flag = RF_FALSE; 408 ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, 409 &prd_dag_h, &prd_alloclist, &prd_pda); 410 411 /* if region log nonempty, initiate read */ 412 if (raidPtr->regionInfo[regionID].diskCount > 0) { 413 if (rf_parityLogDebug) 414 printf("[initiating read of disk log for region %d]\n", 415 regionID); 416 regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); 417 rrd_mcpair = rf_AllocMCPair(); 418 RF_LOCK_MUTEX(rrd_mcpair->mutex); 419 rrd_mcpair->flag = RF_FALSE; 420 ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, 421 &rrd_dag_h, &rrd_alloclist, &rrd_pda); 422 } 423 /* wait on read of region parity to complete */ 424 while (!prd_mcpair->flag) { 425 RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex); 426 } 427 RF_UNLOCK_MUTEX(prd_mcpair->mutex); 428 if (prd_dag_h->status != rf_enable) { 429 RF_ERRORMSG("Unable to read parity from disk\n"); 430 /* add code to fail the parity disk */ 431 RF_ASSERT(0); 432 } 433 /* apply core log to parity */ 434 /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ 435 436 if (raidPtr->regionInfo[regionID].diskCount > 0) { 437 /* wait on read of region log to complete */ 438 while (!rrd_mcpair->flag) 439 RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex); 440 RF_UNLOCK_MUTEX(rrd_mcpair->mutex); 441 if (rrd_dag_h->status != rf_enable) { 442 RF_ERRORMSG("Unable to read region log from disk\n"); 443 /* add code to fail the log disk */ 444 RF_ASSERT(0); 445 } 446 /* apply region log to parity */ 447 /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ 448 /* release resources associated with region log */ 449 /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ 450 rf_FreePhysDiskAddr(rrd_pda); 451 rf_FreeDAG(rrd_dag_h); 452 rf_FreeAllocList(rrd_alloclist); 453 rf_FreeMCPair(rrd_mcpair); 454 ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); 455 } 456 /* write reintegrated parity to disk */ 457 if (rf_parityLogDebug) 458 printf("[initiating write of parity for region %d]\n", 459 regionID); 460 pwr_mcpair = rf_AllocMCPair(); 461 RF_LOCK_MUTEX(pwr_mcpair->mutex); 462 pwr_mcpair->flag = RF_FALSE; 463 WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, 464 &pwr_dag_h, &pwr_alloclist, &pwr_pda); 465 while (!pwr_mcpair->flag) 466 RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex); 467 RF_UNLOCK_MUTEX(pwr_mcpair->mutex); 468 if (pwr_dag_h->status != rf_enable) { 469 RF_ERRORMSG("Unable to write parity to disk\n"); 470 /* add code to fail the parity disk */ 471 RF_ASSERT(0); 472 } 473 /* release resources associated with read of old parity */ 474 /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ 475 rf_FreePhysDiskAddr(prd_pda); 476 rf_FreeDAG(prd_dag_h); 477 rf_FreeAllocList(prd_alloclist); 478 rf_FreeMCPair(prd_mcpair); 479 480 /* release resources associated with write of new parity */ 481 ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); 482 /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ 483 rf_FreePhysDiskAddr(pwr_pda); 484 rf_FreeDAG(pwr_dag_h); 485 rf_FreeAllocList(pwr_alloclist); 486 rf_FreeMCPair(pwr_mcpair); 487 488 if (rf_parityLogDebug) 489 printf("[finished reintegrating region %d]\n", regionID); 490 } 491 492 493 494 static void 495 ReintegrateLogs( 496 RF_Raid_t * raidPtr, 497 RF_ParityLog_t * logList) 498 { 499 RF_ParityLog_t *log, *freeLogList = NULL; 500 RF_ParityLogData_t *logData, *logDataList; 501 RF_RegionId_t regionID; 502 503 RF_ASSERT(logList); 504 while (logList) { 505 log = logList; 506 logList = logList->next; 507 log->next = NULL; 508 regionID = log->regionID; 509 ReintegrateRegion(raidPtr, regionID, log); 510 log->numRecords = 0; 511 512 /* remove all items which are blocked on reintegration of this 513 * region */ 514 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 515 logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, 516 &raidPtr->parityLogDiskQueue.reintBlockHead, 517 &raidPtr->parityLogDiskQueue.reintBlockTail, 518 RF_TRUE); 519 logDataList = logData; 520 while (logData) { 521 logData->next = rf_SearchAndDequeueParityLogData( 522 raidPtr, regionID, 523 &raidPtr->parityLogDiskQueue.reintBlockHead, 524 &raidPtr->parityLogDiskQueue.reintBlockTail, 525 RF_TRUE); 526 logData = logData->next; 527 } 528 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 529 530 /* process blocked log data and clear reintInProgress flag for 531 * this region */ 532 if (logDataList) 533 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); 534 else { 535 /* Enable flushing for this region. Holding both 536 * locks provides a synchronization barrier with 537 * DumpParityLogToDisk */ 538 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 539 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 540 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 541 raidPtr->regionInfo[regionID].diskCount = 0; 542 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 543 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 544 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 545 * enabled */ 546 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 547 } 548 /* if log wasn't used, attach it to the list of logs to be 549 * returned */ 550 if (log) { 551 log->next = freeLogList; 552 freeLogList = log; 553 } 554 } 555 if (freeLogList) 556 rf_ReleaseParityLogs(raidPtr, freeLogList); 557 } 558 559 int 560 rf_ShutdownLogging(RF_Raid_t * raidPtr) 561 { 562 /* shutdown parity logging 1) disable parity logging in all regions 2) 563 * reintegrate all regions */ 564 565 RF_SectorCount_t diskCount; 566 RF_RegionId_t regionID; 567 RF_ParityLog_t *log; 568 569 if (rf_parityLogDebug) 570 printf("[shutting down parity logging]\n"); 571 /* Since parity log maps are volatile, we must reintegrate all 572 * regions. */ 573 if (rf_forceParityLogReint) { 574 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 575 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 576 raidPtr->regionInfo[regionID].loggingEnabled = 577 RF_FALSE; 578 log = raidPtr->regionInfo[regionID].coreLog; 579 raidPtr->regionInfo[regionID].coreLog = NULL; 580 diskCount = raidPtr->regionInfo[regionID].diskCount; 581 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 582 if (diskCount > 0 || log != NULL) 583 ReintegrateRegion(raidPtr, regionID, log); 584 if (log != NULL) 585 rf_ReleaseParityLogs(raidPtr, log); 586 } 587 } 588 if (rf_parityLogDebug) { 589 printf("[parity logging disabled]\n"); 590 printf("[should be done!]\n"); 591 } 592 return (0); 593 } 594 595 int 596 rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) 597 { 598 RF_ParityLog_t *reintQueue, *flushQueue; 599 int workNeeded, done = RF_FALSE; 600 int s; 601 602 /* Main program for parity logging disk thread. This routine waits 603 * for work to appear in either the flush or reintegration queues and 604 * is responsible for flushing core logs to the log disk as well as 605 * reintegrating parity regions. 606 * 607 * BLOCKING */ 608 609 s = splbio(); 610 611 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 612 613 /* 614 * Inform our creator that we're running. Don't bother doing the 615 * mutex lock/unlock dance- we locked above, and we'll unlock 616 * below with nothing to do, yet. 617 */ 618 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; 619 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 620 621 /* empty the work queues */ 622 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; 623 raidPtr->parityLogDiskQueue.flushQueue = NULL; 624 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; 625 raidPtr->parityLogDiskQueue.reintQueue = NULL; 626 workNeeded = (flushQueue || reintQueue); 627 628 while (!done) { 629 while (workNeeded) { 630 /* First, flush all logs in the flush queue, freeing 631 * buffers Second, reintegrate all regions which are 632 * reported as full. Third, append queued log data 633 * until blocked. 634 * 635 * Note: Incoming appends (ParityLogAppend) can block on 636 * either 1. empty buffer pool 2. region under 637 * reintegration To preserve a global FIFO ordering of 638 * appends, buffers are not released to the world 639 * until those appends blocked on buffers are removed 640 * from the append queue. Similarly, regions which 641 * are reintegrated are not opened for general use 642 * until the append queue has been emptied. */ 643 644 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 645 646 /* empty flushQueue, using free'd log buffers to 647 * process bufTail */ 648 if (flushQueue) 649 FlushLogsToDisk(raidPtr, flushQueue); 650 651 /* empty reintQueue, flushing from reintTail as we go */ 652 if (reintQueue) 653 ReintegrateLogs(raidPtr, reintQueue); 654 655 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 656 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; 657 raidPtr->parityLogDiskQueue.flushQueue = NULL; 658 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; 659 raidPtr->parityLogDiskQueue.reintQueue = NULL; 660 workNeeded = (flushQueue || reintQueue); 661 } 662 /* no work is needed at this point */ 663 if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { 664 /* shutdown parity logging 1. disable parity logging 665 * in all regions 2. reintegrate all regions */ 666 done = RF_TRUE; /* thread disabled, no work needed */ 667 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 668 rf_ShutdownLogging(raidPtr); 669 } 670 if (!done) { 671 /* thread enabled, no work needed, so sleep */ 672 if (rf_parityLogDebug) 673 printf("[parity logging disk manager sleeping]\n"); 674 RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, 675 raidPtr->parityLogDiskQueue.mutex); 676 if (rf_parityLogDebug) 677 printf("[parity logging disk manager just woke up]\n"); 678 flushQueue = raidPtr->parityLogDiskQueue.flushQueue; 679 raidPtr->parityLogDiskQueue.flushQueue = NULL; 680 reintQueue = raidPtr->parityLogDiskQueue.reintQueue; 681 raidPtr->parityLogDiskQueue.reintQueue = NULL; 682 workNeeded = (flushQueue || reintQueue); 683 } 684 } 685 /* 686 * Announce that we're done. 687 */ 688 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 689 raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; 690 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 691 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 692 693 splx(s); 694 695 /* 696 * In the NetBSD kernel, the thread must exit; returning would 697 * cause the proc trampoline to attempt to return to userspace. 698 */ 699 kthread_exit(0); /* does not return */ 700 } 701 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 702