1 /* $NetBSD: rf_paritylog.c,v 1.6 2001/10/04 15:58:54 oster Exp $ */ 2 /* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: William V. Courtright II 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29 /* Code for manipulating in-core parity logs 30 * 31 */ 32 33 #include "rf_archs.h" 34 35 #if RF_INCLUDE_PARITYLOGGING > 0 36 37 /* 38 * Append-only log for recording parity "update" and "overwrite" records 39 */ 40 41 #include <dev/raidframe/raidframevar.h> 42 43 #include "rf_threadstuff.h" 44 #include "rf_mcpair.h" 45 #include "rf_raid.h" 46 #include "rf_dag.h" 47 #include "rf_dagfuncs.h" 48 #include "rf_desc.h" 49 #include "rf_layout.h" 50 #include "rf_diskqueue.h" 51 #include "rf_etimer.h" 52 #include "rf_paritylog.h" 53 #include "rf_general.h" 54 #include "rf_map.h" 55 #include "rf_paritylogging.h" 56 #include "rf_paritylogDiskMgr.h" 57 58 static RF_CommonLogData_t * 59 AllocParityLogCommonData(RF_Raid_t * raidPtr) 60 { 61 RF_CommonLogData_t *common = NULL; 62 int rc; 63 64 /* Return a struct for holding common parity log information from the 65 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list 66 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ 67 68 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 69 if (raidPtr->parityLogDiskQueue.freeCommonList) { 70 common = raidPtr->parityLogDiskQueue.freeCommonList; 71 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; 72 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 73 } else { 74 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 75 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); 76 rc = rf_mutex_init(&common->mutex); 77 if (rc) { 78 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__, 79 __LINE__, rc); 80 RF_Free(common, sizeof(RF_CommonLogData_t)); 81 common = NULL; 82 } 83 } 84 common->next = NULL; 85 return (common); 86 } 87 88 static void 89 FreeParityLogCommonData(RF_CommonLogData_t * common) 90 { 91 RF_Raid_t *raidPtr; 92 93 /* Insert a single struct for holding parity log information (data) 94 * into the free list (rf_parityLogDiskQueue.freeCommonList). 95 * NON-BLOCKING */ 96 97 raidPtr = common->raidPtr; 98 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 99 common->next = raidPtr->parityLogDiskQueue.freeCommonList; 100 raidPtr->parityLogDiskQueue.freeCommonList = common; 101 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 102 } 103 104 static RF_ParityLogData_t * 105 AllocParityLogData(RF_Raid_t * raidPtr) 106 { 107 RF_ParityLogData_t *data = NULL; 108 109 /* Return a struct for holding parity log information from the free 110 * list (rf_parityLogDiskQueue.freeList). If the free list is empty, 111 * call RF_Malloc to create a new structure. NON-BLOCKING */ 112 113 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 114 if (raidPtr->parityLogDiskQueue.freeDataList) { 115 data = raidPtr->parityLogDiskQueue.freeDataList; 116 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; 117 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 118 } else { 119 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 120 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); 121 } 122 data->next = NULL; 123 data->prev = NULL; 124 return (data); 125 } 126 127 128 static void 129 FreeParityLogData(RF_ParityLogData_t * data) 130 { 131 RF_ParityLogData_t *nextItem; 132 RF_Raid_t *raidPtr; 133 134 /* Insert a linked list of structs for holding parity log information 135 * (data) into the free list (parityLogDiskQueue.freeList). 136 * NON-BLOCKING */ 137 138 raidPtr = data->common->raidPtr; 139 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 140 while (data) { 141 nextItem = data->next; 142 data->next = raidPtr->parityLogDiskQueue.freeDataList; 143 raidPtr->parityLogDiskQueue.freeDataList = data; 144 data = nextItem; 145 } 146 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 147 } 148 149 150 static void 151 EnqueueParityLogData( 152 RF_ParityLogData_t * data, 153 RF_ParityLogData_t ** head, 154 RF_ParityLogData_t ** tail) 155 { 156 RF_Raid_t *raidPtr; 157 158 /* Insert an in-core parity log (*data) into the head of a disk queue 159 * (*head, *tail). NON-BLOCKING */ 160 161 raidPtr = data->common->raidPtr; 162 if (rf_parityLogDebug) 163 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 164 RF_ASSERT(data->prev == NULL); 165 RF_ASSERT(data->next == NULL); 166 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 167 if (*head) { 168 /* insert into head of queue */ 169 RF_ASSERT((*head)->prev == NULL); 170 RF_ASSERT((*tail)->next == NULL); 171 data->next = *head; 172 (*head)->prev = data; 173 *head = data; 174 } else { 175 /* insert into empty list */ 176 RF_ASSERT(*head == NULL); 177 RF_ASSERT(*tail == NULL); 178 *head = data; 179 *tail = data; 180 } 181 RF_ASSERT((*head)->prev == NULL); 182 RF_ASSERT((*tail)->next == NULL); 183 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 184 } 185 186 static RF_ParityLogData_t * 187 DequeueParityLogData( 188 RF_Raid_t * raidPtr, 189 RF_ParityLogData_t ** head, 190 RF_ParityLogData_t ** tail, 191 int ignoreLocks) 192 { 193 RF_ParityLogData_t *data; 194 195 /* Remove and return an in-core parity log from the tail of a disk 196 * queue (*head, *tail). NON-BLOCKING */ 197 198 /* remove from tail, preserving FIFO order */ 199 if (!ignoreLocks) 200 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 201 data = *tail; 202 if (data) { 203 if (*head == *tail) { 204 /* removing last item from queue */ 205 *head = NULL; 206 *tail = NULL; 207 } else { 208 *tail = (*tail)->prev; 209 (*tail)->next = NULL; 210 RF_ASSERT((*head)->prev == NULL); 211 RF_ASSERT((*tail)->next == NULL); 212 } 213 data->next = NULL; 214 data->prev = NULL; 215 if (rf_parityLogDebug) 216 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 217 } 218 if (*head) { 219 RF_ASSERT((*head)->prev == NULL); 220 RF_ASSERT((*tail)->next == NULL); 221 } 222 if (!ignoreLocks) 223 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 224 return (data); 225 } 226 227 228 static void 229 RequeueParityLogData( 230 RF_ParityLogData_t * data, 231 RF_ParityLogData_t ** head, 232 RF_ParityLogData_t ** tail) 233 { 234 RF_Raid_t *raidPtr; 235 236 /* Insert an in-core parity log (*data) into the tail of a disk queue 237 * (*head, *tail). NON-BLOCKING */ 238 239 raidPtr = data->common->raidPtr; 240 RF_ASSERT(data); 241 if (rf_parityLogDebug) 242 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); 243 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 244 if (*tail) { 245 /* append to tail of list */ 246 data->prev = *tail; 247 data->next = NULL; 248 (*tail)->next = data; 249 *tail = data; 250 } else { 251 /* inserting into an empty list */ 252 *head = data; 253 *tail = data; 254 (*head)->prev = NULL; 255 (*tail)->next = NULL; 256 } 257 RF_ASSERT((*head)->prev == NULL); 258 RF_ASSERT((*tail)->next == NULL); 259 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 260 } 261 262 RF_ParityLogData_t * 263 rf_CreateParityLogData( 264 RF_ParityRecordType_t operation, 265 RF_PhysDiskAddr_t * pda, 266 caddr_t bufPtr, 267 RF_Raid_t * raidPtr, 268 int (*wakeFunc) (RF_DagNode_t * node, int status), 269 void *wakeArg, 270 RF_AccTraceEntry_t * tracerec, 271 RF_Etimer_t startTime) 272 { 273 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; 274 RF_CommonLogData_t *common; 275 RF_PhysDiskAddr_t *diskAddress; 276 int boundary, offset = 0; 277 278 /* Return an initialized struct of info to be logged. Build one item 279 * per physical disk address, one item per region. 280 * 281 * NON-BLOCKING */ 282 283 diskAddress = pda; 284 common = AllocParityLogCommonData(raidPtr); 285 RF_ASSERT(common); 286 287 common->operation = operation; 288 common->bufPtr = bufPtr; 289 common->raidPtr = raidPtr; 290 common->wakeFunc = wakeFunc; 291 common->wakeArg = wakeArg; 292 common->tracerec = tracerec; 293 common->startTime = startTime; 294 common->cnt = 0; 295 296 if (rf_parityLogDebug) 297 printf("[entering CreateParityLogData]\n"); 298 while (diskAddress) { 299 common->cnt++; 300 data = AllocParityLogData(raidPtr); 301 RF_ASSERT(data); 302 data->common = common; 303 data->next = NULL; 304 data->prev = NULL; 305 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); 306 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { 307 /* disk address does not cross a region boundary */ 308 data->diskAddress = *diskAddress; 309 data->bufOffset = offset; 310 offset = offset + diskAddress->numSector; 311 EnqueueParityLogData(data, &resultHead, &resultTail); 312 /* adjust disk address */ 313 diskAddress = diskAddress->next; 314 } else { 315 /* disk address crosses a region boundary */ 316 /* find address where region is crossed */ 317 boundary = 0; 318 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) 319 boundary++; 320 321 /* enter data before the boundary */ 322 data->diskAddress = *diskAddress; 323 data->diskAddress.numSector = boundary; 324 data->bufOffset = offset; 325 offset += boundary; 326 EnqueueParityLogData(data, &resultHead, &resultTail); 327 /* adjust disk address */ 328 diskAddress->startSector += boundary; 329 diskAddress->numSector -= boundary; 330 } 331 } 332 if (rf_parityLogDebug) 333 printf("[leaving CreateParityLogData]\n"); 334 return (resultHead); 335 } 336 337 338 RF_ParityLogData_t * 339 rf_SearchAndDequeueParityLogData( 340 RF_Raid_t * raidPtr, 341 int regionID, 342 RF_ParityLogData_t ** head, 343 RF_ParityLogData_t ** tail, 344 int ignoreLocks) 345 { 346 RF_ParityLogData_t *w; 347 348 /* Remove and return an in-core parity log from a specified region 349 * (regionID). If a matching log is not found, return NULL. 350 * 351 * NON-BLOCKING. */ 352 353 /* walk backward through a list, looking for an entry with a matching 354 * region ID */ 355 if (!ignoreLocks) 356 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 357 w = (*tail); 358 while (w) { 359 if (w->regionID == regionID) { 360 /* remove an element from the list */ 361 if (w == *tail) { 362 if (*head == *tail) { 363 /* removing only element in the list */ 364 *head = NULL; 365 *tail = NULL; 366 } else { 367 /* removing last item in the list */ 368 *tail = (*tail)->prev; 369 (*tail)->next = NULL; 370 RF_ASSERT((*head)->prev == NULL); 371 RF_ASSERT((*tail)->next == NULL); 372 } 373 } else { 374 if (w == *head) { 375 /* removing first item in the list */ 376 *head = (*head)->next; 377 (*head)->prev = NULL; 378 RF_ASSERT((*head)->prev == NULL); 379 RF_ASSERT((*tail)->next == NULL); 380 } else { 381 /* removing an item from the middle of 382 * the list */ 383 w->prev->next = w->next; 384 w->next->prev = w->prev; 385 RF_ASSERT((*head)->prev == NULL); 386 RF_ASSERT((*tail)->next == NULL); 387 } 388 } 389 w->prev = NULL; 390 w->next = NULL; 391 if (rf_parityLogDebug) 392 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); 393 return (w); 394 } else 395 w = w->prev; 396 } 397 if (!ignoreLocks) 398 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 399 return (NULL); 400 } 401 402 static RF_ParityLogData_t * 403 DequeueMatchingLogData( 404 RF_Raid_t * raidPtr, 405 RF_ParityLogData_t ** head, 406 RF_ParityLogData_t ** tail) 407 { 408 RF_ParityLogData_t *logDataList, *logData; 409 int regionID; 410 411 /* Remove and return an in-core parity log from the tail of a disk 412 * queue (*head, *tail). Then remove all matching (identical 413 * regionIDs) logData and return as a linked list. 414 * 415 * NON-BLOCKING */ 416 417 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); 418 if (logDataList) { 419 regionID = logDataList->regionID; 420 logData = logDataList; 421 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 422 while (logData->next) { 423 logData = logData->next; 424 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); 425 } 426 } 427 return (logDataList); 428 } 429 430 431 static RF_ParityLog_t * 432 AcquireParityLog( 433 RF_ParityLogData_t * logData, 434 int finish) 435 { 436 RF_ParityLog_t *log = NULL; 437 RF_Raid_t *raidPtr; 438 439 /* Grab a log buffer from the pool and return it. If no buffers are 440 * available, return NULL. NON-BLOCKING */ 441 raidPtr = logData->common->raidPtr; 442 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 443 if (raidPtr->parityLogPool.parityLogs) { 444 log = raidPtr->parityLogPool.parityLogs; 445 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; 446 log->regionID = logData->regionID; 447 log->numRecords = 0; 448 log->next = NULL; 449 raidPtr->logsInUse++; 450 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 451 } else { 452 /* no logs available, so place ourselves on the queue of work 453 * waiting on log buffers this is done while 454 * parityLogPool.mutex is held, to ensure synchronization with 455 * ReleaseParityLogs. */ 456 if (rf_parityLogDebug) 457 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish); 458 if (finish) 459 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 460 else 461 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 462 } 463 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 464 return (log); 465 } 466 467 void 468 rf_ReleaseParityLogs( 469 RF_Raid_t * raidPtr, 470 RF_ParityLog_t * firstLog) 471 { 472 RF_ParityLogData_t *logDataList; 473 RF_ParityLog_t *log, *lastLog; 474 int cnt; 475 476 /* Insert a linked list of parity logs (firstLog) to the free list 477 * (parityLogPool.parityLogPool) 478 * 479 * NON-BLOCKING. */ 480 481 RF_ASSERT(firstLog); 482 483 /* Before returning logs to global free list, service all requests 484 * which are blocked on logs. Holding mutexes for parityLogPool and 485 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ 486 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 487 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 488 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 489 log = firstLog; 490 if (firstLog) 491 firstLog = firstLog->next; 492 log->numRecords = 0; 493 log->next = NULL; 494 while (logDataList && log) { 495 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 496 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 497 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); 498 if (rf_parityLogDebug) 499 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID); 500 if (log == NULL) { 501 log = firstLog; 502 if (firstLog) { 503 firstLog = firstLog->next; 504 log->numRecords = 0; 505 log->next = NULL; 506 } 507 } 508 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex); 509 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 510 if (log) 511 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); 512 } 513 /* return remaining logs to pool */ 514 if (log) { 515 log->next = firstLog; 516 firstLog = log; 517 } 518 if (firstLog) { 519 lastLog = firstLog; 520 raidPtr->logsInUse--; 521 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 522 while (lastLog->next) { 523 lastLog = lastLog->next; 524 raidPtr->logsInUse--; 525 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); 526 } 527 lastLog->next = raidPtr->parityLogPool.parityLogs; 528 raidPtr->parityLogPool.parityLogs = firstLog; 529 cnt = 0; 530 log = raidPtr->parityLogPool.parityLogs; 531 while (log) { 532 cnt++; 533 log = log->next; 534 } 535 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); 536 } 537 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex); 538 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 539 } 540 541 static void 542 ReintLog( 543 RF_Raid_t * raidPtr, 544 int regionID, 545 RF_ParityLog_t * log) 546 { 547 RF_ASSERT(log); 548 549 /* Insert an in-core parity log (log) into the disk queue of 550 * reintegration work. Set the flag (reintInProgress) for the 551 * specified region (regionID) to indicate that reintegration is in 552 * progress for this region. NON-BLOCKING */ 553 554 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 555 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint 556 * complete */ 557 558 if (rf_parityLogDebug) 559 printf("[requesting reintegration of region %d]\n", log->regionID); 560 /* move record to reintegration queue */ 561 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 562 log->next = raidPtr->parityLogDiskQueue.reintQueue; 563 raidPtr->parityLogDiskQueue.reintQueue = log; 564 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 565 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 566 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 567 } 568 569 static void 570 FlushLog( 571 RF_Raid_t * raidPtr, 572 RF_ParityLog_t * log) 573 { 574 /* insert a core log (log) into a list of logs 575 * (parityLogDiskQueue.flushQueue) waiting to be written to disk. 576 * NON-BLOCKING */ 577 578 RF_ASSERT(log); 579 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 580 RF_ASSERT(log->next == NULL); 581 /* move log to flush queue */ 582 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 583 log->next = raidPtr->parityLogDiskQueue.flushQueue; 584 raidPtr->parityLogDiskQueue.flushQueue = log; 585 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 586 RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); 587 } 588 589 static int 590 DumpParityLogToDisk( 591 int finish, 592 RF_ParityLogData_t * logData) 593 { 594 int i, diskCount, regionID = logData->regionID; 595 RF_ParityLog_t *log; 596 RF_Raid_t *raidPtr; 597 598 raidPtr = logData->common->raidPtr; 599 600 /* Move a core log to disk. If the log disk is full, initiate 601 * reintegration. 602 * 603 * Return (0) if we can enqueue the dump immediately, otherwise return 604 * (1) to indicate we are blocked on reintegration and control of the 605 * thread should be relinquished. 606 * 607 * Caller must hold regionInfo[regionID].mutex 608 * 609 * NON-BLOCKING */ 610 611 if (rf_parityLogDebug) 612 printf("[dumping parity log to disk, region %d]\n", regionID); 613 log = raidPtr->regionInfo[regionID].coreLog; 614 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); 615 RF_ASSERT(log->next == NULL); 616 617 /* if reintegration is in progress, must queue work */ 618 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 619 if (raidPtr->regionInfo[regionID].reintInProgress) { 620 /* Can not proceed since this region is currently being 621 * reintegrated. We can not block, so queue remaining work and 622 * return */ 623 if (rf_parityLogDebug) 624 printf("[region %d waiting on reintegration]\n", regionID); 625 /* XXX not sure about the use of finish - shouldn't this 626 * always be "Enqueue"? */ 627 if (finish) 628 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 629 else 630 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); 631 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 632 return (1); /* relenquish control of this thread */ 633 } 634 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 635 raidPtr->regionInfo[regionID].coreLog = NULL; 636 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) 637 /* IMPORTANT!! this loop bound assumes region disk holds an 638 * integral number of core logs */ 639 { 640 /* update disk map for this region */ 641 diskCount = raidPtr->regionInfo[regionID].diskCount; 642 for (i = 0; i < raidPtr->numSectorsPerLog; i++) { 643 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; 644 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; 645 } 646 log->diskOffset = diskCount; 647 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; 648 FlushLog(raidPtr, log); 649 } else { 650 /* no room for log on disk, send it to disk manager and 651 * request reintegration */ 652 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); 653 ReintLog(raidPtr, regionID, log); 654 } 655 if (rf_parityLogDebug) 656 printf("[finished dumping parity log to disk, region %d]\n", regionID); 657 return (0); 658 } 659 660 int 661 rf_ParityLogAppend( 662 RF_ParityLogData_t * logData, 663 int finish, 664 RF_ParityLog_t ** incomingLog, 665 int clearReintFlag) 666 { 667 int regionID, logItem, itemDone; 668 RF_ParityLogData_t *item; 669 int punt, done = RF_FALSE; 670 RF_ParityLog_t *log; 671 RF_Raid_t *raidPtr; 672 RF_Etimer_t timer; 673 int (*wakeFunc) (RF_DagNode_t * node, int status); 674 void *wakeArg; 675 676 /* Add parity to the appropriate log, one sector at a time. This 677 * routine is called is called by dag functions ParityLogUpdateFunc 678 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. 679 * 680 * Parity to be logged is contained in a linked-list (logData). When 681 * this routine returns, every sector in the list will be in one of 682 * three places: 1) entered into the parity log 2) queued, waiting on 683 * reintegration 3) queued, waiting on a core log 684 * 685 * Blocked work is passed to the ParityLoggingDiskManager for completion. 686 * Later, as conditions which required the block are removed, the work 687 * reenters this routine with the "finish" parameter set to "RF_TRUE." 688 * 689 * NON-BLOCKING */ 690 691 raidPtr = logData->common->raidPtr; 692 /* lock the region for the first item in logData */ 693 RF_ASSERT(logData != NULL); 694 regionID = logData->regionID; 695 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 696 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 697 698 if (clearReintFlag) { 699 /* Enable flushing for this region. Holding both locks 700 * provides a synchronization barrier with DumpParityLogToDisk */ 701 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); 702 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 703 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); 704 raidPtr->regionInfo[regionID].diskCount = 0; 705 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; 706 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now 707 * enabled */ 708 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); 709 } 710 /* process each item in logData */ 711 while (logData) { 712 /* remove an item from logData */ 713 item = logData; 714 logData = logData->next; 715 item->next = NULL; 716 item->prev = NULL; 717 718 if (rf_parityLogDebug) 719 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); 720 721 /* see if we moved to a new region */ 722 if (regionID != item->regionID) { 723 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 724 regionID = item->regionID; 725 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 726 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); 727 } 728 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This 729 * can happen in one of two ways: 1) no core 730 * log (AcquireParityLog) 2) waiting on 731 * reintegration (DumpParityLogToDisk) If punt 732 * is RF_TRUE, the dataItem was queued, so 733 * skip to next item. */ 734 735 /* process item, one sector at a time, until all sectors 736 * processed or we punt */ 737 if (item->diskAddress.numSector > 0) 738 done = RF_FALSE; 739 else 740 RF_ASSERT(0); 741 while (!punt && !done) { 742 /* verify that a core log exists for this region */ 743 if (!raidPtr->regionInfo[regionID].coreLog) { 744 /* Attempt to acquire a parity log. If 745 * acquisition fails, queue remaining work in 746 * data item and move to nextItem. */ 747 if (incomingLog) 748 if (*incomingLog) { 749 RF_ASSERT((*incomingLog)->next == NULL); 750 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 751 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 752 *incomingLog = NULL; 753 } else 754 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 755 else 756 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 757 /* Note: AcquireParityLog either returns a log 758 * or enqueues currentItem */ 759 } 760 if (!raidPtr->regionInfo[regionID].coreLog) 761 punt = RF_TRUE; /* failed to find a core log */ 762 else { 763 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 764 /* verify that the log has room for new 765 * entries */ 766 /* if log is full, dump it to disk and grab a 767 * new log */ 768 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { 769 /* log is full, dump it to disk */ 770 if (DumpParityLogToDisk(finish, item)) 771 punt = RF_TRUE; /* dump unsuccessful, 772 * blocked on 773 * reintegration */ 774 else { 775 /* dump was successful */ 776 if (incomingLog) 777 if (*incomingLog) { 778 RF_ASSERT((*incomingLog)->next == NULL); 779 raidPtr->regionInfo[regionID].coreLog = *incomingLog; 780 raidPtr->regionInfo[regionID].coreLog->regionID = regionID; 781 *incomingLog = NULL; 782 } else 783 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 784 else 785 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); 786 /* if a core log is not 787 * available, must queue work 788 * and return */ 789 if (!raidPtr->regionInfo[regionID].coreLog) 790 punt = RF_TRUE; /* blocked on log 791 * availability */ 792 } 793 } 794 } 795 /* if we didn't punt on this item, attempt to add a 796 * sector to the core log */ 797 if (!punt) { 798 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); 799 /* at this point, we have a core log with 800 * enough room for a sector */ 801 /* copy a sector into the log */ 802 log = raidPtr->regionInfo[regionID].coreLog; 803 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); 804 logItem = log->numRecords++; 805 log->records[logItem].parityAddr = item->diskAddress; 806 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); 807 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); 808 log->records[logItem].parityAddr.numSector = 1; 809 log->records[logItem].operation = item->common->operation; 810 bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector)); 811 item->diskAddress.numSector--; 812 item->diskAddress.startSector++; 813 if (item->diskAddress.numSector == 0) 814 done = RF_TRUE; 815 } 816 } 817 818 if (!punt) { 819 /* Processed this item completely, decrement count of 820 * items to be processed. */ 821 RF_ASSERT(item->diskAddress.numSector == 0); 822 RF_LOCK_MUTEX(item->common->mutex); 823 item->common->cnt--; 824 if (item->common->cnt == 0) 825 itemDone = RF_TRUE; 826 else 827 itemDone = RF_FALSE; 828 RF_UNLOCK_MUTEX(item->common->mutex); 829 if (itemDone) { 830 /* Finished processing all log data for this 831 * IO Return structs to free list and invoke 832 * wakeup function. */ 833 timer = item->common->startTime; /* grab initial value of 834 * timer */ 835 RF_ETIMER_STOP(timer); 836 RF_ETIMER_EVAL(timer); 837 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); 838 if (rf_parityLogDebug) 839 printf("[waking process for region %d]\n", item->regionID); 840 wakeFunc = item->common->wakeFunc; 841 wakeArg = item->common->wakeArg; 842 FreeParityLogCommonData(item->common); 843 FreeParityLogData(item); 844 (wakeFunc) (wakeArg, 0); 845 } else 846 FreeParityLogData(item); 847 } 848 } 849 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 850 if (rf_parityLogDebug) 851 printf("[exiting ParityLogAppend]\n"); 852 return (0); 853 } 854 855 856 void 857 rf_EnableParityLogging(RF_Raid_t * raidPtr) 858 { 859 int regionID; 860 861 for (regionID = 0; regionID < rf_numParityRegions; regionID++) { 862 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 863 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; 864 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); 865 } 866 if (rf_parityLogDebug) 867 printf("[parity logging enabled]\n"); 868 } 869 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ 870