1 /* $NetBSD: rf_paritylog.c,v 1.20 2019/10/10 03:43:59 christos Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: William V. Courtright II
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /* Code for manipulating in-core parity logs
30 *
31 */
32
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.20 2019/10/10 03:43:59 christos Exp $");
35
36 #include "rf_archs.h"
37
38 #if RF_INCLUDE_PARITYLOGGING > 0
39
40 /*
41 * Append-only log for recording parity "update" and "overwrite" records
42 */
43
44 #include <dev/raidframe/raidframevar.h>
45
46 #include "rf_threadstuff.h"
47 #include "rf_mcpair.h"
48 #include "rf_raid.h"
49 #include "rf_dag.h"
50 #include "rf_dagfuncs.h"
51 #include "rf_desc.h"
52 #include "rf_layout.h"
53 #include "rf_diskqueue.h"
54 #include "rf_etimer.h"
55 #include "rf_paritylog.h"
56 #include "rf_general.h"
57 #include "rf_map.h"
58 #include "rf_paritylogging.h"
59 #include "rf_paritylogDiskMgr.h"
60
61 static RF_CommonLogData_t *
AllocParityLogCommonData(RF_Raid_t * raidPtr)62 AllocParityLogCommonData(RF_Raid_t * raidPtr)
63 {
64 RF_CommonLogData_t *common = NULL;
65
66 /* Return a struct for holding common parity log information from the
67 * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
68 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
69
70 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
71 if (raidPtr->parityLogDiskQueue.freeCommonList) {
72 common = raidPtr->parityLogDiskQueue.freeCommonList;
73 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
74 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
75 } else {
76 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
77 common = RF_Malloc(sizeof(*common));
78 /* destroy is in rf_paritylogging.c */
79 rf_init_mutex2(common->mutex, IPL_VM);
80 }
81 common->next = NULL;
82 return (common);
83 }
84
85 static void
FreeParityLogCommonData(RF_CommonLogData_t * common)86 FreeParityLogCommonData(RF_CommonLogData_t * common)
87 {
88 RF_Raid_t *raidPtr;
89
90 /* Insert a single struct for holding parity log information (data)
91 * into the free list (rf_parityLogDiskQueue.freeCommonList).
92 * NON-BLOCKING */
93
94 raidPtr = common->raidPtr;
95 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
96 common->next = raidPtr->parityLogDiskQueue.freeCommonList;
97 raidPtr->parityLogDiskQueue.freeCommonList = common;
98 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
99 }
100
101 static RF_ParityLogData_t *
AllocParityLogData(RF_Raid_t * raidPtr)102 AllocParityLogData(RF_Raid_t * raidPtr)
103 {
104 RF_ParityLogData_t *data = NULL;
105
106 /* Return a struct for holding parity log information from the free
107 * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
108 * call RF_Malloc to create a new structure. NON-BLOCKING */
109
110 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
111 if (raidPtr->parityLogDiskQueue.freeDataList) {
112 data = raidPtr->parityLogDiskQueue.freeDataList;
113 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
114 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
115 } else {
116 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
117 data = RF_Malloc(sizeof(*data));
118 }
119 data->next = NULL;
120 data->prev = NULL;
121 return (data);
122 }
123
124
125 static void
FreeParityLogData(RF_ParityLogData_t * data)126 FreeParityLogData(RF_ParityLogData_t * data)
127 {
128 RF_ParityLogData_t *nextItem;
129 RF_Raid_t *raidPtr;
130
131 /* Insert a linked list of structs for holding parity log information
132 * (data) into the free list (parityLogDiskQueue.freeList).
133 * NON-BLOCKING */
134
135 raidPtr = data->common->raidPtr;
136 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
137 while (data) {
138 nextItem = data->next;
139 data->next = raidPtr->parityLogDiskQueue.freeDataList;
140 raidPtr->parityLogDiskQueue.freeDataList = data;
141 data = nextItem;
142 }
143 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
144 }
145
146
147 static void
EnqueueParityLogData(RF_ParityLogData_t * data,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail)148 EnqueueParityLogData(
149 RF_ParityLogData_t * data,
150 RF_ParityLogData_t ** head,
151 RF_ParityLogData_t ** tail)
152 {
153 RF_Raid_t *raidPtr;
154
155 /* Insert an in-core parity log (*data) into the head of a disk queue
156 * (*head, *tail). NON-BLOCKING */
157
158 raidPtr = data->common->raidPtr;
159 if (rf_parityLogDebug)
160 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
161 RF_ASSERT(data->prev == NULL);
162 RF_ASSERT(data->next == NULL);
163 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
164 if (*head) {
165 /* insert into head of queue */
166 RF_ASSERT((*head)->prev == NULL);
167 RF_ASSERT((*tail)->next == NULL);
168 data->next = *head;
169 (*head)->prev = data;
170 *head = data;
171 } else {
172 /* insert into empty list */
173 RF_ASSERT(*head == NULL);
174 RF_ASSERT(*tail == NULL);
175 *head = data;
176 *tail = data;
177 }
178 RF_ASSERT((*head)->prev == NULL);
179 RF_ASSERT((*tail)->next == NULL);
180 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
181 }
182
183 static RF_ParityLogData_t *
DequeueParityLogData(RF_Raid_t * raidPtr,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail,int ignoreLocks)184 DequeueParityLogData(
185 RF_Raid_t * raidPtr,
186 RF_ParityLogData_t ** head,
187 RF_ParityLogData_t ** tail,
188 int ignoreLocks)
189 {
190 RF_ParityLogData_t *data;
191
192 /* Remove and return an in-core parity log from the tail of a disk
193 * queue (*head, *tail). NON-BLOCKING */
194
195 /* remove from tail, preserving FIFO order */
196 if (!ignoreLocks)
197 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
198 data = *tail;
199 if (data) {
200 if (*head == *tail) {
201 /* removing last item from queue */
202 *head = NULL;
203 *tail = NULL;
204 } else {
205 *tail = (*tail)->prev;
206 (*tail)->next = NULL;
207 RF_ASSERT((*head)->prev == NULL);
208 RF_ASSERT((*tail)->next == NULL);
209 }
210 data->next = NULL;
211 data->prev = NULL;
212 if (rf_parityLogDebug)
213 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
214 }
215 if (*head) {
216 RF_ASSERT((*head)->prev == NULL);
217 RF_ASSERT((*tail)->next == NULL);
218 }
219 if (!ignoreLocks)
220 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
221 return (data);
222 }
223
224
225 static void
RequeueParityLogData(RF_ParityLogData_t * data,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail)226 RequeueParityLogData(
227 RF_ParityLogData_t * data,
228 RF_ParityLogData_t ** head,
229 RF_ParityLogData_t ** tail)
230 {
231 RF_Raid_t *raidPtr;
232
233 /* Insert an in-core parity log (*data) into the tail of a disk queue
234 * (*head, *tail). NON-BLOCKING */
235
236 raidPtr = data->common->raidPtr;
237 RF_ASSERT(data);
238 if (rf_parityLogDebug)
239 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
240 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
241 if (*tail) {
242 /* append to tail of list */
243 data->prev = *tail;
244 data->next = NULL;
245 (*tail)->next = data;
246 *tail = data;
247 } else {
248 /* inserting into an empty list */
249 *head = data;
250 *tail = data;
251 (*head)->prev = NULL;
252 (*tail)->next = NULL;
253 }
254 RF_ASSERT((*head)->prev == NULL);
255 RF_ASSERT((*tail)->next == NULL);
256 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
257 }
258
259 RF_ParityLogData_t *
rf_CreateParityLogData(RF_ParityRecordType_t operation,RF_PhysDiskAddr_t * pda,void * bufPtr,RF_Raid_t * raidPtr,void (* wakeFunc)(void *,int),void * wakeArg,RF_AccTraceEntry_t * tracerec,RF_Etimer_t startTime)260 rf_CreateParityLogData(
261 RF_ParityRecordType_t operation,
262 RF_PhysDiskAddr_t * pda,
263 void *bufPtr,
264 RF_Raid_t * raidPtr,
265 void (*wakeFunc)(void *, int),
266 void *wakeArg,
267 RF_AccTraceEntry_t * tracerec,
268 RF_Etimer_t startTime)
269 {
270 RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
271 RF_CommonLogData_t *common;
272 RF_PhysDiskAddr_t *diskAddress;
273 int boundary, offset = 0;
274
275 /* Return an initialized struct of info to be logged. Build one item
276 * per physical disk address, one item per region.
277 *
278 * NON-BLOCKING */
279
280 diskAddress = pda;
281 common = AllocParityLogCommonData(raidPtr);
282 RF_ASSERT(common);
283
284 common->operation = operation;
285 common->bufPtr = bufPtr;
286 common->raidPtr = raidPtr;
287 common->wakeFunc = wakeFunc;
288 common->wakeArg = wakeArg;
289 common->tracerec = tracerec;
290 common->startTime = startTime;
291 common->cnt = 0;
292
293 if (rf_parityLogDebug)
294 printf("[entering CreateParityLogData]\n");
295 while (diskAddress) {
296 common->cnt++;
297 data = AllocParityLogData(raidPtr);
298 RF_ASSERT(data);
299 data->common = common;
300 data->next = NULL;
301 data->prev = NULL;
302 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
303 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
304 /* disk address does not cross a region boundary */
305 data->diskAddress = *diskAddress;
306 data->bufOffset = offset;
307 offset = offset + diskAddress->numSector;
308 EnqueueParityLogData(data, &resultHead, &resultTail);
309 /* adjust disk address */
310 diskAddress = diskAddress->next;
311 } else {
312 /* disk address crosses a region boundary */
313 /* find address where region is crossed */
314 boundary = 0;
315 while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
316 boundary++;
317
318 /* enter data before the boundary */
319 data->diskAddress = *diskAddress;
320 data->diskAddress.numSector = boundary;
321 data->bufOffset = offset;
322 offset += boundary;
323 EnqueueParityLogData(data, &resultHead, &resultTail);
324 /* adjust disk address */
325 diskAddress->startSector += boundary;
326 diskAddress->numSector -= boundary;
327 }
328 }
329 if (rf_parityLogDebug)
330 printf("[leaving CreateParityLogData]\n");
331 return (resultHead);
332 }
333
334
335 RF_ParityLogData_t *
rf_SearchAndDequeueParityLogData(RF_Raid_t * raidPtr,int regionID,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail,int ignoreLocks)336 rf_SearchAndDequeueParityLogData(
337 RF_Raid_t * raidPtr,
338 int regionID,
339 RF_ParityLogData_t ** head,
340 RF_ParityLogData_t ** tail,
341 int ignoreLocks)
342 {
343 RF_ParityLogData_t *w;
344
345 /* Remove and return an in-core parity log from a specified region
346 * (regionID). If a matching log is not found, return NULL.
347 *
348 * NON-BLOCKING. */
349
350 /* walk backward through a list, looking for an entry with a matching
351 * region ID */
352 if (!ignoreLocks)
353 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
354 w = (*tail);
355 while (w) {
356 if (w->regionID == regionID) {
357 /* remove an element from the list */
358 if (w == *tail) {
359 if (*head == *tail) {
360 /* removing only element in the list */
361 *head = NULL;
362 *tail = NULL;
363 } else {
364 /* removing last item in the list */
365 *tail = (*tail)->prev;
366 (*tail)->next = NULL;
367 RF_ASSERT((*head)->prev == NULL);
368 RF_ASSERT((*tail)->next == NULL);
369 }
370 } else {
371 if (w == *head) {
372 /* removing first item in the list */
373 *head = (*head)->next;
374 (*head)->prev = NULL;
375 RF_ASSERT((*head)->prev == NULL);
376 RF_ASSERT((*tail)->next == NULL);
377 } else {
378 /* removing an item from the middle of
379 * the list */
380 w->prev->next = w->next;
381 w->next->prev = w->prev;
382 RF_ASSERT((*head)->prev == NULL);
383 RF_ASSERT((*tail)->next == NULL);
384 }
385 }
386 w->prev = NULL;
387 w->next = NULL;
388 if (rf_parityLogDebug)
389 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
390 return (w);
391 } else
392 w = w->prev;
393 }
394 if (!ignoreLocks)
395 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
396 return (NULL);
397 }
398
399 static RF_ParityLogData_t *
DequeueMatchingLogData(RF_Raid_t * raidPtr,RF_ParityLogData_t ** head,RF_ParityLogData_t ** tail)400 DequeueMatchingLogData(
401 RF_Raid_t * raidPtr,
402 RF_ParityLogData_t ** head,
403 RF_ParityLogData_t ** tail)
404 {
405 RF_ParityLogData_t *logDataList, *logData;
406 int regionID;
407
408 /* Remove and return an in-core parity log from the tail of a disk
409 * queue (*head, *tail). Then remove all matching (identical
410 * regionIDs) logData and return as a linked list.
411 *
412 * NON-BLOCKING */
413
414 logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
415 if (logDataList) {
416 regionID = logDataList->regionID;
417 logData = logDataList;
418 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
419 while (logData->next) {
420 logData = logData->next;
421 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
422 }
423 }
424 return (logDataList);
425 }
426
427
428 static RF_ParityLog_t *
AcquireParityLog(RF_ParityLogData_t * logData,int finish)429 AcquireParityLog(
430 RF_ParityLogData_t * logData,
431 int finish)
432 {
433 RF_ParityLog_t *log = NULL;
434 RF_Raid_t *raidPtr;
435
436 /* Grab a log buffer from the pool and return it. If no buffers are
437 * available, return NULL. NON-BLOCKING */
438 raidPtr = logData->common->raidPtr;
439 rf_lock_mutex2(raidPtr->parityLogPool.mutex);
440 if (raidPtr->parityLogPool.parityLogs) {
441 log = raidPtr->parityLogPool.parityLogs;
442 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
443 log->regionID = logData->regionID;
444 log->numRecords = 0;
445 log->next = NULL;
446 raidPtr->logsInUse++;
447 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
448 } else {
449 /* no logs available, so place ourselves on the queue of work
450 * waiting on log buffers this is done while
451 * parityLogPool.mutex is held, to ensure synchronization with
452 * ReleaseParityLogs. */
453 if (rf_parityLogDebug)
454 printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
455 if (finish)
456 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
457 else
458 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
459 }
460 rf_unlock_mutex2(raidPtr->parityLogPool.mutex);
461 return (log);
462 }
463
464 void
rf_ReleaseParityLogs(RF_Raid_t * raidPtr,RF_ParityLog_t * firstLog)465 rf_ReleaseParityLogs(
466 RF_Raid_t * raidPtr,
467 RF_ParityLog_t * firstLog)
468 {
469 RF_ParityLogData_t *logDataList;
470 RF_ParityLog_t *log, *lastLog;
471 int cnt;
472
473 /* Insert a linked list of parity logs (firstLog) to the free list
474 * (parityLogPool.parityLogPool)
475 *
476 * NON-BLOCKING. */
477
478 RF_ASSERT(firstLog);
479
480 /* Before returning logs to global free list, service all requests
481 * which are blocked on logs. Holding mutexes for parityLogPool and
482 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
483 rf_lock_mutex2(raidPtr->parityLogPool.mutex);
484 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
485 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
486 log = firstLog;
487 if (firstLog)
488 firstLog = firstLog->next;
489 log->numRecords = 0;
490 log->next = NULL;
491 while (logDataList && log) {
492 rf_unlock_mutex2(raidPtr->parityLogPool.mutex);
493 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
494 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
495 if (rf_parityLogDebug)
496 printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
497 if (log == NULL) {
498 log = firstLog;
499 if (firstLog) {
500 firstLog = firstLog->next;
501 log->numRecords = 0;
502 log->next = NULL;
503 }
504 }
505 rf_lock_mutex2(raidPtr->parityLogPool.mutex);
506 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
507 if (log)
508 logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
509 }
510 /* return remaining logs to pool */
511 if (log) {
512 log->next = firstLog;
513 firstLog = log;
514 }
515 if (firstLog) {
516 lastLog = firstLog;
517 raidPtr->logsInUse--;
518 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
519 while (lastLog->next) {
520 lastLog = lastLog->next;
521 raidPtr->logsInUse--;
522 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
523 }
524 lastLog->next = raidPtr->parityLogPool.parityLogs;
525 raidPtr->parityLogPool.parityLogs = firstLog;
526 cnt = 0;
527 log = raidPtr->parityLogPool.parityLogs;
528 while (log) {
529 cnt++;
530 log = log->next;
531 }
532 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
533 }
534 rf_unlock_mutex2(raidPtr->parityLogPool.mutex);
535 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
536 }
537
538 static void
ReintLog(RF_Raid_t * raidPtr,int regionID,RF_ParityLog_t * log)539 ReintLog(
540 RF_Raid_t * raidPtr,
541 int regionID,
542 RF_ParityLog_t * log)
543 {
544 RF_ASSERT(log);
545
546 /* Insert an in-core parity log (log) into the disk queue of
547 * reintegration work. Set the flag (reintInProgress) for the
548 * specified region (regionID) to indicate that reintegration is in
549 * progress for this region. NON-BLOCKING */
550
551 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
552 raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
553 * complete */
554
555 if (rf_parityLogDebug)
556 printf("[requesting reintegration of region %d]\n", log->regionID);
557 /* move record to reintegration queue */
558 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
559 log->next = raidPtr->parityLogDiskQueue.reintQueue;
560 raidPtr->parityLogDiskQueue.reintQueue = log;
561 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
562 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond);
563 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
564 }
565
566 static void
FlushLog(RF_Raid_t * raidPtr,RF_ParityLog_t * log)567 FlushLog(
568 RF_Raid_t * raidPtr,
569 RF_ParityLog_t * log)
570 {
571 /* insert a core log (log) into a list of logs
572 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
573 * NON-BLOCKING */
574
575 RF_ASSERT(log);
576 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
577 RF_ASSERT(log->next == NULL);
578 /* move log to flush queue */
579 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
580 log->next = raidPtr->parityLogDiskQueue.flushQueue;
581 raidPtr->parityLogDiskQueue.flushQueue = log;
582 rf_signal_cond2(raidPtr->parityLogDiskQueue.cond);
583 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
584 }
585
586 static int
DumpParityLogToDisk(int finish,RF_ParityLogData_t * logData)587 DumpParityLogToDisk(
588 int finish,
589 RF_ParityLogData_t * logData)
590 {
591 int i, diskCount, regionID = logData->regionID;
592 RF_ParityLog_t *log;
593 RF_Raid_t *raidPtr;
594
595 raidPtr = logData->common->raidPtr;
596
597 /* Move a core log to disk. If the log disk is full, initiate
598 * reintegration.
599 *
600 * Return (0) if we can enqueue the dump immediately, otherwise return
601 * (1) to indicate we are blocked on reintegration and control of the
602 * thread should be relinquished.
603 *
604 * Caller must hold regionInfo[regionID].mutex
605 *
606 * NON-BLOCKING */
607
608 RF_ASSERT(rf_owned_mutex2(raidPtr->regionInfo[regionID].mutex));
609
610 if (rf_parityLogDebug)
611 printf("[dumping parity log to disk, region %d]\n", regionID);
612 log = raidPtr->regionInfo[regionID].coreLog;
613 RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
614 RF_ASSERT(log->next == NULL);
615
616 /* if reintegration is in progress, must queue work */
617 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
618 if (raidPtr->regionInfo[regionID].reintInProgress) {
619 /* Can not proceed since this region is currently being
620 * reintegrated. We can not block, so queue remaining work and
621 * return */
622 if (rf_parityLogDebug)
623 printf("[region %d waiting on reintegration]\n", regionID);
624 /* XXX not sure about the use of finish - shouldn't this
625 * always be "Enqueue"? */
626 if (finish)
627 RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
628 else
629 EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
630 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
631 return (1); /* relenquish control of this thread */
632 }
633 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
634 raidPtr->regionInfo[regionID].coreLog = NULL;
635 if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
636 /* IMPORTANT!! this loop bound assumes region disk holds an
637 * integral number of core logs */
638 {
639 /* update disk map for this region */
640 diskCount = raidPtr->regionInfo[regionID].diskCount;
641 for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
642 raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
643 raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
644 }
645 log->diskOffset = diskCount;
646 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
647 FlushLog(raidPtr, log);
648 } else {
649 /* no room for log on disk, send it to disk manager and
650 * request reintegration */
651 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
652 ReintLog(raidPtr, regionID, log);
653 }
654 if (rf_parityLogDebug)
655 printf("[finished dumping parity log to disk, region %d]\n", regionID);
656 return (0);
657 }
658
659 int
rf_ParityLogAppend(RF_ParityLogData_t * logData,int finish,RF_ParityLog_t ** incomingLog,int clearReintFlag)660 rf_ParityLogAppend(
661 RF_ParityLogData_t * logData,
662 int finish,
663 RF_ParityLog_t ** incomingLog,
664 int clearReintFlag)
665 {
666 int regionID, logItem, itemDone;
667 RF_ParityLogData_t *item;
668 int punt, done = RF_FALSE;
669 RF_ParityLog_t *log;
670 RF_Raid_t *raidPtr;
671 RF_Etimer_t timer;
672 void (*wakeFunc) (void *, int);
673 void *wakeArg;
674
675 /* Add parity to the appropriate log, one sector at a time. This
676 * routine is called is called by dag functions ParityLogUpdateFunc
677 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
678 *
679 * Parity to be logged is contained in a linked-list (logData). When
680 * this routine returns, every sector in the list will be in one of
681 * three places: 1) entered into the parity log 2) queued, waiting on
682 * reintegration 3) queued, waiting on a core log
683 *
684 * Blocked work is passed to the ParityLoggingDiskManager for completion.
685 * Later, as conditions which required the block are removed, the work
686 * reenters this routine with the "finish" parameter set to "RF_TRUE."
687 *
688 * NON-BLOCKING */
689
690 raidPtr = logData->common->raidPtr;
691 /* lock the region for the first item in logData */
692 RF_ASSERT(logData != NULL);
693 regionID = logData->regionID;
694 rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex);
695 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
696
697 if (clearReintFlag) {
698 /* Enable flushing for this region. Holding both locks
699 * provides a synchronization barrier with DumpParityLogToDisk */
700 rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex);
701 /* XXXmrg need this? */
702 rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex);
703 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
704 raidPtr->regionInfo[regionID].diskCount = 0;
705 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
706 rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
707 * enabled */
708 /* XXXmrg need this? */
709 rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex);
710 }
711 /* process each item in logData */
712 while (logData) {
713 /* remove an item from logData */
714 item = logData;
715 logData = logData->next;
716 item->next = NULL;
717 item->prev = NULL;
718
719 if (rf_parityLogDebug)
720 printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
721
722 /* see if we moved to a new region */
723 if (regionID != item->regionID) {
724 rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex);
725 regionID = item->regionID;
726 rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex);
727 RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
728 }
729 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
730 * can happen in one of two ways: 1) no core
731 * log (AcquireParityLog) 2) waiting on
732 * reintegration (DumpParityLogToDisk) If punt
733 * is RF_TRUE, the dataItem was queued, so
734 * skip to next item. */
735
736 /* process item, one sector at a time, until all sectors
737 * processed or we punt */
738 if (item->diskAddress.numSector > 0)
739 done = RF_FALSE;
740 else
741 RF_ASSERT(0);
742 while (!punt && !done) {
743 /* verify that a core log exists for this region */
744 if (!raidPtr->regionInfo[regionID].coreLog) {
745 /* Attempt to acquire a parity log. If
746 * acquisition fails, queue remaining work in
747 * data item and move to nextItem. */
748 if (incomingLog)
749 if (*incomingLog) {
750 RF_ASSERT((*incomingLog)->next == NULL);
751 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
752 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
753 *incomingLog = NULL;
754 } else
755 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
756 else
757 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
758 /* Note: AcquireParityLog either returns a log
759 * or enqueues currentItem */
760 }
761 if (!raidPtr->regionInfo[regionID].coreLog)
762 punt = RF_TRUE; /* failed to find a core log */
763 else {
764 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
765 /* verify that the log has room for new
766 * entries */
767 /* if log is full, dump it to disk and grab a
768 * new log */
769 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
770 /* log is full, dump it to disk */
771 if (DumpParityLogToDisk(finish, item))
772 punt = RF_TRUE; /* dump unsuccessful,
773 * blocked on
774 * reintegration */
775 else {
776 /* dump was successful */
777 if (incomingLog)
778 if (*incomingLog) {
779 RF_ASSERT((*incomingLog)->next == NULL);
780 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
781 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
782 *incomingLog = NULL;
783 } else
784 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
785 else
786 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
787 /* if a core log is not
788 * available, must queue work
789 * and return */
790 if (!raidPtr->regionInfo[regionID].coreLog)
791 punt = RF_TRUE; /* blocked on log
792 * availability */
793 }
794 }
795 }
796 /* if we didn't punt on this item, attempt to add a
797 * sector to the core log */
798 if (!punt) {
799 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
800 /* at this point, we have a core log with
801 * enough room for a sector */
802 /* copy a sector into the log */
803 log = raidPtr->regionInfo[regionID].coreLog;
804 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
805 logItem = log->numRecords++;
806 log->records[logItem].parityAddr = item->diskAddress;
807 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
808 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
809 log->records[logItem].parityAddr.numSector = 1;
810 log->records[logItem].operation = item->common->operation;
811 memcpy((char *)log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), ((char *)item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector));
812 item->diskAddress.numSector--;
813 item->diskAddress.startSector++;
814 if (item->diskAddress.numSector == 0)
815 done = RF_TRUE;
816 }
817 }
818
819 if (!punt) {
820 /* Processed this item completely, decrement count of
821 * items to be processed. */
822 RF_ASSERT(item->diskAddress.numSector == 0);
823 rf_lock_mutex2(item->common->mutex);
824 item->common->cnt--;
825 if (item->common->cnt == 0)
826 itemDone = RF_TRUE;
827 else
828 itemDone = RF_FALSE;
829 rf_unlock_mutex2(item->common->mutex);
830 if (itemDone) {
831 /* Finished processing all log data for this
832 * IO Return structs to free list and invoke
833 * wakeup function. */
834 timer = item->common->startTime; /* grab initial value of
835 * timer */
836 RF_ETIMER_STOP(timer);
837 RF_ETIMER_EVAL(timer);
838 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
839 if (rf_parityLogDebug)
840 printf("[waking process for region %d]\n", item->regionID);
841 wakeFunc = item->common->wakeFunc;
842 wakeArg = item->common->wakeArg;
843 FreeParityLogCommonData(item->common);
844 FreeParityLogData(item);
845 (wakeFunc) (wakeArg, 0);
846 } else
847 FreeParityLogData(item);
848 }
849 }
850 rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex);
851 if (rf_parityLogDebug)
852 printf("[exiting ParityLogAppend]\n");
853 return (0);
854 }
855
856
857 void
rf_EnableParityLogging(RF_Raid_t * raidPtr)858 rf_EnableParityLogging(RF_Raid_t * raidPtr)
859 {
860 int regionID;
861
862 for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
863 rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex);
864 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
865 rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex);
866 }
867 if (rf_parityLogDebug)
868 printf("[parity logging enabled]\n");
869 }
870 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
871