xref: /netbsd-src/sys/dev/raidframe/rf_paritylog.c (revision 481fca6e59249d8ffcf24fef7cfbe7b131bfb080)
1 /*	$NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /* Code for manipulating in-core parity logs
30  *
31  */
32 
33 #include "rf_archs.h"
34 
35 #if RF_INCLUDE_PARITYLOGGING > 0
36 
37 /*
38  * Append-only log for recording parity "update" and "overwrite" records
39  */
40 
41 #include "rf_types.h"
42 #include "rf_threadstuff.h"
43 #include "rf_mcpair.h"
44 #include "rf_raid.h"
45 #include "rf_dag.h"
46 #include "rf_dagfuncs.h"
47 #include "rf_desc.h"
48 #include "rf_layout.h"
49 #include "rf_diskqueue.h"
50 #include "rf_etimer.h"
51 #include "rf_paritylog.h"
52 #include "rf_general.h"
53 #include "rf_map.h"
54 #include "rf_paritylogging.h"
55 #include "rf_paritylogDiskMgr.h"
56 
57 static RF_CommonLogData_t *
58 AllocParityLogCommonData(RF_Raid_t * raidPtr)
59 {
60 	RF_CommonLogData_t *common = NULL;
61 	int     rc;
62 
63 	/* Return a struct for holding common parity log information from the
64 	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
65 	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
66 
67 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
68 	if (raidPtr->parityLogDiskQueue.freeCommonList) {
69 		common = raidPtr->parityLogDiskQueue.freeCommonList;
70 		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
71 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
72 	} else {
73 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
74 		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
75 		rc = rf_mutex_init(&common->mutex);
76 		if (rc) {
77 			RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
78 			    __LINE__, rc);
79 			RF_Free(common, sizeof(RF_CommonLogData_t));
80 			common = NULL;
81 		}
82 	}
83 	common->next = NULL;
84 	return (common);
85 }
86 
87 static void
88 FreeParityLogCommonData(RF_CommonLogData_t * common)
89 {
90 	RF_Raid_t *raidPtr;
91 
92 	/* Insert a single struct for holding parity log information (data)
93 	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
94 	 * NON-BLOCKING */
95 
96 	raidPtr = common->raidPtr;
97 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
98 	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
99 	raidPtr->parityLogDiskQueue.freeCommonList = common;
100 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
101 }
102 
103 static RF_ParityLogData_t *
104 AllocParityLogData(RF_Raid_t * raidPtr)
105 {
106 	RF_ParityLogData_t *data = NULL;
107 
108 	/* Return a struct for holding parity log information from the free
109 	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
110 	 * call RF_Malloc to create a new structure. NON-BLOCKING */
111 
112 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
113 	if (raidPtr->parityLogDiskQueue.freeDataList) {
114 		data = raidPtr->parityLogDiskQueue.freeDataList;
115 		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
116 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
117 	} else {
118 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
119 		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
120 	}
121 	data->next = NULL;
122 	data->prev = NULL;
123 	return (data);
124 }
125 
126 
127 static void
128 FreeParityLogData(RF_ParityLogData_t * data)
129 {
130 	RF_ParityLogData_t *nextItem;
131 	RF_Raid_t *raidPtr;
132 
133 	/* Insert a linked list of structs for holding parity log information
134 	 * (data) into the free list (parityLogDiskQueue.freeList).
135 	 * NON-BLOCKING */
136 
137 	raidPtr = data->common->raidPtr;
138 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
139 	while (data) {
140 		nextItem = data->next;
141 		data->next = raidPtr->parityLogDiskQueue.freeDataList;
142 		raidPtr->parityLogDiskQueue.freeDataList = data;
143 		data = nextItem;
144 	}
145 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
146 }
147 
148 
149 static void
150 EnqueueParityLogData(
151     RF_ParityLogData_t * data,
152     RF_ParityLogData_t ** head,
153     RF_ParityLogData_t ** tail)
154 {
155 	RF_Raid_t *raidPtr;
156 
157 	/* Insert an in-core parity log (*data) into the head of a disk queue
158 	 * (*head, *tail). NON-BLOCKING */
159 
160 	raidPtr = data->common->raidPtr;
161 	if (rf_parityLogDebug)
162 		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
163 	RF_ASSERT(data->prev == NULL);
164 	RF_ASSERT(data->next == NULL);
165 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
166 	if (*head) {
167 		/* insert into head of queue */
168 		RF_ASSERT((*head)->prev == NULL);
169 		RF_ASSERT((*tail)->next == NULL);
170 		data->next = *head;
171 		(*head)->prev = data;
172 		*head = data;
173 	} else {
174 		/* insert into empty list */
175 		RF_ASSERT(*head == NULL);
176 		RF_ASSERT(*tail == NULL);
177 		*head = data;
178 		*tail = data;
179 	}
180 	RF_ASSERT((*head)->prev == NULL);
181 	RF_ASSERT((*tail)->next == NULL);
182 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
183 }
184 
185 static RF_ParityLogData_t *
186 DequeueParityLogData(
187     RF_Raid_t * raidPtr,
188     RF_ParityLogData_t ** head,
189     RF_ParityLogData_t ** tail,
190     int ignoreLocks)
191 {
192 	RF_ParityLogData_t *data;
193 
194 	/* Remove and return an in-core parity log from the tail of a disk
195 	 * queue (*head, *tail). NON-BLOCKING */
196 
197 	/* remove from tail, preserving FIFO order */
198 	if (!ignoreLocks)
199 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
200 	data = *tail;
201 	if (data) {
202 		if (*head == *tail) {
203 			/* removing last item from queue */
204 			*head = NULL;
205 			*tail = NULL;
206 		} else {
207 			*tail = (*tail)->prev;
208 			(*tail)->next = NULL;
209 			RF_ASSERT((*head)->prev == NULL);
210 			RF_ASSERT((*tail)->next == NULL);
211 		}
212 		data->next = NULL;
213 		data->prev = NULL;
214 		if (rf_parityLogDebug)
215 			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
216 	}
217 	if (*head) {
218 		RF_ASSERT((*head)->prev == NULL);
219 		RF_ASSERT((*tail)->next == NULL);
220 	}
221 	if (!ignoreLocks)
222 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
223 	return (data);
224 }
225 
226 
227 static void
228 RequeueParityLogData(
229     RF_ParityLogData_t * data,
230     RF_ParityLogData_t ** head,
231     RF_ParityLogData_t ** tail)
232 {
233 	RF_Raid_t *raidPtr;
234 
235 	/* Insert an in-core parity log (*data) into the tail of a disk queue
236 	 * (*head, *tail). NON-BLOCKING */
237 
238 	raidPtr = data->common->raidPtr;
239 	RF_ASSERT(data);
240 	if (rf_parityLogDebug)
241 		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
242 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
243 	if (*tail) {
244 		/* append to tail of list */
245 		data->prev = *tail;
246 		data->next = NULL;
247 		(*tail)->next = data;
248 		*tail = data;
249 	} else {
250 		/* inserting into an empty list */
251 		*head = data;
252 		*tail = data;
253 		(*head)->prev = NULL;
254 		(*tail)->next = NULL;
255 	}
256 	RF_ASSERT((*head)->prev == NULL);
257 	RF_ASSERT((*tail)->next == NULL);
258 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
259 }
260 
261 RF_ParityLogData_t *
262 rf_CreateParityLogData(
263     RF_ParityRecordType_t operation,
264     RF_PhysDiskAddr_t * pda,
265     caddr_t bufPtr,
266     RF_Raid_t * raidPtr,
267     int (*wakeFunc) (RF_DagNode_t * node, int status),
268     void *wakeArg,
269     RF_AccTraceEntry_t * tracerec,
270     RF_Etimer_t startTime)
271 {
272 	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
273 	RF_CommonLogData_t *common;
274 	RF_PhysDiskAddr_t *diskAddress;
275 	int     boundary, offset = 0;
276 
277 	/* Return an initialized struct of info to be logged. Build one item
278 	 * per physical disk address, one item per region.
279 	 *
280 	 * NON-BLOCKING */
281 
282 	diskAddress = pda;
283 	common = AllocParityLogCommonData(raidPtr);
284 	RF_ASSERT(common);
285 
286 	common->operation = operation;
287 	common->bufPtr = bufPtr;
288 	common->raidPtr = raidPtr;
289 	common->wakeFunc = wakeFunc;
290 	common->wakeArg = wakeArg;
291 	common->tracerec = tracerec;
292 	common->startTime = startTime;
293 	common->cnt = 0;
294 
295 	if (rf_parityLogDebug)
296 		printf("[entering CreateParityLogData]\n");
297 	while (diskAddress) {
298 		common->cnt++;
299 		data = AllocParityLogData(raidPtr);
300 		RF_ASSERT(data);
301 		data->common = common;
302 		data->next = NULL;
303 		data->prev = NULL;
304 		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
305 		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
306 			/* disk address does not cross a region boundary */
307 			data->diskAddress = *diskAddress;
308 			data->bufOffset = offset;
309 			offset = offset + diskAddress->numSector;
310 			EnqueueParityLogData(data, &resultHead, &resultTail);
311 			/* adjust disk address */
312 			diskAddress = diskAddress->next;
313 		} else {
314 			/* disk address crosses a region boundary */
315 			/* find address where region is crossed */
316 			boundary = 0;
317 			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
318 				boundary++;
319 
320 			/* enter data before the boundary */
321 			data->diskAddress = *diskAddress;
322 			data->diskAddress.numSector = boundary;
323 			data->bufOffset = offset;
324 			offset += boundary;
325 			EnqueueParityLogData(data, &resultHead, &resultTail);
326 			/* adjust disk address */
327 			diskAddress->startSector += boundary;
328 			diskAddress->numSector -= boundary;
329 		}
330 	}
331 	if (rf_parityLogDebug)
332 		printf("[leaving CreateParityLogData]\n");
333 	return (resultHead);
334 }
335 
336 
337 RF_ParityLogData_t *
338 rf_SearchAndDequeueParityLogData(
339     RF_Raid_t * raidPtr,
340     int regionID,
341     RF_ParityLogData_t ** head,
342     RF_ParityLogData_t ** tail,
343     int ignoreLocks)
344 {
345 	RF_ParityLogData_t *w;
346 
347 	/* Remove and return an in-core parity log from a specified region
348 	 * (regionID). If a matching log is not found, return NULL.
349 	 *
350 	 * NON-BLOCKING. */
351 
352 	/* walk backward through a list, looking for an entry with a matching
353 	 * region ID */
354 	if (!ignoreLocks)
355 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
356 	w = (*tail);
357 	while (w) {
358 		if (w->regionID == regionID) {
359 			/* remove an element from the list */
360 			if (w == *tail) {
361 				if (*head == *tail) {
362 					/* removing only element in the list */
363 					*head = NULL;
364 					*tail = NULL;
365 				} else {
366 					/* removing last item in the list */
367 					*tail = (*tail)->prev;
368 					(*tail)->next = NULL;
369 					RF_ASSERT((*head)->prev == NULL);
370 					RF_ASSERT((*tail)->next == NULL);
371 				}
372 			} else {
373 				if (w == *head) {
374 					/* removing first item in the list */
375 					*head = (*head)->next;
376 					(*head)->prev = NULL;
377 					RF_ASSERT((*head)->prev == NULL);
378 					RF_ASSERT((*tail)->next == NULL);
379 				} else {
380 					/* removing an item from the middle of
381 					 * the list */
382 					w->prev->next = w->next;
383 					w->next->prev = w->prev;
384 					RF_ASSERT((*head)->prev == NULL);
385 					RF_ASSERT((*tail)->next == NULL);
386 				}
387 			}
388 			w->prev = NULL;
389 			w->next = NULL;
390 			if (rf_parityLogDebug)
391 				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
392 			return (w);
393 		} else
394 			w = w->prev;
395 	}
396 	if (!ignoreLocks)
397 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
398 	return (NULL);
399 }
400 
401 static RF_ParityLogData_t *
402 DequeueMatchingLogData(
403     RF_Raid_t * raidPtr,
404     RF_ParityLogData_t ** head,
405     RF_ParityLogData_t ** tail)
406 {
407 	RF_ParityLogData_t *logDataList, *logData;
408 	int     regionID;
409 
410 	/* Remove and return an in-core parity log from the tail of a disk
411 	 * queue (*head, *tail).  Then remove all matching (identical
412 	 * regionIDs) logData and return as a linked list.
413 	 *
414 	 * NON-BLOCKING */
415 
416 	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
417 	if (logDataList) {
418 		regionID = logDataList->regionID;
419 		logData = logDataList;
420 		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
421 		while (logData->next) {
422 			logData = logData->next;
423 			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
424 		}
425 	}
426 	return (logDataList);
427 }
428 
429 
430 static RF_ParityLog_t *
431 AcquireParityLog(
432     RF_ParityLogData_t * logData,
433     int finish)
434 {
435 	RF_ParityLog_t *log = NULL;
436 	RF_Raid_t *raidPtr;
437 
438 	/* Grab a log buffer from the pool and return it. If no buffers are
439 	 * available, return NULL. NON-BLOCKING */
440 	raidPtr = logData->common->raidPtr;
441 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
442 	if (raidPtr->parityLogPool.parityLogs) {
443 		log = raidPtr->parityLogPool.parityLogs;
444 		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
445 		log->regionID = logData->regionID;
446 		log->numRecords = 0;
447 		log->next = NULL;
448 		raidPtr->logsInUse++;
449 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
450 	} else {
451 		/* no logs available, so place ourselves on the queue of work
452 		 * waiting on log buffers this is done while
453 		 * parityLogPool.mutex is held, to ensure synchronization with
454 		 * ReleaseParityLogs. */
455 		if (rf_parityLogDebug)
456 			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
457 		if (finish)
458 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
459 		else
460 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
461 	}
462 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
463 	return (log);
464 }
465 
466 void
467 rf_ReleaseParityLogs(
468     RF_Raid_t * raidPtr,
469     RF_ParityLog_t * firstLog)
470 {
471 	RF_ParityLogData_t *logDataList;
472 	RF_ParityLog_t *log, *lastLog;
473 	int     cnt;
474 
475 	/* Insert a linked list of parity logs (firstLog) to the free list
476 	 * (parityLogPool.parityLogPool)
477 	 *
478 	 * NON-BLOCKING. */
479 
480 	RF_ASSERT(firstLog);
481 
482 	/* Before returning logs to global free list, service all requests
483 	 * which are blocked on logs.  Holding mutexes for parityLogPool and
484 	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
485 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
486 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
487 	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
488 	log = firstLog;
489 	if (firstLog)
490 		firstLog = firstLog->next;
491 	log->numRecords = 0;
492 	log->next = NULL;
493 	while (logDataList && log) {
494 		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
495 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
496 		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
497 		if (rf_parityLogDebug)
498 			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
499 		if (log == NULL) {
500 			log = firstLog;
501 			if (firstLog) {
502 				firstLog = firstLog->next;
503 				log->numRecords = 0;
504 				log->next = NULL;
505 			}
506 		}
507 		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
508 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
509 		if (log)
510 			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
511 	}
512 	/* return remaining logs to pool */
513 	if (log) {
514 		log->next = firstLog;
515 		firstLog = log;
516 	}
517 	if (firstLog) {
518 		lastLog = firstLog;
519 		raidPtr->logsInUse--;
520 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
521 		while (lastLog->next) {
522 			lastLog = lastLog->next;
523 			raidPtr->logsInUse--;
524 			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
525 		}
526 		lastLog->next = raidPtr->parityLogPool.parityLogs;
527 		raidPtr->parityLogPool.parityLogs = firstLog;
528 		cnt = 0;
529 		log = raidPtr->parityLogPool.parityLogs;
530 		while (log) {
531 			cnt++;
532 			log = log->next;
533 		}
534 		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
535 	}
536 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
537 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
538 }
539 
540 static void
541 ReintLog(
542     RF_Raid_t * raidPtr,
543     int regionID,
544     RF_ParityLog_t * log)
545 {
546 	RF_ASSERT(log);
547 
548 	/* Insert an in-core parity log (log) into the disk queue of
549 	 * reintegration work.  Set the flag (reintInProgress) for the
550 	 * specified region (regionID) to indicate that reintegration is in
551 	 * progress for this region. NON-BLOCKING */
552 
553 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
554 	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
555 									 * complete */
556 
557 	if (rf_parityLogDebug)
558 		printf("[requesting reintegration of region %d]\n", log->regionID);
559 	/* move record to reintegration queue */
560 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
561 	log->next = raidPtr->parityLogDiskQueue.reintQueue;
562 	raidPtr->parityLogDiskQueue.reintQueue = log;
563 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
564 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
565 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
566 }
567 
568 static void
569 FlushLog(
570     RF_Raid_t * raidPtr,
571     RF_ParityLog_t * log)
572 {
573 	/* insert a core log (log) into a list of logs
574 	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
575 	 * NON-BLOCKING */
576 
577 	RF_ASSERT(log);
578 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
579 	RF_ASSERT(log->next == NULL);
580 	/* move log to flush queue */
581 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
582 	log->next = raidPtr->parityLogDiskQueue.flushQueue;
583 	raidPtr->parityLogDiskQueue.flushQueue = log;
584 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
585 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
586 }
587 
588 static int
589 DumpParityLogToDisk(
590     int finish,
591     RF_ParityLogData_t * logData)
592 {
593 	int     i, diskCount, regionID = logData->regionID;
594 	RF_ParityLog_t *log;
595 	RF_Raid_t *raidPtr;
596 
597 	raidPtr = logData->common->raidPtr;
598 
599 	/* Move a core log to disk.  If the log disk is full, initiate
600 	 * reintegration.
601 	 *
602 	 * Return (0) if we can enqueue the dump immediately, otherwise return
603 	 * (1) to indicate we are blocked on reintegration and control of the
604 	 * thread should be relinquished.
605 	 *
606 	 * Caller must hold regionInfo[regionID].mutex
607 	 *
608 	 * NON-BLOCKING */
609 
610 	if (rf_parityLogDebug)
611 		printf("[dumping parity log to disk, region %d]\n", regionID);
612 	log = raidPtr->regionInfo[regionID].coreLog;
613 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
614 	RF_ASSERT(log->next == NULL);
615 
616 	/* if reintegration is in progress, must queue work */
617 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
618 	if (raidPtr->regionInfo[regionID].reintInProgress) {
619 		/* Can not proceed since this region is currently being
620 		 * reintegrated. We can not block, so queue remaining work and
621 		 * return */
622 		if (rf_parityLogDebug)
623 			printf("[region %d waiting on reintegration]\n", regionID);
624 		/* XXX not sure about the use of finish - shouldn't this
625 		 * always be "Enqueue"? */
626 		if (finish)
627 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
628 		else
629 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
630 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
631 		return (1);	/* relenquish control of this thread */
632 	}
633 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
634 	raidPtr->regionInfo[regionID].coreLog = NULL;
635 	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
636 		/* IMPORTANT!! this loop bound assumes region disk holds an
637 		 * integral number of core logs */
638 	{
639 		/* update disk map for this region */
640 		diskCount = raidPtr->regionInfo[regionID].diskCount;
641 		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
642 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
643 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
644 		}
645 		log->diskOffset = diskCount;
646 		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
647 		FlushLog(raidPtr, log);
648 	} else {
649 		/* no room for log on disk, send it to disk manager and
650 		 * request reintegration */
651 		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
652 		ReintLog(raidPtr, regionID, log);
653 	}
654 	if (rf_parityLogDebug)
655 		printf("[finished dumping parity log to disk, region %d]\n", regionID);
656 	return (0);
657 }
658 
659 int
660 rf_ParityLogAppend(
661     RF_ParityLogData_t * logData,
662     int finish,
663     RF_ParityLog_t ** incomingLog,
664     int clearReintFlag)
665 {
666 	int     regionID, logItem, itemDone;
667 	RF_ParityLogData_t *item;
668 	int     punt, done = RF_FALSE;
669 	RF_ParityLog_t *log;
670 	RF_Raid_t *raidPtr;
671 	RF_Etimer_t timer;
672 	int     (*wakeFunc) (RF_DagNode_t * node, int status);
673 	void   *wakeArg;
674 
675 	/* Add parity to the appropriate log, one sector at a time. This
676 	 * routine is called is called by dag functions ParityLogUpdateFunc
677 	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
678 	 *
679 	 * Parity to be logged is contained in a linked-list (logData).  When
680 	 * this routine returns, every sector in the list will be in one of
681 	 * three places: 1) entered into the parity log 2) queued, waiting on
682 	 * reintegration 3) queued, waiting on a core log
683 	 *
684 	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
685 	 * Later, as conditions which required the block are removed, the work
686 	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
687 	 *
688 	 * NON-BLOCKING */
689 
690 	raidPtr = logData->common->raidPtr;
691 	/* lock the region for the first item in logData */
692 	RF_ASSERT(logData != NULL);
693 	regionID = logData->regionID;
694 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
695 	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
696 
697 	if (clearReintFlag) {
698 		/* Enable flushing for this region.  Holding both locks
699 		 * provides a synchronization barrier with DumpParityLogToDisk */
700 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
701 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
702 		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
703 		raidPtr->regionInfo[regionID].diskCount = 0;
704 		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
705 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
706 										 * enabled */
707 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
708 	}
709 	/* process each item in logData */
710 	while (logData) {
711 		/* remove an item from logData */
712 		item = logData;
713 		logData = logData->next;
714 		item->next = NULL;
715 		item->prev = NULL;
716 
717 		if (rf_parityLogDebug)
718 			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
719 
720 		/* see if we moved to a new region */
721 		if (regionID != item->regionID) {
722 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
723 			regionID = item->regionID;
724 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
725 			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
726 		}
727 		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
728 				 * can happen in one of two ways: 1) no core
729 				 * log (AcquireParityLog) 2) waiting on
730 				 * reintegration (DumpParityLogToDisk) If punt
731 				 * is RF_TRUE, the dataItem was queued, so
732 				 * skip to next item. */
733 
734 		/* process item, one sector at a time, until all sectors
735 		 * processed or we punt */
736 		if (item->diskAddress.numSector > 0)
737 			done = RF_FALSE;
738 		else
739 			RF_ASSERT(0);
740 		while (!punt && !done) {
741 			/* verify that a core log exists for this region */
742 			if (!raidPtr->regionInfo[regionID].coreLog) {
743 				/* Attempt to acquire a parity log. If
744 				 * acquisition fails, queue remaining work in
745 				 * data item and move to nextItem. */
746 				if (incomingLog)
747 					if (*incomingLog) {
748 						RF_ASSERT((*incomingLog)->next == NULL);
749 						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
750 						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
751 						*incomingLog = NULL;
752 					} else
753 						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
754 				else
755 					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
756 				/* Note: AcquireParityLog either returns a log
757 				 * or enqueues currentItem */
758 			}
759 			if (!raidPtr->regionInfo[regionID].coreLog)
760 				punt = RF_TRUE;	/* failed to find a core log */
761 			else {
762 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
763 				/* verify that the log has room for new
764 				 * entries */
765 				/* if log is full, dump it to disk and grab a
766 				 * new log */
767 				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
768 					/* log is full, dump it to disk */
769 					if (DumpParityLogToDisk(finish, item))
770 						punt = RF_TRUE;	/* dump unsuccessful,
771 								 * blocked on
772 								 * reintegration */
773 					else {
774 						/* dump was successful */
775 						if (incomingLog)
776 							if (*incomingLog) {
777 								RF_ASSERT((*incomingLog)->next == NULL);
778 								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
779 								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
780 								*incomingLog = NULL;
781 							} else
782 								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
783 						else
784 							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
785 						/* if a core log is not
786 						 * available, must queue work
787 						 * and return */
788 						if (!raidPtr->regionInfo[regionID].coreLog)
789 							punt = RF_TRUE;	/* blocked on log
790 									 * availability */
791 					}
792 				}
793 			}
794 			/* if we didn't punt on this item, attempt to add a
795 			 * sector to the core log */
796 			if (!punt) {
797 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
798 				/* at this point, we have a core log with
799 				 * enough room for a sector */
800 				/* copy a sector into the log */
801 				log = raidPtr->regionInfo[regionID].coreLog;
802 				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
803 				logItem = log->numRecords++;
804 				log->records[logItem].parityAddr = item->diskAddress;
805 				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
806 				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
807 				log->records[logItem].parityAddr.numSector = 1;
808 				log->records[logItem].operation = item->common->operation;
809 				bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
810 				item->diskAddress.numSector--;
811 				item->diskAddress.startSector++;
812 				if (item->diskAddress.numSector == 0)
813 					done = RF_TRUE;
814 			}
815 		}
816 
817 		if (!punt) {
818 			/* Processed this item completely, decrement count of
819 			 * items to be processed. */
820 			RF_ASSERT(item->diskAddress.numSector == 0);
821 			RF_LOCK_MUTEX(item->common->mutex);
822 			item->common->cnt--;
823 			if (item->common->cnt == 0)
824 				itemDone = RF_TRUE;
825 			else
826 				itemDone = RF_FALSE;
827 			RF_UNLOCK_MUTEX(item->common->mutex);
828 			if (itemDone) {
829 				/* Finished processing all log data for this
830 				 * IO Return structs to free list and invoke
831 				 * wakeup function. */
832 				timer = item->common->startTime;	/* grab initial value of
833 									 * timer */
834 				RF_ETIMER_STOP(timer);
835 				RF_ETIMER_EVAL(timer);
836 				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
837 				if (rf_parityLogDebug)
838 					printf("[waking process for region %d]\n", item->regionID);
839 				wakeFunc = item->common->wakeFunc;
840 				wakeArg = item->common->wakeArg;
841 				FreeParityLogCommonData(item->common);
842 				FreeParityLogData(item);
843 				(wakeFunc) (wakeArg, 0);
844 			} else
845 				FreeParityLogData(item);
846 		}
847 	}
848 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
849 	if (rf_parityLogDebug)
850 		printf("[exiting ParityLogAppend]\n");
851 	return (0);
852 }
853 
854 
855 void
856 rf_EnableParityLogging(RF_Raid_t * raidPtr)
857 {
858 	int     regionID;
859 
860 	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
861 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
862 		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
863 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
864 	}
865 	if (rf_parityLogDebug)
866 		printf("[parity logging enabled]\n");
867 }
868 #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
869