xref: /netbsd-src/sys/dev/raidframe/rf_paritylog.c (revision 8a8f936f250a330d54f8a24ed0e92aadf9743a7b)
1 /*	$NetBSD: rf_paritylog.c,v 1.6 2001/10/04 15:58:54 oster Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /* Code for manipulating in-core parity logs
30  *
31  */
32 
33 #include "rf_archs.h"
34 
35 #if RF_INCLUDE_PARITYLOGGING > 0
36 
37 /*
38  * Append-only log for recording parity "update" and "overwrite" records
39  */
40 
41 #include <dev/raidframe/raidframevar.h>
42 
43 #include "rf_threadstuff.h"
44 #include "rf_mcpair.h"
45 #include "rf_raid.h"
46 #include "rf_dag.h"
47 #include "rf_dagfuncs.h"
48 #include "rf_desc.h"
49 #include "rf_layout.h"
50 #include "rf_diskqueue.h"
51 #include "rf_etimer.h"
52 #include "rf_paritylog.h"
53 #include "rf_general.h"
54 #include "rf_map.h"
55 #include "rf_paritylogging.h"
56 #include "rf_paritylogDiskMgr.h"
57 
58 static RF_CommonLogData_t *
59 AllocParityLogCommonData(RF_Raid_t * raidPtr)
60 {
61 	RF_CommonLogData_t *common = NULL;
62 	int     rc;
63 
64 	/* Return a struct for holding common parity log information from the
65 	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
66 	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
67 
68 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
69 	if (raidPtr->parityLogDiskQueue.freeCommonList) {
70 		common = raidPtr->parityLogDiskQueue.freeCommonList;
71 		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
72 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
73 	} else {
74 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
75 		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
76 		rc = rf_mutex_init(&common->mutex);
77 		if (rc) {
78 			RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
79 			    __LINE__, rc);
80 			RF_Free(common, sizeof(RF_CommonLogData_t));
81 			common = NULL;
82 		}
83 	}
84 	common->next = NULL;
85 	return (common);
86 }
87 
88 static void
89 FreeParityLogCommonData(RF_CommonLogData_t * common)
90 {
91 	RF_Raid_t *raidPtr;
92 
93 	/* Insert a single struct for holding parity log information (data)
94 	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
95 	 * NON-BLOCKING */
96 
97 	raidPtr = common->raidPtr;
98 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
99 	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
100 	raidPtr->parityLogDiskQueue.freeCommonList = common;
101 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
102 }
103 
104 static RF_ParityLogData_t *
105 AllocParityLogData(RF_Raid_t * raidPtr)
106 {
107 	RF_ParityLogData_t *data = NULL;
108 
109 	/* Return a struct for holding parity log information from the free
110 	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
111 	 * call RF_Malloc to create a new structure. NON-BLOCKING */
112 
113 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
114 	if (raidPtr->parityLogDiskQueue.freeDataList) {
115 		data = raidPtr->parityLogDiskQueue.freeDataList;
116 		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
117 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
118 	} else {
119 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
120 		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
121 	}
122 	data->next = NULL;
123 	data->prev = NULL;
124 	return (data);
125 }
126 
127 
128 static void
129 FreeParityLogData(RF_ParityLogData_t * data)
130 {
131 	RF_ParityLogData_t *nextItem;
132 	RF_Raid_t *raidPtr;
133 
134 	/* Insert a linked list of structs for holding parity log information
135 	 * (data) into the free list (parityLogDiskQueue.freeList).
136 	 * NON-BLOCKING */
137 
138 	raidPtr = data->common->raidPtr;
139 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
140 	while (data) {
141 		nextItem = data->next;
142 		data->next = raidPtr->parityLogDiskQueue.freeDataList;
143 		raidPtr->parityLogDiskQueue.freeDataList = data;
144 		data = nextItem;
145 	}
146 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
147 }
148 
149 
150 static void
151 EnqueueParityLogData(
152     RF_ParityLogData_t * data,
153     RF_ParityLogData_t ** head,
154     RF_ParityLogData_t ** tail)
155 {
156 	RF_Raid_t *raidPtr;
157 
158 	/* Insert an in-core parity log (*data) into the head of a disk queue
159 	 * (*head, *tail). NON-BLOCKING */
160 
161 	raidPtr = data->common->raidPtr;
162 	if (rf_parityLogDebug)
163 		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
164 	RF_ASSERT(data->prev == NULL);
165 	RF_ASSERT(data->next == NULL);
166 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
167 	if (*head) {
168 		/* insert into head of queue */
169 		RF_ASSERT((*head)->prev == NULL);
170 		RF_ASSERT((*tail)->next == NULL);
171 		data->next = *head;
172 		(*head)->prev = data;
173 		*head = data;
174 	} else {
175 		/* insert into empty list */
176 		RF_ASSERT(*head == NULL);
177 		RF_ASSERT(*tail == NULL);
178 		*head = data;
179 		*tail = data;
180 	}
181 	RF_ASSERT((*head)->prev == NULL);
182 	RF_ASSERT((*tail)->next == NULL);
183 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
184 }
185 
186 static RF_ParityLogData_t *
187 DequeueParityLogData(
188     RF_Raid_t * raidPtr,
189     RF_ParityLogData_t ** head,
190     RF_ParityLogData_t ** tail,
191     int ignoreLocks)
192 {
193 	RF_ParityLogData_t *data;
194 
195 	/* Remove and return an in-core parity log from the tail of a disk
196 	 * queue (*head, *tail). NON-BLOCKING */
197 
198 	/* remove from tail, preserving FIFO order */
199 	if (!ignoreLocks)
200 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
201 	data = *tail;
202 	if (data) {
203 		if (*head == *tail) {
204 			/* removing last item from queue */
205 			*head = NULL;
206 			*tail = NULL;
207 		} else {
208 			*tail = (*tail)->prev;
209 			(*tail)->next = NULL;
210 			RF_ASSERT((*head)->prev == NULL);
211 			RF_ASSERT((*tail)->next == NULL);
212 		}
213 		data->next = NULL;
214 		data->prev = NULL;
215 		if (rf_parityLogDebug)
216 			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
217 	}
218 	if (*head) {
219 		RF_ASSERT((*head)->prev == NULL);
220 		RF_ASSERT((*tail)->next == NULL);
221 	}
222 	if (!ignoreLocks)
223 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
224 	return (data);
225 }
226 
227 
228 static void
229 RequeueParityLogData(
230     RF_ParityLogData_t * data,
231     RF_ParityLogData_t ** head,
232     RF_ParityLogData_t ** tail)
233 {
234 	RF_Raid_t *raidPtr;
235 
236 	/* Insert an in-core parity log (*data) into the tail of a disk queue
237 	 * (*head, *tail). NON-BLOCKING */
238 
239 	raidPtr = data->common->raidPtr;
240 	RF_ASSERT(data);
241 	if (rf_parityLogDebug)
242 		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
243 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
244 	if (*tail) {
245 		/* append to tail of list */
246 		data->prev = *tail;
247 		data->next = NULL;
248 		(*tail)->next = data;
249 		*tail = data;
250 	} else {
251 		/* inserting into an empty list */
252 		*head = data;
253 		*tail = data;
254 		(*head)->prev = NULL;
255 		(*tail)->next = NULL;
256 	}
257 	RF_ASSERT((*head)->prev == NULL);
258 	RF_ASSERT((*tail)->next == NULL);
259 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
260 }
261 
262 RF_ParityLogData_t *
263 rf_CreateParityLogData(
264     RF_ParityRecordType_t operation,
265     RF_PhysDiskAddr_t * pda,
266     caddr_t bufPtr,
267     RF_Raid_t * raidPtr,
268     int (*wakeFunc) (RF_DagNode_t * node, int status),
269     void *wakeArg,
270     RF_AccTraceEntry_t * tracerec,
271     RF_Etimer_t startTime)
272 {
273 	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
274 	RF_CommonLogData_t *common;
275 	RF_PhysDiskAddr_t *diskAddress;
276 	int     boundary, offset = 0;
277 
278 	/* Return an initialized struct of info to be logged. Build one item
279 	 * per physical disk address, one item per region.
280 	 *
281 	 * NON-BLOCKING */
282 
283 	diskAddress = pda;
284 	common = AllocParityLogCommonData(raidPtr);
285 	RF_ASSERT(common);
286 
287 	common->operation = operation;
288 	common->bufPtr = bufPtr;
289 	common->raidPtr = raidPtr;
290 	common->wakeFunc = wakeFunc;
291 	common->wakeArg = wakeArg;
292 	common->tracerec = tracerec;
293 	common->startTime = startTime;
294 	common->cnt = 0;
295 
296 	if (rf_parityLogDebug)
297 		printf("[entering CreateParityLogData]\n");
298 	while (diskAddress) {
299 		common->cnt++;
300 		data = AllocParityLogData(raidPtr);
301 		RF_ASSERT(data);
302 		data->common = common;
303 		data->next = NULL;
304 		data->prev = NULL;
305 		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
306 		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
307 			/* disk address does not cross a region boundary */
308 			data->diskAddress = *diskAddress;
309 			data->bufOffset = offset;
310 			offset = offset + diskAddress->numSector;
311 			EnqueueParityLogData(data, &resultHead, &resultTail);
312 			/* adjust disk address */
313 			diskAddress = diskAddress->next;
314 		} else {
315 			/* disk address crosses a region boundary */
316 			/* find address where region is crossed */
317 			boundary = 0;
318 			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
319 				boundary++;
320 
321 			/* enter data before the boundary */
322 			data->diskAddress = *diskAddress;
323 			data->diskAddress.numSector = boundary;
324 			data->bufOffset = offset;
325 			offset += boundary;
326 			EnqueueParityLogData(data, &resultHead, &resultTail);
327 			/* adjust disk address */
328 			diskAddress->startSector += boundary;
329 			diskAddress->numSector -= boundary;
330 		}
331 	}
332 	if (rf_parityLogDebug)
333 		printf("[leaving CreateParityLogData]\n");
334 	return (resultHead);
335 }
336 
337 
338 RF_ParityLogData_t *
339 rf_SearchAndDequeueParityLogData(
340     RF_Raid_t * raidPtr,
341     int regionID,
342     RF_ParityLogData_t ** head,
343     RF_ParityLogData_t ** tail,
344     int ignoreLocks)
345 {
346 	RF_ParityLogData_t *w;
347 
348 	/* Remove and return an in-core parity log from a specified region
349 	 * (regionID). If a matching log is not found, return NULL.
350 	 *
351 	 * NON-BLOCKING. */
352 
353 	/* walk backward through a list, looking for an entry with a matching
354 	 * region ID */
355 	if (!ignoreLocks)
356 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
357 	w = (*tail);
358 	while (w) {
359 		if (w->regionID == regionID) {
360 			/* remove an element from the list */
361 			if (w == *tail) {
362 				if (*head == *tail) {
363 					/* removing only element in the list */
364 					*head = NULL;
365 					*tail = NULL;
366 				} else {
367 					/* removing last item in the list */
368 					*tail = (*tail)->prev;
369 					(*tail)->next = NULL;
370 					RF_ASSERT((*head)->prev == NULL);
371 					RF_ASSERT((*tail)->next == NULL);
372 				}
373 			} else {
374 				if (w == *head) {
375 					/* removing first item in the list */
376 					*head = (*head)->next;
377 					(*head)->prev = NULL;
378 					RF_ASSERT((*head)->prev == NULL);
379 					RF_ASSERT((*tail)->next == NULL);
380 				} else {
381 					/* removing an item from the middle of
382 					 * the list */
383 					w->prev->next = w->next;
384 					w->next->prev = w->prev;
385 					RF_ASSERT((*head)->prev == NULL);
386 					RF_ASSERT((*tail)->next == NULL);
387 				}
388 			}
389 			w->prev = NULL;
390 			w->next = NULL;
391 			if (rf_parityLogDebug)
392 				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
393 			return (w);
394 		} else
395 			w = w->prev;
396 	}
397 	if (!ignoreLocks)
398 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
399 	return (NULL);
400 }
401 
402 static RF_ParityLogData_t *
403 DequeueMatchingLogData(
404     RF_Raid_t * raidPtr,
405     RF_ParityLogData_t ** head,
406     RF_ParityLogData_t ** tail)
407 {
408 	RF_ParityLogData_t *logDataList, *logData;
409 	int     regionID;
410 
411 	/* Remove and return an in-core parity log from the tail of a disk
412 	 * queue (*head, *tail).  Then remove all matching (identical
413 	 * regionIDs) logData and return as a linked list.
414 	 *
415 	 * NON-BLOCKING */
416 
417 	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
418 	if (logDataList) {
419 		regionID = logDataList->regionID;
420 		logData = logDataList;
421 		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
422 		while (logData->next) {
423 			logData = logData->next;
424 			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
425 		}
426 	}
427 	return (logDataList);
428 }
429 
430 
431 static RF_ParityLog_t *
432 AcquireParityLog(
433     RF_ParityLogData_t * logData,
434     int finish)
435 {
436 	RF_ParityLog_t *log = NULL;
437 	RF_Raid_t *raidPtr;
438 
439 	/* Grab a log buffer from the pool and return it. If no buffers are
440 	 * available, return NULL. NON-BLOCKING */
441 	raidPtr = logData->common->raidPtr;
442 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
443 	if (raidPtr->parityLogPool.parityLogs) {
444 		log = raidPtr->parityLogPool.parityLogs;
445 		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
446 		log->regionID = logData->regionID;
447 		log->numRecords = 0;
448 		log->next = NULL;
449 		raidPtr->logsInUse++;
450 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
451 	} else {
452 		/* no logs available, so place ourselves on the queue of work
453 		 * waiting on log buffers this is done while
454 		 * parityLogPool.mutex is held, to ensure synchronization with
455 		 * ReleaseParityLogs. */
456 		if (rf_parityLogDebug)
457 			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
458 		if (finish)
459 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
460 		else
461 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
462 	}
463 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
464 	return (log);
465 }
466 
467 void
468 rf_ReleaseParityLogs(
469     RF_Raid_t * raidPtr,
470     RF_ParityLog_t * firstLog)
471 {
472 	RF_ParityLogData_t *logDataList;
473 	RF_ParityLog_t *log, *lastLog;
474 	int     cnt;
475 
476 	/* Insert a linked list of parity logs (firstLog) to the free list
477 	 * (parityLogPool.parityLogPool)
478 	 *
479 	 * NON-BLOCKING. */
480 
481 	RF_ASSERT(firstLog);
482 
483 	/* Before returning logs to global free list, service all requests
484 	 * which are blocked on logs.  Holding mutexes for parityLogPool and
485 	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
486 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
487 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
488 	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
489 	log = firstLog;
490 	if (firstLog)
491 		firstLog = firstLog->next;
492 	log->numRecords = 0;
493 	log->next = NULL;
494 	while (logDataList && log) {
495 		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
496 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
497 		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
498 		if (rf_parityLogDebug)
499 			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
500 		if (log == NULL) {
501 			log = firstLog;
502 			if (firstLog) {
503 				firstLog = firstLog->next;
504 				log->numRecords = 0;
505 				log->next = NULL;
506 			}
507 		}
508 		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
509 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
510 		if (log)
511 			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
512 	}
513 	/* return remaining logs to pool */
514 	if (log) {
515 		log->next = firstLog;
516 		firstLog = log;
517 	}
518 	if (firstLog) {
519 		lastLog = firstLog;
520 		raidPtr->logsInUse--;
521 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
522 		while (lastLog->next) {
523 			lastLog = lastLog->next;
524 			raidPtr->logsInUse--;
525 			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
526 		}
527 		lastLog->next = raidPtr->parityLogPool.parityLogs;
528 		raidPtr->parityLogPool.parityLogs = firstLog;
529 		cnt = 0;
530 		log = raidPtr->parityLogPool.parityLogs;
531 		while (log) {
532 			cnt++;
533 			log = log->next;
534 		}
535 		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
536 	}
537 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
538 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
539 }
540 
541 static void
542 ReintLog(
543     RF_Raid_t * raidPtr,
544     int regionID,
545     RF_ParityLog_t * log)
546 {
547 	RF_ASSERT(log);
548 
549 	/* Insert an in-core parity log (log) into the disk queue of
550 	 * reintegration work.  Set the flag (reintInProgress) for the
551 	 * specified region (regionID) to indicate that reintegration is in
552 	 * progress for this region. NON-BLOCKING */
553 
554 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
555 	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
556 									 * complete */
557 
558 	if (rf_parityLogDebug)
559 		printf("[requesting reintegration of region %d]\n", log->regionID);
560 	/* move record to reintegration queue */
561 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
562 	log->next = raidPtr->parityLogDiskQueue.reintQueue;
563 	raidPtr->parityLogDiskQueue.reintQueue = log;
564 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
565 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
566 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
567 }
568 
569 static void
570 FlushLog(
571     RF_Raid_t * raidPtr,
572     RF_ParityLog_t * log)
573 {
574 	/* insert a core log (log) into a list of logs
575 	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
576 	 * NON-BLOCKING */
577 
578 	RF_ASSERT(log);
579 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
580 	RF_ASSERT(log->next == NULL);
581 	/* move log to flush queue */
582 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
583 	log->next = raidPtr->parityLogDiskQueue.flushQueue;
584 	raidPtr->parityLogDiskQueue.flushQueue = log;
585 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
586 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
587 }
588 
589 static int
590 DumpParityLogToDisk(
591     int finish,
592     RF_ParityLogData_t * logData)
593 {
594 	int     i, diskCount, regionID = logData->regionID;
595 	RF_ParityLog_t *log;
596 	RF_Raid_t *raidPtr;
597 
598 	raidPtr = logData->common->raidPtr;
599 
600 	/* Move a core log to disk.  If the log disk is full, initiate
601 	 * reintegration.
602 	 *
603 	 * Return (0) if we can enqueue the dump immediately, otherwise return
604 	 * (1) to indicate we are blocked on reintegration and control of the
605 	 * thread should be relinquished.
606 	 *
607 	 * Caller must hold regionInfo[regionID].mutex
608 	 *
609 	 * NON-BLOCKING */
610 
611 	if (rf_parityLogDebug)
612 		printf("[dumping parity log to disk, region %d]\n", regionID);
613 	log = raidPtr->regionInfo[regionID].coreLog;
614 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
615 	RF_ASSERT(log->next == NULL);
616 
617 	/* if reintegration is in progress, must queue work */
618 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
619 	if (raidPtr->regionInfo[regionID].reintInProgress) {
620 		/* Can not proceed since this region is currently being
621 		 * reintegrated. We can not block, so queue remaining work and
622 		 * return */
623 		if (rf_parityLogDebug)
624 			printf("[region %d waiting on reintegration]\n", regionID);
625 		/* XXX not sure about the use of finish - shouldn't this
626 		 * always be "Enqueue"? */
627 		if (finish)
628 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
629 		else
630 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
631 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
632 		return (1);	/* relenquish control of this thread */
633 	}
634 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
635 	raidPtr->regionInfo[regionID].coreLog = NULL;
636 	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
637 		/* IMPORTANT!! this loop bound assumes region disk holds an
638 		 * integral number of core logs */
639 	{
640 		/* update disk map for this region */
641 		diskCount = raidPtr->regionInfo[regionID].diskCount;
642 		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
643 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
644 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
645 		}
646 		log->diskOffset = diskCount;
647 		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
648 		FlushLog(raidPtr, log);
649 	} else {
650 		/* no room for log on disk, send it to disk manager and
651 		 * request reintegration */
652 		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
653 		ReintLog(raidPtr, regionID, log);
654 	}
655 	if (rf_parityLogDebug)
656 		printf("[finished dumping parity log to disk, region %d]\n", regionID);
657 	return (0);
658 }
659 
660 int
661 rf_ParityLogAppend(
662     RF_ParityLogData_t * logData,
663     int finish,
664     RF_ParityLog_t ** incomingLog,
665     int clearReintFlag)
666 {
667 	int     regionID, logItem, itemDone;
668 	RF_ParityLogData_t *item;
669 	int     punt, done = RF_FALSE;
670 	RF_ParityLog_t *log;
671 	RF_Raid_t *raidPtr;
672 	RF_Etimer_t timer;
673 	int     (*wakeFunc) (RF_DagNode_t * node, int status);
674 	void   *wakeArg;
675 
676 	/* Add parity to the appropriate log, one sector at a time. This
677 	 * routine is called is called by dag functions ParityLogUpdateFunc
678 	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
679 	 *
680 	 * Parity to be logged is contained in a linked-list (logData).  When
681 	 * this routine returns, every sector in the list will be in one of
682 	 * three places: 1) entered into the parity log 2) queued, waiting on
683 	 * reintegration 3) queued, waiting on a core log
684 	 *
685 	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
686 	 * Later, as conditions which required the block are removed, the work
687 	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
688 	 *
689 	 * NON-BLOCKING */
690 
691 	raidPtr = logData->common->raidPtr;
692 	/* lock the region for the first item in logData */
693 	RF_ASSERT(logData != NULL);
694 	regionID = logData->regionID;
695 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
696 	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
697 
698 	if (clearReintFlag) {
699 		/* Enable flushing for this region.  Holding both locks
700 		 * provides a synchronization barrier with DumpParityLogToDisk */
701 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
702 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
703 		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
704 		raidPtr->regionInfo[regionID].diskCount = 0;
705 		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
706 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
707 										 * enabled */
708 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
709 	}
710 	/* process each item in logData */
711 	while (logData) {
712 		/* remove an item from logData */
713 		item = logData;
714 		logData = logData->next;
715 		item->next = NULL;
716 		item->prev = NULL;
717 
718 		if (rf_parityLogDebug)
719 			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
720 
721 		/* see if we moved to a new region */
722 		if (regionID != item->regionID) {
723 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
724 			regionID = item->regionID;
725 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
726 			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
727 		}
728 		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
729 				 * can happen in one of two ways: 1) no core
730 				 * log (AcquireParityLog) 2) waiting on
731 				 * reintegration (DumpParityLogToDisk) If punt
732 				 * is RF_TRUE, the dataItem was queued, so
733 				 * skip to next item. */
734 
735 		/* process item, one sector at a time, until all sectors
736 		 * processed or we punt */
737 		if (item->diskAddress.numSector > 0)
738 			done = RF_FALSE;
739 		else
740 			RF_ASSERT(0);
741 		while (!punt && !done) {
742 			/* verify that a core log exists for this region */
743 			if (!raidPtr->regionInfo[regionID].coreLog) {
744 				/* Attempt to acquire a parity log. If
745 				 * acquisition fails, queue remaining work in
746 				 * data item and move to nextItem. */
747 				if (incomingLog)
748 					if (*incomingLog) {
749 						RF_ASSERT((*incomingLog)->next == NULL);
750 						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
751 						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
752 						*incomingLog = NULL;
753 					} else
754 						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
755 				else
756 					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
757 				/* Note: AcquireParityLog either returns a log
758 				 * or enqueues currentItem */
759 			}
760 			if (!raidPtr->regionInfo[regionID].coreLog)
761 				punt = RF_TRUE;	/* failed to find a core log */
762 			else {
763 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
764 				/* verify that the log has room for new
765 				 * entries */
766 				/* if log is full, dump it to disk and grab a
767 				 * new log */
768 				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
769 					/* log is full, dump it to disk */
770 					if (DumpParityLogToDisk(finish, item))
771 						punt = RF_TRUE;	/* dump unsuccessful,
772 								 * blocked on
773 								 * reintegration */
774 					else {
775 						/* dump was successful */
776 						if (incomingLog)
777 							if (*incomingLog) {
778 								RF_ASSERT((*incomingLog)->next == NULL);
779 								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
780 								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
781 								*incomingLog = NULL;
782 							} else
783 								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
784 						else
785 							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
786 						/* if a core log is not
787 						 * available, must queue work
788 						 * and return */
789 						if (!raidPtr->regionInfo[regionID].coreLog)
790 							punt = RF_TRUE;	/* blocked on log
791 									 * availability */
792 					}
793 				}
794 			}
795 			/* if we didn't punt on this item, attempt to add a
796 			 * sector to the core log */
797 			if (!punt) {
798 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
799 				/* at this point, we have a core log with
800 				 * enough room for a sector */
801 				/* copy a sector into the log */
802 				log = raidPtr->regionInfo[regionID].coreLog;
803 				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
804 				logItem = log->numRecords++;
805 				log->records[logItem].parityAddr = item->diskAddress;
806 				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
807 				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
808 				log->records[logItem].parityAddr.numSector = 1;
809 				log->records[logItem].operation = item->common->operation;
810 				bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
811 				item->diskAddress.numSector--;
812 				item->diskAddress.startSector++;
813 				if (item->diskAddress.numSector == 0)
814 					done = RF_TRUE;
815 			}
816 		}
817 
818 		if (!punt) {
819 			/* Processed this item completely, decrement count of
820 			 * items to be processed. */
821 			RF_ASSERT(item->diskAddress.numSector == 0);
822 			RF_LOCK_MUTEX(item->common->mutex);
823 			item->common->cnt--;
824 			if (item->common->cnt == 0)
825 				itemDone = RF_TRUE;
826 			else
827 				itemDone = RF_FALSE;
828 			RF_UNLOCK_MUTEX(item->common->mutex);
829 			if (itemDone) {
830 				/* Finished processing all log data for this
831 				 * IO Return structs to free list and invoke
832 				 * wakeup function. */
833 				timer = item->common->startTime;	/* grab initial value of
834 									 * timer */
835 				RF_ETIMER_STOP(timer);
836 				RF_ETIMER_EVAL(timer);
837 				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
838 				if (rf_parityLogDebug)
839 					printf("[waking process for region %d]\n", item->regionID);
840 				wakeFunc = item->common->wakeFunc;
841 				wakeArg = item->common->wakeArg;
842 				FreeParityLogCommonData(item->common);
843 				FreeParityLogData(item);
844 				(wakeFunc) (wakeArg, 0);
845 			} else
846 				FreeParityLogData(item);
847 		}
848 	}
849 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
850 	if (rf_parityLogDebug)
851 		printf("[exiting ParityLogAppend]\n");
852 	return (0);
853 }
854 
855 
856 void
857 rf_EnableParityLogging(RF_Raid_t * raidPtr)
858 {
859 	int     regionID;
860 
861 	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
862 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
863 		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
864 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
865 	}
866 	if (rf_parityLogDebug)
867 		printf("[parity logging enabled]\n");
868 }
869 #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
870