xref: /netbsd-src/sys/dev/raidframe/rf_paritylog.c (revision bada23909e740596d0a3785a73bd3583a9807fb8)
1 /*	$NetBSD: rf_paritylog.c,v 1.3 1999/02/05 00:06:13 oster Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /* Code for manipulating in-core parity logs
30  *
31  */
32 
33 #include "rf_archs.h"
34 
35 #if RF_INCLUDE_PARITYLOGGING > 0
36 
37 /*
38  * Append-only log for recording parity "update" and "overwrite" records
39  */
40 
41 #include "rf_types.h"
42 #include "rf_threadstuff.h"
43 #include "rf_mcpair.h"
44 #include "rf_raid.h"
45 #include "rf_dag.h"
46 #include "rf_dagfuncs.h"
47 #include "rf_desc.h"
48 #include "rf_layout.h"
49 #include "rf_diskqueue.h"
50 #include "rf_etimer.h"
51 #include "rf_paritylog.h"
52 #include "rf_general.h"
53 #include "rf_threadid.h"
54 #include "rf_map.h"
55 #include "rf_paritylogging.h"
56 #include "rf_paritylogDiskMgr.h"
57 #include "rf_sys.h"
58 
59 static RF_CommonLogData_t *
60 AllocParityLogCommonData(RF_Raid_t * raidPtr)
61 {
62 	RF_CommonLogData_t *common = NULL;
63 	int     rc;
64 
65 	/* Return a struct for holding common parity log information from the
66 	 * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
67 	 * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
68 
69 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
70 	if (raidPtr->parityLogDiskQueue.freeCommonList) {
71 		common = raidPtr->parityLogDiskQueue.freeCommonList;
72 		raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
73 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
74 	} else {
75 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
76 		RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
77 		rc = rf_mutex_init(&common->mutex);
78 		if (rc) {
79 			RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
80 			    __LINE__, rc);
81 			RF_Free(common, sizeof(RF_CommonLogData_t));
82 			common = NULL;
83 		}
84 	}
85 	common->next = NULL;
86 	return (common);
87 }
88 
89 static void
90 FreeParityLogCommonData(RF_CommonLogData_t * common)
91 {
92 	RF_Raid_t *raidPtr;
93 
94 	/* Insert a single struct for holding parity log information (data)
95 	 * into the free list (rf_parityLogDiskQueue.freeCommonList).
96 	 * NON-BLOCKING */
97 
98 	raidPtr = common->raidPtr;
99 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
100 	common->next = raidPtr->parityLogDiskQueue.freeCommonList;
101 	raidPtr->parityLogDiskQueue.freeCommonList = common;
102 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
103 }
104 
105 static RF_ParityLogData_t *
106 AllocParityLogData(RF_Raid_t * raidPtr)
107 {
108 	RF_ParityLogData_t *data = NULL;
109 
110 	/* Return a struct for holding parity log information from the free
111 	 * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
112 	 * call RF_Malloc to create a new structure. NON-BLOCKING */
113 
114 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
115 	if (raidPtr->parityLogDiskQueue.freeDataList) {
116 		data = raidPtr->parityLogDiskQueue.freeDataList;
117 		raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
118 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
119 	} else {
120 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
121 		RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
122 	}
123 	data->next = NULL;
124 	data->prev = NULL;
125 	return (data);
126 }
127 
128 
129 static void
130 FreeParityLogData(RF_ParityLogData_t * data)
131 {
132 	RF_ParityLogData_t *nextItem;
133 	RF_Raid_t *raidPtr;
134 
135 	/* Insert a linked list of structs for holding parity log information
136 	 * (data) into the free list (parityLogDiskQueue.freeList).
137 	 * NON-BLOCKING */
138 
139 	raidPtr = data->common->raidPtr;
140 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
141 	while (data) {
142 		nextItem = data->next;
143 		data->next = raidPtr->parityLogDiskQueue.freeDataList;
144 		raidPtr->parityLogDiskQueue.freeDataList = data;
145 		data = nextItem;
146 	}
147 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
148 }
149 
150 
151 static void
152 EnqueueParityLogData(
153     RF_ParityLogData_t * data,
154     RF_ParityLogData_t ** head,
155     RF_ParityLogData_t ** tail)
156 {
157 	RF_Raid_t *raidPtr;
158 
159 	/* Insert an in-core parity log (*data) into the head of a disk queue
160 	 * (*head, *tail). NON-BLOCKING */
161 
162 	raidPtr = data->common->raidPtr;
163 	if (rf_parityLogDebug)
164 		printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
165 	RF_ASSERT(data->prev == NULL);
166 	RF_ASSERT(data->next == NULL);
167 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
168 	if (*head) {
169 		/* insert into head of queue */
170 		RF_ASSERT((*head)->prev == NULL);
171 		RF_ASSERT((*tail)->next == NULL);
172 		data->next = *head;
173 		(*head)->prev = data;
174 		*head = data;
175 	} else {
176 		/* insert into empty list */
177 		RF_ASSERT(*head == NULL);
178 		RF_ASSERT(*tail == NULL);
179 		*head = data;
180 		*tail = data;
181 	}
182 	RF_ASSERT((*head)->prev == NULL);
183 	RF_ASSERT((*tail)->next == NULL);
184 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
185 }
186 
187 static RF_ParityLogData_t *
188 DequeueParityLogData(
189     RF_Raid_t * raidPtr,
190     RF_ParityLogData_t ** head,
191     RF_ParityLogData_t ** tail,
192     int ignoreLocks)
193 {
194 	RF_ParityLogData_t *data;
195 
196 	/* Remove and return an in-core parity log from the tail of a disk
197 	 * queue (*head, *tail). NON-BLOCKING */
198 
199 	/* remove from tail, preserving FIFO order */
200 	if (!ignoreLocks)
201 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
202 	data = *tail;
203 	if (data) {
204 		if (*head == *tail) {
205 			/* removing last item from queue */
206 			*head = NULL;
207 			*tail = NULL;
208 		} else {
209 			*tail = (*tail)->prev;
210 			(*tail)->next = NULL;
211 			RF_ASSERT((*head)->prev == NULL);
212 			RF_ASSERT((*tail)->next == NULL);
213 		}
214 		data->next = NULL;
215 		data->prev = NULL;
216 		if (rf_parityLogDebug)
217 			printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
218 	}
219 	if (*head) {
220 		RF_ASSERT((*head)->prev == NULL);
221 		RF_ASSERT((*tail)->next == NULL);
222 	}
223 	if (!ignoreLocks)
224 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
225 	return (data);
226 }
227 
228 
229 static void
230 RequeueParityLogData(
231     RF_ParityLogData_t * data,
232     RF_ParityLogData_t ** head,
233     RF_ParityLogData_t ** tail)
234 {
235 	RF_Raid_t *raidPtr;
236 
237 	/* Insert an in-core parity log (*data) into the tail of a disk queue
238 	 * (*head, *tail). NON-BLOCKING */
239 
240 	raidPtr = data->common->raidPtr;
241 	RF_ASSERT(data);
242 	if (rf_parityLogDebug)
243 		printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
244 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
245 	if (*tail) {
246 		/* append to tail of list */
247 		data->prev = *tail;
248 		data->next = NULL;
249 		(*tail)->next = data;
250 		*tail = data;
251 	} else {
252 		/* inserting into an empty list */
253 		*head = data;
254 		*tail = data;
255 		(*head)->prev = NULL;
256 		(*tail)->next = NULL;
257 	}
258 	RF_ASSERT((*head)->prev == NULL);
259 	RF_ASSERT((*tail)->next == NULL);
260 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
261 }
262 
263 RF_ParityLogData_t *
264 rf_CreateParityLogData(
265     RF_ParityRecordType_t operation,
266     RF_PhysDiskAddr_t * pda,
267     caddr_t bufPtr,
268     RF_Raid_t * raidPtr,
269     int (*wakeFunc) (RF_DagNode_t * node, int status),
270     void *wakeArg,
271     RF_AccTraceEntry_t * tracerec,
272     RF_Etimer_t startTime)
273 {
274 	RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
275 	RF_CommonLogData_t *common;
276 	RF_PhysDiskAddr_t *diskAddress;
277 	int     boundary, offset = 0;
278 
279 	/* Return an initialized struct of info to be logged. Build one item
280 	 * per physical disk address, one item per region.
281 	 *
282 	 * NON-BLOCKING */
283 
284 	diskAddress = pda;
285 	common = AllocParityLogCommonData(raidPtr);
286 	RF_ASSERT(common);
287 
288 	common->operation = operation;
289 	common->bufPtr = bufPtr;
290 	common->raidPtr = raidPtr;
291 	common->wakeFunc = wakeFunc;
292 	common->wakeArg = wakeArg;
293 	common->tracerec = tracerec;
294 	common->startTime = startTime;
295 	common->cnt = 0;
296 
297 	if (rf_parityLogDebug)
298 		printf("[entering CreateParityLogData]\n");
299 	while (diskAddress) {
300 		common->cnt++;
301 		data = AllocParityLogData(raidPtr);
302 		RF_ASSERT(data);
303 		data->common = common;
304 		data->next = NULL;
305 		data->prev = NULL;
306 		data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
307 		if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
308 			/* disk address does not cross a region boundary */
309 			data->diskAddress = *diskAddress;
310 			data->bufOffset = offset;
311 			offset = offset + diskAddress->numSector;
312 			EnqueueParityLogData(data, &resultHead, &resultTail);
313 			/* adjust disk address */
314 			diskAddress = diskAddress->next;
315 		} else {
316 			/* disk address crosses a region boundary */
317 			/* find address where region is crossed */
318 			boundary = 0;
319 			while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
320 				boundary++;
321 
322 			/* enter data before the boundary */
323 			data->diskAddress = *diskAddress;
324 			data->diskAddress.numSector = boundary;
325 			data->bufOffset = offset;
326 			offset += boundary;
327 			EnqueueParityLogData(data, &resultHead, &resultTail);
328 			/* adjust disk address */
329 			diskAddress->startSector += boundary;
330 			diskAddress->numSector -= boundary;
331 		}
332 	}
333 	if (rf_parityLogDebug)
334 		printf("[leaving CreateParityLogData]\n");
335 	return (resultHead);
336 }
337 
338 
339 RF_ParityLogData_t *
340 rf_SearchAndDequeueParityLogData(
341     RF_Raid_t * raidPtr,
342     int regionID,
343     RF_ParityLogData_t ** head,
344     RF_ParityLogData_t ** tail,
345     int ignoreLocks)
346 {
347 	RF_ParityLogData_t *w;
348 
349 	/* Remove and return an in-core parity log from a specified region
350 	 * (regionID). If a matching log is not found, return NULL.
351 	 *
352 	 * NON-BLOCKING. */
353 
354 	/* walk backward through a list, looking for an entry with a matching
355 	 * region ID */
356 	if (!ignoreLocks)
357 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
358 	w = (*tail);
359 	while (w) {
360 		if (w->regionID == regionID) {
361 			/* remove an element from the list */
362 			if (w == *tail) {
363 				if (*head == *tail) {
364 					/* removing only element in the list */
365 					*head = NULL;
366 					*tail = NULL;
367 				} else {
368 					/* removing last item in the list */
369 					*tail = (*tail)->prev;
370 					(*tail)->next = NULL;
371 					RF_ASSERT((*head)->prev == NULL);
372 					RF_ASSERT((*tail)->next == NULL);
373 				}
374 			} else {
375 				if (w == *head) {
376 					/* removing first item in the list */
377 					*head = (*head)->next;
378 					(*head)->prev = NULL;
379 					RF_ASSERT((*head)->prev == NULL);
380 					RF_ASSERT((*tail)->next == NULL);
381 				} else {
382 					/* removing an item from the middle of
383 					 * the list */
384 					w->prev->next = w->next;
385 					w->next->prev = w->prev;
386 					RF_ASSERT((*head)->prev == NULL);
387 					RF_ASSERT((*tail)->next == NULL);
388 				}
389 			}
390 			w->prev = NULL;
391 			w->next = NULL;
392 			if (rf_parityLogDebug)
393 				printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
394 			return (w);
395 		} else
396 			w = w->prev;
397 	}
398 	if (!ignoreLocks)
399 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
400 	return (NULL);
401 }
402 
403 static RF_ParityLogData_t *
404 DequeueMatchingLogData(
405     RF_Raid_t * raidPtr,
406     RF_ParityLogData_t ** head,
407     RF_ParityLogData_t ** tail)
408 {
409 	RF_ParityLogData_t *logDataList, *logData;
410 	int     regionID;
411 
412 	/* Remove and return an in-core parity log from the tail of a disk
413 	 * queue (*head, *tail).  Then remove all matching (identical
414 	 * regionIDs) logData and return as a linked list.
415 	 *
416 	 * NON-BLOCKING */
417 
418 	logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
419 	if (logDataList) {
420 		regionID = logDataList->regionID;
421 		logData = logDataList;
422 		logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
423 		while (logData->next) {
424 			logData = logData->next;
425 			logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
426 		}
427 	}
428 	return (logDataList);
429 }
430 
431 
432 static RF_ParityLog_t *
433 AcquireParityLog(
434     RF_ParityLogData_t * logData,
435     int finish)
436 {
437 	RF_ParityLog_t *log = NULL;
438 	RF_Raid_t *raidPtr;
439 
440 	/* Grab a log buffer from the pool and return it. If no buffers are
441 	 * available, return NULL. NON-BLOCKING */
442 	raidPtr = logData->common->raidPtr;
443 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
444 	if (raidPtr->parityLogPool.parityLogs) {
445 		log = raidPtr->parityLogPool.parityLogs;
446 		raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
447 		log->regionID = logData->regionID;
448 		log->numRecords = 0;
449 		log->next = NULL;
450 		raidPtr->logsInUse++;
451 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
452 	} else {
453 		/* no logs available, so place ourselves on the queue of work
454 		 * waiting on log buffers this is done while
455 		 * parityLogPool.mutex is held, to ensure synchronization with
456 		 * ReleaseParityLogs. */
457 		if (rf_parityLogDebug)
458 			printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
459 		if (finish)
460 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
461 		else
462 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
463 	}
464 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
465 	return (log);
466 }
467 
468 void
469 rf_ReleaseParityLogs(
470     RF_Raid_t * raidPtr,
471     RF_ParityLog_t * firstLog)
472 {
473 	RF_ParityLogData_t *logDataList;
474 	RF_ParityLog_t *log, *lastLog;
475 	int     cnt;
476 
477 	/* Insert a linked list of parity logs (firstLog) to the free list
478 	 * (parityLogPool.parityLogPool)
479 	 *
480 	 * NON-BLOCKING. */
481 
482 	RF_ASSERT(firstLog);
483 
484 	/* Before returning logs to global free list, service all requests
485 	 * which are blocked on logs.  Holding mutexes for parityLogPool and
486 	 * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
487 	RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
488 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
489 	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
490 	log = firstLog;
491 	if (firstLog)
492 		firstLog = firstLog->next;
493 	log->numRecords = 0;
494 	log->next = NULL;
495 	while (logDataList && log) {
496 		RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
497 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
498 		rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
499 		if (rf_parityLogDebug)
500 			printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
501 		if (log == NULL) {
502 			log = firstLog;
503 			if (firstLog) {
504 				firstLog = firstLog->next;
505 				log->numRecords = 0;
506 				log->next = NULL;
507 			}
508 		}
509 		RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
510 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
511 		if (log)
512 			logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
513 	}
514 	/* return remaining logs to pool */
515 	if (log) {
516 		log->next = firstLog;
517 		firstLog = log;
518 	}
519 	if (firstLog) {
520 		lastLog = firstLog;
521 		raidPtr->logsInUse--;
522 		RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
523 		while (lastLog->next) {
524 			lastLog = lastLog->next;
525 			raidPtr->logsInUse--;
526 			RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
527 		}
528 		lastLog->next = raidPtr->parityLogPool.parityLogs;
529 		raidPtr->parityLogPool.parityLogs = firstLog;
530 		cnt = 0;
531 		log = raidPtr->parityLogPool.parityLogs;
532 		while (log) {
533 			cnt++;
534 			log = log->next;
535 		}
536 		RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
537 	}
538 	RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
539 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
540 }
541 
542 static void
543 ReintLog(
544     RF_Raid_t * raidPtr,
545     int regionID,
546     RF_ParityLog_t * log)
547 {
548 	RF_ASSERT(log);
549 
550 	/* Insert an in-core parity log (log) into the disk queue of
551 	 * reintegration work.  Set the flag (reintInProgress) for the
552 	 * specified region (regionID) to indicate that reintegration is in
553 	 * progress for this region. NON-BLOCKING */
554 
555 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
556 	raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;	/* cleared when reint
557 									 * complete */
558 
559 	if (rf_parityLogDebug)
560 		printf("[requesting reintegration of region %d]\n", log->regionID);
561 	/* move record to reintegration queue */
562 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
563 	log->next = raidPtr->parityLogDiskQueue.reintQueue;
564 	raidPtr->parityLogDiskQueue.reintQueue = log;
565 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
566 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
567 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
568 }
569 
570 static void
571 FlushLog(
572     RF_Raid_t * raidPtr,
573     RF_ParityLog_t * log)
574 {
575 	/* insert a core log (log) into a list of logs
576 	 * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
577 	 * NON-BLOCKING */
578 
579 	RF_ASSERT(log);
580 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
581 	RF_ASSERT(log->next == NULL);
582 	/* move log to flush queue */
583 	RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
584 	log->next = raidPtr->parityLogDiskQueue.flushQueue;
585 	raidPtr->parityLogDiskQueue.flushQueue = log;
586 	RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
587 	RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
588 }
589 
590 static int
591 DumpParityLogToDisk(
592     int finish,
593     RF_ParityLogData_t * logData)
594 {
595 	int     i, diskCount, regionID = logData->regionID;
596 	RF_ParityLog_t *log;
597 	RF_Raid_t *raidPtr;
598 
599 	raidPtr = logData->common->raidPtr;
600 
601 	/* Move a core log to disk.  If the log disk is full, initiate
602 	 * reintegration.
603 	 *
604 	 * Return (0) if we can enqueue the dump immediately, otherwise return
605 	 * (1) to indicate we are blocked on reintegration and control of the
606 	 * thread should be relinquished.
607 	 *
608 	 * Caller must hold regionInfo[regionID].mutex
609 	 *
610 	 * NON-BLOCKING */
611 
612 	if (rf_parityLogDebug)
613 		printf("[dumping parity log to disk, region %d]\n", regionID);
614 	log = raidPtr->regionInfo[regionID].coreLog;
615 	RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
616 	RF_ASSERT(log->next == NULL);
617 
618 	/* if reintegration is in progress, must queue work */
619 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
620 	if (raidPtr->regionInfo[regionID].reintInProgress) {
621 		/* Can not proceed since this region is currently being
622 		 * reintegrated. We can not block, so queue remaining work and
623 		 * return */
624 		if (rf_parityLogDebug)
625 			printf("[region %d waiting on reintegration]\n", regionID);
626 		/* XXX not sure about the use of finish - shouldn't this
627 		 * always be "Enqueue"? */
628 		if (finish)
629 			RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
630 		else
631 			EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
632 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
633 		return (1);	/* relenquish control of this thread */
634 	}
635 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
636 	raidPtr->regionInfo[regionID].coreLog = NULL;
637 	if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
638 		/* IMPORTANT!! this loop bound assumes region disk holds an
639 		 * integral number of core logs */
640 	{
641 		/* update disk map for this region */
642 		diskCount = raidPtr->regionInfo[regionID].diskCount;
643 		for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
644 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
645 			raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
646 		}
647 		log->diskOffset = diskCount;
648 		raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
649 		FlushLog(raidPtr, log);
650 	} else {
651 		/* no room for log on disk, send it to disk manager and
652 		 * request reintegration */
653 		RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
654 		ReintLog(raidPtr, regionID, log);
655 	}
656 	if (rf_parityLogDebug)
657 		printf("[finished dumping parity log to disk, region %d]\n", regionID);
658 	return (0);
659 }
660 
661 int
662 rf_ParityLogAppend(
663     RF_ParityLogData_t * logData,
664     int finish,
665     RF_ParityLog_t ** incomingLog,
666     int clearReintFlag)
667 {
668 	int     regionID, logItem, itemDone;
669 	RF_ParityLogData_t *item;
670 	int     punt, done = RF_FALSE;
671 	RF_ParityLog_t *log;
672 	RF_Raid_t *raidPtr;
673 	RF_Etimer_t timer;
674 	int     (*wakeFunc) (RF_DagNode_t * node, int status);
675 	void   *wakeArg;
676 
677 	/* Add parity to the appropriate log, one sector at a time. This
678 	 * routine is called is called by dag functions ParityLogUpdateFunc
679 	 * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
680 	 *
681 	 * Parity to be logged is contained in a linked-list (logData).  When
682 	 * this routine returns, every sector in the list will be in one of
683 	 * three places: 1) entered into the parity log 2) queued, waiting on
684 	 * reintegration 3) queued, waiting on a core log
685 	 *
686 	 * Blocked work is passed to the ParityLoggingDiskManager for completion.
687 	 * Later, as conditions which required the block are removed, the work
688 	 * reenters this routine with the "finish" parameter set to "RF_TRUE."
689 	 *
690 	 * NON-BLOCKING */
691 
692 	raidPtr = logData->common->raidPtr;
693 	/* lock the region for the first item in logData */
694 	RF_ASSERT(logData != NULL);
695 	regionID = logData->regionID;
696 	RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
697 	RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
698 
699 	if (clearReintFlag) {
700 		/* Enable flushing for this region.  Holding both locks
701 		 * provides a synchronization barrier with DumpParityLogToDisk */
702 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
703 		RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
704 		RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
705 		raidPtr->regionInfo[regionID].diskCount = 0;
706 		raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
707 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);	/* flushing is now
708 										 * enabled */
709 		RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
710 	}
711 	/* process each item in logData */
712 	while (logData) {
713 		/* remove an item from logData */
714 		item = logData;
715 		logData = logData->next;
716 		item->next = NULL;
717 		item->prev = NULL;
718 
719 		if (rf_parityLogDebug)
720 			printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
721 
722 		/* see if we moved to a new region */
723 		if (regionID != item->regionID) {
724 			RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
725 			regionID = item->regionID;
726 			RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
727 			RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
728 		}
729 		punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
730 				 * can happen in one of two ways: 1) no core
731 				 * log (AcquireParityLog) 2) waiting on
732 				 * reintegration (DumpParityLogToDisk) If punt
733 				 * is RF_TRUE, the dataItem was queued, so
734 				 * skip to next item. */
735 
736 		/* process item, one sector at a time, until all sectors
737 		 * processed or we punt */
738 		if (item->diskAddress.numSector > 0)
739 			done = RF_FALSE;
740 		else
741 			RF_ASSERT(0);
742 		while (!punt && !done) {
743 			/* verify that a core log exists for this region */
744 			if (!raidPtr->regionInfo[regionID].coreLog) {
745 				/* Attempt to acquire a parity log. If
746 				 * acquisition fails, queue remaining work in
747 				 * data item and move to nextItem. */
748 				if (incomingLog)
749 					if (*incomingLog) {
750 						RF_ASSERT((*incomingLog)->next == NULL);
751 						raidPtr->regionInfo[regionID].coreLog = *incomingLog;
752 						raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
753 						*incomingLog = NULL;
754 					} else
755 						raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
756 				else
757 					raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
758 				/* Note: AcquireParityLog either returns a log
759 				 * or enqueues currentItem */
760 			}
761 			if (!raidPtr->regionInfo[regionID].coreLog)
762 				punt = RF_TRUE;	/* failed to find a core log */
763 			else {
764 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
765 				/* verify that the log has room for new
766 				 * entries */
767 				/* if log is full, dump it to disk and grab a
768 				 * new log */
769 				if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
770 					/* log is full, dump it to disk */
771 					if (DumpParityLogToDisk(finish, item))
772 						punt = RF_TRUE;	/* dump unsuccessful,
773 								 * blocked on
774 								 * reintegration */
775 					else {
776 						/* dump was successful */
777 						if (incomingLog)
778 							if (*incomingLog) {
779 								RF_ASSERT((*incomingLog)->next == NULL);
780 								raidPtr->regionInfo[regionID].coreLog = *incomingLog;
781 								raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
782 								*incomingLog = NULL;
783 							} else
784 								raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
785 						else
786 							raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
787 						/* if a core log is not
788 						 * available, must queue work
789 						 * and return */
790 						if (!raidPtr->regionInfo[regionID].coreLog)
791 							punt = RF_TRUE;	/* blocked on log
792 									 * availability */
793 					}
794 				}
795 			}
796 			/* if we didn't punt on this item, attempt to add a
797 			 * sector to the core log */
798 			if (!punt) {
799 				RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
800 				/* at this point, we have a core log with
801 				 * enough room for a sector */
802 				/* copy a sector into the log */
803 				log = raidPtr->regionInfo[regionID].coreLog;
804 				RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
805 				logItem = log->numRecords++;
806 				log->records[logItem].parityAddr = item->diskAddress;
807 				RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
808 				RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
809 				log->records[logItem].parityAddr.numSector = 1;
810 				log->records[logItem].operation = item->common->operation;
811 				bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
812 				item->diskAddress.numSector--;
813 				item->diskAddress.startSector++;
814 				if (item->diskAddress.numSector == 0)
815 					done = RF_TRUE;
816 			}
817 		}
818 
819 		if (!punt) {
820 			/* Processed this item completely, decrement count of
821 			 * items to be processed. */
822 			RF_ASSERT(item->diskAddress.numSector == 0);
823 			RF_LOCK_MUTEX(item->common->mutex);
824 			item->common->cnt--;
825 			if (item->common->cnt == 0)
826 				itemDone = RF_TRUE;
827 			else
828 				itemDone = RF_FALSE;
829 			RF_UNLOCK_MUTEX(item->common->mutex);
830 			if (itemDone) {
831 				/* Finished processing all log data for this
832 				 * IO Return structs to free list and invoke
833 				 * wakeup function. */
834 				timer = item->common->startTime;	/* grab initial value of
835 									 * timer */
836 				RF_ETIMER_STOP(timer);
837 				RF_ETIMER_EVAL(timer);
838 				item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
839 				if (rf_parityLogDebug)
840 					printf("[waking process for region %d]\n", item->regionID);
841 				wakeFunc = item->common->wakeFunc;
842 				wakeArg = item->common->wakeArg;
843 				FreeParityLogCommonData(item->common);
844 				FreeParityLogData(item);
845 				(wakeFunc) (wakeArg, 0);
846 			} else
847 				FreeParityLogData(item);
848 		}
849 	}
850 	RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
851 	if (rf_parityLogDebug)
852 		printf("[exiting ParityLogAppend]\n");
853 	return (0);
854 }
855 
856 
857 void
858 rf_EnableParityLogging(RF_Raid_t * raidPtr)
859 {
860 	int     regionID;
861 
862 	for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
863 		RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
864 		raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
865 		RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
866 	}
867 	if (rf_parityLogDebug)
868 		printf("[parity logging enabled]\n");
869 }
870 #endif				/* RF_INCLUDE_PARITYLOGGING > 0 */
871