xref: /netbsd-src/sys/dev/raidframe/rf_paritylog.c (revision dc306354b0b29af51801a7632f1e95265a68cd81)
1 /*	$NetBSD: rf_paritylog.c,v 1.1 1998/11/13 04:20:31 oster Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /* Code for manipulating in-core parity logs
30  *
31  * :
32  * Log: rf_paritylog.c,v
33  * Revision 1.27  1996/07/28 20:31:39  jimz
34  * i386netbsd port
35  * true/false fixup
36  *
37  * Revision 1.26  1996/07/27  23:36:08  jimz
38  * Solaris port of simulator
39  *
40  * Revision 1.25  1996/07/17  21:00:58  jimz
41  * clean up timer interface, tracing
42  *
43  * Revision 1.24  1996/06/11  10:18:59  jimz
44  * AllocParityLogCommonData() was freeing the common pointer immediately
45  * after allocating this. It appeared that this free really belonged
46  * inside one of the failure cases (for backing out), so I moved it
47  * in there.
48  *
49  * Revision 1.23  1996/06/05  18:06:02  jimz
50  * Major code cleanup. The Great Renaming is now done.
51  * Better modularity. Better typing. Fixed a bunch of
52  * synchronization bugs. Made a lot of global stuff
53  * per-desc or per-array. Removed dead code.
54  *
55  * Revision 1.22  1996/06/02  17:31:48  jimz
56  * Moved a lot of global stuff into array structure, where it belongs.
57  * Fixed up paritylogging, pss modules in this manner. Some general
58  * code cleanup. Removed lots of dead code, some dead files.
59  *
60  * Revision 1.21  1996/05/31  22:26:54  jimz
61  * fix a lot of mapping problems, memory allocation problems
62  * found some weird lock issues, fixed 'em
63  * more code cleanup
64  *
65  * Revision 1.20  1996/05/30  23:22:16  jimz
66  * bugfixes of serialization, timing problems
67  * more cleanup
68  *
69  * Revision 1.19  1996/05/30  12:59:18  jimz
70  * make etimer happier, more portable
71  *
72  * Revision 1.18  1996/05/27  18:56:37  jimz
73  * more code cleanup
74  * better typing
75  * compiles in all 3 environments
76  *
77  * Revision 1.17  1996/05/24  04:28:55  jimz
78  * release cleanup ckpt
79  *
80  * Revision 1.16  1996/05/23  21:46:35  jimz
81  * checkpoint in code cleanup (release prep)
82  * lots of types, function names have been fixed
83  *
84  * Revision 1.15  1996/05/23  00:33:23  jimz
85  * code cleanup: move all debug decls to rf_options.c, all extern
86  * debug decls to rf_options.h, all debug vars preceded by rf_
87  *
88  * Revision 1.14  1996/05/20  16:16:59  jimz
89  * switch to rf_{mutex,cond}_{init,destroy}
90  *
91  * Revision 1.13  1996/05/18  19:51:34  jimz
92  * major code cleanup- fix syntax, make some types consistent,
93  * add prototypes, clean out dead code, et cetera
94  *
95  * Revision 1.12  1995/12/12  18:10:06  jimz
96  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
97  * fix 80-column brain damage in comments
98  *
99  * Revision 1.11  1995/12/06  20:54:44  wvcii
100  * added prototyping
101  *
102  * Revision 1.10  1995/11/30  16:05:37  wvcii
103  * added copyright info
104  *
105  * Revision 1.9  1995/10/08  20:41:28  wvcii
106  * fixed bug in allocation of CommonLogData (was allocating incorrect size)
107  *
108  * Revision 1.8  1995/09/07  15:52:12  jimz
109  * noop compile when INCLUDE_PARITYLOGGING not defined
110  *
111  * Revision 1.7  1995/09/06  19:17:36  wvcii
112  * moved code for reintegration to rf_paritylogDiskMgr.c
113  *
114  * Revision 1.6  95/07/07  00:16:06  wvcii
115  * this version free from deadlock, fails parity verification
116  *
117  * Revision 1.5  1995/06/09  13:14:24  wvcii
118  * code is now nonblocking
119  *
120  * Revision 1.4  95/06/01  17:01:59  wvcii
121  * code debug
122  *
123  * Revision 1.3  95/05/31  13:08:23  wvcii
124  * code debug
125  *
126  * Revision 1.2  95/05/21  15:42:15  wvcii
127  * code debug
128  *
129  * Revision 1.1  95/05/18  10:43:54  wvcii
130  * Initial revision
131  *
132  */
133 
134 #include "rf_archs.h"
135 
136 #if RF_INCLUDE_PARITYLOGGING > 0
137 
138 /*
139  * Append-only log for recording parity "update" and "overwrite" records
140  */
141 
142 #include "rf_types.h"
143 #include "rf_threadstuff.h"
144 #include "rf_mcpair.h"
145 #include "rf_raid.h"
146 #include "rf_dag.h"
147 #include "rf_dagfuncs.h"
148 #include "rf_desc.h"
149 #include "rf_layout.h"
150 #include "rf_diskqueue.h"
151 #include "rf_etimer.h"
152 #include "rf_paritylog.h"
153 #include "rf_general.h"
154 #include "rf_threadid.h"
155 #include "rf_map.h"
156 #include "rf_paritylogging.h"
157 #include "rf_paritylogDiskMgr.h"
158 #include "rf_sys.h"
159 
160 static RF_CommonLogData_t *AllocParityLogCommonData(RF_Raid_t *raidPtr)
161 {
162   RF_CommonLogData_t *common = NULL;
163   int rc;
164 
165   /* Return a struct for holding common parity log information from the free
166      list (rf_parityLogDiskQueue.freeCommonList).  If the free list is empty, call
167      RF_Malloc to create a new structure.
168      NON-BLOCKING */
169 
170   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
171   if (raidPtr->parityLogDiskQueue.freeCommonList)
172     {
173       common = raidPtr->parityLogDiskQueue.freeCommonList;
174       raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
175       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
176     }
177   else
178     {
179       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
180       RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
181       rc = rf_mutex_init(&common->mutex);
182       if (rc) {
183         RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
184           __LINE__, rc);
185         RF_Free(common, sizeof(RF_CommonLogData_t));
186         common = NULL;
187       }
188     }
189   common->next = NULL;
190   return(common);
191 }
192 
193 static void FreeParityLogCommonData(RF_CommonLogData_t *common)
194 {
195   RF_Raid_t *raidPtr;
196 
197   /* Insert a single struct for holding parity log information
198      (data) into the free list (rf_parityLogDiskQueue.freeCommonList).
199      NON-BLOCKING */
200 
201   raidPtr = common->raidPtr;
202   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
203   common->next = raidPtr->parityLogDiskQueue.freeCommonList;
204   raidPtr->parityLogDiskQueue.freeCommonList = common;
205   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
206 }
207 
208 static RF_ParityLogData_t *AllocParityLogData(RF_Raid_t *raidPtr)
209 {
210   RF_ParityLogData_t *data = NULL;
211 
212   /* Return a struct for holding parity log information from the free
213      list (rf_parityLogDiskQueue.freeList).  If the free list is empty, call
214      RF_Malloc to create a new structure.
215      NON-BLOCKING */
216 
217   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
218   if (raidPtr->parityLogDiskQueue.freeDataList)
219     {
220       data = raidPtr->parityLogDiskQueue.freeDataList;
221       raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
222       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
223     }
224   else
225     {
226       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
227       RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
228     }
229   data->next = NULL;
230   data->prev = NULL;
231   return(data);
232 }
233 
234 
235 static void FreeParityLogData(RF_ParityLogData_t *data)
236 {
237   RF_ParityLogData_t *nextItem;
238   RF_Raid_t *raidPtr;
239 
240   /* Insert a linked list of structs for holding parity log
241      information (data) into the free list (parityLogDiskQueue.freeList).
242      NON-BLOCKING */
243 
244   raidPtr = data->common->raidPtr;
245   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
246   while (data)
247     {
248       nextItem = data->next;
249       data->next = raidPtr->parityLogDiskQueue.freeDataList;
250       raidPtr->parityLogDiskQueue.freeDataList = data;
251       data = nextItem;
252     }
253   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
254 }
255 
256 
257 static void EnqueueParityLogData(
258   RF_ParityLogData_t   *data,
259   RF_ParityLogData_t  **head,
260   RF_ParityLogData_t  **tail)
261 {
262   RF_Raid_t *raidPtr;
263 
264   /* Insert an in-core parity log (*data) into the head of
265      a disk queue (*head, *tail).
266      NON-BLOCKING */
267 
268   raidPtr = data->common->raidPtr;
269   if (rf_parityLogDebug)
270     printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
271   RF_ASSERT(data->prev == NULL);
272   RF_ASSERT(data->next == NULL);
273   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
274   if (*head)
275     {
276       /* insert into head of queue */
277       RF_ASSERT((*head)->prev == NULL);
278       RF_ASSERT((*tail)->next == NULL);
279       data->next = *head;
280       (*head)->prev = data;
281       *head = data;
282     }
283   else
284     {
285       /* insert into empty list */
286       RF_ASSERT(*head == NULL);
287       RF_ASSERT(*tail == NULL);
288       *head = data;
289       *tail = data;
290     }
291   RF_ASSERT((*head)->prev == NULL);
292   RF_ASSERT((*tail)->next == NULL);
293   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
294 }
295 
296 static RF_ParityLogData_t *DequeueParityLogData(
297   RF_Raid_t            *raidPtr,
298   RF_ParityLogData_t  **head,
299   RF_ParityLogData_t  **tail,
300   int                   ignoreLocks)
301 {
302   RF_ParityLogData_t *data;
303 
304   /* Remove and return an in-core parity log from the tail of
305      a disk queue (*head, *tail).
306      NON-BLOCKING */
307 
308   /* remove from tail, preserving FIFO order */
309   if (!ignoreLocks)
310     RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
311   data = *tail;
312   if (data)
313     {
314       if (*head == *tail)
315 	{
316 	  /* removing last item from queue */
317 	  *head = NULL;
318 	  *tail = NULL;
319 	}
320       else
321 	{
322 	  *tail = (*tail)->prev;
323 	  (*tail)->next = NULL;
324 	  RF_ASSERT((*head)->prev == NULL);
325 	  RF_ASSERT((*tail)->next == NULL);
326 	}
327       data->next = NULL;
328       data->prev = NULL;
329       if (rf_parityLogDebug)
330 	printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int)data->diskAddress.numSector);
331     }
332   if (*head)
333     {
334       RF_ASSERT((*head)->prev == NULL);
335       RF_ASSERT((*tail)->next == NULL);
336     }
337   if (!ignoreLocks)
338     RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
339   return(data);
340 }
341 
342 
343 static void RequeueParityLogData(
344   RF_ParityLogData_t   *data,
345   RF_ParityLogData_t  **head,
346   RF_ParityLogData_t  **tail)
347 {
348   RF_Raid_t *raidPtr;
349 
350   /* Insert an in-core parity log (*data) into the tail of
351      a disk queue (*head, *tail).
352      NON-BLOCKING */
353 
354   raidPtr = data->common->raidPtr;
355   RF_ASSERT(data);
356   if (rf_parityLogDebug)
357     printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n",data->regionID,(int)data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
358   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
359   if (*tail)
360     {
361       /* append to tail of list */
362       data->prev = *tail;
363       data->next = NULL;
364       (*tail)->next = data;
365       *tail = data;
366     }
367   else
368     {
369       /* inserting into an empty list */
370       *head = data;
371       *tail = data;
372       (*head)->prev = NULL;
373       (*tail)->next = NULL;
374     }
375   RF_ASSERT((*head)->prev == NULL);
376   RF_ASSERT((*tail)->next == NULL);
377   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
378 }
379 
380 RF_ParityLogData_t *rf_CreateParityLogData(
381   RF_ParityRecordType_t    operation,
382   RF_PhysDiskAddr_t       *pda,
383   caddr_t                  bufPtr,
384   RF_Raid_t               *raidPtr,
385   int                    (*wakeFunc)(RF_DagNode_t *node, int status),
386   void                    *wakeArg,
387   RF_AccTraceEntry_t      *tracerec,
388   RF_Etimer_t              startTime)
389 {
390   RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
391   RF_CommonLogData_t *common;
392   RF_PhysDiskAddr_t *diskAddress;
393   int boundary, offset = 0;
394 
395   /* Return an initialized struct of info to be logged.
396      Build one item per physical disk address, one item per region.
397 
398      NON-BLOCKING */
399 
400   diskAddress = pda;
401   common = AllocParityLogCommonData(raidPtr);
402   RF_ASSERT(common);
403 
404   common->operation = operation;
405   common->bufPtr = bufPtr;
406   common->raidPtr = raidPtr;
407   common->wakeFunc = wakeFunc;
408   common->wakeArg = wakeArg;
409   common->tracerec = tracerec;
410   common->startTime = startTime;
411   common->cnt = 0;
412 
413   if (rf_parityLogDebug)
414     printf("[entering CreateParityLogData]\n");
415   while (diskAddress)
416     {
417       common->cnt++;
418       data = AllocParityLogData(raidPtr);
419       RF_ASSERT(data);
420       data->common = common;
421       data->next = NULL;
422       data->prev = NULL;
423       data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
424       if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1))
425 	{
426 	  /* disk address does not cross a region boundary */
427 	  data->diskAddress = *diskAddress;
428 	  data->bufOffset = offset;
429 	  offset = offset + diskAddress->numSector;
430 	  EnqueueParityLogData(data, &resultHead, &resultTail);
431 	  /* adjust disk address */
432 	  diskAddress = diskAddress->next;
433 	}
434       else
435 	{
436 	  /* disk address crosses a region boundary */
437 	  /* find address where region is crossed */
438 	  boundary = 0;
439 	  while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
440 	    boundary++;
441 
442 	  /* enter data before the boundary */
443 	  data->diskAddress = *diskAddress;
444 	  data->diskAddress.numSector = boundary;
445 	  data->bufOffset = offset;
446 	  offset += boundary;
447 	  EnqueueParityLogData(data, &resultHead, &resultTail);
448 	  /* adjust disk address */
449 	  diskAddress->startSector += boundary;
450 	  diskAddress->numSector -= boundary;
451 	}
452     }
453   if (rf_parityLogDebug)
454     printf("[leaving CreateParityLogData]\n");
455   return(resultHead);
456 }
457 
458 
459 RF_ParityLogData_t *rf_SearchAndDequeueParityLogData(
460   RF_Raid_t            *raidPtr,
461   int                   regionID,
462   RF_ParityLogData_t  **head,
463   RF_ParityLogData_t  **tail,
464   int                   ignoreLocks)
465 {
466   RF_ParityLogData_t *w;
467 
468   /* Remove and return an in-core parity log from a specified region (regionID).
469      If a matching log is not found, return NULL.
470 
471      NON-BLOCKING.
472      */
473 
474   /* walk backward through a list, looking for an entry with a matching region ID */
475   if (!ignoreLocks)
476     RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
477   w = (*tail);
478   while (w)
479     {
480       if (w->regionID == regionID)
481 	{
482 	  /* remove an element from the list */
483 	  if (w == *tail)
484 	    {
485 	      if (*head == *tail)
486 		{
487 		  /* removing only element in the list */
488 		  *head = NULL;
489 		  *tail = NULL;
490 		}
491 	      else
492 		{
493 		  /* removing last item in the list */
494 		  *tail = (*tail)->prev;
495 		  (*tail)->next = NULL;
496 		  RF_ASSERT((*head)->prev == NULL);
497 		  RF_ASSERT((*tail)->next == NULL);
498 		}
499 	    }
500 	  else
501 	    {
502 	      if (w == *head)
503 		{
504 		  /* removing first item in the list */
505 		  *head = (*head)->next;
506 		  (*head)->prev = NULL;
507 		  RF_ASSERT((*head)->prev == NULL);
508 		  RF_ASSERT((*tail)->next == NULL);
509 		}
510 	      else
511 		{
512 		  /* removing an item from the middle of the list */
513 		  w->prev->next = w->next;
514 		  w->next->prev = w->prev;
515 		  RF_ASSERT((*head)->prev == NULL);
516 		  RF_ASSERT((*tail)->next == NULL);
517 		}
518 	    }
519 	  w->prev = NULL;
520 	  w->next = NULL;
521 	  if (rf_parityLogDebug)
522 	    printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n",w->regionID,(int)w->diskAddress.raidAddress,(int) w->diskAddress.numSector);
523 	  return(w);
524 	}
525       else
526 	w = w->prev;
527     }
528   if (!ignoreLocks)
529     RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
530   return(NULL);
531 }
532 
533 static RF_ParityLogData_t *DequeueMatchingLogData(
534   RF_Raid_t            *raidPtr,
535   RF_ParityLogData_t  **head,
536   RF_ParityLogData_t  **tail)
537 {
538   RF_ParityLogData_t *logDataList, *logData;
539   int regionID;
540 
541   /* Remove and return an in-core parity log from the tail of
542      a disk queue (*head, *tail).  Then remove all matching
543      (identical regionIDs) logData and return as a linked list.
544 
545      NON-BLOCKING
546      */
547 
548   logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
549   if (logDataList)
550     {
551       regionID = logDataList->regionID;
552       logData = logDataList;
553       logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
554       while (logData->next)
555 	{
556 	  logData = logData->next;
557 	  logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
558 	}
559     }
560   return(logDataList);
561 }
562 
563 
564 static RF_ParityLog_t *AcquireParityLog(
565   RF_ParityLogData_t  *logData,
566   int                  finish)
567 {
568   RF_ParityLog_t *log = NULL;
569   RF_Raid_t *raidPtr;
570 
571   /* Grab a log buffer from the pool and return it.
572      If no buffers are available, return NULL.
573      NON-BLOCKING
574      */
575   raidPtr = logData->common->raidPtr;
576   RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
577   if (raidPtr->parityLogPool.parityLogs)
578     {
579       log = raidPtr->parityLogPool.parityLogs;
580       raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
581       log->regionID = logData->regionID;
582       log->numRecords = 0;
583       log->next = NULL;
584       raidPtr->logsInUse++;
585       RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
586     }
587   else
588     {
589       /* no logs available, so place ourselves on the queue of work waiting on log buffers
590 	 this is done while parityLogPool.mutex is held, to ensure synchronization
591 	 with ReleaseParityLogs.
592 	 */
593       if (rf_parityLogDebug)
594 	printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
595       if (finish)
596 	RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
597       else
598 	EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
599     }
600   RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
601   return(log);
602 }
603 
604 void rf_ReleaseParityLogs(
605   RF_Raid_t       *raidPtr,
606   RF_ParityLog_t  *firstLog)
607 {
608   RF_ParityLogData_t *logDataList;
609   RF_ParityLog_t *log, *lastLog;
610   int cnt;
611 
612   /* Insert a linked list of parity logs (firstLog) to
613      the free list (parityLogPool.parityLogPool)
614 
615      NON-BLOCKING.
616      */
617 
618   RF_ASSERT(firstLog);
619 
620   /* Before returning logs to global free list, service all
621      requests which are blocked on logs.  Holding mutexes for parityLogPool and parityLogDiskQueue
622      forces synchronization with AcquireParityLog().
623      */
624   RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
625   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
626   logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
627   log = firstLog;
628   if (firstLog)
629     firstLog = firstLog->next;
630   log->numRecords = 0;
631   log->next = NULL;
632   while (logDataList && log)
633     {
634       RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
635       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
636       rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
637       if (rf_parityLogDebug)
638 	printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
639       if (log == NULL)
640 	{
641 	  log = firstLog;
642 	  if (firstLog)
643 	    {
644 	      firstLog = firstLog->next;
645 	      log->numRecords = 0;
646 	      log->next = NULL;
647 	    }
648 	}
649       RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
650       RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
651       if (log)
652 	logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
653     }
654   /* return remaining logs to pool */
655   if (log)
656     {
657       log->next = firstLog;
658       firstLog = log;
659     }
660   if (firstLog)
661     {
662       lastLog = firstLog;
663       raidPtr->logsInUse--;
664       RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
665       while (lastLog->next)
666 	{
667 	  lastLog = lastLog->next;
668 	  raidPtr->logsInUse--;
669 	  RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
670 	}
671       lastLog->next = raidPtr->parityLogPool.parityLogs;
672       raidPtr->parityLogPool.parityLogs = firstLog;
673       cnt = 0;
674       log = raidPtr->parityLogPool.parityLogs;
675       while (log)
676 	{
677 	  cnt++;
678 	  log = log->next;
679 	}
680       RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
681     }
682   RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
683   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
684 }
685 
686 static void ReintLog(
687   RF_Raid_t       *raidPtr,
688   int              regionID,
689   RF_ParityLog_t  *log)
690 {
691   RF_ASSERT(log);
692 
693   /* Insert an in-core parity log (log) into the disk queue of reintegration
694      work.  Set the flag (reintInProgress) for the specified region (regionID)
695      to indicate that reintegration is in progress for this region.
696      NON-BLOCKING
697      */
698 
699   RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
700   raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;  /* cleared when reint complete */
701 
702   if (rf_parityLogDebug)
703     printf("[requesting reintegration of region %d]\n", log->regionID);
704   /* move record to reintegration queue */
705   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
706   log->next = raidPtr->parityLogDiskQueue.reintQueue;
707   raidPtr->parityLogDiskQueue.reintQueue = log;
708   RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
709   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
710   RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
711 }
712 
713 static void FlushLog(
714   RF_Raid_t       *raidPtr,
715   RF_ParityLog_t  *log)
716 {
717   /* insert a core log (log) into a list of logs (parityLogDiskQueue.flushQueue)
718      waiting to be written to disk.
719      NON-BLOCKING
720      */
721 
722   RF_ASSERT(log);
723   RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
724   RF_ASSERT(log->next == NULL);
725   /* move log to flush queue */
726   RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
727   log->next = raidPtr->parityLogDiskQueue.flushQueue;
728   raidPtr->parityLogDiskQueue.flushQueue = log;
729   RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
730   RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
731 }
732 
733 static int DumpParityLogToDisk(
734   int                  finish,
735   RF_ParityLogData_t  *logData)
736 {
737   int i, diskCount, regionID = logData->regionID;
738   RF_ParityLog_t *log;
739   RF_Raid_t *raidPtr;
740 
741   raidPtr = logData->common->raidPtr;
742 
743   /* Move a core log to disk.  If the log disk is full, initiate
744      reintegration.
745 
746      Return (0) if we can enqueue the dump immediately, otherwise
747      return (1) to indicate we are blocked on reintegration and
748      control of the thread should be relinquished.
749 
750      Caller must hold regionInfo[regionID].mutex
751 
752      NON-BLOCKING
753      */
754 
755   if (rf_parityLogDebug)
756     printf("[dumping parity log to disk, region %d]\n", regionID);
757   log = raidPtr->regionInfo[regionID].coreLog;
758   RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
759   RF_ASSERT(log->next == NULL);
760 
761   /* if reintegration is in progress, must queue work */
762   RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
763   if (raidPtr->regionInfo[regionID].reintInProgress)
764     {
765       /* Can not proceed since this region is currently being reintegrated.
766 	 We can not block, so queue remaining work and return */
767       if (rf_parityLogDebug)
768 	printf("[region %d waiting on reintegration]\n",regionID);
769       /* XXX not sure about the use of finish - shouldn't this always be "Enqueue"? */
770       if (finish)
771 	RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
772       else
773 	EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
774       RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
775       return(1);  /* relenquish control of this thread */
776     }
777   RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
778   raidPtr->regionInfo[regionID].coreLog = NULL;
779   if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
780     /* IMPORTANT!! this loop bound assumes region disk holds an integral number of core logs */
781     {
782       /* update disk map for this region */
783       diskCount = raidPtr->regionInfo[regionID].diskCount;
784       for (i = 0; i < raidPtr->numSectorsPerLog; i++)
785 	{
786 	  raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
787 	  raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
788 	}
789       log->diskOffset = diskCount;
790       raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
791       FlushLog(raidPtr, log);
792     }
793   else
794     {
795       /* no room for log on disk, send it to disk manager and request reintegration */
796       RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
797       ReintLog(raidPtr, regionID, log);
798     }
799   if (rf_parityLogDebug)
800     printf("[finished dumping parity log to disk, region %d]\n", regionID);
801   return(0);
802 }
803 
804 int rf_ParityLogAppend(
805   RF_ParityLogData_t   *logData,
806   int                   finish,
807   RF_ParityLog_t      **incomingLog,
808   int                   clearReintFlag)
809 {
810   int regionID, logItem, itemDone;
811   RF_ParityLogData_t *item;
812   int punt, done = RF_FALSE;
813   RF_ParityLog_t *log;
814   RF_Raid_t *raidPtr;
815   RF_Etimer_t timer;
816   int (*wakeFunc)(RF_DagNode_t *node, int status);
817   void *wakeArg;
818 
819   /* Add parity to the appropriate log, one sector at a time.
820      This routine is called is called by dag functions ParityLogUpdateFunc
821      and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
822 
823      Parity to be logged is contained in a linked-list (logData).  When
824      this routine returns, every sector in the list will be in one of
825      three places:
826        1) entered into the parity log
827        2) queued, waiting on reintegration
828        3) queued, waiting on a core log
829 
830      Blocked work is passed to the ParityLoggingDiskManager for completion.
831      Later, as conditions which required the block are removed, the work
832      reenters this routine with the "finish" parameter set to "RF_TRUE."
833 
834      NON-BLOCKING
835      */
836 
837   raidPtr = logData->common->raidPtr;
838   /* lock the region for the first item in logData */
839   RF_ASSERT(logData != NULL);
840   regionID = logData->regionID;
841   RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
842   RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
843 
844   if (clearReintFlag)
845     {
846       /* Enable flushing for this region.  Holding both locks provides
847 	 a synchronization barrier with DumpParityLogToDisk
848 	 */
849       RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
850       RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
851       RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
852       raidPtr->regionInfo[regionID].diskCount = 0;
853       raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
854       RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now enabled */
855       RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
856     }
857 
858   /* process each item in logData */
859   while (logData)
860     {
861       /* remove an item from logData */
862       item = logData;
863       logData = logData->next;
864       item->next = NULL;
865       item->prev = NULL;
866 
867       if (rf_parityLogDebug)
868 	printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n",item->regionID,(int)item->diskAddress.raidAddress, (int)item->diskAddress.numSector);
869 
870       /* see if we moved to a new region */
871       if (regionID != item->regionID)
872 	{
873 	  RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
874 	  regionID = item->regionID;
875 	  RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
876 	  RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
877 	}
878 
879       punt = RF_FALSE;  /* Set to RF_TRUE if work is blocked.  This can happen in one of two ways:
880 		          1) no core log (AcquireParityLog)
881 			  2) waiting on reintegration (DumpParityLogToDisk)
882 			If punt is RF_TRUE, the dataItem was queued, so skip to next item.
883 			*/
884 
885       /* process item, one sector at a time, until all sectors processed or we punt */
886       if (item->diskAddress.numSector > 0)
887 	done = RF_FALSE;
888       else
889 	RF_ASSERT(0);
890       while (!punt && !done)
891 	{
892 	  /* verify that a core log exists for this region */
893 	  if (!raidPtr->regionInfo[regionID].coreLog)
894 	    {
895 	      /* Attempt to acquire a parity log.
896 		 If acquisition fails, queue remaining work in data item and move to nextItem.
897 		 */
898 	      if (incomingLog)
899 		if (*incomingLog)
900 		  {
901 		    RF_ASSERT((*incomingLog)->next == NULL);
902 		    raidPtr->regionInfo[regionID].coreLog = *incomingLog;
903 		    raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
904 		    *incomingLog = NULL;
905 		  }
906 		else
907 		  raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
908 	      else
909 		raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
910 	      /* Note: AcquireParityLog either returns a log or enqueues currentItem */
911 	    }
912 	  if (!raidPtr->regionInfo[regionID].coreLog)
913 	    punt = RF_TRUE; /* failed to find a core log */
914 	  else
915 	    {
916 	      RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
917 	      /* verify that the log has room for new entries */
918 	      /* if log is full, dump it to disk and grab a new log */
919 	      if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog)
920 		{
921 		  /* log is full, dump it to disk */
922 		  if (DumpParityLogToDisk(finish, item))
923 		    punt = RF_TRUE; /* dump unsuccessful, blocked on reintegration */
924 		  else
925 		    {
926 		      /* dump was successful */
927 		      if (incomingLog)
928 			if (*incomingLog)
929 			  {
930 			    RF_ASSERT((*incomingLog)->next == NULL);
931 			    raidPtr->regionInfo[regionID].coreLog = *incomingLog;
932 			    raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
933 			    *incomingLog = NULL;
934 			  }
935 			else
936 			  raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
937 		      else
938 			raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
939 		      /* if a core log is not available, must queue work and return */
940 		      if (!raidPtr->regionInfo[regionID].coreLog)
941 			punt = RF_TRUE; /* blocked on log availability */
942 		    }
943 		}
944 	    }
945 	  /* if we didn't punt on this item, attempt to add a sector to the core log */
946 	  if (!punt)
947 	    {
948 	      RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
949 	      /* at this point, we have a core log with enough room for a sector */
950 	      /* copy a sector into the log */
951 	      log = raidPtr->regionInfo[regionID].coreLog;
952 	      RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
953 	      logItem = log->numRecords++;
954 	      log->records[logItem].parityAddr = item->diskAddress;
955 	      RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
956 	      RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
957 	      log->records[logItem].parityAddr.numSector = 1;
958 	      log->records[logItem].operation = item->common->operation;
959 	      bcopy((item->common->bufPtr + (item->bufOffset++ * (1<<item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1<<item->common->raidPtr->logBytesPerSector)), (1<<item->common->raidPtr->logBytesPerSector));
960 	      item->diskAddress.numSector--;
961 	      item->diskAddress.startSector++;
962 	      if (item->diskAddress.numSector == 0)
963 		done = RF_TRUE;
964 	    }
965 	}
966 
967       if (!punt)
968 	{
969 	  /* Processed this item completely, decrement count of items
970 	     to be processed.
971 	     */
972 	  RF_ASSERT(item->diskAddress.numSector == 0);
973 	  RF_LOCK_MUTEX(item->common->mutex);
974 	  item->common->cnt--;
975 	  if (item->common->cnt == 0)
976 	    itemDone = RF_TRUE;
977 	  else
978 	    itemDone = RF_FALSE;
979 	  RF_UNLOCK_MUTEX(item->common->mutex);
980 	  if (itemDone)
981 	    {
982 	      /* Finished processing all log data for this IO
983 		 Return structs to free list and invoke wakeup function.
984 		 */
985 	      timer = item->common->startTime;  /* grab initial value of timer */
986 	      RF_ETIMER_STOP(timer);
987 	      RF_ETIMER_EVAL(timer);
988 	      item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
989 	      if (rf_parityLogDebug)
990 		printf("[waking process for region %d]\n", item->regionID);
991 	      wakeFunc = item->common->wakeFunc;
992 	      wakeArg = item->common->wakeArg;
993 	      FreeParityLogCommonData(item->common);
994 	      FreeParityLogData(item);
995 	      (wakeFunc)(wakeArg, 0);
996 	    }
997 	  else
998 	    FreeParityLogData(item);
999 	}
1000     }
1001   RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1002   if (rf_parityLogDebug)
1003     printf("[exiting ParityLogAppend]\n");
1004   return(0);
1005 }
1006 
1007 
1008 void rf_EnableParityLogging(RF_Raid_t *raidPtr)
1009 {
1010   int regionID;
1011 
1012   for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
1013     RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1014     raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
1015     RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1016   }
1017   if (rf_parityLogDebug)
1018     printf("[parity logging enabled]\n");
1019 }
1020 
1021 #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
1022