xref: /netbsd-src/sys/dev/raidframe/rf_pq.c (revision dc306354b0b29af51801a7632f1e95265a68cd81)
1 /*	$NetBSD: rf_pq.c,v 1.1 1998/11/13 04:20:32 oster Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Daniel Stodolsky
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*
30  * Code for RAID level 6 (P + Q) disk array architecture.
31  *
32  * :
33  * Log: rf_pq.c,v
34  * Revision 1.33  1996/11/05 21:10:40  jimz
35  * failed pda generalization
36  *
37  * Revision 1.32  1996/07/31  16:29:50  jimz
38  * "fix" math on 32-bit machines using RF_LONGSHIFT
39  * (may be incorrect)
40  *
41  * Revision 1.31  1996/07/31  15:35:01  jimz
42  * evenodd changes; bugfixes for double-degraded archs, generalize
43  * some formerly PQ-only functions
44  *
45  * Revision 1.30  1996/07/27  23:36:08  jimz
46  * Solaris port of simulator
47  *
48  * Revision 1.29  1996/07/22  19:52:16  jimz
49  * switched node params to RF_DagParam_t, a union of
50  * a 64-bit int and a void *, for better portability
51  * attempted hpux port, but failed partway through for
52  * lack of a single C compiler capable of compiling all
53  * source files
54  *
55  * Revision 1.28  1996/06/09  02:36:46  jimz
56  * lots of little crufty cleanup- fixup whitespace
57  * issues, comment #ifdefs, improve typing in some
58  * places (esp size-related)
59  *
60  * Revision 1.27  1996/06/07  21:33:04  jimz
61  * begin using consistent types for sector numbers,
62  * stripe numbers, row+col numbers, recon unit numbers
63  *
64  * Revision 1.26  1996/06/02  17:31:48  jimz
65  * Moved a lot of global stuff into array structure, where it belongs.
66  * Fixed up paritylogging, pss modules in this manner. Some general
67  * code cleanup. Removed lots of dead code, some dead files.
68  *
69  * Revision 1.25  1996/05/31  22:26:54  jimz
70  * fix a lot of mapping problems, memory allocation problems
71  * found some weird lock issues, fixed 'em
72  * more code cleanup
73  *
74  * Revision 1.24  1996/05/30  23:22:16  jimz
75  * bugfixes of serialization, timing problems
76  * more cleanup
77  *
78  * Revision 1.23  1996/05/30  12:59:18  jimz
79  * make etimer happier, more portable
80  *
81  * Revision 1.22  1996/05/27  18:56:37  jimz
82  * more code cleanup
83  * better typing
84  * compiles in all 3 environments
85  *
86  * Revision 1.21  1996/05/24  22:17:04  jimz
87  * continue code + namespace cleanup
88  * typed a bunch of flags
89  *
90  * Revision 1.20  1996/05/24  04:28:55  jimz
91  * release cleanup ckpt
92  *
93  * Revision 1.19  1996/05/23  21:46:35  jimz
94  * checkpoint in code cleanup (release prep)
95  * lots of types, function names have been fixed
96  *
97  * Revision 1.18  1996/05/23  00:33:23  jimz
98  * code cleanup: move all debug decls to rf_options.c, all extern
99  * debug decls to rf_options.h, all debug vars preceded by rf_
100  *
101  * Revision 1.17  1996/05/18  19:51:34  jimz
102  * major code cleanup- fix syntax, make some types consistent,
103  * add prototypes, clean out dead code, et cetera
104  *
105  * Revision 1.16  1996/05/17  14:52:04  wvcii
106  * added prototyping to QDelta()
107  *   - changed buf params from volatile unsigned long * to char *
108  * changed QDelta for kernel
109  *   - just bzero the buf since kernel doesn't include pq decode table
110  *
111  * Revision 1.15  1996/05/03  19:40:20  wvcii
112  * added includes for dag library
113  *
114  * Revision 1.14  1995/12/12  18:10:06  jimz
115  * MIN -> RF_MIN, MAX -> RF_MAX, ASSERT -> RF_ASSERT
116  * fix 80-column brain damage in comments
117  *
118  * Revision 1.13  1995/11/30  16:19:55  wvcii
119  * added copyright info
120  *
121  * Revision 1.12  1995/11/07  16:13:47  wvcii
122  * changed PQDagSelect prototype
123  * function no longer returns numHdrSucc, numTermAnt
124  * note:  this file contains node functions which should be
125  * moved to rf_dagfuncs.c so that all node funcs are bundled together
126  *
127  * Revision 1.11  1995/10/04  03:50:33  wvcii
128  * removed panics, minor code cleanup in dag selection
129  *
130  *
131  */
132 
133 #include "rf_archs.h"
134 #include "rf_types.h"
135 #include "rf_raid.h"
136 #include "rf_dag.h"
137 #include "rf_dagffrd.h"
138 #include "rf_dagffwr.h"
139 #include "rf_dagdegrd.h"
140 #include "rf_dagdegwr.h"
141 #include "rf_dagutils.h"
142 #include "rf_dagfuncs.h"
143 #include "rf_threadid.h"
144 #include "rf_etimer.h"
145 #include "rf_pqdeg.h"
146 #include "rf_general.h"
147 #include "rf_map.h"
148 #include "rf_pq.h"
149 #include "rf_sys.h"
150 
151 RF_RedFuncs_t rf_pFuncs = { rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P" };
152 RF_RedFuncs_t rf_pRecoveryFuncs = { rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func" };
153 
154 int rf_RegularONPFunc(node)
155   RF_DagNode_t  *node;
156 {
157   return(rf_RegularXorFunc(node));
158 }
159 
160 /*
161    same as simpleONQ func, but the coefficient is always 1
162 */
163 
164 int rf_SimpleONPFunc(node)
165   RF_DagNode_t  *node;
166 {
167   return(rf_SimpleXorFunc(node));
168 }
169 
170 int rf_RecoveryPFunc(node)
171 RF_DagNode_t *node;
172 {
173   return(rf_RecoveryXorFunc(node));
174 }
175 
176 int rf_RegularPFunc(node)
177   RF_DagNode_t  *node;
178 {
179   return(rf_RegularXorFunc(node));
180 }
181 
182 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
183 
184 static void QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
185 	unsigned char coeff);
186 static void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
187 	unsigned length, unsigned coeff);
188 
189 RF_RedFuncs_t rf_qFuncs = { rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q" };
190 RF_RedFuncs_t rf_qRecoveryFuncs = { rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func" };
191 RF_RedFuncs_t rf_pqRecoveryFuncs = { rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func" };
192 
193 void rf_PQDagSelect(
194   RF_Raid_t             *raidPtr,
195   RF_IoType_t            type,
196   RF_AccessStripeMap_t  *asmap,
197   RF_VoidFuncPtr        *createFunc)
198 {
199   RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
200   unsigned ndfail = asmap->numDataFailed;
201   unsigned npfail = asmap->numParityFailed;
202   unsigned ntfail = npfail + ndfail;
203 
204   RF_ASSERT(RF_IO_IS_R_OR_W(type));
205   if (ntfail > 2)
206     {
207       RF_ERRORMSG("more than two disks failed in a single group!  Aborting I/O operation.\n");
208       /* *infoFunc = */ *createFunc = NULL;
209       return;
210     }
211 
212   /* ok, we can do this I/O */
213   if (type == RF_IO_TYPE_READ)
214     {
215       switch (ndfail)
216 	{
217 	case 0:
218 	  /* fault free read */
219 	  *createFunc = rf_CreateFaultFreeReadDAG;   /* same as raid 5 */
220 	  break;
221 	case 1:
222 	  /* lost a single data unit */
223 	  /* two cases:
224 	        (1) parity is not lost.
225 		    do a normal raid 5 reconstruct read.
226 		(2) parity is lost.
227 		    do a reconstruct read using "q".
228           */
229 	  if (ntfail == 2) /* also lost redundancy */
230 	    {
231 	      if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
232 		*createFunc = rf_PQ_110_CreateReadDAG;
233 	      else
234 		*createFunc = rf_PQ_101_CreateReadDAG;
235 	    }
236 	  else
237 	    {
238 	      /* P and Q are ok. But is there a failure
239 		 in some unaccessed data unit?
240               */
241 	      if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2)
242 		*createFunc = rf_PQ_200_CreateReadDAG;
243 	      else
244 		  *createFunc = rf_PQ_100_CreateReadDAG;
245 	    }
246 	  break;
247 	case 2:
248 	  /* lost two data units */
249 	  /* *infoFunc = PQOneTwo; */
250 	  *createFunc = rf_PQ_200_CreateReadDAG;
251 	  break;
252 	}
253       return;
254     }
255 
256   /* a write */
257   switch (ntfail)
258     {
259     case 0: /* fault free */
260       if (rf_suppressLocksAndLargeWrites ||
261 	  (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
262 	   (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
263 
264 	*createFunc = rf_PQCreateSmallWriteDAG;
265       }
266       else {
267 	*createFunc = rf_PQCreateLargeWriteDAG;
268       }
269       break;
270 
271     case 1: /* single disk fault */
272       if (npfail==1)
273 	{
274 	  RF_ASSERT ((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) ||  (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
275 	  if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q)
276 	    { /* q died, treat like normal mode raid5 write.*/
277 	      if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
278 		  || rf_NumFailedDataUnitsInStripe(raidPtr,asmap))
279 		*createFunc = rf_PQ_001_CreateSmallWriteDAG;
280 	      else
281 		*createFunc = rf_PQ_001_CreateLargeWriteDAG;
282 	    }
283 	  else
284 	    { /* parity died, small write only updating Q */
285 	      if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
286 		  || rf_NumFailedDataUnitsInStripe(raidPtr,asmap))
287 		*createFunc = rf_PQ_010_CreateSmallWriteDAG;
288 	      else
289 		*createFunc = rf_PQ_010_CreateLargeWriteDAG;
290 	    }
291 	}
292       else
293 	{ /* data missing.
294 	     Do a P reconstruct write if only a single data unit
295 	     is lost in the stripe, otherwise a PQ reconstruct
296 	     write. */
297 	  if (rf_NumFailedDataUnitsInStripe(raidPtr,asmap)==2)
298 	    *createFunc = rf_PQ_200_CreateWriteDAG;
299 	  else
300 	    *createFunc = rf_PQ_100_CreateWriteDAG;
301 	}
302       break;
303 
304     case 2: /* two disk faults */
305       switch (npfail)
306 	{
307 	case 2: /* both p and q dead */
308 	  *createFunc = rf_PQ_011_CreateWriteDAG;
309 	  break;
310 	case 1: /* either p or q and dead data */
311 	  RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
312 	  RF_ASSERT ((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) ||  (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
313 	  if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
314 	    *createFunc = rf_PQ_101_CreateWriteDAG;
315 	  else
316 	    *createFunc = rf_PQ_110_CreateWriteDAG;
317 	  break;
318 	case 0: /* double data loss */
319 	  *createFunc = rf_PQ_200_CreateWriteDAG;
320 	  break;
321 	}
322       break;
323 
324     default:  /* more than 2 disk faults */
325       *createFunc = NULL;
326       RF_PANIC();
327     }
328   return;
329 }
330 
331 /*
332    Used as a stop gap info function
333 */
334 static void PQOne(raidPtr, nSucc, nAnte, asmap)
335   RF_Raid_t             *raidPtr;
336   int                   *nSucc;
337   int                   *nAnte;
338   RF_AccessStripeMap_t  *asmap;
339 {
340   *nSucc = *nAnte = 1;
341 }
342 
343 static void PQOneTwo(raidPtr, nSucc, nAnte, asmap)
344   RF_Raid_t             *raidPtr;
345   int                   *nSucc;
346   int                   *nAnte;
347   RF_AccessStripeMap_t  *asmap;
348 {
349   *nSucc = 1;
350   *nAnte = 2;
351 }
352 
353 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
354 {
355   rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
356     rf_RegularPQFunc, RF_FALSE);
357 }
358 
359 int rf_RegularONQFunc(node)
360   RF_DagNode_t  *node;
361 {
362   int np = node->numParams;
363   int d;
364   RF_Raid_t *raidPtr = (RF_Raid_t *)node->params[np-1].p;
365   int i;
366   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
367   RF_Etimer_t timer;
368   char *qbuf, *qpbuf;
369   char *obuf, *nbuf;
370   RF_PhysDiskAddr_t *old, *new;
371   unsigned long coeff;
372   unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
373 
374   RF_ETIMER_START(timer);
375 
376   d = (np-3)/4;
377   RF_ASSERT (4*d+3 == np);
378   qbuf = (char *) node->params[2*d+1].p; /* q buffer*/
379   for (i=0; i < d; i++)
380     {
381       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
382       obuf = (char *) node->params[2*i+1].p;
383       new  = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p;
384       nbuf = (char *) node->params[2*(d+1+i)+1].p;
385       RF_ASSERT (new->numSector == old->numSector);
386       RF_ASSERT (new->raidAddress == old->raidAddress);
387       /* the stripe unit within the stripe tells us the coefficient to use
388 	 for the multiply. */
389       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress);
390       /* compute the data unit offset within the column, then add one */
391       coeff = (coeff % raidPtr->Layout.numDataCol);
392       qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU);
393       QDelta(qpbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
394     }
395 
396   RF_ETIMER_STOP(timer);
397   RF_ETIMER_EVAL(timer);
398   tracerec->q_us += RF_ETIMER_VAL_US(timer);
399   rf_GenericWakeupFunc(node, 0);     /* call wake func explicitly since no I/O in this node */
400   return(0);
401 }
402 
403 /*
404    See the SimpleXORFunc for the difference between a simple and regular func.
405    These Q functions should be used for
406 
407          new q = Q(data,old data,old q)
408 
409    style updates and not for
410 
411          q = ( new data, new data, .... )
412 
413    computations.
414 
415    The simple q takes 2(2d+1)+1 params, where d is the number
416    of stripes written. The order of params is
417    old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
418    [2d] old q pda_0, old q buffer
419    [2d_2] new data pda_0, new data buffer_0, ...                                    new data pda_d, new data buffer_d
420    raidPtr
421 */
422 
423 int rf_SimpleONQFunc(node)
424   RF_DagNode_t  *node;
425 {
426   int np = node->numParams;
427   int d;
428   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p;
429   int i;
430   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
431   RF_Etimer_t timer;
432   char *qbuf;
433   char *obuf, *nbuf;
434   RF_PhysDiskAddr_t *old, *new;
435   unsigned long coeff;
436 
437   RF_ETIMER_START(timer);
438 
439   d = (np-3)/4;
440   RF_ASSERT (4*d+3 == np);
441   qbuf = (char *) node->params[2*d+1].p; /* q buffer*/
442   for (i=0; i < d; i++)
443     {
444       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
445       obuf = (char *) node->params[2*i+1].p;
446       new  = (RF_PhysDiskAddr_t *) node->params[2*(d+1+i)].p;
447       nbuf = (char *) node->params[2*(d+1+i)+1].p;
448       RF_ASSERT (new->numSector == old->numSector);
449       RF_ASSERT (new->raidAddress == old->raidAddress);
450       /* the stripe unit within the stripe tells us the coefficient to use
451 	 for the multiply. */
452       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),new->raidAddress);
453       /* compute the data unit offset within the column, then add one */
454       coeff = (coeff % raidPtr->Layout.numDataCol);
455       QDelta(qbuf,obuf,nbuf, rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
456     }
457 
458   RF_ETIMER_STOP(timer);
459   RF_ETIMER_EVAL(timer);
460   tracerec->q_us += RF_ETIMER_VAL_US(timer);
461   rf_GenericWakeupFunc(node, 0);     /* call wake func explicitly since no I/O in this node */
462   return(0);
463 }
464 
465 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
466 {
467   rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
468 }
469 
470 static void RegularQSubr(node,qbuf)
471   RF_DagNode_t  *node;
472   char          *qbuf;
473 {
474   int np = node->numParams;
475   int d;
476   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p;
477   unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
478   int i;
479   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
480   RF_Etimer_t timer;
481   char *obuf, *qpbuf;
482   RF_PhysDiskAddr_t *old;
483   unsigned long coeff;
484 
485   RF_ETIMER_START(timer);
486 
487   d = (np-1)/2;
488   RF_ASSERT (2*d+1 == np);
489   for (i=0; i < d; i++)
490     {
491       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
492       obuf = (char *) node->params[2*i+1].p;
493       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress);
494       /* compute the data unit offset within the column, then add one */
495       coeff = (coeff % raidPtr->Layout.numDataCol);
496       /* the input buffers may not all be aligned with the start of the
497 	 stripe. so shift by their sector offset within the stripe unit */
498       qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,old->startSector % secPerSU);
499       rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
500     }
501 
502   RF_ETIMER_STOP(timer);
503   RF_ETIMER_EVAL(timer);
504   tracerec->q_us += RF_ETIMER_VAL_US(timer);
505 }
506 
507 /*
508    used in degraded writes.
509 */
510 
511 static void DegrQSubr(node)
512   RF_DagNode_t  *node;
513 {
514   int np = node->numParams;
515   int d;
516   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p;
517   unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
518   int i;
519   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
520   RF_Etimer_t timer;
521   char *qbuf = node->results[1];
522   char *obuf, *qpbuf;
523   RF_PhysDiskAddr_t *old;
524   unsigned long coeff;
525   unsigned fail_start;
526   int j;
527 
528   old = (RF_PhysDiskAddr_t *)node->params[np-2].p;
529   fail_start = old->startSector % secPerSU;
530 
531   RF_ETIMER_START(timer);
532 
533   d = (np-2)/2;
534   RF_ASSERT (2*d+2 == np);
535   for (i=0; i < d; i++)
536     {
537       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
538       obuf = (char *) node->params[2*i+1].p;
539       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress);
540       /* compute the data unit offset within the column, then add one */
541       coeff = (coeff % raidPtr->Layout.numDataCol);
542       /* the input buffers may not all be aligned with the start of the
543 	 stripe. so shift by their sector offset within the stripe unit */
544       j = old->startSector % secPerSU;
545       RF_ASSERT(j >= fail_start);
546       qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,j - fail_start);
547       rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
548     }
549 
550   RF_ETIMER_STOP(timer);
551   RF_ETIMER_EVAL(timer);
552   tracerec->q_us += RF_ETIMER_VAL_US(timer);
553 }
554 
555 /*
556    Called by large write code to compute the new parity and the new q.
557 
558    structure of the params:
559 
560    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
561    raidPtr
562 
563    for a total of 2d+1 arguments.
564    The result buffers results[0], results[1] are the buffers for the p and q,
565    respectively.
566 
567    We compute Q first, then compute P. The P calculation may try to reuse
568    one of the input buffers for its output, so if we computed P first, we would
569    corrupt the input for the q calculation.
570 */
571 
572 int rf_RegularPQFunc(node)
573   RF_DagNode_t  *node;
574 {
575   RegularQSubr(node,node->results[1]);
576   return(rf_RegularXorFunc(node)); /* does the wakeup */
577 }
578 
579 int rf_RegularQFunc(node)
580   RF_DagNode_t  *node;
581 {
582   /* Almost ... adjust Qsubr args */
583   RegularQSubr(node, node->results[0]);
584   rf_GenericWakeupFunc(node, 0);     /* call wake func explicitly since no I/O in this node */
585   return(0);
586 }
587 
588 /*
589    Called by singly degraded write code to compute the new parity and the new q.
590 
591    structure of the params:
592 
593    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
594    failedPDA raidPtr
595 
596    for a total of 2d+2 arguments.
597    The result buffers results[0], results[1] are the buffers for the parity and q,
598    respectively.
599 
600    We compute Q first, then compute parity. The parity calculation may try to reuse
601    one of the input buffers for its output, so if we computed parity first, we would
602    corrupt the input for the q calculation.
603 
604    We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
605 */
606 
607 void rf_Degraded_100_PQFunc(node)
608   RF_DagNode_t  *node;
609 {
610   int np = node->numParams;
611 
612   RF_ASSERT (np >= 2);
613   DegrQSubr(node);
614   rf_RecoveryXorFunc(node);
615 }
616 
617 
618 /*
619    The two below are used when reading a stripe with a single lost data unit.
620    The parameters are
621 
622    pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
623 
624    and results[0] contains the data buffer. Which is originally zero-filled.
625 
626 */
627 
628 /* this Q func is used by the degraded-mode dag functions to recover lost data.
629  * the second-to-last parameter is the PDA for the failed portion of the access.
630  * the code here looks at this PDA and assumes that the xor target buffer is
631  * equal in size to the number of sectors in the failed PDA.  It then uses
632  * the other PDAs in the parameter list to determine where within the target
633  * buffer the corresponding data should be xored.
634  *
635  * Recall the basic equation is
636  *
637  *     Q = ( data_1 + 2 * data_2 ... + k * data_k  ) mod 256
638  *
639  * so to recover data_j we need
640  *
641  *    J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
642  *
643  * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
644  * copying Q into it. Then we need to do a table lookup to convert to solve
645  *   data_j /= J
646  *
647  *
648  */
649 int rf_RecoveryQFunc(node)
650   RF_DagNode_t  *node;
651 {
652   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams-1].p;
653   RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &raidPtr->Layout;
654   RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams-2].p;
655   int i;
656   RF_PhysDiskAddr_t *pda;
657   RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr,failedPDA->startSector);
658   char *srcbuf, *destbuf;
659   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
660   RF_Etimer_t timer;
661   unsigned long coeff;
662 
663   RF_ETIMER_START(timer);
664   /* start by copying Q into the buffer */
665   bcopy(node->params[node->numParams-3].p,node->results[0],
666     rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
667   for (i=0; i<node->numParams-4; i+=2)
668     {
669       RF_ASSERT (node->params[i+1].p != node->results[0]);
670       pda = (RF_PhysDiskAddr_t *) node->params[i].p;
671       srcbuf = (char *) node->params[i+1].p;
672       suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
673       destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr,suoffset-failedSUOffset);
674       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),pda->raidAddress);
675       /* compute the data unit offset within the column */
676       coeff = (coeff % raidPtr->Layout.numDataCol);
677       rf_IncQ((unsigned long *)destbuf, (unsigned long *)srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
678   }
679   /* Do the nasty inversion now */
680   coeff =  (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),failedPDA->startSector) % raidPtr->Layout.numDataCol);
681   rf_InvertQ(node->results[0],node->results[0],rf_RaidAddressToByte(raidPtr,pda->numSector),coeff);
682   RF_ETIMER_STOP(timer);
683   RF_ETIMER_EVAL(timer);
684   tracerec->q_us += RF_ETIMER_VAL_US(timer);
685   rf_GenericWakeupFunc(node, 0);
686   return(0);
687 }
688 
689 int rf_RecoveryPQFunc(node)
690   RF_DagNode_t  *node;
691 {
692   RF_PANIC();
693   return(1);
694 }
695 
696 /*
697    Degraded write Q subroutine.
698    Used when P is dead.
699    Large-write style Q computation.
700    Parameters
701 
702    (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
703 
704    We ignore failedPDA.
705 
706    This is a "simple style" recovery func.
707 */
708 
709 void rf_PQ_DegradedWriteQFunc(node)
710   RF_DagNode_t  *node;
711 {
712   int np = node->numParams;
713   int d;
714   RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np-1].p;
715   unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
716   int i;
717   RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
718   RF_Etimer_t timer;
719   char *qbuf = node->results[0];
720   char *obuf, *qpbuf;
721   RF_PhysDiskAddr_t *old;
722   unsigned long coeff;
723   int fail_start,j;
724 
725   old = (RF_PhysDiskAddr_t *) node->params[np-2].p;
726   fail_start = old->startSector % secPerSU;
727 
728   RF_ETIMER_START(timer);
729 
730   d = (np-2)/2;
731   RF_ASSERT (2*d+2 == np);
732 
733   for (i=0; i < d; i++)
734     {
735       old  = (RF_PhysDiskAddr_t *) node->params[2*i].p;
736       obuf = (char *) node->params[2*i+1].p;
737       coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),old->raidAddress);
738       /* compute the data unit offset within the column, then add one */
739       coeff = (coeff % raidPtr->Layout.numDataCol);
740       j = old->startSector % secPerSU;
741       RF_ASSERT(j >= fail_start);
742       qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,j - fail_start);
743       rf_IncQ((unsigned long *)qpbuf,(unsigned long *)obuf,rf_RaidAddressToByte(raidPtr, old->numSector),coeff);
744     }
745 
746   RF_ETIMER_STOP(timer);
747   RF_ETIMER_EVAL(timer);
748   tracerec->q_us += RF_ETIMER_VAL_US(timer);
749   rf_GenericWakeupFunc(node, 0);
750 }
751 
752 
753 
754 
755 /* Q computations */
756 
757 /*
758    coeff - colummn;
759 
760    compute  dest ^= qfor[28-coeff][rn[coeff+1] a]
761 
762    on 5-bit basis;
763    length in bytes;
764 */
765 
766 void rf_IncQ(dest,buf,length,coeff)
767   unsigned long   *dest;
768   unsigned long   *buf;
769   unsigned         length;
770   unsigned         coeff;
771 {
772   unsigned long a, d, new;
773   unsigned long a1, a2;
774   unsigned int *q = &(rf_qfor[28-coeff][0]);
775   unsigned r = rf_rn[coeff+1];
776 
777 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
778 #define INSERT(a,i) (a << (5L*i))
779 
780   length /= 8;
781   /* 13 5 bit quants in a 64 bit word */
782   while (length)
783     {
784       a = *buf++;
785       d = *dest;
786       a1 = EXTRACT(a,0) ^ r;
787       a2 = EXTRACT(a,1) ^ r;
788       new = INSERT(a2,1) | a1 ;
789       a1 = EXTRACT(a,2) ^ r;
790       a2 = EXTRACT(a,3) ^ r;
791       a1 = q[a1];
792       a2 = q[a2];
793       new = new | INSERT(a1,2) | INSERT (a2,3);
794       a1 = EXTRACT(a,4) ^ r;
795       a2 = EXTRACT(a,5) ^ r;
796       a1 = q[a1];
797       a2 = q[a2];
798       new = new | INSERT(a1,4) | INSERT (a2,5);
799       a1 = EXTRACT(a,5) ^ r;
800       a2 = EXTRACT(a,6) ^ r;
801       a1 = q[a1];
802       a2 = q[a2];
803       new = new | INSERT(a1,5) | INSERT (a2,6);
804 #if RF_LONGSHIFT > 2
805       a1 = EXTRACT(a,7) ^ r;
806       a2 = EXTRACT(a,8) ^ r;
807       a1 = q[a1];
808       a2 = q[a2];
809       new = new | INSERT(a1,7) | INSERT (a2,8);
810       a1 = EXTRACT(a,9) ^ r;
811       a2 = EXTRACT(a,10) ^ r;
812       a1 = q[a1];
813       a2 = q[a2];
814       new = new | INSERT(a1,9) | INSERT (a2,10);
815       a1 = EXTRACT(a,11) ^ r;
816       a2 = EXTRACT(a,12) ^ r;
817       a1 = q[a1];
818       a2 = q[a2];
819       new = new | INSERT(a1,11) | INSERT (a2,12);
820 #endif /* RF_LONGSHIFT > 2 */
821       d ^= new;
822       *dest++ = d;
823       length--;
824     }
825 }
826 
827 /*
828    compute
829 
830    dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
831 
832    on a five bit basis.
833    optimization: compute old ^ new on 64 bit basis.
834 
835    length in bytes.
836 */
837 
838 static void QDelta(
839   char           *dest,
840   char           *obuf,
841   char           *nbuf,
842   unsigned        length,
843   unsigned char   coeff)
844 {
845   unsigned long a, d, new;
846   unsigned long a1, a2;
847   unsigned int *q = &(rf_qfor[28-coeff][0]);
848   unsigned r = rf_rn[coeff+1];
849 
850 #ifdef KERNEL
851   /* PQ in kernel currently not supported because the encoding/decoding table is not present */
852   bzero(dest, length);
853 #else  /* KERNEL */
854   /* this code probably doesn't work and should be rewritten  -wvcii */
855   /* 13 5 bit quants in a 64 bit word */
856   length /= 8;
857   while (length)
858     {
859       a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
860       a ^= *nbuf++;
861       d = *dest;
862       a1 = EXTRACT(a,0) ^ r;
863       a2 = EXTRACT(a,1) ^ r;
864       a1 = q[a1];
865       a2 = q[a2];
866       new = INSERT(a2,1) | a1 ;
867       a1 = EXTRACT(a,2) ^ r;
868       a2 = EXTRACT(a,3) ^ r;
869       a1 = q[a1];
870       a2 = q[a2];
871       new = new | INSERT(a1,2) | INSERT (a2,3);
872       a1 = EXTRACT(a,4) ^ r;
873       a2 = EXTRACT(a,5) ^ r;
874       a1 = q[a1];
875       a2 = q[a2];
876       new = new | INSERT(a1,4) | INSERT (a2,5);
877       a1 = EXTRACT(a,5) ^ r;
878       a2 = EXTRACT(a,6) ^ r;
879       a1 = q[a1];
880       a2 = q[a2];
881       new = new | INSERT(a1,5) | INSERT (a2,6);
882 #if RF_LONGSHIFT > 2
883       a1 = EXTRACT(a,7) ^ r;
884       a2 = EXTRACT(a,8) ^ r;
885       a1 = q[a1];
886       a2 = q[a2];
887       new = new | INSERT(a1,7) | INSERT (a2,8);
888       a1 = EXTRACT(a,9) ^ r;
889       a2 = EXTRACT(a,10) ^ r;
890       a1 = q[a1];
891       a2 = q[a2];
892       new = new | INSERT(a1,9) | INSERT (a2,10);
893       a1 = EXTRACT(a,11) ^ r;
894       a2 = EXTRACT(a,12) ^ r;
895       a1 = q[a1];
896       a2 = q[a2];
897       new = new | INSERT(a1,11) | INSERT (a2,12);
898 #endif /* RF_LONGSHIFT > 2 */
899       d ^= new;
900       *dest++ = d;
901       length--;
902     }
903 #endif  /* KERNEL */
904 }
905 
906 /*
907    recover columns a and b from the given p and q into
908    bufs abuf and bbuf. All bufs are word aligned.
909    Length is in bytes.
910 */
911 
912 
913 /*
914  * XXX
915  *
916  * Everything about this seems wrong.
917  */
918 void rf_PQ_recover(pbuf,qbuf,abuf,bbuf,length,coeff_a,coeff_b)
919   unsigned long  *pbuf;
920   unsigned long  *qbuf;
921   unsigned long  *abuf;
922   unsigned long  *bbuf;
923   unsigned        length;
924   unsigned        coeff_a;
925   unsigned        coeff_b;
926 {
927   unsigned long p, q, a, a0, a1;
928   int col = (29 * coeff_a) + coeff_b;
929   unsigned char *q0 = & (rf_qinv[col][0]);
930 
931   length /= 8;
932   while (length)
933     {
934       p  = *pbuf++;
935       q  = *qbuf++;
936       a0 = EXTRACT(p,0);
937       a1 = EXTRACT(q,0);
938       a  = q0[a0<<5 | a1];
939 #define MF(i) \
940       a0 = EXTRACT(p,i); \
941       a1 = EXTRACT(q,i); \
942       a  = a | INSERT(q0[a0<<5 | a1],i)
943 
944       MF(1);
945       MF(2);
946       MF(3);
947       MF(4);
948       MF(5);
949       MF(6);
950 #if 0
951       MF(7);
952       MF(8);
953       MF(9);
954       MF(10);
955       MF(11);
956       MF(12);
957 #endif /* 0 */
958       *abuf++ = a;
959       *bbuf++ = a ^ p;
960       length--;
961     }
962 }
963 
964 /*
965    Lost parity and a data column. Recover that data column.
966    Assume col coeff is lost. Let q the contents of Q after
967    all surviving data columns have been q-xored out of it.
968    Then we have the equation
969 
970    q[28-coeff][a_i ^ r_i+1] = q
971 
972    but q is cyclic with period 31.
973    So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
974       q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
975 
976    so a_i = r_{coeff+1} ^ q[3+coeff][q]
977 
978    The routine is passed q buffer and the buffer
979    the data is to be recoverd into. They can be the same.
980 */
981 
982 
983 
984 static void rf_InvertQ(
985   unsigned long  *qbuf,
986   unsigned long  *abuf,
987   unsigned        length,
988   unsigned        coeff)
989 {
990   unsigned long a, new;
991   unsigned long a1, a2;
992   unsigned int *q = &(rf_qfor[3+coeff][0]);
993   unsigned r = rf_rn[coeff+1];
994 
995   /* 13 5 bit quants in a 64 bit word */
996   length /= 8;
997   while (length)
998     {
999       a = *qbuf++;
1000       a1 = EXTRACT(a,0);
1001       a2 = EXTRACT(a,1);
1002       a1 = r ^ q[a1];
1003       a2 = r ^ q[a2];
1004       new = INSERT(a2,1) | a1;
1005 #define M(i,j) \
1006       a1 = EXTRACT(a,i); \
1007       a2 = EXTRACT(a,j); \
1008       a1 = r ^ q[a1]; \
1009       a2 = r ^ q[a2]; \
1010       new = new | INSERT(a1,i) | INSERT(a2,j)
1011 
1012       M(2,3);
1013       M(4,5);
1014       M(5,6);
1015 #if RF_LONGSHIFT > 2
1016       M(7,8);
1017       M(9,10);
1018       M(11,12);
1019 #endif /* RF_LONGSHIFT > 2 */
1020       *abuf++ = new;
1021       length--;
1022     }
1023 }
1024 
1025 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */
1026