xref: /netbsd-src/sys/dev/raidframe/rf_pq.c (revision 3b01aba77a7a698587faaae455bbfe740923c1f5)
1 /*	$NetBSD: rf_pq.c,v 1.9 2001/07/18 06:45:34 thorpej Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Daniel Stodolsky
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*
30  * Code for RAID level 6 (P + Q) disk array architecture.
31  */
32 
33 #include "rf_archs.h"
34 
35 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
36 
37 #include "rf_types.h"
38 #include "rf_raid.h"
39 #include "rf_dag.h"
40 #include "rf_dagffrd.h"
41 #include "rf_dagffwr.h"
42 #include "rf_dagdegrd.h"
43 #include "rf_dagdegwr.h"
44 #include "rf_dagutils.h"
45 #include "rf_dagfuncs.h"
46 #include "rf_etimer.h"
47 #include "rf_pqdeg.h"
48 #include "rf_general.h"
49 #include "rf_map.h"
50 #include "rf_pq.h"
51 
52 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
53 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
54 
55 int
56 rf_RegularONPFunc(node)
57 	RF_DagNode_t *node;
58 {
59 	return (rf_RegularXorFunc(node));
60 }
61 /*
62    same as simpleONQ func, but the coefficient is always 1
63 */
64 
65 int
66 rf_SimpleONPFunc(node)
67 	RF_DagNode_t *node;
68 {
69 	return (rf_SimpleXorFunc(node));
70 }
71 
72 int
73 rf_RecoveryPFunc(node)
74 	RF_DagNode_t *node;
75 {
76 	return (rf_RecoveryXorFunc(node));
77 }
78 
79 int
80 rf_RegularPFunc(node)
81 	RF_DagNode_t *node;
82 {
83 	return (rf_RegularXorFunc(node));
84 }
85 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
86 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
87 
88 static void
89 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
90     unsigned char coeff);
91 static void
92 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
93     unsigned length, unsigned coeff);
94 
95 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
96 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
97 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
98 
99 void
100 rf_PQDagSelect(
101     RF_Raid_t * raidPtr,
102     RF_IoType_t type,
103     RF_AccessStripeMap_t * asmap,
104     RF_VoidFuncPtr * createFunc)
105 {
106 	RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
107 	unsigned ndfail = asmap->numDataFailed;
108 	unsigned npfail = asmap->numParityFailed;
109 	unsigned ntfail = npfail + ndfail;
110 
111 	RF_ASSERT(RF_IO_IS_R_OR_W(type));
112 	if (ntfail > 2) {
113 		RF_ERRORMSG("more than two disks failed in a single group!  Aborting I/O operation.\n");
114 		 /* *infoFunc = */ *createFunc = NULL;
115 		return;
116 	}
117 	/* ok, we can do this I/O */
118 	if (type == RF_IO_TYPE_READ) {
119 		switch (ndfail) {
120 		case 0:
121 			/* fault free read */
122 			*createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG;	/* same as raid 5 */
123 			break;
124 		case 1:
125 			/* lost a single data unit */
126 			/* two cases: (1) parity is not lost. do a normal raid
127 			 * 5 reconstruct read. (2) parity is lost. do a
128 			 * reconstruct read using "q". */
129 			if (ntfail == 2) {	/* also lost redundancy */
130 				if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
131 					*createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
132 				else
133 					*createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
134 			} else {
135 				/* P and Q are ok. But is there a failure in
136 				 * some unaccessed data unit? */
137 				if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
138 					*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
139 				else
140 					*createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
141 			}
142 			break;
143 		case 2:
144 			/* lost two data units */
145 			/* *infoFunc = PQOneTwo; */
146 			*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
147 			break;
148 		}
149 		return;
150 	}
151 	/* a write */
152 	switch (ntfail) {
153 	case 0:		/* fault free */
154 		if (rf_suppressLocksAndLargeWrites ||
155 		    (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
156 			(asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
157 
158 			*createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
159 		} else {
160 			*createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
161 		}
162 		break;
163 
164 	case 1:		/* single disk fault */
165 		if (npfail == 1) {
166 			RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
167 			if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {	/* q died, treat like
168 										 * normal mode raid5
169 										 * write. */
170 				if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
171 				    || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
172 					*createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
173 				else
174 					*createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
175 			} else {/* parity died, small write only updating Q */
176 				if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
177 				    || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
178 					*createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
179 				else
180 					*createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
181 			}
182 		} else {	/* data missing. Do a P reconstruct write if
183 				 * only a single data unit is lost in the
184 				 * stripe, otherwise a PQ reconstruct write. */
185 			if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
186 				*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
187 			else
188 				*createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
189 		}
190 		break;
191 
192 	case 2:		/* two disk faults */
193 		switch (npfail) {
194 		case 2:	/* both p and q dead */
195 			*createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
196 			break;
197 		case 1:	/* either p or q and dead data */
198 			RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
199 			RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
200 			if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
201 				*createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
202 			else
203 				*createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
204 			break;
205 		case 0:	/* double data loss */
206 			*createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
207 			break;
208 		}
209 		break;
210 
211 	default:		/* more than 2 disk faults */
212 		*createFunc = NULL;
213 		RF_PANIC();
214 	}
215 	return;
216 }
217 /*
218    Used as a stop gap info function
219 */
220 #if 0
221 static void
222 PQOne(raidPtr, nSucc, nAnte, asmap)
223 	RF_Raid_t *raidPtr;
224 	int    *nSucc;
225 	int    *nAnte;
226 	RF_AccessStripeMap_t *asmap;
227 {
228 	*nSucc = *nAnte = 1;
229 }
230 
231 static void
232 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
233 	RF_Raid_t *raidPtr;
234 	int    *nSucc;
235 	int    *nAnte;
236 	RF_AccessStripeMap_t *asmap;
237 {
238 	*nSucc = 1;
239 	*nAnte = 2;
240 }
241 #endif
242 
243 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
244 {
245 	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
246 	    rf_RegularPQFunc, RF_FALSE);
247 }
248 
249 int
250 rf_RegularONQFunc(node)
251 	RF_DagNode_t *node;
252 {
253 	int     np = node->numParams;
254 	int     d;
255 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
256 	int     i;
257 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
258 	RF_Etimer_t timer;
259 	char   *qbuf, *qpbuf;
260 	char   *obuf, *nbuf;
261 	RF_PhysDiskAddr_t *old, *new;
262 	unsigned long coeff;
263 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
264 
265 	RF_ETIMER_START(timer);
266 
267 	d = (np - 3) / 4;
268 	RF_ASSERT(4 * d + 3 == np);
269 	qbuf = (char *) node->params[2 * d + 1].p;	/* q buffer */
270 	for (i = 0; i < d; i++) {
271 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
272 		obuf = (char *) node->params[2 * i + 1].p;
273 		new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
274 		nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
275 		RF_ASSERT(new->numSector == old->numSector);
276 		RF_ASSERT(new->raidAddress == old->raidAddress);
277 		/* the stripe unit within the stripe tells us the coefficient
278 		 * to use for the multiply. */
279 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
280 		/* compute the data unit offset within the column, then add
281 		 * one */
282 		coeff = (coeff % raidPtr->Layout.numDataCol);
283 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
284 		QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
285 	}
286 
287 	RF_ETIMER_STOP(timer);
288 	RF_ETIMER_EVAL(timer);
289 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
290 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
291 					 * I/O in this node */
292 	return (0);
293 }
294 /*
295    See the SimpleXORFunc for the difference between a simple and regular func.
296    These Q functions should be used for
297 
298          new q = Q(data,old data,old q)
299 
300    style updates and not for
301 
302          q = ( new data, new data, .... )
303 
304    computations.
305 
306    The simple q takes 2(2d+1)+1 params, where d is the number
307    of stripes written. The order of params is
308    old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
309    [2d] old q pda_0, old q buffer
310    [2d_2] new data pda_0, new data buffer_0, ...                                    new data pda_d, new data buffer_d
311    raidPtr
312 */
313 
314 int
315 rf_SimpleONQFunc(node)
316 	RF_DagNode_t *node;
317 {
318 	int     np = node->numParams;
319 	int     d;
320 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
321 	int     i;
322 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
323 	RF_Etimer_t timer;
324 	char   *qbuf;
325 	char   *obuf, *nbuf;
326 	RF_PhysDiskAddr_t *old, *new;
327 	unsigned long coeff;
328 
329 	RF_ETIMER_START(timer);
330 
331 	d = (np - 3) / 4;
332 	RF_ASSERT(4 * d + 3 == np);
333 	qbuf = (char *) node->params[2 * d + 1].p;	/* q buffer */
334 	for (i = 0; i < d; i++) {
335 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
336 		obuf = (char *) node->params[2 * i + 1].p;
337 		new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
338 		nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
339 		RF_ASSERT(new->numSector == old->numSector);
340 		RF_ASSERT(new->raidAddress == old->raidAddress);
341 		/* the stripe unit within the stripe tells us the coefficient
342 		 * to use for the multiply. */
343 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
344 		/* compute the data unit offset within the column, then add
345 		 * one */
346 		coeff = (coeff % raidPtr->Layout.numDataCol);
347 		QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
348 	}
349 
350 	RF_ETIMER_STOP(timer);
351 	RF_ETIMER_EVAL(timer);
352 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
353 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
354 					 * I/O in this node */
355 	return (0);
356 }
357 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
358 {
359 	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
360 }
361 
362 static void RegularQSubr(RF_DagNode_t *node, char   *qbuf);
363 
364 static void
365 RegularQSubr(node, qbuf)
366 	RF_DagNode_t *node;
367 	char   *qbuf;
368 {
369 	int     np = node->numParams;
370 	int     d;
371 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
372 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
373 	int     i;
374 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
375 	RF_Etimer_t timer;
376 	char   *obuf, *qpbuf;
377 	RF_PhysDiskAddr_t *old;
378 	unsigned long coeff;
379 
380 	RF_ETIMER_START(timer);
381 
382 	d = (np - 1) / 2;
383 	RF_ASSERT(2 * d + 1 == np);
384 	for (i = 0; i < d; i++) {
385 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
386 		obuf = (char *) node->params[2 * i + 1].p;
387 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
388 		/* compute the data unit offset within the column, then add
389 		 * one */
390 		coeff = (coeff % raidPtr->Layout.numDataCol);
391 		/* the input buffers may not all be aligned with the start of
392 		 * the stripe. so shift by their sector offset within the
393 		 * stripe unit */
394 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
395 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
396 	}
397 
398 	RF_ETIMER_STOP(timer);
399 	RF_ETIMER_EVAL(timer);
400 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
401 }
402 /*
403    used in degraded writes.
404 */
405 
406 static void DegrQSubr(RF_DagNode_t *node);
407 
408 static void
409 DegrQSubr(node)
410 	RF_DagNode_t *node;
411 {
412 	int     np = node->numParams;
413 	int     d;
414 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
415 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
416 	int     i;
417 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
418 	RF_Etimer_t timer;
419 	char   *qbuf = node->results[1];
420 	char   *obuf, *qpbuf;
421 	RF_PhysDiskAddr_t *old;
422 	unsigned long coeff;
423 	unsigned fail_start;
424 	int     j;
425 
426 	old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
427 	fail_start = old->startSector % secPerSU;
428 
429 	RF_ETIMER_START(timer);
430 
431 	d = (np - 2) / 2;
432 	RF_ASSERT(2 * d + 2 == np);
433 	for (i = 0; i < d; i++) {
434 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
435 		obuf = (char *) node->params[2 * i + 1].p;
436 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
437 		/* compute the data unit offset within the column, then add
438 		 * one */
439 		coeff = (coeff % raidPtr->Layout.numDataCol);
440 		/* the input buffers may not all be aligned with the start of
441 		 * the stripe. so shift by their sector offset within the
442 		 * stripe unit */
443 		j = old->startSector % secPerSU;
444 		RF_ASSERT(j >= fail_start);
445 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
446 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
447 	}
448 
449 	RF_ETIMER_STOP(timer);
450 	RF_ETIMER_EVAL(timer);
451 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
452 }
453 /*
454    Called by large write code to compute the new parity and the new q.
455 
456    structure of the params:
457 
458    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
459    raidPtr
460 
461    for a total of 2d+1 arguments.
462    The result buffers results[0], results[1] are the buffers for the p and q,
463    respectively.
464 
465    We compute Q first, then compute P. The P calculation may try to reuse
466    one of the input buffers for its output, so if we computed P first, we would
467    corrupt the input for the q calculation.
468 */
469 
470 int
471 rf_RegularPQFunc(node)
472 	RF_DagNode_t *node;
473 {
474 	RegularQSubr(node, node->results[1]);
475 	return (rf_RegularXorFunc(node));	/* does the wakeup */
476 }
477 
478 int
479 rf_RegularQFunc(node)
480 	RF_DagNode_t *node;
481 {
482 	/* Almost ... adjust Qsubr args */
483 	RegularQSubr(node, node->results[0]);
484 	rf_GenericWakeupFunc(node, 0);	/* call wake func explicitly since no
485 					 * I/O in this node */
486 	return (0);
487 }
488 /*
489    Called by singly degraded write code to compute the new parity and the new q.
490 
491    structure of the params:
492 
493    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
494    failedPDA raidPtr
495 
496    for a total of 2d+2 arguments.
497    The result buffers results[0], results[1] are the buffers for the parity and q,
498    respectively.
499 
500    We compute Q first, then compute parity. The parity calculation may try to reuse
501    one of the input buffers for its output, so if we computed parity first, we would
502    corrupt the input for the q calculation.
503 
504    We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
505 */
506 
507 void
508 rf_Degraded_100_PQFunc(node)
509 	RF_DagNode_t *node;
510 {
511 	int     np = node->numParams;
512 
513 	RF_ASSERT(np >= 2);
514 	DegrQSubr(node);
515 	rf_RecoveryXorFunc(node);
516 }
517 
518 
519 /*
520    The two below are used when reading a stripe with a single lost data unit.
521    The parameters are
522 
523    pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
524 
525    and results[0] contains the data buffer. Which is originally zero-filled.
526 
527 */
528 
529 /* this Q func is used by the degraded-mode dag functions to recover lost data.
530  * the second-to-last parameter is the PDA for the failed portion of the access.
531  * the code here looks at this PDA and assumes that the xor target buffer is
532  * equal in size to the number of sectors in the failed PDA.  It then uses
533  * the other PDAs in the parameter list to determine where within the target
534  * buffer the corresponding data should be xored.
535  *
536  * Recall the basic equation is
537  *
538  *     Q = ( data_1 + 2 * data_2 ... + k * data_k  ) mod 256
539  *
540  * so to recover data_j we need
541  *
542  *    J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
543  *
544  * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
545  * copying Q into it. Then we need to do a table lookup to convert to solve
546  *   data_j /= J
547  *
548  *
549  */
550 int
551 rf_RecoveryQFunc(node)
552 	RF_DagNode_t *node;
553 {
554 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
555 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
556 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
557 	int     i;
558 	RF_PhysDiskAddr_t *pda;
559 	RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
560 	char   *srcbuf, *destbuf;
561 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
562 	RF_Etimer_t timer;
563 	unsigned long coeff;
564 
565 	RF_ETIMER_START(timer);
566 	/* start by copying Q into the buffer */
567 	bcopy(node->params[node->numParams - 3].p, node->results[0],
568 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
569 	for (i = 0; i < node->numParams - 4; i += 2) {
570 		RF_ASSERT(node->params[i + 1].p != node->results[0]);
571 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
572 		srcbuf = (char *) node->params[i + 1].p;
573 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
574 		destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
575 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
576 		/* compute the data unit offset within the column */
577 		coeff = (coeff % raidPtr->Layout.numDataCol);
578 		rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
579 	}
580 	/* Do the nasty inversion now */
581 	coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
582 	rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
583 	RF_ETIMER_STOP(timer);
584 	RF_ETIMER_EVAL(timer);
585 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
586 	rf_GenericWakeupFunc(node, 0);
587 	return (0);
588 }
589 
590 int
591 rf_RecoveryPQFunc(node)
592 	RF_DagNode_t *node;
593 {
594 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
595 	printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
596 	return (1);
597 }
598 /*
599    Degraded write Q subroutine.
600    Used when P is dead.
601    Large-write style Q computation.
602    Parameters
603 
604    (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
605 
606    We ignore failedPDA.
607 
608    This is a "simple style" recovery func.
609 */
610 
611 void
612 rf_PQ_DegradedWriteQFunc(node)
613 	RF_DagNode_t *node;
614 {
615 	int     np = node->numParams;
616 	int     d;
617 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
618 	unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
619 	int     i;
620 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
621 	RF_Etimer_t timer;
622 	char   *qbuf = node->results[0];
623 	char   *obuf, *qpbuf;
624 	RF_PhysDiskAddr_t *old;
625 	unsigned long coeff;
626 	int     fail_start, j;
627 
628 	old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
629 	fail_start = old->startSector % secPerSU;
630 
631 	RF_ETIMER_START(timer);
632 
633 	d = (np - 2) / 2;
634 	RF_ASSERT(2 * d + 2 == np);
635 
636 	for (i = 0; i < d; i++) {
637 		old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
638 		obuf = (char *) node->params[2 * i + 1].p;
639 		coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
640 		/* compute the data unit offset within the column, then add
641 		 * one */
642 		coeff = (coeff % raidPtr->Layout.numDataCol);
643 		j = old->startSector % secPerSU;
644 		RF_ASSERT(j >= fail_start);
645 		qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
646 		rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
647 	}
648 
649 	RF_ETIMER_STOP(timer);
650 	RF_ETIMER_EVAL(timer);
651 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
652 	rf_GenericWakeupFunc(node, 0);
653 }
654 
655 
656 
657 
658 /* Q computations */
659 
660 /*
661    coeff - colummn;
662 
663    compute  dest ^= qfor[28-coeff][rn[coeff+1] a]
664 
665    on 5-bit basis;
666    length in bytes;
667 */
668 
669 void
670 rf_IncQ(dest, buf, length, coeff)
671 	unsigned long *dest;
672 	unsigned long *buf;
673 	unsigned length;
674 	unsigned coeff;
675 {
676 	unsigned long a, d, new;
677 	unsigned long a1, a2;
678 	unsigned int *q = &(rf_qfor[28 - coeff][0]);
679 	unsigned r = rf_rn[coeff + 1];
680 
681 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
682 #define INSERT(a,i) (a << (5L*i))
683 
684 	length /= 8;
685 	/* 13 5 bit quants in a 64 bit word */
686 	while (length) {
687 		a = *buf++;
688 		d = *dest;
689 		a1 = EXTRACT(a, 0) ^ r;
690 		a2 = EXTRACT(a, 1) ^ r;
691 		new = INSERT(a2, 1) | a1;
692 		a1 = EXTRACT(a, 2) ^ r;
693 		a2 = EXTRACT(a, 3) ^ r;
694 		a1 = q[a1];
695 		a2 = q[a2];
696 		new = new | INSERT(a1, 2) | INSERT(a2, 3);
697 		a1 = EXTRACT(a, 4) ^ r;
698 		a2 = EXTRACT(a, 5) ^ r;
699 		a1 = q[a1];
700 		a2 = q[a2];
701 		new = new | INSERT(a1, 4) | INSERT(a2, 5);
702 		a1 = EXTRACT(a, 5) ^ r;
703 		a2 = EXTRACT(a, 6) ^ r;
704 		a1 = q[a1];
705 		a2 = q[a2];
706 		new = new | INSERT(a1, 5) | INSERT(a2, 6);
707 #if RF_LONGSHIFT > 2
708 		a1 = EXTRACT(a, 7) ^ r;
709 		a2 = EXTRACT(a, 8) ^ r;
710 		a1 = q[a1];
711 		a2 = q[a2];
712 		new = new | INSERT(a1, 7) | INSERT(a2, 8);
713 		a1 = EXTRACT(a, 9) ^ r;
714 		a2 = EXTRACT(a, 10) ^ r;
715 		a1 = q[a1];
716 		a2 = q[a2];
717 		new = new | INSERT(a1, 9) | INSERT(a2, 10);
718 		a1 = EXTRACT(a, 11) ^ r;
719 		a2 = EXTRACT(a, 12) ^ r;
720 		a1 = q[a1];
721 		a2 = q[a2];
722 		new = new | INSERT(a1, 11) | INSERT(a2, 12);
723 #endif				/* RF_LONGSHIFT > 2 */
724 		d ^= new;
725 		*dest++ = d;
726 		length--;
727 	}
728 }
729 /*
730    compute
731 
732    dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
733 
734    on a five bit basis.
735    optimization: compute old ^ new on 64 bit basis.
736 
737    length in bytes.
738 */
739 
740 static void
741 QDelta(
742     char *dest,
743     char *obuf,
744     char *nbuf,
745     unsigned length,
746     unsigned char coeff)
747 {
748 	unsigned long a, d, new;
749 	unsigned long a1, a2;
750 	unsigned int *q = &(rf_qfor[28 - coeff][0]);
751 	unsigned int r = rf_rn[coeff + 1];
752 
753 	r = a1 = a2 = new = d = a = 0; /* XXX for now... */
754 	q = NULL; /* XXX for now */
755 
756 #ifdef _KERNEL
757 	/* PQ in kernel currently not supported because the encoding/decoding
758 	 * table is not present */
759 	memset(dest, 0, length);
760 #else				/* KERNEL */
761 	/* this code probably doesn't work and should be rewritten  -wvcii */
762 	/* 13 5 bit quants in a 64 bit word */
763 	length /= 8;
764 	while (length) {
765 		a = *obuf++;	/* XXX need to reorg to avoid cache conflicts */
766 		a ^= *nbuf++;
767 		d = *dest;
768 		a1 = EXTRACT(a, 0) ^ r;
769 		a2 = EXTRACT(a, 1) ^ r;
770 		a1 = q[a1];
771 		a2 = q[a2];
772 		new = INSERT(a2, 1) | a1;
773 		a1 = EXTRACT(a, 2) ^ r;
774 		a2 = EXTRACT(a, 3) ^ r;
775 		a1 = q[a1];
776 		a2 = q[a2];
777 		new = new | INSERT(a1, 2) | INSERT(a2, 3);
778 		a1 = EXTRACT(a, 4) ^ r;
779 		a2 = EXTRACT(a, 5) ^ r;
780 		a1 = q[a1];
781 		a2 = q[a2];
782 		new = new | INSERT(a1, 4) | INSERT(a2, 5);
783 		a1 = EXTRACT(a, 5) ^ r;
784 		a2 = EXTRACT(a, 6) ^ r;
785 		a1 = q[a1];
786 		a2 = q[a2];
787 		new = new | INSERT(a1, 5) | INSERT(a2, 6);
788 #if RF_LONGSHIFT > 2
789 		a1 = EXTRACT(a, 7) ^ r;
790 		a2 = EXTRACT(a, 8) ^ r;
791 		a1 = q[a1];
792 		a2 = q[a2];
793 		new = new | INSERT(a1, 7) | INSERT(a2, 8);
794 		a1 = EXTRACT(a, 9) ^ r;
795 		a2 = EXTRACT(a, 10) ^ r;
796 		a1 = q[a1];
797 		a2 = q[a2];
798 		new = new | INSERT(a1, 9) | INSERT(a2, 10);
799 		a1 = EXTRACT(a, 11) ^ r;
800 		a2 = EXTRACT(a, 12) ^ r;
801 		a1 = q[a1];
802 		a2 = q[a2];
803 		new = new | INSERT(a1, 11) | INSERT(a2, 12);
804 #endif				/* RF_LONGSHIFT > 2 */
805 		d ^= new;
806 		*dest++ = d;
807 		length--;
808 	}
809 #endif				/* _KERNEL */
810 }
811 /*
812    recover columns a and b from the given p and q into
813    bufs abuf and bbuf. All bufs are word aligned.
814    Length is in bytes.
815 */
816 
817 
818 /*
819  * XXX
820  *
821  * Everything about this seems wrong.
822  */
823 void
824 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
825 	unsigned long *pbuf;
826 	unsigned long *qbuf;
827 	unsigned long *abuf;
828 	unsigned long *bbuf;
829 	unsigned length;
830 	unsigned coeff_a;
831 	unsigned coeff_b;
832 {
833 	unsigned long p, q, a, a0, a1;
834 	int     col = (29 * coeff_a) + coeff_b;
835 	unsigned char *q0 = &(rf_qinv[col][0]);
836 
837 	length /= 8;
838 	while (length) {
839 		p = *pbuf++;
840 		q = *qbuf++;
841 		a0 = EXTRACT(p, 0);
842 		a1 = EXTRACT(q, 0);
843 		a = q0[a0 << 5 | a1];
844 #define MF(i) \
845       a0 = EXTRACT(p,i); \
846       a1 = EXTRACT(q,i); \
847       a  = a | INSERT(q0[a0<<5 | a1],i)
848 
849 		MF(1);
850 		MF(2);
851 		MF(3);
852 		MF(4);
853 		MF(5);
854 		MF(6);
855 #if 0
856 		MF(7);
857 		MF(8);
858 		MF(9);
859 		MF(10);
860 		MF(11);
861 		MF(12);
862 #endif				/* 0 */
863 		*abuf++ = a;
864 		*bbuf++ = a ^ p;
865 		length--;
866 	}
867 }
868 /*
869    Lost parity and a data column. Recover that data column.
870    Assume col coeff is lost. Let q the contents of Q after
871    all surviving data columns have been q-xored out of it.
872    Then we have the equation
873 
874    q[28-coeff][a_i ^ r_i+1] = q
875 
876    but q is cyclic with period 31.
877    So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
878       q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
879 
880    so a_i = r_{coeff+1} ^ q[3+coeff][q]
881 
882    The routine is passed q buffer and the buffer
883    the data is to be recoverd into. They can be the same.
884 */
885 
886 
887 
888 static void
889 rf_InvertQ(
890     unsigned long *qbuf,
891     unsigned long *abuf,
892     unsigned length,
893     unsigned coeff)
894 {
895 	unsigned long a, new;
896 	unsigned long a1, a2;
897 	unsigned int *q = &(rf_qfor[3 + coeff][0]);
898 	unsigned r = rf_rn[coeff + 1];
899 
900 	/* 13 5 bit quants in a 64 bit word */
901 	length /= 8;
902 	while (length) {
903 		a = *qbuf++;
904 		a1 = EXTRACT(a, 0);
905 		a2 = EXTRACT(a, 1);
906 		a1 = r ^ q[a1];
907 		a2 = r ^ q[a2];
908 		new = INSERT(a2, 1) | a1;
909 #define M(i,j) \
910       a1 = EXTRACT(a,i); \
911       a2 = EXTRACT(a,j); \
912       a1 = r ^ q[a1]; \
913       a2 = r ^ q[a2]; \
914       new = new | INSERT(a1,i) | INSERT(a2,j)
915 
916 		M(2, 3);
917 		M(4, 5);
918 		M(5, 6);
919 #if RF_LONGSHIFT > 2
920 		M(7, 8);
921 		M(9, 10);
922 		M(11, 12);
923 #endif				/* RF_LONGSHIFT > 2 */
924 		*abuf++ = new;
925 		length--;
926 	}
927 }
928 #endif				/* (RF_INCLUDE_DECL_PQ > 0) ||
929 				 * (RF_INCLUDE_RAID6 > 0) */
930