1 /* $NetBSD: rf_pq.c,v 1.18 2023/10/15 18:15:20 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * Code for RAID level 6 (P + Q) disk array architecture.
31 */
32
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_pq.c,v 1.18 2023/10/15 18:15:20 oster Exp $");
35
36 #include "rf_archs.h"
37
38 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
39
40 #include <dev/raidframe/raidframevar.h>
41
42 #include "rf_raid.h"
43 #include "rf_dag.h"
44 #include "rf_dagffrd.h"
45 #include "rf_dagffwr.h"
46 #include "rf_dagdegrd.h"
47 #include "rf_dagdegwr.h"
48 #include "rf_dagutils.h"
49 #include "rf_dagfuncs.h"
50 #include "rf_etimer.h"
51 #include "rf_pqdeg.h"
52 #include "rf_general.h"
53 #include "rf_map.h"
54 #include "rf_pq.h"
55
56 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
57 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
58
59 void
rf_RegularONPFunc(RF_DagNode_t * node)60 rf_RegularONPFunc(RF_DagNode_t *node)
61 {
62 rf_RegularXorFunc(node);
63 }
64 /*
65 same as simpleONQ func, but the coefficient is always 1
66 */
67
68 void
rf_SimpleONPFunc(RF_DagNode_t * node)69 rf_SimpleONPFunc(RF_DagNode_t *node)
70 {
71 rf_SimpleXorFunc(node);
72 }
73
74 void
rf_RecoveryPFunc(RF_DagNode_t * node)75 rf_RecoveryPFunc(RF_DagNode_t *node)
76 {
77 rf_RecoveryXorFunc(node);
78 }
79
80 void
rf_RegularPFunc(RF_DagNode_t * node)81 rf_RegularPFunc(RF_DagNode_t *node)
82 {
83 rf_RegularXorFunc(node);
84 }
85 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
86 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
87
88 static void
89 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
90 unsigned char coeff);
91 static void
92 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
93 unsigned length, unsigned coeff);
94
95 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
96 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
97 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
98
99 void
rf_PQDagSelect(RF_Raid_t * raidPtr,RF_IoType_t type,RF_AccessStripeMap_t * asmap,RF_VoidFuncPtr * createFunc)100 rf_PQDagSelect(
101 RF_Raid_t * raidPtr,
102 RF_IoType_t type,
103 RF_AccessStripeMap_t * asmap,
104 RF_VoidFuncPtr * createFunc)
105 {
106 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
107 unsigned ndfail = asmap->numDataFailed;
108 unsigned npfail = asmap->numParityFailed;
109 unsigned ntfail = npfail + ndfail;
110
111 RF_ASSERT(RF_IO_IS_R_OR_W(type));
112 if (ntfail > 2) {
113 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
114 *createFunc = NULL;
115 return;
116 }
117 /* ok, we can do this I/O */
118 if (type == RF_IO_TYPE_READ) {
119 switch (ndfail) {
120 case 0:
121 /* fault free read */
122 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
123 break;
124 case 1:
125 /* lost a single data unit */
126 /* two cases: (1) parity is not lost. do a normal raid
127 * 5 reconstruct read. (2) parity is lost. do a
128 * reconstruct read using "q". */
129 if (ntfail == 2) { /* also lost redundancy */
130 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
131 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
132 else
133 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
134 } else {
135 /* P and Q are ok. But is there a failure in
136 * some unaccessed data unit? */
137 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
138 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
139 else
140 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
141 }
142 break;
143 case 2:
144 /* lost two data units */
145 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
146 break;
147 }
148 return;
149 }
150 /* a write */
151 switch (ntfail) {
152 case 0: /* fault free */
153 if (rf_suppressLocksAndLargeWrites ||
154 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
155 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
156
157 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
158 } else {
159 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
160 }
161 break;
162
163 case 1: /* single disk fault */
164 if (npfail == 1) {
165 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
166 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
167 * normal mode raid5
168 * write. */
169 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
170 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
171 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
172 else
173 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
174 } else {/* parity died, small write only updating Q */
175 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
176 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
177 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
178 else
179 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
180 }
181 } else { /* data missing. Do a P reconstruct write if
182 * only a single data unit is lost in the
183 * stripe, otherwise a PQ reconstruct write. */
184 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
185 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
186 else
187 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
188 }
189 break;
190
191 case 2: /* two disk faults */
192 switch (npfail) {
193 case 2: /* both p and q dead */
194 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
195 break;
196 case 1: /* either p or q and dead data */
197 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
198 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
199 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
200 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
201 else
202 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
203 break;
204 case 0: /* double data loss */
205 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
206 break;
207 }
208 break;
209
210 default: /* more than 2 disk faults */
211 *createFunc = NULL;
212 RF_PANIC();
213 }
214 return;
215 }
216 /*
217 Used as a stop gap info function
218 */
219 #if 0
220 static void
221 PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, RF_AccessStripeMap_t *asmap)
222 {
223 *nSucc = *nAnte = 1;
224 }
225
226 static void
227 PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte, RF_AccessStripeMap_t *asmap)
228 {
229 *nSucc = 1;
230 *nAnte = 2;
231 }
232 #endif
233
RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)234 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
235 {
236 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
237 rf_RegularPQFunc, RF_FALSE);
238 }
239
240 void
rf_RegularONQFunc(RF_DagNode_t * node)241 rf_RegularONQFunc(RF_DagNode_t *node)
242 {
243 int np = node->numParams;
244 int d;
245 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
246 int i;
247 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
248 RF_Etimer_t timer;
249 char *qbuf, *qpbuf;
250 char *obuf, *nbuf;
251 RF_PhysDiskAddr_t *old, *new;
252 unsigned long coeff;
253 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
254
255 RF_ETIMER_START(timer);
256
257 d = (np - 3) / 4;
258 RF_ASSERT(4 * d + 3 == np);
259 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
260 for (i = 0; i < d; i++) {
261 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
262 obuf = (char *) node->params[2 * i + 1].p;
263 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
264 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
265 RF_ASSERT(new->numSector == old->numSector);
266 RF_ASSERT(new->raidAddress == old->raidAddress);
267 /* the stripe unit within the stripe tells us the coefficient
268 * to use for the multiply. */
269 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
270 /* compute the data unit offset within the column, then add
271 * one */
272 coeff = (coeff % raidPtr->Layout.numDataCol);
273 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
274 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
275 }
276
277 RF_ETIMER_STOP(timer);
278 RF_ETIMER_EVAL(timer);
279 tracerec->q_us += RF_ETIMER_VAL_US(timer);
280 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
281 * I/O in this node */
282 }
283 /*
284 See the SimpleXORFunc for the difference between a simple and regular func.
285 These Q functions should be used for
286
287 new q = Q(data,old data,old q)
288
289 style updates and not for
290
291 q = ( new data, new data, .... )
292
293 computations.
294
295 The simple q takes 2(2d+1)+1 params, where d is the number
296 of stripes written. The order of params is
297 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
298 [2d] old q pda_0, old q buffer
299 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
300 raidPtr
301 */
302
303 void
rf_SimpleONQFunc(RF_DagNode_t * node)304 rf_SimpleONQFunc(RF_DagNode_t *node)
305 {
306 int np = node->numParams;
307 int d;
308 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
309 int i;
310 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
311 RF_Etimer_t timer;
312 char *qbuf;
313 char *obuf, *nbuf;
314 RF_PhysDiskAddr_t *old, *new;
315 unsigned long coeff;
316
317 RF_ETIMER_START(timer);
318
319 d = (np - 3) / 4;
320 RF_ASSERT(4 * d + 3 == np);
321 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
322 for (i = 0; i < d; i++) {
323 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
324 obuf = (char *) node->params[2 * i + 1].p;
325 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
326 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
327 RF_ASSERT(new->numSector == old->numSector);
328 RF_ASSERT(new->raidAddress == old->raidAddress);
329 /* the stripe unit within the stripe tells us the coefficient
330 * to use for the multiply. */
331 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
332 /* compute the data unit offset within the column, then add
333 * one */
334 coeff = (coeff % raidPtr->Layout.numDataCol);
335 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
336 }
337
338 RF_ETIMER_STOP(timer);
339 RF_ETIMER_EVAL(timer);
340 tracerec->q_us += RF_ETIMER_VAL_US(timer);
341 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
342 * I/O in this node */
343 }
RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)344 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
345 {
346 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
347 }
348
349 static void RegularQSubr(RF_DagNode_t *node, char *qbuf);
350
351 static void
RegularQSubr(RF_DagNode_t * node,char * qbuf)352 RegularQSubr(RF_DagNode_t *node, char *qbuf)
353 {
354 int np = node->numParams;
355 int d;
356 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
357 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
358 int i;
359 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
360 RF_Etimer_t timer;
361 char *obuf, *qpbuf;
362 RF_PhysDiskAddr_t *old;
363 unsigned long coeff;
364
365 RF_ETIMER_START(timer);
366
367 d = (np - 1) / 2;
368 RF_ASSERT(2 * d + 1 == np);
369 for (i = 0; i < d; i++) {
370 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
371 obuf = (char *) node->params[2 * i + 1].p;
372 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
373 /* compute the data unit offset within the column, then add
374 * one */
375 coeff = (coeff % raidPtr->Layout.numDataCol);
376 /* the input buffers may not all be aligned with the start of
377 * the stripe. so shift by their sector offset within the
378 * stripe unit */
379 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
380 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
381 }
382
383 RF_ETIMER_STOP(timer);
384 RF_ETIMER_EVAL(timer);
385 tracerec->q_us += RF_ETIMER_VAL_US(timer);
386 }
387 /*
388 used in degraded writes.
389 */
390
391 static void DegrQSubr(RF_DagNode_t *node);
392
393 static void
DegrQSubr(RF_DagNode_t * node)394 DegrQSubr(RF_DagNode_t *node)
395 {
396 int np = node->numParams;
397 int d;
398 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
399 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
400 int i;
401 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
402 RF_Etimer_t timer;
403 char *qbuf = node->results[1];
404 char *obuf, *qpbuf;
405 RF_PhysDiskAddr_t *old;
406 unsigned long coeff;
407 unsigned fail_start;
408 int j;
409
410 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
411 fail_start = old->startSector % secPerSU;
412
413 RF_ETIMER_START(timer);
414
415 d = (np - 2) / 2;
416 RF_ASSERT(2 * d + 2 == np);
417 for (i = 0; i < d; i++) {
418 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
419 obuf = (char *) node->params[2 * i + 1].p;
420 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
421 /* compute the data unit offset within the column, then add
422 * one */
423 coeff = (coeff % raidPtr->Layout.numDataCol);
424 /* the input buffers may not all be aligned with the start of
425 * the stripe. so shift by their sector offset within the
426 * stripe unit */
427 j = old->startSector % secPerSU;
428 RF_ASSERT(j >= fail_start);
429 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
430 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
431 }
432
433 RF_ETIMER_STOP(timer);
434 RF_ETIMER_EVAL(timer);
435 tracerec->q_us += RF_ETIMER_VAL_US(timer);
436 }
437 /*
438 Called by large write code to compute the new parity and the new q.
439
440 structure of the params:
441
442 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
443 raidPtr
444
445 for a total of 2d+1 arguments.
446 The result buffers results[0], results[1] are the buffers for the p and q,
447 respectively.
448
449 We compute Q first, then compute P. The P calculation may try to reuse
450 one of the input buffers for its output, so if we computed P first, we would
451 corrupt the input for the q calculation.
452 */
453
454 void
rf_RegularPQFunc(RF_DagNode_t * node)455 rf_RegularPQFunc(RF_DagNode_t *node)
456 {
457 RegularQSubr(node, node->results[1]);
458 rf_RegularXorFunc(node); /* does the wakeup */
459 }
460
461 void
rf_RegularQFunc(RF_DagNode_t * node)462 rf_RegularQFunc(RF_DagNode_t *node)
463 {
464 /* Almost ... adjust Qsubr args */
465 RegularQSubr(node, node->results[0]);
466 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
467 * I/O in this node */
468 }
469 /*
470 Called by singly degraded write code to compute the new parity and the new q.
471
472 structure of the params:
473
474 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
475 failedPDA raidPtr
476
477 for a total of 2d+2 arguments.
478 The result buffers results[0], results[1] are the buffers for the parity and q,
479 respectively.
480
481 We compute Q first, then compute parity. The parity calculation may try to reuse
482 one of the input buffers for its output, so if we computed parity first, we would
483 corrupt the input for the q calculation.
484
485 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
486 */
487
488 void
rf_Degraded_100_PQFunc(RF_DagNode_t * node)489 rf_Degraded_100_PQFunc(RF_DagNode_t *node)
490 {
491 int np = node->numParams;
492
493 RF_ASSERT(np >= 2);
494 DegrQSubr(node);
495 rf_RecoveryXorFunc(node);
496 }
497
498
499 /*
500 The two below are used when reading a stripe with a single lost data unit.
501 The parameters are
502
503 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
504
505 and results[0] contains the data buffer. Which is originally zero-filled.
506
507 */
508
509 /* this Q func is used by the degraded-mode dag functions to recover lost data.
510 * the second-to-last parameter is the PDA for the failed portion of the access.
511 * the code here looks at this PDA and assumes that the xor target buffer is
512 * equal in size to the number of sectors in the failed PDA. It then uses
513 * the other PDAs in the parameter list to determine where within the target
514 * buffer the corresponding data should be xored.
515 *
516 * Recall the basic equation is
517 *
518 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
519 *
520 * so to recover data_j we need
521 *
522 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
523 *
524 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
525 * copying Q into it. Then we need to do a table lookup to convert to solve
526 * data_j /= J
527 *
528 *
529 */
530 void
rf_RecoveryQFunc(RF_DagNode_t * node)531 rf_RecoveryQFunc(RF_DagNode_t *node)
532 {
533 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
534 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
535 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
536 int i;
537 RF_PhysDiskAddr_t *pda = NULL;
538 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
539 char *srcbuf, *destbuf;
540 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
541 RF_Etimer_t timer;
542 unsigned long coeff;
543
544 RF_ETIMER_START(timer);
545 /* start by copying Q into the buffer */
546 memcpy(node->results[0], node->params[node->numParams - 3].p,
547 rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
548 for (i = 0; i < node->numParams - 4; i += 2) {
549 RF_ASSERT(node->params[i + 1].p != node->results[0]);
550 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
551 srcbuf = (char *) node->params[i + 1].p;
552 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
553 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
554 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
555 /* compute the data unit offset within the column */
556 coeff = (coeff % raidPtr->Layout.numDataCol);
557 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
558 }
559 /* Do the nasty inversion now */
560 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
561 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
562 RF_ETIMER_STOP(timer);
563 RF_ETIMER_EVAL(timer);
564 tracerec->q_us += RF_ETIMER_VAL_US(timer);
565 rf_GenericWakeupFunc(node, 0);
566 }
567
568 void
rf_RecoveryPQFunc(RF_DagNode_t * node)569 rf_RecoveryPQFunc(RF_DagNode_t *node)
570 {
571 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
572 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
573 /* XXX: Was: */
574 /* return (1); */
575 }
576 /*
577 Degraded write Q subroutine.
578 Used when P is dead.
579 Large-write style Q computation.
580 Parameters
581
582 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
583
584 We ignore failedPDA.
585
586 This is a "simple style" recovery func.
587 */
588
589 void
rf_PQ_DegradedWriteQFunc(RF_DagNode_t * node)590 rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node)
591 {
592 int np = node->numParams;
593 int d;
594 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
595 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
596 int i;
597 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
598 RF_Etimer_t timer;
599 char *qbuf = node->results[0];
600 char *obuf, *qpbuf;
601 RF_PhysDiskAddr_t *old;
602 unsigned long coeff;
603 int fail_start, j;
604
605 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
606 fail_start = old->startSector % secPerSU;
607
608 RF_ETIMER_START(timer);
609
610 d = (np - 2) / 2;
611 RF_ASSERT(2 * d + 2 == np);
612
613 for (i = 0; i < d; i++) {
614 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
615 obuf = (char *) node->params[2 * i + 1].p;
616 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
617 /* compute the data unit offset within the column, then add
618 * one */
619 coeff = (coeff % raidPtr->Layout.numDataCol);
620 j = old->startSector % secPerSU;
621 RF_ASSERT(j >= fail_start);
622 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
623 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
624 }
625
626 RF_ETIMER_STOP(timer);
627 RF_ETIMER_EVAL(timer);
628 tracerec->q_us += RF_ETIMER_VAL_US(timer);
629 rf_GenericWakeupFunc(node, 0);
630 }
631
632
633
634
635 /* Q computations */
636
637 /*
638 coeff - colummn;
639
640 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
641
642 on 5-bit basis;
643 length in bytes;
644 */
645
646 void
rf_IncQ(unsigned long * dest,unsigned long * buf,unsigned length,unsigned coeff)647 rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length, unsigned coeff)
648 {
649 unsigned long a, d, new;
650 unsigned long a1, a2;
651 unsigned int *q = &(rf_qfor[28 - coeff][0]);
652 unsigned r = rf_rn[coeff + 1];
653
654 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
655 #define INSERT(a,i) (a << (5L*i))
656
657 length /= 8;
658 /* 13 5 bit quants in a 64 bit word */
659 while (length) {
660 a = *buf++;
661 d = *dest;
662 a1 = EXTRACT(a, 0) ^ r;
663 a2 = EXTRACT(a, 1) ^ r;
664 new = INSERT(a2, 1) | a1;
665 a1 = EXTRACT(a, 2) ^ r;
666 a2 = EXTRACT(a, 3) ^ r;
667 a1 = q[a1];
668 a2 = q[a2];
669 new = new | INSERT(a1, 2) | INSERT(a2, 3);
670 a1 = EXTRACT(a, 4) ^ r;
671 a2 = EXTRACT(a, 5) ^ r;
672 a1 = q[a1];
673 a2 = q[a2];
674 new = new | INSERT(a1, 4) | INSERT(a2, 5);
675 a1 = EXTRACT(a, 5) ^ r;
676 a2 = EXTRACT(a, 6) ^ r;
677 a1 = q[a1];
678 a2 = q[a2];
679 new = new | INSERT(a1, 5) | INSERT(a2, 6);
680 #if RF_LONGSHIFT > 2
681 a1 = EXTRACT(a, 7) ^ r;
682 a2 = EXTRACT(a, 8) ^ r;
683 a1 = q[a1];
684 a2 = q[a2];
685 new = new | INSERT(a1, 7) | INSERT(a2, 8);
686 a1 = EXTRACT(a, 9) ^ r;
687 a2 = EXTRACT(a, 10) ^ r;
688 a1 = q[a1];
689 a2 = q[a2];
690 new = new | INSERT(a1, 9) | INSERT(a2, 10);
691 a1 = EXTRACT(a, 11) ^ r;
692 a2 = EXTRACT(a, 12) ^ r;
693 a1 = q[a1];
694 a2 = q[a2];
695 new = new | INSERT(a1, 11) | INSERT(a2, 12);
696 #endif /* RF_LONGSHIFT > 2 */
697 d ^= new;
698 *dest++ = d;
699 length--;
700 }
701 }
702 /*
703 compute
704
705 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
706
707 on a five bit basis.
708 optimization: compute old ^ new on 64 bit basis.
709
710 length in bytes.
711 */
712
713 static void
QDelta(char * dest,char * obuf,char * nbuf,unsigned length,unsigned char coeff)714 QDelta(
715 char *dest,
716 char *obuf,
717 char *nbuf,
718 unsigned length,
719 unsigned char coeff)
720 {
721 #ifndef _KERNEL
722 unsigned long a, d, new;
723 unsigned long a1, a2;
724 unsigned int *q = &(rf_qfor[28 - coeff][0]);
725 unsigned int r = rf_rn[coeff + 1];
726
727 r = a1 = a2 = new = d = a = 0; /* XXX for now... */
728 q = NULL; /* XXX for now */
729 #endif
730 #ifdef _KERNEL
731 /* PQ in kernel currently not supported because the encoding/decoding
732 * table is not present */
733 memset(dest, 0, length);
734 #else /* KERNEL */
735 /* this code probably doesn't work and should be rewritten -wvcii */
736 /* 13 5 bit quants in a 64 bit word */
737 length /= 8;
738 while (length) {
739 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
740 a ^= *nbuf++;
741 d = *dest;
742 a1 = EXTRACT(a, 0) ^ r;
743 a2 = EXTRACT(a, 1) ^ r;
744 a1 = q[a1];
745 a2 = q[a2];
746 new = INSERT(a2, 1) | a1;
747 a1 = EXTRACT(a, 2) ^ r;
748 a2 = EXTRACT(a, 3) ^ r;
749 a1 = q[a1];
750 a2 = q[a2];
751 new = new | INSERT(a1, 2) | INSERT(a2, 3);
752 a1 = EXTRACT(a, 4) ^ r;
753 a2 = EXTRACT(a, 5) ^ r;
754 a1 = q[a1];
755 a2 = q[a2];
756 new = new | INSERT(a1, 4) | INSERT(a2, 5);
757 a1 = EXTRACT(a, 5) ^ r;
758 a2 = EXTRACT(a, 6) ^ r;
759 a1 = q[a1];
760 a2 = q[a2];
761 new = new | INSERT(a1, 5) | INSERT(a2, 6);
762 #if RF_LONGSHIFT > 2
763 a1 = EXTRACT(a, 7) ^ r;
764 a2 = EXTRACT(a, 8) ^ r;
765 a1 = q[a1];
766 a2 = q[a2];
767 new = new | INSERT(a1, 7) | INSERT(a2, 8);
768 a1 = EXTRACT(a, 9) ^ r;
769 a2 = EXTRACT(a, 10) ^ r;
770 a1 = q[a1];
771 a2 = q[a2];
772 new = new | INSERT(a1, 9) | INSERT(a2, 10);
773 a1 = EXTRACT(a, 11) ^ r;
774 a2 = EXTRACT(a, 12) ^ r;
775 a1 = q[a1];
776 a2 = q[a2];
777 new = new | INSERT(a1, 11) | INSERT(a2, 12);
778 #endif /* RF_LONGSHIFT > 2 */
779 d ^= new;
780 *dest++ = d;
781 length--;
782 }
783 #endif /* _KERNEL */
784 }
785 /*
786 recover columns a and b from the given p and q into
787 bufs abuf and bbuf. All bufs are word aligned.
788 Length is in bytes.
789 */
790
791
792 /*
793 * XXX
794 *
795 * Everything about this seems wrong.
796 */
797 void
rf_PQ_recover(unsigned long * pbuf,unsigned long * qbuf,unsigned long * abuf,unsigned long * bbuf,unsigned length,unsigned coeff_a,unsigned coeff_b)798 rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf, unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b)
799 {
800 unsigned long p, q, a, a0, a1;
801 int col = (29 * coeff_a) + coeff_b;
802 unsigned char *q0 = &(rf_qinv[col][0]);
803
804 length /= 8;
805 while (length) {
806 p = *pbuf++;
807 q = *qbuf++;
808 a0 = EXTRACT(p, 0);
809 a1 = EXTRACT(q, 0);
810 a = q0[a0 << 5 | a1];
811 #define MF(i) \
812 a0 = EXTRACT(p,i); \
813 a1 = EXTRACT(q,i); \
814 a = a | INSERT(q0[a0<<5 | a1],i)
815
816 MF(1);
817 MF(2);
818 MF(3);
819 MF(4);
820 MF(5);
821 MF(6);
822 #if 0
823 MF(7);
824 MF(8);
825 MF(9);
826 MF(10);
827 MF(11);
828 MF(12);
829 #endif /* 0 */
830 *abuf++ = a;
831 *bbuf++ = a ^ p;
832 length--;
833 }
834 }
835 /*
836 Lost parity and a data column. Recover that data column.
837 Assume col coeff is lost. Let q the contents of Q after
838 all surviving data columns have been q-xored out of it.
839 Then we have the equation
840
841 q[28-coeff][a_i ^ r_i+1] = q
842
843 but q is cyclic with period 31.
844 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
845 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
846
847 so a_i = r_{coeff+1} ^ q[3+coeff][q]
848
849 The routine is passed q buffer and the buffer
850 the data is to be recoverd into. They can be the same.
851 */
852
853
854
855 static void
rf_InvertQ(unsigned long * qbuf,unsigned long * abuf,unsigned length,unsigned coeff)856 rf_InvertQ(
857 unsigned long *qbuf,
858 unsigned long *abuf,
859 unsigned length,
860 unsigned coeff)
861 {
862 unsigned long a, new;
863 unsigned long a1, a2;
864 unsigned int *q = &(rf_qfor[3 + coeff][0]);
865 unsigned r = rf_rn[coeff + 1];
866
867 /* 13 5 bit quants in a 64 bit word */
868 length /= 8;
869 while (length) {
870 a = *qbuf++;
871 a1 = EXTRACT(a, 0);
872 a2 = EXTRACT(a, 1);
873 a1 = r ^ q[a1];
874 a2 = r ^ q[a2];
875 new = INSERT(a2, 1) | a1;
876 #define M(i,j) \
877 a1 = EXTRACT(a,i); \
878 a2 = EXTRACT(a,j); \
879 a1 = r ^ q[a1]; \
880 a2 = r ^ q[a2]; \
881 new = new | INSERT(a1,i) | INSERT(a2,j)
882
883 M(2, 3);
884 M(4, 5);
885 M(5, 6);
886 #if RF_LONGSHIFT > 2
887 M(7, 8);
888 M(9, 10);
889 M(11, 12);
890 #endif /* RF_LONGSHIFT > 2 */
891 *abuf++ = new;
892 length--;
893 }
894 }
895 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
896 * (RF_INCLUDE_RAID6 > 0) */
897