xref: /netbsd-src/sys/dev/raidframe/rf_evenodd_dagfuncs.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: rf_evenodd_dagfuncs.c,v 1.20 2009/03/14 15:36:20 dsl Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: ChangMing Wu
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*
30  * Code for RAID-EVENODD  architecture.
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.20 2009/03/14 15:36:20 dsl Exp $");
35 
36 #include "rf_archs.h"
37 
38 #ifdef _KERNEL_OPT
39 #include "opt_raid_diagnostic.h"
40 #endif
41 
42 #if RF_INCLUDE_EVENODD > 0
43 
44 #include <dev/raidframe/raidframevar.h>
45 
46 #include "rf_raid.h"
47 #include "rf_dag.h"
48 #include "rf_dagffrd.h"
49 #include "rf_dagffwr.h"
50 #include "rf_dagdegrd.h"
51 #include "rf_dagdegwr.h"
52 #include "rf_dagutils.h"
53 #include "rf_dagfuncs.h"
54 #include "rf_etimer.h"
55 #include "rf_general.h"
56 #include "rf_parityscan.h"
57 #include "rf_evenodd.h"
58 #include "rf_evenodd_dagfuncs.h"
59 
60 /* These redundant functions are for small write */
61 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
62 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
63 /* These redundant functions are for degraded read */
64 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
65 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
66 /**********************************************************************************************
67  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
68  **********************************************************************************************/
69 int
70 rf_RegularPEFunc(RF_DagNode_t *node)
71 {
72 	rf_RegularESubroutine(node, node->results[1]);
73 	rf_RegularXorFunc(node);/* does the wakeup here! */
74 #if 1
75 	return (0);		/* XXX This was missing... GO */
76 #endif
77 }
78 
79 
80 /************************************************************************************************
81  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
82  *  be used. The previous case is when write access at least sectors of full stripe unit.
83  *  The later function is used when the write access two stripe units but with total sectors
84  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
85  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
86  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
87  ************************************************************************************************/
88 
89 /* Algorithm:
90      1. Store the difference of old data and new data in the Rod buffer.
91      2. then encode this buffer into the buffer which already have old 'E' information inside it,
92 	the result can be shown to be the new 'E' information.
93      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
94    Here we have another alternative: to allocate a temporary buffer for storing the difference of
95    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
96    take the same speed as the previous, and need more memory.
97 */
98 int
99 rf_RegularONEFunc(RF_DagNode_t *node)
100 {
101 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
102 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
103 	int     EpdaIndex = (node->numParams - 1) / 2 - 1;	/* the parameter of node
104 								 * where you can find
105 								 * e-pda */
106 	int     i, k, retcode = 0;
107 	int     suoffset, length;
108 	RF_RowCol_t scol;
109 	char   *srcbuf, *destbuf;
110 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
111 	RF_Etimer_t timer;
112 	RF_PhysDiskAddr_t *pda;
113 #ifdef RAID_DIAGNOSTIC
114 	RF_PhysDiskAddr_t *EPDA =
115 	    (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
116 	int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
117 #endif /* RAID_DIAGNOSTIC */
118 
119 	RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
120 	RF_ASSERT(ESUOffset == 0);
121 
122 	RF_ETIMER_START(timer);
123 
124 	/* Xor the Wnd buffer into Rod buffer, the difference of old data and
125 	 * new data is stored in Rod buffer */
126 	for (k = 0; k < EpdaIndex; k += 2) {
127 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
128 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
129 	}
130 	/* Start to encoding the buffer storing the difference of old data and
131 	 * new data into 'E' buffer  */
132 	for (i = 0; i < EpdaIndex; i += 2)
133 		if (node->params[i + 1].p != node->results[0]) {	/* results[0] is buf ptr
134 									 * of E */
135 			pda = (RF_PhysDiskAddr_t *) node->params[i].p;
136 			srcbuf = (char *) node->params[i + 1].p;
137 			scol = rf_EUCol(layoutPtr, pda->raidAddress);
138 			suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
139 			destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
140 			rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
141 		}
142 	/* Recover the original old data to be used by parity encoding
143 	 * function in XorNode */
144 	for (k = 0; k < EpdaIndex; k += 2) {
145 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
146 		retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length);
147 	}
148 	RF_ETIMER_STOP(timer);
149 	RF_ETIMER_EVAL(timer);
150 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
151 	rf_GenericWakeupFunc(node, 0);
152 #if 1
153 	return (0);		/* XXX this was missing.. GO */
154 #endif
155 }
156 
157 int
158 rf_SimpleONEFunc(RF_DagNode_t *node)
159 {
160 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
161 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
162 	RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
163 	int     retcode = 0;
164 	char   *srcbuf, *destbuf;
165 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
166 	int     length;
167 	RF_RowCol_t scol;
168 	RF_Etimer_t timer;
169 
170 	RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
171 	if (node->dagHdr->status == rf_enable) {
172 		RF_ETIMER_START(timer);
173 		length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);	/* this is a pda of
174 														 * writeDataNodes */
175 		/* bxor to buffer of readDataNodes */
176 		retcode = rf_bxor(node->params[5].p, node->params[1].p, length);
177 		/* find out the corresponding colume in encoding matrix for
178 		 * write colume to be encoded into redundant disk 'E' */
179 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
180 		srcbuf = node->params[1].p;
181 		destbuf = node->params[3].p;
182 		/* Start encoding process */
183 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
184 		rf_bxor(node->params[5].p, node->params[1].p, length);
185 		RF_ETIMER_STOP(timer);
186 		RF_ETIMER_EVAL(timer);
187 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
188 
189 	}
190 	return (rf_GenericWakeupFunc(node, retcode));	/* call wake func
191 							 * explicitly since no
192 							 * I/O in this node */
193 }
194 
195 
196 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
197 void
198 rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf)
199 {
200 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
201 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
202 	RF_PhysDiskAddr_t *pda;
203 	int     i, suoffset;
204 	RF_RowCol_t scol;
205 	char   *srcbuf, *destbuf;
206 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
207 	RF_Etimer_t timer;
208 
209 	RF_ETIMER_START(timer);
210 	for (i = 0; i < node->numParams - 2; i += 2) {
211 		RF_ASSERT(node->params[i + 1].p != ebuf);
212 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
213 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
214 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
215 		srcbuf = (char *) node->params[i + 1].p;
216 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
217 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
218 	}
219 	RF_ETIMER_STOP(timer);
220 	RF_ETIMER_EVAL(timer);
221 	tracerec->xor_us += RF_ETIMER_VAL_US(timer);
222 }
223 
224 
225 /*******************************************************************************************
226  *			 Used in  EO_001_CreateLargeWriteDAG
227  ******************************************************************************************/
228 int
229 rf_RegularEFunc(RF_DagNode_t *node)
230 {
231 	rf_RegularESubroutine(node, node->results[0]);
232 	rf_GenericWakeupFunc(node, 0);
233 #if 1
234 	return (0);		/* XXX this was missing?.. GO */
235 #endif
236 }
237 /*******************************************************************************************
238  * This degraded function allow only two case:
239  *  1. when write access the full failed stripe unit, then the access can be more than
240  *     one tripe units.
241  *  2. when write access only part of the failed SU, we assume accesses of more than
242  *     one stripe unit is not allowed so that the write can be dealt with like a
243  *     large write.
244  *  The following function is based on these assumptions. So except in the second case,
245  *  it looks the same as a large write encodeing function. But this is not exactly the
246  *  normal way for doing a degraded write, since raidframe have to break cases of access
247  *  other than the above two into smaller accesses. We may have to change
248  *  DegrESubroutin in the future.
249  *******************************************************************************************/
250 void
251 rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf)
252 {
253 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
254 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
255 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
256 	RF_PhysDiskAddr_t *pda;
257 	int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
258 	RF_RowCol_t scol;
259 	char   *srcbuf, *destbuf;
260 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
261 	RF_Etimer_t timer;
262 
263 	RF_ETIMER_START(timer);
264 	for (i = 0; i < node->numParams - 2; i += 2) {
265 		RF_ASSERT(node->params[i + 1].p != ebuf);
266 		pda = (RF_PhysDiskAddr_t *) node->params[i].p;
267 		suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
268 		scol = rf_EUCol(layoutPtr, pda->raidAddress);
269 		srcbuf = (char *) node->params[i + 1].p;
270 		destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
271 		rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
272 	}
273 
274 	RF_ETIMER_STOP(timer);
275 	RF_ETIMER_EVAL(timer);
276 	tracerec->q_us += RF_ETIMER_VAL_US(timer);
277 }
278 
279 
280 /**************************************************************************************
281  * This function is used in case where one data disk failed and both redundant disks
282  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
283  * failed in the stripe but not accessed at this time, then we should, instead, use
284  * the rf_EOWriteDoubleRecoveryFunc().
285  **************************************************************************************/
286 int
287 rf_Degraded_100_EOFunc(RF_DagNode_t *node)
288 {
289 	rf_DegrESubroutine(node, node->results[1]);
290 	rf_RecoveryXorFunc(node);	/* does the wakeup here! */
291 #if 1
292 	return (0);		/* XXX this was missing... SHould these be
293 				 * void functions??? GO */
294 #endif
295 }
296 /**************************************************************************************
297  * This function is to encode one sector in one of the data disks to the E disk.
298  * However, in evenodd this function can also be used as decoding function to recover
299  * data from dead disk in the case of parity failure and a single data failure.
300  **************************************************************************************/
301 void
302 rf_e_EncOneSect(
303     RF_RowCol_t srcLogicCol,
304     char *srcSecbuf,
305     RF_RowCol_t destLogicCol,
306     char *destSecbuf,
307     int bytesPerSector)
308 {
309 	int     S_index;	/* index of the EU in the src col which need
310 				 * be Xored into all EUs in a dest sector */
311 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
312 	RF_RowCol_t j, indexInDest,	/* row index of an encoding unit in
313 					 * the destination colume of encoding
314 					 * matrix */
315 	        indexInSrc;	/* row index of an encoding unit in the source
316 				 * colume used for recovery */
317 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
318 
319 #if RF_EO_MATRIX_DIM > 17
320 	int     shortsPerEU = bytesPerEU / sizeof(short);
321 	short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
322 	short temp1;
323 #elif RF_EO_MATRIX_DIM == 17
324 	int     longsPerEU = bytesPerEU / sizeof(long);
325 	long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
326 	long temp1;
327 #endif
328 
329 #if RF_EO_MATRIX_DIM > 17
330 	RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
331 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
332 #elif RF_EO_MATRIX_DIM == 17
333 	RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
334 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
335 #endif
336 
337 	S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
338 #if RF_EO_MATRIX_DIM > 17
339 	srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
340 #elif RF_EO_MATRIX_DIM == 17
341 	srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
342 #endif
343 
344 	for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
345 		indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
346 
347 #if RF_EO_MATRIX_DIM > 17
348 		destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
349 		srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
350 		for (j = 0; j < shortsPerEU; j++) {
351 			temp1 = destShortBuf[j] ^ srcShortBuf1[j];
352 			/* note: S_index won't be at the end row for any src
353 			 * col! */
354 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
355 				destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
356 			/* if indexInSrc is at the end row, ie.
357 			 * RF_EO_MATRIX_DIM -1, then all elements are zero! */
358 			else
359 				destShortBuf[j] = temp1;
360 		}
361 
362 #elif RF_EO_MATRIX_DIM == 17
363 		destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
364 		srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
365 		for (j = 0; j < longsPerEU; j++) {
366 			temp1 = destLongBuf[j] ^ srcLongBuf1[j];
367 			if (indexInSrc != RF_EO_MATRIX_DIM - 1)
368 				destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
369 			else
370 				destLongBuf[j] = temp1;
371 		}
372 #endif
373 	}
374 }
375 
376 void
377 rf_e_encToBuf(
378     RF_Raid_t * raidPtr,
379     RF_RowCol_t srcLogicCol,
380     char *srcbuf,
381     RF_RowCol_t destLogicCol,
382     char *destbuf,
383     int numSector)
384 {
385 	int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
386 
387 	for (i = 0; i < numSector; i++) {
388 		rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
389 		srcbuf += bytesPerSector;
390 		destbuf += bytesPerSector;
391 	}
392 }
393 /**************************************************************************************
394  * when parity die and one data die, We use second redundant information, 'E',
395  * to recover the data in dead disk. This function is used in the recovery node of
396  * for EO_110_CreateReadDAG
397  **************************************************************************************/
398 int
399 rf_RecoveryEFunc(RF_DagNode_t *node)
400 {
401 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
402 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
403 	RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
404 	RF_RowCol_t scol,	/* source logical column */
405 	        fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);	/* logical column of
406 									 * failed SU */
407 	int     i;
408 	RF_PhysDiskAddr_t *pda;
409 	int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
410 	char   *srcbuf, *destbuf;
411 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
412 	RF_Etimer_t timer;
413 
414 	memset((char *) node->results[0], 0,
415 	    rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
416 	if (node->dagHdr->status == rf_enable) {
417 		RF_ETIMER_START(timer);
418 		for (i = 0; i < node->numParams - 2; i += 2)
419 			if (node->params[i + 1].p != node->results[0]) {
420 				pda = (RF_PhysDiskAddr_t *) node->params[i].p;
421 				if (i == node->numParams - 4)
422 					scol = RF_EO_MATRIX_DIM - 2;	/* the colume of
423 									 * redundant E */
424 				else
425 					scol = rf_EUCol(layoutPtr, pda->raidAddress);
426 				srcbuf = (char *) node->params[i + 1].p;
427 				suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
428 				destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
429 				rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
430 			}
431 		RF_ETIMER_STOP(timer);
432 		RF_ETIMER_EVAL(timer);
433 		tracerec->xor_us += RF_ETIMER_VAL_US(timer);
434 	}
435 	return (rf_GenericWakeupFunc(node, 0));	/* node execute successfully */
436 }
437 /**************************************************************************************
438  * This function is used in the case where one data and the parity have filed.
439  * (in EO_110_CreateWriteDAG )
440  **************************************************************************************/
441 int
442 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
443 {
444 	rf_DegrESubroutine(node, node->results[0]);
445 	rf_GenericWakeupFunc(node, 0);
446 #if 1
447 	return (0);		/* XXX Yet another one!! GO */
448 #endif
449 }
450 
451 
452 
453 /**************************************************************************************
454  *  		THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
455  **************************************************************************************/
456 
457 void
458 rf_doubleEOdecode(
459     RF_Raid_t * raidPtr,
460     char **rrdbuf,
461     char **dest,
462     RF_RowCol_t * fcol,
463     char *pbuf,
464     char *ebuf)
465 {
466 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
467 	int     i, j, k, f1, f2, row;
468 	int     rrdrow, erow, count = 0;
469 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
470 	int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
471 #if 0
472 	int     pcol = (RF_EO_MATRIX_DIM) - 1;
473 #endif
474 	int     ecol = (RF_EO_MATRIX_DIM) - 2;
475 	int     bytesPerEU = bytesPerSector / numRowInEncMatix;
476 	int     numDataCol = layoutPtr->numDataCol;
477 #if RF_EO_MATRIX_DIM > 17
478 	int     shortsPerEU = bytesPerEU / sizeof(short);
479 	short  *rrdbuf_current, *pbuf_current, *ebuf_current;
480 	short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
481 	short *temp;
482 	short  *P;
483 
484 	RF_ASSERT(bytesPerEU % sizeof(short) == 0);
485 	RF_Malloc(P, bytesPerEU, (short *));
486 	RF_Malloc(temp, bytesPerEU, (short *));
487 #elif RF_EO_MATRIX_DIM == 17
488 	int     longsPerEU = bytesPerEU / sizeof(long);
489 	long   *rrdbuf_current, *pbuf_current, *ebuf_current;
490 	long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
491 	long *temp;
492 	long   *P;
493 
494 	RF_ASSERT(bytesPerEU % sizeof(long) == 0);
495 	RF_Malloc(P, bytesPerEU, (long *));
496 	RF_Malloc(temp, bytesPerEU, (long *));
497 #endif
498 	RF_ASSERT(*((long *) dest[0]) == 0);
499 	RF_ASSERT(*((long *) dest[1]) == 0);
500 	memset((char *) P, 0, bytesPerEU);
501 	memset((char *) temp, 0, bytesPerEU);
502 	RF_ASSERT(*P == 0);
503 	/* calculate the 'P' parameter, which, not parity, is the Xor of all
504 	 * elements in the last two column, ie. 'E' and 'parity' colume, see
505 	 * the Ref. paper by Blaum, et al 1993  */
506 	for (i = 0; i < numRowInEncMatix; i++)
507 		for (k = 0; k < longsPerEU; k++) {
508 #if RF_EO_MATRIX_DIM > 17
509 			ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
510 			pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
511 #elif RF_EO_MATRIX_DIM == 17
512 			ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
513 			pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
514 #endif
515 			P[k] ^= *ebuf_current;
516 			P[k] ^= *pbuf_current;
517 		}
518 	RF_ASSERT(fcol[0] != fcol[1]);
519 	if (fcol[0] < fcol[1]) {
520 #if RF_EO_MATRIX_DIM > 17
521 		dest_smaller = (short *) (dest[0]);
522 		dest_larger = (short *) (dest[1]);
523 #elif RF_EO_MATRIX_DIM == 17
524 		dest_smaller = (long *) (dest[0]);
525 		dest_larger = (long *) (dest[1]);
526 #endif
527 		f1 = fcol[0];
528 		f2 = fcol[1];
529 	} else {
530 #if RF_EO_MATRIX_DIM > 17
531 		dest_smaller = (short *) (dest[1]);
532 		dest_larger = (short *) (dest[0]);
533 #elif RF_EO_MATRIX_DIM == 17
534 		dest_smaller = (long *) (dest[1]);
535 		dest_larger = (long *) (dest[0]);
536 #endif
537 		f1 = fcol[1];
538 		f2 = fcol[0];
539 	}
540 	row = (RF_EO_MATRIX_DIM) - 1;
541 	while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
542 #if RF_EO_MATRIX_DIM > 17
543 		dest_larger_current = dest_larger + row * shortsPerEU;
544 		dest_smaller_current = dest_smaller + row * shortsPerEU;
545 #elif RF_EO_MATRIX_DIM == 17
546 		dest_larger_current = dest_larger + row * longsPerEU;
547 		dest_smaller_current = dest_smaller + row * longsPerEU;
548 #endif
549 		/**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
550 		       which is the failed data in the colume which has smaller col index. **/
551 		/* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
552 		for (j = 0; j < numDataCol; j++) {
553 			if (j == f1 || j == f2)
554 				continue;
555 			rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
556 			if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
557 #if RF_EO_MATRIX_DIM > 17
558 				rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
559 				for (k = 0; k < shortsPerEU; k++)
560 					temp[k] ^= *(rrdbuf_current + k);
561 #elif RF_EO_MATRIX_DIM == 17
562 				rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
563 				for (k = 0; k < longsPerEU; k++)
564 					temp[k] ^= *(rrdbuf_current + k);
565 #endif
566 			}
567 		}
568 		/* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
569 		 * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
570 		 * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
571 		 * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
572 		 * diagonal) ^ (failed 2)       */
573 
574 		erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
575 		if (erow != (RF_EO_MATRIX_DIM) - 1) {
576 #if RF_EO_MATRIX_DIM > 17
577 			ebuf_current = (short *) ebuf + shortsPerEU * erow;
578 			for (k = 0; k < shortsPerEU; k++)
579 				temp[k] ^= *(ebuf_current + k);
580 #elif RF_EO_MATRIX_DIM == 17
581 			ebuf_current = (long *) ebuf + longsPerEU * erow;
582 			for (k = 0; k < longsPerEU; k++)
583 				temp[k] ^= *(ebuf_current + k);
584 #endif
585 		}
586 		/* step 3: ^P to obtain the failed data (failed 2).  P can be
587 		 * proved to be actually  (principle diagonal)  After this
588 		 * step, temp[k] = (failed 2), the failed data to be recovered */
589 #if RF_EO_MATRIX_DIM > 17
590 		for (k = 0; k < shortsPerEU; k++)
591 			temp[k] ^= P[k];
592 		/* Put the data to the destination buffer                              */
593 		for (k = 0; k < shortsPerEU; k++)
594 			dest_larger_current[k] = temp[k];
595 #elif RF_EO_MATRIX_DIM == 17
596 		for (k = 0; k < longsPerEU; k++)
597 			temp[k] ^= P[k];
598 		/* Put the data to the destination buffer                              */
599 		for (k = 0; k < longsPerEU; k++)
600 			dest_larger_current[k] = temp[k];
601 #endif
602 
603 		/**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
604 		/* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
605 		 * columes    */
606 		for (j = 0; j < numDataCol; j++) {
607 			if (j == f1 || j == f2)
608 				continue;
609 #if RF_EO_MATRIX_DIM > 17
610 			rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
611 			for (k = 0; k < shortsPerEU; k++)
612 				temp[k] ^= *(rrdbuf_current + k);
613 #elif RF_EO_MATRIX_DIM == 17
614 			rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
615 			for (k = 0; k < longsPerEU; k++)
616 				temp[k] ^= *(rrdbuf_current + k);
617 #endif
618 		}
619 		/* step 2: ^A(row,m-1) */
620 		/* step 3: Put the data to the destination buffer                             	 */
621 #if RF_EO_MATRIX_DIM > 17
622 		pbuf_current = (short *) pbuf + shortsPerEU * row;
623 		for (k = 0; k < shortsPerEU; k++)
624 			temp[k] ^= *(pbuf_current + k);
625 		for (k = 0; k < shortsPerEU; k++)
626 			dest_smaller_current[k] = temp[k];
627 #elif RF_EO_MATRIX_DIM == 17
628 		pbuf_current = (long *) pbuf + longsPerEU * row;
629 		for (k = 0; k < longsPerEU; k++)
630 			temp[k] ^= *(pbuf_current + k);
631 		for (k = 0; k < longsPerEU; k++)
632 			dest_smaller_current[k] = temp[k];
633 #endif
634 		count++;
635 	}
636 	/* Check if all Encoding Unit in the data buffer have been decoded,
637 	 * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
638 	 * this algorithm will covered all buffer 				 */
639 	RF_ASSERT(count == numRowInEncMatix);
640 	RF_Free((char *) P, bytesPerEU);
641 	RF_Free((char *) temp, bytesPerEU);
642 }
643 
644 
645 /***************************************************************************************
646 * 	This function is called by double degragded read
647 * 	EO_200_CreateReadDAG
648 *
649 ***************************************************************************************/
650 int
651 rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node)
652 {
653 	int     ndataParam = 0;
654 	int     np = node->numParams;
655 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
656 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
657 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
658 	int     i, prm, sector, nresults = node->numResults;
659 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
660 	unsigned sosAddr;
661 	int     two = 0, mallc_one = 0, mallc_two = 0;	/* flags to indicate if
662 							 * memory is allocated */
663 	int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
664 	RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
665 	        npda;
666 	RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
667 	char  **buf, *ebuf, *pbuf, *dest[2];
668 	long   *suoff = NULL, *suend = NULL, *prmToCol = NULL,
669 	    psuoff = 0, esuoff = 0;
670 	RF_SectorNum_t startSector, endSector;
671 	RF_Etimer_t timer;
672 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
673 
674 	RF_ETIMER_START(timer);
675 
676 	/* Find out the number of parameters which are pdas for data
677 	 * information */
678 	for (i = 0; i <= np; i++)
679 		if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
680 			ndataParam = i;
681 			break;
682 		}
683 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
684 	if (ndataParam != 0) {
685 		RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
686 		RF_Malloc(suend, ndataParam * sizeof(long), (long *));
687 		RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
688 	}
689 	if (asmap->failedPDAs[1] &&
690 	    (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
691 		RF_ASSERT(0);	/* currently, no support for this situation */
692 		ppda = node->params[np - 6].p;
693 		ppda2 = node->params[np - 5].p;
694 		RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
695 		epda = node->params[np - 4].p;
696 		epda2 = node->params[np - 3].p;
697 		RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
698 		two = 1;
699 	} else {
700 		ppda = node->params[np - 4].p;
701 		epda = node->params[np - 3].p;
702 		psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
703 		esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
704 		RF_ASSERT(psuoff == esuoff);
705 	}
706 	/*
707             the followings have three goals:
708             1. determine the startSector to begin decoding and endSector to end decoding.
709             2. determine the colume numbers of the two failed disks.
710             3. determine the offset and end offset of the access within each failed stripe unit.
711          */
712 	if (nresults == 1) {
713 		/* find the startSector to begin decoding */
714 		pda = node->results[0];
715 		memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
716 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
717 		fsuend[0] = fsuoff[0] + pda->numSector;
718 		fsuoff[1] = 0;
719 		fsuend[1] = 0;
720 		startSector = fsuoff[0];
721 		endSector = fsuend[0];
722 
723 		/* find out the column of failed disk being accessed */
724 		fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
725 
726 		/* find out the other failed colume not accessed */
727 		sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
728 		for (i = 0; i < numDataCol; i++) {
729 			npda.raidAddress = sosAddr + (i * secPerSU);
730 			(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
731 			/* skip over dead disks */
732 			if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
733 				if (i != fcol[0])
734 					break;
735 		}
736 		RF_ASSERT(i < numDataCol);
737 		fcol[1] = i;
738 	} else {
739 		RF_ASSERT(nresults == 2);
740 		pda0 = node->results[0];
741 		memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
742 		pda1 = node->results[1];
743 		memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
744 		/* determine the failed colume numbers of the two failed
745 		 * disks. */
746 		fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
747 		fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
748 		/* determine the offset and end offset of the access within
749 		 * each failed stripe unit. */
750 		fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
751 		fsuend[0] = fsuoff[0] + pda0->numSector;
752 		fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
753 		fsuend[1] = fsuoff[1] + pda1->numSector;
754 		/* determine the startSector to begin decoding */
755 		startSector = RF_MIN(pda0->startSector, pda1->startSector);
756 		/* determine the endSector to end decoding */
757 		endSector = RF_MAX(fsuend[0], fsuend[1]);
758 	}
759 	/*
760 	      assign the beginning sector and the end sector for each parameter
761 	      find out the corresponding colume # for each parameter
762         */
763 	for (prm = 0; prm < ndataParam; prm++) {
764 		pda = node->params[prm].p;
765 		suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
766 		suend[prm] = suoff[prm] + pda->numSector;
767 		prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
768 	}
769 	/* 'sector' is the sector for the current decoding algorithm. For each
770 	 * sector in the failed SU, find out the corresponding parameters that
771 	 * cover the current sector and that are needed for decoding of this
772 	 * sector in failed SU. 2.  Find out if sector is in the shadow of any
773 	 * accessed failed SU. If not, malloc a temporary space of a sector in
774 	 * size. */
775 	for (sector = startSector; sector < endSector; sector++) {
776 		if (nresults == 2)
777 			if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
778 				continue;
779 		for (prm = 0; prm < ndataParam; prm++)
780 			if (suoff[prm] <= sector && sector < suend[prm])
781 				buf[(prmToCol[prm])] = (char *)((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
782 				    rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
783 		/* find out if sector is in the shadow of any accessed failed
784 		 * SU. If yes, assign dest[0], dest[1] to point at suitable
785 		 * position of the buffer corresponding to failed SUs. if no,
786 		 * malloc a temporary space of a sector in size for
787 		 * destination of decoding. */
788 		RF_ASSERT(nresults == 1 || nresults == 2);
789 		if (nresults == 1) {
790 			dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
791 			/* Always malloc temp buffer to dest[1]  */
792 			RF_Malloc(dest[1], bytesPerSector, (char *));
793 			memset(dest[1], 0, bytesPerSector);
794 			mallc_two = 1;
795 		} else {
796 			if (fsuoff[0] <= sector && sector < fsuend[0])
797 				dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
798 			else {
799 				RF_Malloc(dest[0], bytesPerSector, (char *));
800 				memset(dest[0], 0, bytesPerSector);
801 				mallc_one = 1;
802 			}
803 			if (fsuoff[1] <= sector && sector < fsuend[1])
804 				dest[1] = (char *)((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
805 			else {
806 				RF_Malloc(dest[1], bytesPerSector, (char *));
807 				memset(dest[1], 0, bytesPerSector);
808 				mallc_two = 1;
809 			}
810 			RF_ASSERT(mallc_one == 0 || mallc_two == 0);
811 		}
812 		pbuf = (char *)ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
813 		ebuf = (char *)epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
814 		/*
815 	         * After finish finding all needed sectors, call doubleEOdecode function for decoding
816 	         * one sector to destination.
817 	         */
818 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
819 		/* free all allocated memory, and mark flag to indicate no
820 		 * memory is being allocated */
821 		if (mallc_one == 1)
822 			RF_Free(dest[0], bytesPerSector);
823 		if (mallc_two == 1)
824 			RF_Free(dest[1], bytesPerSector);
825 		mallc_one = mallc_two = 0;
826 	}
827 	RF_Free(buf, numDataCol * sizeof(char *));
828 	if (ndataParam != 0) {
829 		RF_Free(suoff, ndataParam * sizeof(long));
830 		RF_Free(suend, ndataParam * sizeof(long));
831 		RF_Free(prmToCol, ndataParam * sizeof(long));
832 	}
833 	RF_ETIMER_STOP(timer);
834 	RF_ETIMER_EVAL(timer);
835 	if (tracerec) {
836 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
837 	}
838 	rf_GenericWakeupFunc(node, 0);
839 #if 1
840 	return (0);		/* XXX is this even close!!?!?!!? GO */
841 #endif
842 }
843 
844 
845 /* currently, only access of one of the two failed SU is allowed in this function.
846  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
847  * many accesses of single stripe unit.
848  */
849 
850 int
851 rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node)
852 {
853 	int     np = node->numParams;
854 	RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
855 	RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
856 	RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
857 	RF_SectorNum_t sector;
858 	RF_RowCol_t col, scol;
859 	int     prm, i, j;
860 	RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
861 	unsigned sosAddr;
862 	unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
863 	RF_int64 numbytes;
864 	RF_SectorNum_t startSector, endSector;
865 	RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
866 	RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
867 	char  **buf;		/* buf[0], buf[1], buf[2], ...etc. point to
868 				 * buffer storing data read from col0, col1,
869 				 * col2 */
870 	char   *ebuf, *pbuf, *dest[2], *olddata[2];
871 	RF_Etimer_t timer;
872 	RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
873 
874 	RF_ASSERT(asmap->numDataFailed == 1);	/* currently only support this
875 						 * case, the other failed SU
876 						 * is not being accessed */
877 	RF_ETIMER_START(timer);
878 	RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
879 
880 	ppda = node->results[0];/* Instead of being buffers, node->results[0]
881 				 * and [1] are Ppda and Epda  */
882 	epda = node->results[1];
883 	fpda = asmap->failedPDAs[0];
884 
885 	/* First, recovery the failed old SU using EvenOdd double decoding      */
886 	/* determine the startSector and endSector for decoding */
887 	startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
888 	endSector = startSector + fpda->numSector;
889 	/* Assign buf[col] pointers to point to each non-failed colume  and
890 	 * initialize the pbuf and ebuf to point at the beginning of each
891 	 * source buffers and destination buffers */
892 	for (prm = 0; prm < numDataCol - 2; prm++) {
893 		pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
894 		col = rf_EUCol(layoutPtr, pda->raidAddress);
895 		buf[col] = pda->bufPtr;
896 	}
897 	/* pbuf and ebuf:  they will change values as double recovery decoding
898 	 * goes on */
899 	pbuf = ppda->bufPtr;
900 	ebuf = epda->bufPtr;
901 	/* find out the logical colume numbers in the encoding matrix of the
902 	 * two failed columes */
903 	fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
904 
905 	/* find out the other failed colume not accessed this time */
906 	sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
907 	for (i = 0; i < numDataCol; i++) {
908 		npda.raidAddress = sosAddr + (i * secPerSU);
909 		(raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
910 		/* skip over dead disks */
911 		if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
912 			if (i != fcol[0])
913 				break;
914 	}
915 	RF_ASSERT(i < numDataCol);
916 	fcol[1] = i;
917 	/* assign temporary space to put recovered failed SU */
918 	numbytes = fpda->numSector * bytesPerSector;
919 	RF_Malloc(olddata[0], numbytes, (char *));
920 	RF_Malloc(olddata[1], numbytes, (char *));
921 	dest[0] = olddata[0];
922 	dest[1] = olddata[1];
923 	memset(olddata[0], 0, numbytes);
924 	memset(olddata[1], 0, numbytes);
925 	/* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
926 	 * have already pointed at the beginning of each source buffers and
927 	 * destination buffers */
928 	for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
929 		rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
930 		for (j = 0; j < numDataCol; j++)
931 			if ((j != fcol[0]) && (j != fcol[1]))
932 				buf[j] += bytesPerSector;
933 		dest[0] += bytesPerSector;
934 		dest[1] += bytesPerSector;
935 		ebuf += bytesPerSector;
936 		pbuf += bytesPerSector;
937 	}
938 	/* after recovery, the buffer pointed by olddata[0] is the old failed
939 	 * data. With new writing data and this old data, use small write to
940 	 * calculate the new redundant informations */
941 	/* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
942 	 * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
943 	 * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
944 	 * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
945 	 * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
946 	 * wudNodes; For current implementation, we assume the simplest case:
947 	 * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
948 	 * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
949 	 * data to be writen to the failed disk. We first bxor the new data
950 	 * into the old recovered data, then do the same things as small
951 	 * write. */
952 
953 	rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes);
954 	/* do new 'E' calculation  */
955 	/* find out the corresponding colume in encoding matrix for write
956 	 * colume to be encoded into redundant disk 'E' */
957 	scol = rf_EUCol(layoutPtr, fpda->raidAddress);
958 	/* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
959 	 * buffer pointer               */
960 	rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
961 
962 	/* do new 'P' calculation  */
963 	rf_bxor(olddata[0], ppda->bufPtr, numbytes);
964 	/* Free the allocated buffer  */
965 	RF_Free(olddata[0], numbytes);
966 	RF_Free(olddata[1], numbytes);
967 	RF_Free(buf, numDataCol * sizeof(char *));
968 
969 	RF_ETIMER_STOP(timer);
970 	RF_ETIMER_EVAL(timer);
971 	if (tracerec) {
972 		tracerec->q_us += RF_ETIMER_VAL_US(timer);
973 	}
974 	rf_GenericWakeupFunc(node, 0);
975 	return (0);
976 }
977 #endif				/* RF_INCLUDE_EVENODD > 0 */
978