xref: /netbsd-src/sys/dev/raidframe/rf_dagffwr.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: rf_dagffwr.c,v 1.33 2006/11/16 01:33:23 christos Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*
30  * rf_dagff.c
31  *
32  * code for creating fault-free DAGs
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.33 2006/11/16 01:33:23 christos Exp $");
38 
39 #include <dev/raidframe/raidframevar.h>
40 
41 #include "rf_raid.h"
42 #include "rf_dag.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_debugMem.h"
46 #include "rf_dagffrd.h"
47 #include "rf_general.h"
48 #include "rf_dagffwr.h"
49 #include "rf_map.h"
50 
51 /******************************************************************************
52  *
53  * General comments on DAG creation:
54  *
55  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
56  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
57  * is reached, the execution engine will halt forward execution and work
58  * backward through the graph, executing the undo functions.  Assuming that
59  * each node in the graph prior to the Cmt node are undoable and atomic - or -
60  * does not make changes to permanent state, the graph will fail atomically.
61  * If an error occurs after the Cmt node executes, the engine will roll-forward
62  * through the graph, blindly executing nodes until it reaches the end.
63  * If a graph reaches the end, it is assumed to have completed successfully.
64  *
65  * A graph has only 1 Cmt node.
66  *
67  */
68 
69 
70 /******************************************************************************
71  *
72  * The following wrappers map the standard DAG creation interface to the
73  * DAG creation routines.  Additionally, these wrappers enable experimentation
74  * with new DAG structures by providing an extra level of indirection, allowing
75  * the DAG creation routines to be replaced at this single point.
76  */
77 
78 
79 void
80 rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81 			      RF_DagHeader_t *dag_h, void *bp,
82 			      RF_RaidAccessFlags_t flags,
83 			      RF_AllocListElem_t *allocList,
84 			      RF_IoType_t type)
85 {
86 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87 				 RF_IO_TYPE_WRITE);
88 }
89 
90 void
91 rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92 		       RF_DagHeader_t *dag_h, void *bp,
93 		       RF_RaidAccessFlags_t flags,
94 		       RF_AllocListElem_t *allocList,
95 		       RF_IoType_t type)
96 {
97 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98 				 RF_IO_TYPE_WRITE);
99 }
100 
101 void
102 rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103 		       RF_DagHeader_t *dag_h, void *bp,
104 		       RF_RaidAccessFlags_t flags,
105 		       RF_AllocListElem_t *allocList)
106 {
107 	/* "normal" rollaway */
108 	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109 				     allocList, &rf_xorFuncs, NULL);
110 }
111 
112 void
113 rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114 		       RF_DagHeader_t *dag_h, void *bp,
115 		       RF_RaidAccessFlags_t flags,
116 		       RF_AllocListElem_t *allocList)
117 {
118 	/* "normal" rollaway */
119 	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120 				     allocList, 1, rf_RegularXorFunc, RF_TRUE);
121 }
122 
123 
124 /******************************************************************************
125  *
126  * DAG creation code begins here
127  */
128 
129 
130 /******************************************************************************
131  *
132  * creates a DAG to perform a large-write operation:
133  *
134  *           / Rod \           / Wnd \
135  * H -- block- Rod - Xor - Cmt - Wnd --- T
136  *           \ Rod /          \  Wnp /
137  *                             \[Wnq]/
138  *
139  * The XOR node also does the Q calculation in the P+Q architecture.
140  * All nodes are before the commit node (Cmt) are assumed to be atomic and
141  * undoable - or - they make no changes to permanent state.
142  *
143  * Rod = read old data
144  * Cmt = commit node
145  * Wnp = write new parity
146  * Wnd = write new data
147  * Wnq = write new "q"
148  * [] denotes optional segments in the graph
149  *
150  * Parameters:  raidPtr   - description of the physical array
151  *              asmap     - logical & physical addresses for this access
152  *              bp        - buffer ptr (holds write data)
153  *              flags     - general flags (e.g. disk locking)
154  *              allocList - list of memory allocated in DAG creation
155  *              nfaults   - number of faults array can tolerate
156  *                          (equal to # redundancy units in stripe)
157  *              redfuncs  - list of redundancy generating functions
158  *
159  *****************************************************************************/
160 
161 void
162 rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
163 			     RF_DagHeader_t *dag_h, void *bp,
164 			     RF_RaidAccessFlags_t flags,
165 			     RF_AllocListElem_t *allocList,
166 			     int nfaults, int (*redFunc) (RF_DagNode_t *),
167 			     int allowBufferRecycle)
168 {
169 	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
170 	RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
171 	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
172 	RF_AccessStripeMapHeader_t *new_asm_h[2];
173 	RF_StripeNum_t parityStripeID;
174 	char   *sosBuffer, *eosBuffer;
175 	RF_ReconUnitNum_t which_ru;
176 	RF_RaidLayout_t *layoutPtr;
177 	RF_PhysDiskAddr_t *pda;
178 
179 	layoutPtr = &(raidPtr->Layout);
180 	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
181 							asmap->raidAddress,
182 							&which_ru);
183 
184 #if RF_DEBUG_DAG
185 	if (rf_dagDebug) {
186 		printf("[Creating large-write DAG]\n");
187 	}
188 #endif
189 	dag_h->creator = "LargeWriteDAG";
190 
191 	dag_h->numCommitNodes = 1;
192 	dag_h->numCommits = 0;
193 	dag_h->numSuccedents = 1;
194 
195 	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
196 	nWndNodes = asmap->numStripeUnitsAccessed;
197 
198 	for (i = 0; i < nWndNodes; i++) {
199 		tmpNode = rf_AllocDAGNode();
200 		tmpNode->list_next = dag_h->nodes;
201 		dag_h->nodes = tmpNode;
202 	}
203 	wndNodes = dag_h->nodes;
204 
205 	xorNode = rf_AllocDAGNode();
206 	xorNode->list_next = dag_h->nodes;
207 	dag_h->nodes = xorNode;
208 
209 	wnpNode = rf_AllocDAGNode();
210 	wnpNode->list_next = dag_h->nodes;
211 	dag_h->nodes = wnpNode;
212 
213 	blockNode = rf_AllocDAGNode();
214 	blockNode->list_next = dag_h->nodes;
215 	dag_h->nodes = blockNode;
216 
217 	commitNode = rf_AllocDAGNode();
218 	commitNode->list_next = dag_h->nodes;
219 	dag_h->nodes = commitNode;
220 
221 	termNode = rf_AllocDAGNode();
222 	termNode->list_next = dag_h->nodes;
223 	dag_h->nodes = termNode;
224 
225 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
226 	if (nfaults == 2) {
227 		wnqNode = rf_AllocDAGNode();
228 	} else {
229 #endif
230 		wnqNode = NULL;
231 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
232 	}
233 #endif
234 	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
235 					new_asm_h, &nRodNodes, &sosBuffer,
236 					&eosBuffer, allocList);
237 	if (nRodNodes > 0) {
238 		for (i = 0; i < nRodNodes; i++) {
239 			tmpNode = rf_AllocDAGNode();
240 			tmpNode->list_next = dag_h->nodes;
241 			dag_h->nodes = tmpNode;
242 		}
243 		rodNodes = dag_h->nodes;
244 	} else {
245 		rodNodes = NULL;
246 	}
247 
248 	/* begin node initialization */
249 	if (nRodNodes > 0) {
250 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
251 			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
252 			    dag_h, "Nil", allocList);
253 	} else {
254 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
255 			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
256 			    dag_h, "Nil", allocList);
257 	}
258 
259 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
260 		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
261 		    dag_h, "Cmt", allocList);
262 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
263 		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
264 		    dag_h, "Trm", allocList);
265 
266 	/* initialize the Rod nodes */
267 	tmpNode = rodNodes;
268 	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
269 		if (new_asm_h[asmNum]) {
270 			pda = new_asm_h[asmNum]->stripeMap->physInfo;
271 			while (pda) {
272 				rf_InitNode(tmpNode, rf_wait,
273 					    RF_FALSE, rf_DiskReadFunc,
274 					    rf_DiskReadUndoFunc,
275 					    rf_GenericWakeupFunc,
276 					    1, 1, 4, 0, dag_h,
277 					    "Rod", allocList);
278 				tmpNode->params[0].p = pda;
279 				tmpNode->params[1].p = pda->bufPtr;
280 				tmpNode->params[2].v = parityStripeID;
281 				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
282 				    which_ru);
283 				nodeNum++;
284 				pda = pda->next;
285 				tmpNode = tmpNode->list_next;
286 			}
287 		}
288 	}
289 	RF_ASSERT(nodeNum == nRodNodes);
290 
291 	/* initialize the wnd nodes */
292 	pda = asmap->physInfo;
293 	tmpNode = wndNodes;
294 	for (i = 0; i < nWndNodes; i++) {
295 		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
296 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
297 			    rf_GenericWakeupFunc, 1, 1, 4, 0,
298 			    dag_h, "Wnd", allocList);
299 		RF_ASSERT(pda != NULL);
300 		tmpNode->params[0].p = pda;
301 		tmpNode->params[1].p = pda->bufPtr;
302 		tmpNode->params[2].v = parityStripeID;
303 		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
304 		pda = pda->next;
305 		tmpNode = tmpNode->list_next;
306 	}
307 
308 	/* initialize the redundancy node */
309 	if (nRodNodes > 0) {
310 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
311 			    rf_NullNodeUndoFunc, NULL, 1,
312 			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
313 			    nfaults, dag_h, "Xr ", allocList);
314 	} else {
315 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
316 			    rf_NullNodeUndoFunc, NULL, 1,
317 			    1, 2 * (nWndNodes + nRodNodes) + 1,
318 			    nfaults, dag_h, "Xr ", allocList);
319 	}
320 	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
321 	tmpNode = wndNodes;
322 	for (i = 0; i < nWndNodes; i++) {
323 		/* pda */
324 		xorNode->params[2 * i + 0] = tmpNode->params[0];
325 		/* buf ptr */
326 		xorNode->params[2 * i + 1] = tmpNode->params[1];
327 		tmpNode = tmpNode->list_next;
328 	}
329 	tmpNode = rodNodes;
330 	for (i = 0; i < nRodNodes; i++) {
331 		/* pda */
332 		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
333 		/* buf ptr */
334 		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
335 		tmpNode = tmpNode->list_next;
336 	}
337 	/* xor node needs to get at RAID information */
338 	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
339 
340 	/*
341          * Look for an Rod node that reads a complete SU. If none,
342          * alloc a buffer to receive the parity info. Note that we
343          * can't use a new data buffer because it will not have gotten
344          * written when the xor occurs.  */
345 	if (allowBufferRecycle) {
346 		tmpNode = rodNodes;
347 		for (i = 0; i < nRodNodes; i++) {
348 			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
349 				break;
350 			tmpNode = tmpNode->list_next;
351 		}
352 	}
353 	if ((!allowBufferRecycle) || (i == nRodNodes)) {
354 		xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
355 	} else {
356 		/* this works because the only way we get here is if
357 		   allowBufferRecycle is true and we went through the
358 		   above for loop, and exited via the break before
359 		   i==nRodNodes was true.  That means tmpNode will
360 		   still point to a valid node -- the one we want for
361 		   here! */
362 		xorNode->results[0] = tmpNode->params[1].p;
363 	}
364 
365 	/* initialize the Wnp node */
366 	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
367 		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
368 		    dag_h, "Wnp", allocList);
369 	wnpNode->params[0].p = asmap->parityInfo;
370 	wnpNode->params[1].p = xorNode->results[0];
371 	wnpNode->params[2].v = parityStripeID;
372 	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
373 	/* parityInfo must describe entire parity unit */
374 	RF_ASSERT(asmap->parityInfo->next == NULL);
375 
376 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
377 	if (nfaults == 2) {
378 		/*
379 	         * We never try to recycle a buffer for the Q calcuation
380 	         * in addition to the parity. This would cause two buffers
381 	         * to get smashed during the P and Q calculation, guaranteeing
382 	         * one would be wrong.
383 	         */
384 		RF_MallocAndAdd(xorNode->results[1],
385 				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
386 				(void *), allocList);
387 		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
388 			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
389 			    1, 1, 4, 0, dag_h, "Wnq", allocList);
390 		wnqNode->params[0].p = asmap->qInfo;
391 		wnqNode->params[1].p = xorNode->results[1];
392 		wnqNode->params[2].v = parityStripeID;
393 		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
394 		/* parityInfo must describe entire parity unit */
395 		RF_ASSERT(asmap->parityInfo->next == NULL);
396 	}
397 #endif
398 	/*
399          * Connect nodes to form graph.
400          */
401 
402 	/* connect dag header to block node */
403 	RF_ASSERT(blockNode->numAntecedents == 0);
404 	dag_h->succedents[0] = blockNode;
405 
406 	if (nRodNodes > 0) {
407 		/* connect the block node to the Rod nodes */
408 		RF_ASSERT(blockNode->numSuccedents == nRodNodes);
409 		RF_ASSERT(xorNode->numAntecedents == nRodNodes);
410 		tmpNode = rodNodes;
411 		for (i = 0; i < nRodNodes; i++) {
412 			RF_ASSERT(tmpNode->numAntecedents == 1);
413 			blockNode->succedents[i] = tmpNode;
414 			tmpNode->antecedents[0] = blockNode;
415 			tmpNode->antType[0] = rf_control;
416 
417 			/* connect the Rod nodes to the Xor node */
418 			RF_ASSERT(tmpNode->numSuccedents == 1);
419 			tmpNode->succedents[0] = xorNode;
420 			xorNode->antecedents[i] = tmpNode;
421 			xorNode->antType[i] = rf_trueData;
422 			tmpNode = tmpNode->list_next;
423 		}
424 	} else {
425 		/* connect the block node to the Xor node */
426 		RF_ASSERT(blockNode->numSuccedents == 1);
427 		RF_ASSERT(xorNode->numAntecedents == 1);
428 		blockNode->succedents[0] = xorNode;
429 		xorNode->antecedents[0] = blockNode;
430 		xorNode->antType[0] = rf_control;
431 	}
432 
433 	/* connect the xor node to the commit node */
434 	RF_ASSERT(xorNode->numSuccedents == 1);
435 	RF_ASSERT(commitNode->numAntecedents == 1);
436 	xorNode->succedents[0] = commitNode;
437 	commitNode->antecedents[0] = xorNode;
438 	commitNode->antType[0] = rf_control;
439 
440 	/* connect the commit node to the write nodes */
441 	RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
442 	tmpNode = wndNodes;
443 	for (i = 0; i < nWndNodes; i++) {
444 		RF_ASSERT(wndNodes->numAntecedents == 1);
445 		commitNode->succedents[i] = tmpNode;
446 		tmpNode->antecedents[0] = commitNode;
447 		tmpNode->antType[0] = rf_control;
448 		tmpNode = tmpNode->list_next;
449 	}
450 	RF_ASSERT(wnpNode->numAntecedents == 1);
451 	commitNode->succedents[nWndNodes] = wnpNode;
452 	wnpNode->antecedents[0] = commitNode;
453 	wnpNode->antType[0] = rf_trueData;
454 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
455 	if (nfaults == 2) {
456 		RF_ASSERT(wnqNode->numAntecedents == 1);
457 		commitNode->succedents[nWndNodes + 1] = wnqNode;
458 		wnqNode->antecedents[0] = commitNode;
459 		wnqNode->antType[0] = rf_trueData;
460 	}
461 #endif
462 	/* connect the write nodes to the term node */
463 	RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
464 	RF_ASSERT(termNode->numSuccedents == 0);
465 	tmpNode = wndNodes;
466 	for (i = 0; i < nWndNodes; i++) {
467 		RF_ASSERT(wndNodes->numSuccedents == 1);
468 		tmpNode->succedents[0] = termNode;
469 		termNode->antecedents[i] = tmpNode;
470 		termNode->antType[i] = rf_control;
471 		tmpNode = tmpNode->list_next;
472 	}
473 	RF_ASSERT(wnpNode->numSuccedents == 1);
474 	wnpNode->succedents[0] = termNode;
475 	termNode->antecedents[nWndNodes] = wnpNode;
476 	termNode->antType[nWndNodes] = rf_control;
477 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
478 	if (nfaults == 2) {
479 		RF_ASSERT(wnqNode->numSuccedents == 1);
480 		wnqNode->succedents[0] = termNode;
481 		termNode->antecedents[nWndNodes + 1] = wnqNode;
482 		termNode->antType[nWndNodes + 1] = rf_control;
483 	}
484 #endif
485 }
486 /******************************************************************************
487  *
488  * creates a DAG to perform a small-write operation (either raid 5 or pq),
489  * which is as follows:
490  *
491  * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
492  *            \- Rod X      /     \----> Wnd [Und]-/
493  *           [\- Rod X     /       \---> Wnd [Und]-/]
494  *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
495  *
496  * Rop = read old parity
497  * Rod = read old data
498  * Roq = read old "q"
499  * Cmt = commit node
500  * Und = unlock data disk
501  * Unp = unlock parity disk
502  * Unq = unlock q disk
503  * Wnp = write new parity
504  * Wnd = write new data
505  * Wnq = write new "q"
506  * [ ] denotes optional segments in the graph
507  *
508  * Parameters:  raidPtr   - description of the physical array
509  *              asmap     - logical & physical addresses for this access
510  *              bp        - buffer ptr (holds write data)
511  *              flags     - general flags (e.g. disk locking)
512  *              allocList - list of memory allocated in DAG creation
513  *              pfuncs    - list of parity generating functions
514  *              qfuncs    - list of q generating functions
515  *
516  * A null qfuncs indicates single fault tolerant
517  *****************************************************************************/
518 
519 void
520 rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
521 			     RF_DagHeader_t *dag_h, void *bp,
522 			     RF_RaidAccessFlags_t flags,
523 			     RF_AllocListElem_t *allocList,
524 			     const RF_RedFuncs_t *pfuncs,
525 			     const RF_RedFuncs_t *qfuncs)
526 {
527 	RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
528 	RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
529 	RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
530 	RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
531 	RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
532 	RF_DagNode_t *tmpwriteParityNode;
533 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
534 	RF_DagNode_t *tmpwriteQNode;
535 #endif
536 	int     i, j, nNodes, totalNumNodes;
537 	RF_ReconUnitNum_t which_ru;
538 	int     (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
539 	int     (*qfunc) (RF_DagNode_t *);
540 	int     numDataNodes, numParityNodes;
541 	RF_StripeNum_t parityStripeID;
542 	RF_PhysDiskAddr_t *pda;
543 	const char *name, *qname;
544 	long    nfaults;
545 
546 	nfaults = qfuncs ? 2 : 1;
547 
548 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
549 	    asmap->raidAddress, &which_ru);
550 	pda = asmap->physInfo;
551 	numDataNodes = asmap->numStripeUnitsAccessed;
552 	numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
553 
554 #if RF_DEBUG_DAG
555 	if (rf_dagDebug) {
556 		printf("[Creating small-write DAG]\n");
557 	}
558 #endif
559 	RF_ASSERT(numDataNodes > 0);
560 	dag_h->creator = "SmallWriteDAG";
561 
562 	dag_h->numCommitNodes = 1;
563 	dag_h->numCommits = 0;
564 	dag_h->numSuccedents = 1;
565 
566 	/*
567          * DAG creation occurs in four steps:
568          * 1. count the number of nodes in the DAG
569          * 2. create the nodes
570          * 3. initialize the nodes
571          * 4. connect the nodes
572          */
573 
574 	/*
575          * Step 1. compute number of nodes in the graph
576          */
577 
578 	/* number of nodes: a read and write for each data unit a
579 	 * redundancy computation node for each parity node (nfaults *
580 	 * nparity) a read and write for each parity unit a block and
581 	 * commit node (2) a terminate node if atomic RMW an unlock
582 	 * node for each data unit, redundancy unit */
583 	totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
584 	    + (nfaults * 2 * numParityNodes) + 3;
585 	/*
586          * Step 2. create the nodes
587          */
588 
589 	blockNode = rf_AllocDAGNode();
590 	blockNode->list_next = dag_h->nodes;
591 	dag_h->nodes = blockNode;
592 
593 	commitNode = rf_AllocDAGNode();
594 	commitNode->list_next = dag_h->nodes;
595 	dag_h->nodes = commitNode;
596 
597 	for (i = 0; i < numDataNodes; i++) {
598 		tmpNode = rf_AllocDAGNode();
599 		tmpNode->list_next = dag_h->nodes;
600 		dag_h->nodes = tmpNode;
601 	}
602 	readDataNodes = dag_h->nodes;
603 
604 	for (i = 0; i < numParityNodes; i++) {
605 		tmpNode = rf_AllocDAGNode();
606 		tmpNode->list_next = dag_h->nodes;
607 		dag_h->nodes = tmpNode;
608 	}
609 	readParityNodes = dag_h->nodes;
610 
611 	for (i = 0; i < numDataNodes; i++) {
612 		tmpNode = rf_AllocDAGNode();
613 		tmpNode->list_next = dag_h->nodes;
614 		dag_h->nodes = tmpNode;
615 	}
616 	writeDataNodes = dag_h->nodes;
617 
618 	for (i = 0; i < numParityNodes; i++) {
619 		tmpNode = rf_AllocDAGNode();
620 		tmpNode->list_next = dag_h->nodes;
621 		dag_h->nodes = tmpNode;
622 	}
623 	writeParityNodes = dag_h->nodes;
624 
625 	for (i = 0; i < numParityNodes; i++) {
626 		tmpNode = rf_AllocDAGNode();
627 		tmpNode->list_next = dag_h->nodes;
628 		dag_h->nodes = tmpNode;
629 	}
630 	xorNodes = dag_h->nodes;
631 
632 	termNode = rf_AllocDAGNode();
633 	termNode->list_next = dag_h->nodes;
634 	dag_h->nodes = termNode;
635 
636 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
637 	if (nfaults == 2) {
638 		for (i = 0; i < numParityNodes; i++) {
639 			tmpNode = rf_AllocDAGNode();
640 			tmpNode->list_next = dag_h->nodes;
641 			dag_h->nodes = tmpNode;
642 		}
643 		readQNodes = dag_h->nodes;
644 
645 		for (i = 0; i < numParityNodes; i++) {
646 			tmpNode = rf_AllocDAGNode();
647 			tmpNode->list_next = dag_h->nodes;
648 			dag_h->nodes = tmpNode;
649 		}
650 		writeQNodes = dag_h->nodes;
651 
652 		for (i = 0; i < numParityNodes; i++) {
653 			tmpNode = rf_AllocDAGNode();
654 			tmpNode->list_next = dag_h->nodes;
655 			dag_h->nodes = tmpNode;
656 		}
657 		qNodes = dag_h->nodes;
658 	} else {
659 #endif
660 		readQNodes = writeQNodes = qNodes = NULL;
661 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
662 	}
663 #endif
664 
665 	/*
666          * Step 3. initialize the nodes
667          */
668 	/* initialize block node (Nil) */
669 	nNodes = numDataNodes + (nfaults * numParityNodes);
670 	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
671 		    rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
672 		    dag_h, "Nil", allocList);
673 
674 	/* initialize commit node (Cmt) */
675 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
676 		    rf_NullNodeUndoFunc, NULL, nNodes,
677 		    (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
678 
679 	/* initialize terminate node (Trm) */
680 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
681 		    rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
682 		    dag_h, "Trm", allocList);
683 
684 	/* initialize nodes which read old data (Rod) */
685 	tmpreadDataNode = readDataNodes;
686 	for (i = 0; i < numDataNodes; i++) {
687 		rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
688 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
689 			    rf_GenericWakeupFunc, (nfaults * numParityNodes),
690 			    1, 4, 0, dag_h, "Rod", allocList);
691 		RF_ASSERT(pda != NULL);
692 		/* physical disk addr desc */
693 		tmpreadDataNode->params[0].p = pda;
694 		/* buffer to hold old data */
695 		tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
696 		tmpreadDataNode->params[2].v = parityStripeID;
697 		tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
698 		    which_ru);
699 		pda = pda->next;
700 		for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
701 			tmpreadDataNode->propList[j] = NULL;
702 		}
703 		tmpreadDataNode = tmpreadDataNode->list_next;
704 	}
705 
706 	/* initialize nodes which read old parity (Rop) */
707 	pda = asmap->parityInfo;
708 	i = 0;
709 	tmpreadParityNode = readParityNodes;
710 	for (i = 0; i < numParityNodes; i++) {
711 		RF_ASSERT(pda != NULL);
712 		rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
713 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
714 			    rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
715 			    dag_h, "Rop", allocList);
716 		tmpreadParityNode->params[0].p = pda;
717 		/* buffer to hold old parity */
718 		tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
719 		tmpreadParityNode->params[2].v = parityStripeID;
720 		tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
721 		    which_ru);
722 		pda = pda->next;
723 		for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
724 			tmpreadParityNode->propList[0] = NULL;
725 		}
726 		tmpreadParityNode = tmpreadParityNode->list_next;
727 	}
728 
729 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
730 	/* initialize nodes which read old Q (Roq) */
731 	if (nfaults == 2) {
732 		pda = asmap->qInfo;
733 		tmpreadQNode = readQNodes;
734 		for (i = 0; i < numParityNodes; i++) {
735 			RF_ASSERT(pda != NULL);
736 			rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
737 				    rf_DiskReadFunc, rf_DiskReadUndoFunc,
738 				    rf_GenericWakeupFunc, numParityNodes,
739 				    1, 4, 0, dag_h, "Roq", allocList);
740 			tmpreadQNode->params[0].p = pda;
741 			/* buffer to hold old Q */
742 			tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
743 								   pda->numSector << raidPtr->logBytesPerSector);
744 			tmpreadQNode->params[2].v = parityStripeID;
745 			tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
746 			    which_ru);
747 			pda = pda->next;
748 			for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
749 				tmpreadQNode->propList[0] = NULL;
750 			}
751 			tmpreadQNode = tmpreadQNode->list_next;
752 		}
753 	}
754 #endif
755 	/* initialize nodes which write new data (Wnd) */
756 	pda = asmap->physInfo;
757 	tmpwriteDataNode = writeDataNodes;
758 	for (i = 0; i < numDataNodes; i++) {
759 		RF_ASSERT(pda != NULL);
760 		rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
761 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
762 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
763 			    "Wnd", allocList);
764 		/* physical disk addr desc */
765 		tmpwriteDataNode->params[0].p = pda;
766 		/* buffer holding new data to be written */
767 		tmpwriteDataNode->params[1].p = pda->bufPtr;
768 		tmpwriteDataNode->params[2].v = parityStripeID;
769 		tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
770 		    which_ru);
771 		pda = pda->next;
772 		tmpwriteDataNode = tmpwriteDataNode->list_next;
773 	}
774 
775 	/*
776          * Initialize nodes which compute new parity and Q.
777          */
778 	/*
779          * We use the simple XOR func in the double-XOR case, and when
780          * we're accessing only a portion of one stripe unit. The
781          * distinction between the two is that the regular XOR func
782          * assumes that the targbuf is a full SU in size, and examines
783          * the pda associated with the buffer to decide where within
784          * the buffer to XOR the data, whereas the simple XOR func
785          * just XORs the data into the start of the buffer.  */
786 	if ((numParityNodes == 2) || ((numDataNodes == 1)
787 		&& (asmap->totalSectorsAccessed <
788 		    raidPtr->Layout.sectorsPerStripeUnit))) {
789 		func = pfuncs->simple;
790 		undoFunc = rf_NullNodeUndoFunc;
791 		name = pfuncs->SimpleName;
792 		if (qfuncs) {
793 			qfunc = qfuncs->simple;
794 			qname = qfuncs->SimpleName;
795 		} else {
796 			qfunc = NULL;
797 			qname = NULL;
798 		}
799 	} else {
800 		func = pfuncs->regular;
801 		undoFunc = rf_NullNodeUndoFunc;
802 		name = pfuncs->RegularName;
803 		if (qfuncs) {
804 			qfunc = qfuncs->regular;
805 			qname = qfuncs->RegularName;
806 		} else {
807 			qfunc = NULL;
808 			qname = NULL;
809 		}
810 	}
811 	/*
812          * Initialize the xor nodes: params are {pda,buf}
813          * from {Rod,Wnd,Rop} nodes, and raidPtr
814          */
815 	if (numParityNodes == 2) {
816 		/* double-xor case */
817 		tmpxorNode = xorNodes;
818 		tmpreadDataNode = readDataNodes;
819 		tmpreadParityNode = readParityNodes;
820 		tmpwriteDataNode = writeDataNodes;
821 		tmpqNode = qNodes;
822 		tmpreadQNode = readQNodes;
823 		for (i = 0; i < numParityNodes; i++) {
824 			/* note: no wakeup func for xor */
825 			rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
826 				    undoFunc, NULL, 1,
827 				    (numDataNodes + numParityNodes),
828 				    7, 1, dag_h, name, allocList);
829 			tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
830 			tmpxorNode->params[0] = tmpreadDataNode->params[0];
831 			tmpxorNode->params[1] = tmpreadDataNode->params[1];
832 			tmpxorNode->params[2] = tmpreadParityNode->params[0];
833 			tmpxorNode->params[3] = tmpreadParityNode->params[1];
834 			tmpxorNode->params[4] = tmpwriteDataNode->params[0];
835 			tmpxorNode->params[5] = tmpwriteDataNode->params[1];
836 			tmpxorNode->params[6].p = raidPtr;
837 			/* use old parity buf as target buf */
838 			tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
839 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
840 			if (nfaults == 2) {
841 				/* note: no wakeup func for qor */
842 				rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
843 					    qfunc, undoFunc, NULL, 1,
844 					    (numDataNodes + numParityNodes),
845 					    7, 1, dag_h, qname, allocList);
846 				tmpqNode->params[0] = tmpreadDataNode->params[0];
847 				tmpqNode->params[1] = tmpreadDataNode->params[1];
848 				tmpqNode->params[2] = tmpreadQNode->.params[0];
849 				tmpqNode->params[3] = tmpreadQNode->params[1];
850 				tmpqNode->params[4] = tmpwriteDataNode->params[0];
851 				tmpqNode->params[5] = tmpwriteDataNode->params[1];
852 				tmpqNode->params[6].p = raidPtr;
853 				/* use old Q buf as target buf */
854 				tmpqNode->results[0] = tmpreadQNode->params[1].p;
855 				tmpqNode = tmpqNode->list_next;
856 				tmpreadQNodes = tmpreadQNodes->list_next;
857 			}
858 #endif
859 			tmpxorNode = tmpxorNode->list_next;
860 			tmpreadDataNode = tmpreadDataNode->list_next;
861 			tmpreadParityNode = tmpreadParityNode->list_next;
862 			tmpwriteDataNode = tmpwriteDataNode->list_next;
863 		}
864 	} else {
865 		/* there is only one xor node in this case */
866 		rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
867 			    undoFunc, NULL, 1, (numDataNodes + numParityNodes),
868 			    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
869 			    dag_h, name, allocList);
870 		xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
871 		tmpreadDataNode = readDataNodes;
872 		for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
873 							out the "+1" into the "deal with Rop separately below */
874 			/* set up params related to Rod nodes */
875 			xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
876 			xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
877 			tmpreadDataNode = tmpreadDataNode->list_next;
878 		}
879 		/* deal with Rop separately */
880 		xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
881 		xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
882 
883 		tmpwriteDataNode = writeDataNodes;
884 		for (i = 0; i < numDataNodes; i++) {
885 			/* set up params related to Wnd and Wnp nodes */
886 			xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
887 			    tmpwriteDataNode->params[0];
888 			xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
889 			    tmpwriteDataNode->params[1];
890 			tmpwriteDataNode = tmpwriteDataNode->list_next;
891 		}
892 		/* xor node needs to get at RAID information */
893 		xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
894 		xorNodes->results[0] = readParityNodes->params[1].p;
895 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
896 		if (nfaults == 2) {
897 			rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
898 				    undoFunc, NULL, 1,
899 				    (numDataNodes + numParityNodes),
900 				    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
901 				    dag_h, qname, allocList);
902 			tmpreadDataNode = readDataNodes;
903 			for (i = 0; i < numDataNodes; i++) {
904 				/* set up params related to Rod */
905 				qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
906 				qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
907 				tmpreadDataNode = tmpreadDataNode->list_next;
908 			}
909 			/* and read old q */
910 			qNodes->params[2 * numDataNodes + 0] =	/* pda */
911 			    readQNodes->params[0];
912 			qNodes->params[2 * numDataNodes + 1] =	/* buffer ptr */
913 			    readQNodes->params[1];
914 			tmpwriteDataNode = writeDataNodes;
915 			for (i = 0; i < numDataNodes; i++) {
916 				/* set up params related to Wnd nodes */
917 				qNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
918 				    tmpwriteDataNode->params[0];
919 				qNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
920 				    tmpwriteDataNode->params[1];
921 				tmpwriteDataNode = tmpwriteDataNode->list_next;
922 			}
923 			/* xor node needs to get at RAID information */
924 			qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
925 			qNodes->results[0] = readQNodes->params[1].p;
926 		}
927 #endif
928 	}
929 
930 	/* initialize nodes which write new parity (Wnp) */
931 	pda = asmap->parityInfo;
932 	tmpwriteParityNode = writeParityNodes;
933 	tmpxorNode = xorNodes;
934 	for (i = 0; i < numParityNodes; i++) {
935 		rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
936 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
937 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
938 			    "Wnp", allocList);
939 		RF_ASSERT(pda != NULL);
940 		tmpwriteParityNode->params[0].p = pda;	/* param 1 (bufPtr)
941 				  			 * filled in by xor node */
942 		tmpwriteParityNode->params[1].p = tmpxorNode->results[0];	/* buffer pointer for
943 				  						 * parity write
944 				  						 * operation */
945 		tmpwriteParityNode->params[2].v = parityStripeID;
946 		tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
947 		    which_ru);
948 		pda = pda->next;
949 		tmpwriteParityNode = tmpwriteParityNode->list_next;
950 		tmpxorNode = tmpxorNode->list_next;
951 	}
952 
953 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
954 	/* initialize nodes which write new Q (Wnq) */
955 	if (nfaults == 2) {
956 		pda = asmap->qInfo;
957 		tmpwriteQNode = writeQNodes;
958 		tmpqNode = qNodes;
959 		for (i = 0; i < numParityNodes; i++) {
960 			rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
961 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
962 				    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
963 				    "Wnq", allocList);
964 			RF_ASSERT(pda != NULL);
965 			tmpwriteQNode->params[0].p = pda;	/* param 1 (bufPtr)
966 								 * filled in by xor node */
967 			tmpwriteQNode->params[1].p = tmpqNode->results[0];	/* buffer pointer for
968 										 * parity write
969 										 * operation */
970 			tmpwriteQNode->params[2].v = parityStripeID;
971 			tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
972 			    which_ru);
973 			pda = pda->next;
974 			tmpwriteQNode = tmpwriteQNode->list_next;
975 			tmpqNode = tmpqNode->list_next;
976 		}
977 	}
978 #endif
979 	/*
980          * Step 4. connect the nodes.
981          */
982 
983 	/* connect header to block node */
984 	dag_h->succedents[0] = blockNode;
985 
986 	/* connect block node to read old data nodes */
987 	RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
988 	tmpreadDataNode = readDataNodes;
989 	for (i = 0; i < numDataNodes; i++) {
990 		blockNode->succedents[i] = tmpreadDataNode;
991 		RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
992 		tmpreadDataNode->antecedents[0] = blockNode;
993 		tmpreadDataNode->antType[0] = rf_control;
994 		tmpreadDataNode = tmpreadDataNode->list_next;
995 	}
996 
997 	/* connect block node to read old parity nodes */
998 	tmpreadParityNode = readParityNodes;
999 	for (i = 0; i < numParityNodes; i++) {
1000 		blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1001 		RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1002 		tmpreadParityNode->antecedents[0] = blockNode;
1003 		tmpreadParityNode->antType[0] = rf_control;
1004 		tmpreadParityNode = tmpreadParityNode->list_next;
1005 	}
1006 
1007 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1008 	/* connect block node to read old Q nodes */
1009 	if (nfaults == 2) {
1010 		tmpreadQNode = readQNodes;
1011 		for (i = 0; i < numParityNodes; i++) {
1012 			blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1013 			RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1014 			tmpreadQNode->antecedents[0] = blockNode;
1015 			tmpreadQNode->antType[0] = rf_control;
1016 			tmpreadQNode = tmpreadQNode->list_next;
1017 		}
1018 	}
1019 #endif
1020 	/* connect read old data nodes to xor nodes */
1021 	tmpreadDataNode = readDataNodes;
1022 	for (i = 0; i < numDataNodes; i++) {
1023 		RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1024 		tmpxorNode = xorNodes;
1025 		for (j = 0; j < numParityNodes; j++) {
1026 			RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1027 			tmpreadDataNode->succedents[j] = tmpxorNode;
1028 			tmpxorNode->antecedents[i] = tmpreadDataNode;
1029 			tmpxorNode->antType[i] = rf_trueData;
1030 			tmpxorNode = tmpxorNode->list_next;
1031 		}
1032 		tmpreadDataNode = tmpreadDataNode->list_next;
1033 	}
1034 
1035 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1036 	/* connect read old data nodes to q nodes */
1037 	if (nfaults == 2) {
1038 		tmpreadDataNode = readDataNodes;
1039 		for (i = 0; i < numDataNodes; i++) {
1040 			tmpqNode = qNodes;
1041 			for (j = 0; j < numParityNodes; j++) {
1042 				RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1043 				tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1044 				tmpqNode->antecedents[i] = tmpreadDataNode;
1045 				tmpqNode->antType[i] = rf_trueData;
1046 				tmpqNode = tmpqNode->list_next;
1047 			}
1048 			tmpreadDataNode = tmpreadDataNode->list_next;
1049 		}
1050 	}
1051 #endif
1052 	/* connect read old parity nodes to xor nodes */
1053 	tmpreadParityNode = readParityNodes;
1054 	for (i = 0; i < numParityNodes; i++) {
1055 		RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1056 		tmpxorNode = xorNodes;
1057 		for (j = 0; j < numParityNodes; j++) {
1058 			tmpreadParityNode->succedents[j] = tmpxorNode;
1059 			tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1060 			tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1061 			tmpxorNode = tmpxorNode->list_next;
1062 		}
1063 		tmpreadParityNode = tmpreadParityNode->list_next;
1064 	}
1065 
1066 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1067 	/* connect read old q nodes to q nodes */
1068 	if (nfaults == 2) {
1069 		tmpreadParityNode = readParityNodes;
1070 		tmpreadQNode = readQNodes;
1071 		for (i = 0; i < numParityNodes; i++) {
1072 			RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1073 			tmpqNode = qNodes;
1074 			for (j = 0; j < numParityNodes; j++) {
1075 				tmpreadQNode->succedents[j] = tmpqNode;
1076 				tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1077 				tmpqNode->antType[numDataNodes + i] = rf_trueData;
1078 				tmpqNode = tmpqNode->list_next;
1079 			}
1080 			tmpreadParityNode = tmpreadParityNode->list_next;
1081 			tmpreadQNode = tmpreadQNode->list_next;
1082 		}
1083 	}
1084 #endif
1085 	/* connect xor nodes to commit node */
1086 	RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1087 	tmpxorNode = xorNodes;
1088 	for (i = 0; i < numParityNodes; i++) {
1089 		RF_ASSERT(tmpxorNode->numSuccedents == 1);
1090 		tmpxorNode->succedents[0] = commitNode;
1091 		commitNode->antecedents[i] = tmpxorNode;
1092 		commitNode->antType[i] = rf_control;
1093 		tmpxorNode = tmpxorNode->list_next;
1094 	}
1095 
1096 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1097 	/* connect q nodes to commit node */
1098 	if (nfaults == 2) {
1099 		tmpqNode = qNodes;
1100 		for (i = 0; i < numParityNodes; i++) {
1101 			RF_ASSERT(tmpqNode->numSuccedents == 1);
1102 			tmpqNode->succedents[0] = commitNode;
1103 			commitNode->antecedents[i + numParityNodes] = tmpqNode;
1104 			commitNode->antType[i + numParityNodes] = rf_control;
1105 			tmpqNode = tmpqNode->list_next;
1106 		}
1107 	}
1108 #endif
1109 	/* connect commit node to write nodes */
1110 	RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1111 	tmpwriteDataNode = writeDataNodes;
1112 	for (i = 0; i < numDataNodes; i++) {
1113 		RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1114 		commitNode->succedents[i] = tmpwriteDataNode;
1115 		tmpwriteDataNode->antecedents[0] = commitNode;
1116 		tmpwriteDataNode->antType[0] = rf_trueData;
1117 		tmpwriteDataNode = tmpwriteDataNode->list_next;
1118 	}
1119 	tmpwriteParityNode = writeParityNodes;
1120 	for (i = 0; i < numParityNodes; i++) {
1121 		RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1122 		commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1123 		tmpwriteParityNode->antecedents[0] = commitNode;
1124 		tmpwriteParityNode->antType[0] = rf_trueData;
1125 		tmpwriteParityNode = tmpwriteParityNode->list_next;
1126 	}
1127 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1128 	if (nfaults == 2) {
1129 		tmpwriteQNode = writeQNodes;
1130 		for (i = 0; i < numParityNodes; i++) {
1131 			RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1132 			commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1133 			tmpwriteQNode->antecedents[0] = commitNode;
1134 			tmpwriteQNode->antType[0] = rf_trueData;
1135 			tmpwriteQNode = tmpwriteQNode->list_next;
1136 		}
1137 	}
1138 #endif
1139 	RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1140 	RF_ASSERT(termNode->numSuccedents == 0);
1141 	tmpwriteDataNode = writeDataNodes;
1142 	for (i = 0; i < numDataNodes; i++) {
1143 		/* connect write new data nodes to term node */
1144 		RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1145 		RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1146 		tmpwriteDataNode->succedents[0] = termNode;
1147 		termNode->antecedents[i] = tmpwriteDataNode;
1148 		termNode->antType[i] = rf_control;
1149 		tmpwriteDataNode = tmpwriteDataNode->list_next;
1150 	}
1151 
1152 	tmpwriteParityNode = writeParityNodes;
1153 	for (i = 0; i < numParityNodes; i++) {
1154 		RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1155 		tmpwriteParityNode->succedents[0] = termNode;
1156 		termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1157 		termNode->antType[numDataNodes + i] = rf_control;
1158 		tmpwriteParityNode = tmpwriteParityNode->list_next;
1159 	}
1160 
1161 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1162 	if (nfaults == 2) {
1163 		tmpwriteQNode = writeQNodes;
1164 		for (i = 0; i < numParityNodes; i++) {
1165 			RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1166 			tmpwriteQNode->succedents[0] = termNode;
1167 			termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1168 			termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1169 			tmpwriteQNode = tmpwriteQNode->list_next;
1170 		}
1171 	}
1172 #endif
1173 }
1174 
1175 
1176 /******************************************************************************
1177  * create a write graph (fault-free or degraded) for RAID level 1
1178  *
1179  * Hdr -> Commit -> Wpd -> Nil -> Trm
1180  *               -> Wsd ->
1181  *
1182  * The "Wpd" node writes data to the primary copy in the mirror pair
1183  * The "Wsd" node writes data to the secondary copy in the mirror pair
1184  *
1185  * Parameters:  raidPtr   - description of the physical array
1186  *              asmap     - logical & physical addresses for this access
1187  *              bp        - buffer ptr (holds write data)
1188  *              flags     - general flags (e.g. disk locking)
1189  *              allocList - list of memory allocated in DAG creation
1190  *****************************************************************************/
1191 
1192 void
1193 rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1194 			 RF_DagHeader_t *dag_h, void *bp,
1195 			 RF_RaidAccessFlags_t flags,
1196 			 RF_AllocListElem_t *allocList)
1197 {
1198 	RF_DagNode_t *unblockNode, *termNode, *commitNode;
1199 	RF_DagNode_t *wndNode, *wmirNode;
1200 	RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1201 	int     nWndNodes, nWmirNodes, i;
1202 	RF_ReconUnitNum_t which_ru;
1203 	RF_PhysDiskAddr_t *pda, *pdaP;
1204 	RF_StripeNum_t parityStripeID;
1205 
1206 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1207 	    asmap->raidAddress, &which_ru);
1208 #if RF_DEBUG_DAG
1209 	if (rf_dagDebug) {
1210 		printf("[Creating RAID level 1 write DAG]\n");
1211 	}
1212 #endif
1213 	dag_h->creator = "RaidOneWriteDAG";
1214 
1215 	/* 2 implies access not SU aligned */
1216 	nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1217 	nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1218 
1219 	/* alloc the Wnd nodes and the Wmir node */
1220 	if (asmap->numDataFailed == 1)
1221 		nWndNodes--;
1222 	if (asmap->numParityFailed == 1)
1223 		nWmirNodes--;
1224 
1225 	/* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1226 	 * + terminator) */
1227 	for (i = 0; i < nWndNodes; i++) {
1228 		tmpNode = rf_AllocDAGNode();
1229 		tmpNode->list_next = dag_h->nodes;
1230 		dag_h->nodes = tmpNode;
1231 	}
1232 	wndNode = dag_h->nodes;
1233 
1234 	for (i = 0; i < nWmirNodes; i++) {
1235 		tmpNode = rf_AllocDAGNode();
1236 		tmpNode->list_next = dag_h->nodes;
1237 		dag_h->nodes = tmpNode;
1238 	}
1239 	wmirNode = dag_h->nodes;
1240 
1241 	commitNode = rf_AllocDAGNode();
1242 	commitNode->list_next = dag_h->nodes;
1243 	dag_h->nodes = commitNode;
1244 
1245 	unblockNode = rf_AllocDAGNode();
1246 	unblockNode->list_next = dag_h->nodes;
1247 	dag_h->nodes = unblockNode;
1248 
1249 	termNode = rf_AllocDAGNode();
1250 	termNode->list_next = dag_h->nodes;
1251 	dag_h->nodes = termNode;
1252 
1253 	/* this dag can commit immediately */
1254 	dag_h->numCommitNodes = 1;
1255 	dag_h->numCommits = 0;
1256 	dag_h->numSuccedents = 1;
1257 
1258 	/* initialize the commit, unblock, and term nodes */
1259 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1260 		    rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1261 		    0, 0, 0, dag_h, "Cmt", allocList);
1262 	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1263 		    rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1264 		    0, 0, dag_h, "Nil", allocList);
1265 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1266 		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1267 		    dag_h, "Trm", allocList);
1268 
1269 	/* initialize the wnd nodes */
1270 	if (nWndNodes > 0) {
1271 		pda = asmap->physInfo;
1272 		tmpwndNode = wndNode;
1273 		for (i = 0; i < nWndNodes; i++) {
1274 			rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1275 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1276 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1277 				    dag_h, "Wpd", allocList);
1278 			RF_ASSERT(pda != NULL);
1279 			tmpwndNode->params[0].p = pda;
1280 			tmpwndNode->params[1].p = pda->bufPtr;
1281 			tmpwndNode->params[2].v = parityStripeID;
1282 			tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1283 			pda = pda->next;
1284 			tmpwndNode = tmpwndNode->list_next;
1285 		}
1286 		RF_ASSERT(pda == NULL);
1287 	}
1288 	/* initialize the mirror nodes */
1289 	if (nWmirNodes > 0) {
1290 		pda = asmap->physInfo;
1291 		pdaP = asmap->parityInfo;
1292 		tmpwmirNode = wmirNode;
1293 		for (i = 0; i < nWmirNodes; i++) {
1294 			rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1295 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1296 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1297 				    dag_h, "Wsd", allocList);
1298 			RF_ASSERT(pda != NULL);
1299 			tmpwmirNode->params[0].p = pdaP;
1300 			tmpwmirNode->params[1].p = pda->bufPtr;
1301 			tmpwmirNode->params[2].v = parityStripeID;
1302 			tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1303 			pda = pda->next;
1304 			pdaP = pdaP->next;
1305 			tmpwmirNode = tmpwmirNode->list_next;
1306 		}
1307 		RF_ASSERT(pda == NULL);
1308 		RF_ASSERT(pdaP == NULL);
1309 	}
1310 	/* link the header node to the commit node */
1311 	RF_ASSERT(dag_h->numSuccedents == 1);
1312 	RF_ASSERT(commitNode->numAntecedents == 0);
1313 	dag_h->succedents[0] = commitNode;
1314 
1315 	/* link the commit node to the write nodes */
1316 	RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1317 	tmpwndNode = wndNode;
1318 	for (i = 0; i < nWndNodes; i++) {
1319 		RF_ASSERT(tmpwndNode->numAntecedents == 1);
1320 		commitNode->succedents[i] = tmpwndNode;
1321 		tmpwndNode->antecedents[0] = commitNode;
1322 		tmpwndNode->antType[0] = rf_control;
1323 		tmpwndNode = tmpwndNode->list_next;
1324 	}
1325 	tmpwmirNode = wmirNode;
1326 	for (i = 0; i < nWmirNodes; i++) {
1327 		RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1328 		commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1329 		tmpwmirNode->antecedents[0] = commitNode;
1330 		tmpwmirNode->antType[0] = rf_control;
1331 		tmpwmirNode = tmpwmirNode->list_next;
1332 	}
1333 
1334 	/* link the write nodes to the unblock node */
1335 	RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1336 	tmpwndNode = wndNode;
1337 	for (i = 0; i < nWndNodes; i++) {
1338 		RF_ASSERT(tmpwndNode->numSuccedents == 1);
1339 		tmpwndNode->succedents[0] = unblockNode;
1340 		unblockNode->antecedents[i] = tmpwndNode;
1341 		unblockNode->antType[i] = rf_control;
1342 		tmpwndNode = tmpwndNode->list_next;
1343 	}
1344 	tmpwmirNode = wmirNode;
1345 	for (i = 0; i < nWmirNodes; i++) {
1346 		RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1347 		tmpwmirNode->succedents[0] = unblockNode;
1348 		unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1349 		unblockNode->antType[i + nWndNodes] = rf_control;
1350 		tmpwmirNode = tmpwmirNode->list_next;
1351 	}
1352 
1353 	/* link the unblock node to the term node */
1354 	RF_ASSERT(unblockNode->numSuccedents == 1);
1355 	RF_ASSERT(termNode->numAntecedents == 1);
1356 	RF_ASSERT(termNode->numSuccedents == 0);
1357 	unblockNode->succedents[0] = termNode;
1358 	termNode->antecedents[0] = unblockNode;
1359 	termNode->antType[0] = rf_control;
1360 }
1361