xref: /netbsd-src/sys/dev/raidframe/rf_dagffwr.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: rf_dagffwr.c,v 1.34 2013/09/15 12:41:17 martin Exp $	*/
2 /*
3  * Copyright (c) 1995 Carnegie-Mellon University.
4  * All rights reserved.
5  *
6  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
7  *
8  * Permission to use, copy, modify and distribute this software and
9  * its documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  */
28 
29 /*
30  * rf_dagff.c
31  *
32  * code for creating fault-free DAGs
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 1.34 2013/09/15 12:41:17 martin Exp $");
38 
39 #include <dev/raidframe/raidframevar.h>
40 
41 #include "rf_raid.h"
42 #include "rf_dag.h"
43 #include "rf_dagutils.h"
44 #include "rf_dagfuncs.h"
45 #include "rf_debugMem.h"
46 #include "rf_dagffrd.h"
47 #include "rf_general.h"
48 #include "rf_dagffwr.h"
49 #include "rf_map.h"
50 
51 /******************************************************************************
52  *
53  * General comments on DAG creation:
54  *
55  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
56  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
57  * is reached, the execution engine will halt forward execution and work
58  * backward through the graph, executing the undo functions.  Assuming that
59  * each node in the graph prior to the Cmt node are undoable and atomic - or -
60  * does not make changes to permanent state, the graph will fail atomically.
61  * If an error occurs after the Cmt node executes, the engine will roll-forward
62  * through the graph, blindly executing nodes until it reaches the end.
63  * If a graph reaches the end, it is assumed to have completed successfully.
64  *
65  * A graph has only 1 Cmt node.
66  *
67  */
68 
69 
70 /******************************************************************************
71  *
72  * The following wrappers map the standard DAG creation interface to the
73  * DAG creation routines.  Additionally, these wrappers enable experimentation
74  * with new DAG structures by providing an extra level of indirection, allowing
75  * the DAG creation routines to be replaced at this single point.
76  */
77 
78 
79 void
80 rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
81 			      RF_DagHeader_t *dag_h, void *bp,
82 			      RF_RaidAccessFlags_t flags,
83 			      RF_AllocListElem_t *allocList,
84 			      RF_IoType_t type)
85 {
86 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
87 				 RF_IO_TYPE_WRITE);
88 }
89 
90 void
91 rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
92 		       RF_DagHeader_t *dag_h, void *bp,
93 		       RF_RaidAccessFlags_t flags,
94 		       RF_AllocListElem_t *allocList,
95 		       RF_IoType_t type)
96 {
97 	rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
98 				 RF_IO_TYPE_WRITE);
99 }
100 
101 void
102 rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
103 		       RF_DagHeader_t *dag_h, void *bp,
104 		       RF_RaidAccessFlags_t flags,
105 		       RF_AllocListElem_t *allocList)
106 {
107 	/* "normal" rollaway */
108 	rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
109 				     allocList, &rf_xorFuncs, NULL);
110 }
111 
112 void
113 rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
114 		       RF_DagHeader_t *dag_h, void *bp,
115 		       RF_RaidAccessFlags_t flags,
116 		       RF_AllocListElem_t *allocList)
117 {
118 	/* "normal" rollaway */
119 	rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
120 				     allocList, 1, rf_RegularXorFunc, RF_TRUE);
121 }
122 
123 
124 /******************************************************************************
125  *
126  * DAG creation code begins here
127  */
128 
129 
130 /******************************************************************************
131  *
132  * creates a DAG to perform a large-write operation:
133  *
134  *           / Rod \           / Wnd \
135  * H -- block- Rod - Xor - Cmt - Wnd --- T
136  *           \ Rod /          \  Wnp /
137  *                             \[Wnq]/
138  *
139  * The XOR node also does the Q calculation in the P+Q architecture.
140  * All nodes are before the commit node (Cmt) are assumed to be atomic and
141  * undoable - or - they make no changes to permanent state.
142  *
143  * Rod = read old data
144  * Cmt = commit node
145  * Wnp = write new parity
146  * Wnd = write new data
147  * Wnq = write new "q"
148  * [] denotes optional segments in the graph
149  *
150  * Parameters:  raidPtr   - description of the physical array
151  *              asmap     - logical & physical addresses for this access
152  *              bp        - buffer ptr (holds write data)
153  *              flags     - general flags (e.g. disk locking)
154  *              allocList - list of memory allocated in DAG creation
155  *              nfaults   - number of faults array can tolerate
156  *                          (equal to # redundancy units in stripe)
157  *              redfuncs  - list of redundancy generating functions
158  *
159  *****************************************************************************/
160 
161 void
162 rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
163 			     RF_DagHeader_t *dag_h, void *bp,
164 			     RF_RaidAccessFlags_t flags,
165 			     RF_AllocListElem_t *allocList,
166 			     int nfaults, int (*redFunc) (RF_DagNode_t *),
167 			     int allowBufferRecycle)
168 {
169 	RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
170 	RF_DagNode_t *blockNode, *commitNode, *termNode;
171 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
172 	RF_DagNode_t *wnqNode;
173 #endif
174 	int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
175 	RF_AccessStripeMapHeader_t *new_asm_h[2];
176 	RF_StripeNum_t parityStripeID;
177 	char   *sosBuffer, *eosBuffer;
178 	RF_ReconUnitNum_t which_ru;
179 	RF_RaidLayout_t *layoutPtr;
180 	RF_PhysDiskAddr_t *pda;
181 
182 	layoutPtr = &(raidPtr->Layout);
183 	parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
184 							asmap->raidAddress,
185 							&which_ru);
186 
187 #if RF_DEBUG_DAG
188 	if (rf_dagDebug) {
189 		printf("[Creating large-write DAG]\n");
190 	}
191 #endif
192 	dag_h->creator = "LargeWriteDAG";
193 
194 	dag_h->numCommitNodes = 1;
195 	dag_h->numCommits = 0;
196 	dag_h->numSuccedents = 1;
197 
198 	/* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
199 	nWndNodes = asmap->numStripeUnitsAccessed;
200 
201 	for (i = 0; i < nWndNodes; i++) {
202 		tmpNode = rf_AllocDAGNode();
203 		tmpNode->list_next = dag_h->nodes;
204 		dag_h->nodes = tmpNode;
205 	}
206 	wndNodes = dag_h->nodes;
207 
208 	xorNode = rf_AllocDAGNode();
209 	xorNode->list_next = dag_h->nodes;
210 	dag_h->nodes = xorNode;
211 
212 	wnpNode = rf_AllocDAGNode();
213 	wnpNode->list_next = dag_h->nodes;
214 	dag_h->nodes = wnpNode;
215 
216 	blockNode = rf_AllocDAGNode();
217 	blockNode->list_next = dag_h->nodes;
218 	dag_h->nodes = blockNode;
219 
220 	commitNode = rf_AllocDAGNode();
221 	commitNode->list_next = dag_h->nodes;
222 	dag_h->nodes = commitNode;
223 
224 	termNode = rf_AllocDAGNode();
225 	termNode->list_next = dag_h->nodes;
226 	dag_h->nodes = termNode;
227 
228 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
229 	if (nfaults == 2) {
230 		wnqNode = rf_AllocDAGNode();
231 	} else {
232 		wnqNode = NULL;
233 	}
234 #endif
235 	rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h,
236 					new_asm_h, &nRodNodes, &sosBuffer,
237 					&eosBuffer, allocList);
238 	if (nRodNodes > 0) {
239 		for (i = 0; i < nRodNodes; i++) {
240 			tmpNode = rf_AllocDAGNode();
241 			tmpNode->list_next = dag_h->nodes;
242 			dag_h->nodes = tmpNode;
243 		}
244 		rodNodes = dag_h->nodes;
245 	} else {
246 		rodNodes = NULL;
247 	}
248 
249 	/* begin node initialization */
250 	if (nRodNodes > 0) {
251 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
252 			    rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0,
253 			    dag_h, "Nil", allocList);
254 	} else {
255 		rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
256 			    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0,
257 			    dag_h, "Nil", allocList);
258 	}
259 
260 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
261 		    rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0,
262 		    dag_h, "Cmt", allocList);
263 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
264 		    rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0,
265 		    dag_h, "Trm", allocList);
266 
267 	/* initialize the Rod nodes */
268 	tmpNode = rodNodes;
269 	for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
270 		if (new_asm_h[asmNum]) {
271 			pda = new_asm_h[asmNum]->stripeMap->physInfo;
272 			while (pda) {
273 				rf_InitNode(tmpNode, rf_wait,
274 					    RF_FALSE, rf_DiskReadFunc,
275 					    rf_DiskReadUndoFunc,
276 					    rf_GenericWakeupFunc,
277 					    1, 1, 4, 0, dag_h,
278 					    "Rod", allocList);
279 				tmpNode->params[0].p = pda;
280 				tmpNode->params[1].p = pda->bufPtr;
281 				tmpNode->params[2].v = parityStripeID;
282 				tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
283 				    which_ru);
284 				nodeNum++;
285 				pda = pda->next;
286 				tmpNode = tmpNode->list_next;
287 			}
288 		}
289 	}
290 	RF_ASSERT(nodeNum == nRodNodes);
291 
292 	/* initialize the wnd nodes */
293 	pda = asmap->physInfo;
294 	tmpNode = wndNodes;
295 	for (i = 0; i < nWndNodes; i++) {
296 		rf_InitNode(tmpNode, rf_wait, RF_FALSE,
297 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
298 			    rf_GenericWakeupFunc, 1, 1, 4, 0,
299 			    dag_h, "Wnd", allocList);
300 		RF_ASSERT(pda != NULL);
301 		tmpNode->params[0].p = pda;
302 		tmpNode->params[1].p = pda->bufPtr;
303 		tmpNode->params[2].v = parityStripeID;
304 		tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
305 		pda = pda->next;
306 		tmpNode = tmpNode->list_next;
307 	}
308 
309 	/* initialize the redundancy node */
310 	if (nRodNodes > 0) {
311 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
312 			    rf_NullNodeUndoFunc, NULL, 1,
313 			    nRodNodes, 2 * (nWndNodes + nRodNodes) + 1,
314 			    nfaults, dag_h, "Xr ", allocList);
315 	} else {
316 		rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc,
317 			    rf_NullNodeUndoFunc, NULL, 1,
318 			    1, 2 * (nWndNodes + nRodNodes) + 1,
319 			    nfaults, dag_h, "Xr ", allocList);
320 	}
321 	xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
322 	tmpNode = wndNodes;
323 	for (i = 0; i < nWndNodes; i++) {
324 		/* pda */
325 		xorNode->params[2 * i + 0] = tmpNode->params[0];
326 		/* buf ptr */
327 		xorNode->params[2 * i + 1] = tmpNode->params[1];
328 		tmpNode = tmpNode->list_next;
329 	}
330 	tmpNode = rodNodes;
331 	for (i = 0; i < nRodNodes; i++) {
332 		/* pda */
333 		xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
334 		/* buf ptr */
335 		xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
336 		tmpNode = tmpNode->list_next;
337 	}
338 	/* xor node needs to get at RAID information */
339 	xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
340 
341 	/*
342          * Look for an Rod node that reads a complete SU. If none,
343          * alloc a buffer to receive the parity info. Note that we
344          * can't use a new data buffer because it will not have gotten
345          * written when the xor occurs.  */
346 	if (allowBufferRecycle) {
347 		tmpNode = rodNodes;
348 		for (i = 0; i < nRodNodes; i++) {
349 			if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
350 				break;
351 			tmpNode = tmpNode->list_next;
352 		}
353 	}
354 	if ((!allowBufferRecycle) || (i == nRodNodes)) {
355 		xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
356 	} else {
357 		/* this works because the only way we get here is if
358 		   allowBufferRecycle is true and we went through the
359 		   above for loop, and exited via the break before
360 		   i==nRodNodes was true.  That means tmpNode will
361 		   still point to a valid node -- the one we want for
362 		   here! */
363 		xorNode->results[0] = tmpNode->params[1].p;
364 	}
365 
366 	/* initialize the Wnp node */
367 	rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
368 		    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
369 		    dag_h, "Wnp", allocList);
370 	wnpNode->params[0].p = asmap->parityInfo;
371 	wnpNode->params[1].p = xorNode->results[0];
372 	wnpNode->params[2].v = parityStripeID;
373 	wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
374 	/* parityInfo must describe entire parity unit */
375 	RF_ASSERT(asmap->parityInfo->next == NULL);
376 
377 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
378 	if (nfaults == 2) {
379 		/*
380 	         * We never try to recycle a buffer for the Q calcuation
381 	         * in addition to the parity. This would cause two buffers
382 	         * to get smashed during the P and Q calculation, guaranteeing
383 	         * one would be wrong.
384 	         */
385 		RF_MallocAndAdd(xorNode->results[1],
386 				rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
387 				(void *), allocList);
388 		rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc,
389 			    rf_DiskWriteUndoFunc, rf_GenericWakeupFunc,
390 			    1, 1, 4, 0, dag_h, "Wnq", allocList);
391 		wnqNode->params[0].p = asmap->qInfo;
392 		wnqNode->params[1].p = xorNode->results[1];
393 		wnqNode->params[2].v = parityStripeID;
394 		wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
395 		/* parityInfo must describe entire parity unit */
396 		RF_ASSERT(asmap->parityInfo->next == NULL);
397 	}
398 #endif
399 	/*
400          * Connect nodes to form graph.
401          */
402 
403 	/* connect dag header to block node */
404 	RF_ASSERT(blockNode->numAntecedents == 0);
405 	dag_h->succedents[0] = blockNode;
406 
407 	if (nRodNodes > 0) {
408 		/* connect the block node to the Rod nodes */
409 		RF_ASSERT(blockNode->numSuccedents == nRodNodes);
410 		RF_ASSERT(xorNode->numAntecedents == nRodNodes);
411 		tmpNode = rodNodes;
412 		for (i = 0; i < nRodNodes; i++) {
413 			RF_ASSERT(tmpNode->numAntecedents == 1);
414 			blockNode->succedents[i] = tmpNode;
415 			tmpNode->antecedents[0] = blockNode;
416 			tmpNode->antType[0] = rf_control;
417 
418 			/* connect the Rod nodes to the Xor node */
419 			RF_ASSERT(tmpNode->numSuccedents == 1);
420 			tmpNode->succedents[0] = xorNode;
421 			xorNode->antecedents[i] = tmpNode;
422 			xorNode->antType[i] = rf_trueData;
423 			tmpNode = tmpNode->list_next;
424 		}
425 	} else {
426 		/* connect the block node to the Xor node */
427 		RF_ASSERT(blockNode->numSuccedents == 1);
428 		RF_ASSERT(xorNode->numAntecedents == 1);
429 		blockNode->succedents[0] = xorNode;
430 		xorNode->antecedents[0] = blockNode;
431 		xorNode->antType[0] = rf_control;
432 	}
433 
434 	/* connect the xor node to the commit node */
435 	RF_ASSERT(xorNode->numSuccedents == 1);
436 	RF_ASSERT(commitNode->numAntecedents == 1);
437 	xorNode->succedents[0] = commitNode;
438 	commitNode->antecedents[0] = xorNode;
439 	commitNode->antType[0] = rf_control;
440 
441 	/* connect the commit node to the write nodes */
442 	RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
443 	tmpNode = wndNodes;
444 	for (i = 0; i < nWndNodes; i++) {
445 		RF_ASSERT(wndNodes->numAntecedents == 1);
446 		commitNode->succedents[i] = tmpNode;
447 		tmpNode->antecedents[0] = commitNode;
448 		tmpNode->antType[0] = rf_control;
449 		tmpNode = tmpNode->list_next;
450 	}
451 	RF_ASSERT(wnpNode->numAntecedents == 1);
452 	commitNode->succedents[nWndNodes] = wnpNode;
453 	wnpNode->antecedents[0] = commitNode;
454 	wnpNode->antType[0] = rf_trueData;
455 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
456 	if (nfaults == 2) {
457 		RF_ASSERT(wnqNode->numAntecedents == 1);
458 		commitNode->succedents[nWndNodes + 1] = wnqNode;
459 		wnqNode->antecedents[0] = commitNode;
460 		wnqNode->antType[0] = rf_trueData;
461 	}
462 #endif
463 	/* connect the write nodes to the term node */
464 	RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
465 	RF_ASSERT(termNode->numSuccedents == 0);
466 	tmpNode = wndNodes;
467 	for (i = 0; i < nWndNodes; i++) {
468 		RF_ASSERT(wndNodes->numSuccedents == 1);
469 		tmpNode->succedents[0] = termNode;
470 		termNode->antecedents[i] = tmpNode;
471 		termNode->antType[i] = rf_control;
472 		tmpNode = tmpNode->list_next;
473 	}
474 	RF_ASSERT(wnpNode->numSuccedents == 1);
475 	wnpNode->succedents[0] = termNode;
476 	termNode->antecedents[nWndNodes] = wnpNode;
477 	termNode->antType[nWndNodes] = rf_control;
478 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
479 	if (nfaults == 2) {
480 		RF_ASSERT(wnqNode->numSuccedents == 1);
481 		wnqNode->succedents[0] = termNode;
482 		termNode->antecedents[nWndNodes + 1] = wnqNode;
483 		termNode->antType[nWndNodes + 1] = rf_control;
484 	}
485 #endif
486 }
487 /******************************************************************************
488  *
489  * creates a DAG to perform a small-write operation (either raid 5 or pq),
490  * which is as follows:
491  *
492  * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
493  *            \- Rod X      /     \----> Wnd [Und]-/
494  *           [\- Rod X     /       \---> Wnd [Und]-/]
495  *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
496  *
497  * Rop = read old parity
498  * Rod = read old data
499  * Roq = read old "q"
500  * Cmt = commit node
501  * Und = unlock data disk
502  * Unp = unlock parity disk
503  * Unq = unlock q disk
504  * Wnp = write new parity
505  * Wnd = write new data
506  * Wnq = write new "q"
507  * [ ] denotes optional segments in the graph
508  *
509  * Parameters:  raidPtr   - description of the physical array
510  *              asmap     - logical & physical addresses for this access
511  *              bp        - buffer ptr (holds write data)
512  *              flags     - general flags (e.g. disk locking)
513  *              allocList - list of memory allocated in DAG creation
514  *              pfuncs    - list of parity generating functions
515  *              qfuncs    - list of q generating functions
516  *
517  * A null qfuncs indicates single fault tolerant
518  *****************************************************************************/
519 
520 void
521 rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
522 			     RF_DagHeader_t *dag_h, void *bp,
523 			     RF_RaidAccessFlags_t flags,
524 			     RF_AllocListElem_t *allocList,
525 			     const RF_RedFuncs_t *pfuncs,
526 			     const RF_RedFuncs_t *qfuncs)
527 {
528 	RF_DagNode_t *readDataNodes, *readParityNodes, *termNode;
529 	RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
530 	RF_DagNode_t *xorNodes, *blockNode, *commitNode;
531 	RF_DagNode_t *writeDataNodes, *writeParityNodes;
532 	RF_DagNode_t *tmpxorNode, *tmpwriteDataNode;
533 	RF_DagNode_t *tmpwriteParityNode;
534 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
535 	RF_DagNode_t *tmpwriteQNode, *tmpreadQNode, *tmpqNode, *readQNodes,
536 	     *writeQNodes, *qNodes;
537 #endif
538 	int     i, j, nNodes;
539 	RF_ReconUnitNum_t which_ru;
540 	int     (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
541 	int     (*qfunc) (RF_DagNode_t *) __unused;
542 	int     numDataNodes, numParityNodes;
543 	RF_StripeNum_t parityStripeID;
544 	RF_PhysDiskAddr_t *pda;
545 	const char *name, *qname __unused;
546 	long    nfaults;
547 
548 	nfaults = qfuncs ? 2 : 1;
549 
550 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
551 	    asmap->raidAddress, &which_ru);
552 	pda = asmap->physInfo;
553 	numDataNodes = asmap->numStripeUnitsAccessed;
554 	numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
555 
556 #if RF_DEBUG_DAG
557 	if (rf_dagDebug) {
558 		printf("[Creating small-write DAG]\n");
559 	}
560 #endif
561 	RF_ASSERT(numDataNodes > 0);
562 	dag_h->creator = "SmallWriteDAG";
563 
564 	dag_h->numCommitNodes = 1;
565 	dag_h->numCommits = 0;
566 	dag_h->numSuccedents = 1;
567 
568 	/*
569          * DAG creation occurs in four steps:
570          * 1. count the number of nodes in the DAG
571          * 2. create the nodes
572          * 3. initialize the nodes
573          * 4. connect the nodes
574          */
575 
576 	/*
577          * Step 1. compute number of nodes in the graph
578          */
579 
580 	/* number of nodes: a read and write for each data unit a
581 	 * redundancy computation node for each parity node (nfaults *
582 	 * nparity) a read and write for each parity unit a block and
583 	 * commit node (2) a terminate node if atomic RMW an unlock
584 	 * node for each data unit, redundancy unit
585 	 * totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
586 	 *   + (nfaults * 2 * numParityNodes) + 3;
587 	 */
588 
589 	/*
590          * Step 2. create the nodes
591          */
592 
593 	blockNode = rf_AllocDAGNode();
594 	blockNode->list_next = dag_h->nodes;
595 	dag_h->nodes = blockNode;
596 
597 	commitNode = rf_AllocDAGNode();
598 	commitNode->list_next = dag_h->nodes;
599 	dag_h->nodes = commitNode;
600 
601 	for (i = 0; i < numDataNodes; i++) {
602 		tmpNode = rf_AllocDAGNode();
603 		tmpNode->list_next = dag_h->nodes;
604 		dag_h->nodes = tmpNode;
605 	}
606 	readDataNodes = dag_h->nodes;
607 
608 	for (i = 0; i < numParityNodes; i++) {
609 		tmpNode = rf_AllocDAGNode();
610 		tmpNode->list_next = dag_h->nodes;
611 		dag_h->nodes = tmpNode;
612 	}
613 	readParityNodes = dag_h->nodes;
614 
615 	for (i = 0; i < numDataNodes; i++) {
616 		tmpNode = rf_AllocDAGNode();
617 		tmpNode->list_next = dag_h->nodes;
618 		dag_h->nodes = tmpNode;
619 	}
620 	writeDataNodes = dag_h->nodes;
621 
622 	for (i = 0; i < numParityNodes; i++) {
623 		tmpNode = rf_AllocDAGNode();
624 		tmpNode->list_next = dag_h->nodes;
625 		dag_h->nodes = tmpNode;
626 	}
627 	writeParityNodes = dag_h->nodes;
628 
629 	for (i = 0; i < numParityNodes; i++) {
630 		tmpNode = rf_AllocDAGNode();
631 		tmpNode->list_next = dag_h->nodes;
632 		dag_h->nodes = tmpNode;
633 	}
634 	xorNodes = dag_h->nodes;
635 
636 	termNode = rf_AllocDAGNode();
637 	termNode->list_next = dag_h->nodes;
638 	dag_h->nodes = termNode;
639 
640 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
641 	if (nfaults == 2) {
642 		for (i = 0; i < numParityNodes; i++) {
643 			tmpNode = rf_AllocDAGNode();
644 			tmpNode->list_next = dag_h->nodes;
645 			dag_h->nodes = tmpNode;
646 		}
647 		readQNodes = dag_h->nodes;
648 
649 		for (i = 0; i < numParityNodes; i++) {
650 			tmpNode = rf_AllocDAGNode();
651 			tmpNode->list_next = dag_h->nodes;
652 			dag_h->nodes = tmpNode;
653 		}
654 		writeQNodes = dag_h->nodes;
655 
656 		for (i = 0; i < numParityNodes; i++) {
657 			tmpNode = rf_AllocDAGNode();
658 			tmpNode->list_next = dag_h->nodes;
659 			dag_h->nodes = tmpNode;
660 		}
661 		qNodes = dag_h->nodes;
662 	} else {
663 		readQNodes = writeQNodes = qNodes = NULL;
664 	}
665 #endif
666 
667 	/*
668          * Step 3. initialize the nodes
669          */
670 	/* initialize block node (Nil) */
671 	nNodes = numDataNodes + (nfaults * numParityNodes);
672 	rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
673 		    rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0,
674 		    dag_h, "Nil", allocList);
675 
676 	/* initialize commit node (Cmt) */
677 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
678 		    rf_NullNodeUndoFunc, NULL, nNodes,
679 		    (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
680 
681 	/* initialize terminate node (Trm) */
682 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
683 		    rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0,
684 		    dag_h, "Trm", allocList);
685 
686 	/* initialize nodes which read old data (Rod) */
687 	tmpreadDataNode = readDataNodes;
688 	for (i = 0; i < numDataNodes; i++) {
689 		rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE,
690 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
691 			    rf_GenericWakeupFunc, (nfaults * numParityNodes),
692 			    1, 4, 0, dag_h, "Rod", allocList);
693 		RF_ASSERT(pda != NULL);
694 		/* physical disk addr desc */
695 		tmpreadDataNode->params[0].p = pda;
696 		/* buffer to hold old data */
697 		tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
698 		tmpreadDataNode->params[2].v = parityStripeID;
699 		tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
700 		    which_ru);
701 		pda = pda->next;
702 		for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
703 			tmpreadDataNode->propList[j] = NULL;
704 		}
705 		tmpreadDataNode = tmpreadDataNode->list_next;
706 	}
707 
708 	/* initialize nodes which read old parity (Rop) */
709 	pda = asmap->parityInfo;
710 	i = 0;
711 	tmpreadParityNode = readParityNodes;
712 	for (i = 0; i < numParityNodes; i++) {
713 		RF_ASSERT(pda != NULL);
714 		rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE,
715 			    rf_DiskReadFunc, rf_DiskReadUndoFunc,
716 			    rf_GenericWakeupFunc, numParityNodes, 1, 4, 0,
717 			    dag_h, "Rop", allocList);
718 		tmpreadParityNode->params[0].p = pda;
719 		/* buffer to hold old parity */
720 		tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
721 		tmpreadParityNode->params[2].v = parityStripeID;
722 		tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
723 		    which_ru);
724 		pda = pda->next;
725 		for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
726 			tmpreadParityNode->propList[0] = NULL;
727 		}
728 		tmpreadParityNode = tmpreadParityNode->list_next;
729 	}
730 
731 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
732 	/* initialize nodes which read old Q (Roq) */
733 	if (nfaults == 2) {
734 		pda = asmap->qInfo;
735 		tmpreadQNode = readQNodes;
736 		for (i = 0; i < numParityNodes; i++) {
737 			RF_ASSERT(pda != NULL);
738 			rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE,
739 				    rf_DiskReadFunc, rf_DiskReadUndoFunc,
740 				    rf_GenericWakeupFunc, numParityNodes,
741 				    1, 4, 0, dag_h, "Roq", allocList);
742 			tmpreadQNode->params[0].p = pda;
743 			/* buffer to hold old Q */
744 			tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
745 								   pda->numSector << raidPtr->logBytesPerSector);
746 			tmpreadQNode->params[2].v = parityStripeID;
747 			tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
748 			    which_ru);
749 			pda = pda->next;
750 			for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
751 				tmpreadQNode->propList[0] = NULL;
752 			}
753 			tmpreadQNode = tmpreadQNode->list_next;
754 		}
755 	}
756 #endif
757 	/* initialize nodes which write new data (Wnd) */
758 	pda = asmap->physInfo;
759 	tmpwriteDataNode = writeDataNodes;
760 	for (i = 0; i < numDataNodes; i++) {
761 		RF_ASSERT(pda != NULL);
762 		rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE,
763 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
764 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
765 			    "Wnd", allocList);
766 		/* physical disk addr desc */
767 		tmpwriteDataNode->params[0].p = pda;
768 		/* buffer holding new data to be written */
769 		tmpwriteDataNode->params[1].p = pda->bufPtr;
770 		tmpwriteDataNode->params[2].v = parityStripeID;
771 		tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
772 		    which_ru);
773 		pda = pda->next;
774 		tmpwriteDataNode = tmpwriteDataNode->list_next;
775 	}
776 
777 	/*
778          * Initialize nodes which compute new parity and Q.
779          */
780 	/*
781          * We use the simple XOR func in the double-XOR case, and when
782          * we're accessing only a portion of one stripe unit. The
783          * distinction between the two is that the regular XOR func
784          * assumes that the targbuf is a full SU in size, and examines
785          * the pda associated with the buffer to decide where within
786          * the buffer to XOR the data, whereas the simple XOR func
787          * just XORs the data into the start of the buffer.  */
788 	if ((numParityNodes == 2) || ((numDataNodes == 1)
789 		&& (asmap->totalSectorsAccessed <
790 		    raidPtr->Layout.sectorsPerStripeUnit))) {
791 		func = pfuncs->simple;
792 		undoFunc = rf_NullNodeUndoFunc;
793 		name = pfuncs->SimpleName;
794 		if (qfuncs) {
795 			qfunc = qfuncs->simple;
796 			qname = qfuncs->SimpleName;
797 		} else {
798 			qfunc = NULL;
799 			qname = NULL;
800 		}
801 	} else {
802 		func = pfuncs->regular;
803 		undoFunc = rf_NullNodeUndoFunc;
804 		name = pfuncs->RegularName;
805 		if (qfuncs) {
806 			qfunc = qfuncs->regular;
807 			qname = qfuncs->RegularName;
808 		} else {
809 			qfunc = NULL;
810 			qname = NULL;
811 		}
812 	}
813 	/*
814          * Initialize the xor nodes: params are {pda,buf}
815          * from {Rod,Wnd,Rop} nodes, and raidPtr
816          */
817 	if (numParityNodes == 2) {
818 		/* double-xor case */
819 		tmpxorNode = xorNodes;
820 		tmpreadDataNode = readDataNodes;
821 		tmpreadParityNode = readParityNodes;
822 		tmpwriteDataNode = writeDataNodes;
823 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
824 		tmpqNode = qNodes;
825 		tmpreadQNode = readQNodes;
826 #endif
827 		for (i = 0; i < numParityNodes; i++) {
828 			/* note: no wakeup func for xor */
829 			rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func,
830 				    undoFunc, NULL, 1,
831 				    (numDataNodes + numParityNodes),
832 				    7, 1, dag_h, name, allocList);
833 			tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
834 			tmpxorNode->params[0] = tmpreadDataNode->params[0];
835 			tmpxorNode->params[1] = tmpreadDataNode->params[1];
836 			tmpxorNode->params[2] = tmpreadParityNode->params[0];
837 			tmpxorNode->params[3] = tmpreadParityNode->params[1];
838 			tmpxorNode->params[4] = tmpwriteDataNode->params[0];
839 			tmpxorNode->params[5] = tmpwriteDataNode->params[1];
840 			tmpxorNode->params[6].p = raidPtr;
841 			/* use old parity buf as target buf */
842 			tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
843 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
844 			if (nfaults == 2) {
845 				/* note: no wakeup func for qor */
846 				rf_InitNode(tmpqNode, rf_wait, RF_FALSE,
847 					    qfunc, undoFunc, NULL, 1,
848 					    (numDataNodes + numParityNodes),
849 					    7, 1, dag_h, qname, allocList);
850 				tmpqNode->params[0] = tmpreadDataNode->params[0];
851 				tmpqNode->params[1] = tmpreadDataNode->params[1];
852 				tmpqNode->params[2] = tmpreadQNode->.params[0];
853 				tmpqNode->params[3] = tmpreadQNode->params[1];
854 				tmpqNode->params[4] = tmpwriteDataNode->params[0];
855 				tmpqNode->params[5] = tmpwriteDataNode->params[1];
856 				tmpqNode->params[6].p = raidPtr;
857 				/* use old Q buf as target buf */
858 				tmpqNode->results[0] = tmpreadQNode->params[1].p;
859 				tmpqNode = tmpqNode->list_next;
860 				tmpreadQNodes = tmpreadQNodes->list_next;
861 			}
862 #endif
863 			tmpxorNode = tmpxorNode->list_next;
864 			tmpreadDataNode = tmpreadDataNode->list_next;
865 			tmpreadParityNode = tmpreadParityNode->list_next;
866 			tmpwriteDataNode = tmpwriteDataNode->list_next;
867 		}
868 	} else {
869 		/* there is only one xor node in this case */
870 		rf_InitNode(xorNodes, rf_wait, RF_FALSE, func,
871 			    undoFunc, NULL, 1, (numDataNodes + numParityNodes),
872 			    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
873 			    dag_h, name, allocList);
874 		xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
875 		tmpreadDataNode = readDataNodes;
876 		for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored
877 							out the "+1" into the "deal with Rop separately below */
878 			/* set up params related to Rod nodes */
879 			xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
880 			xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
881 			tmpreadDataNode = tmpreadDataNode->list_next;
882 		}
883 		/* deal with Rop separately */
884 		xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
885 		xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
886 
887 		tmpwriteDataNode = writeDataNodes;
888 		for (i = 0; i < numDataNodes; i++) {
889 			/* set up params related to Wnd and Wnp nodes */
890 			xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
891 			    tmpwriteDataNode->params[0];
892 			xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
893 			    tmpwriteDataNode->params[1];
894 			tmpwriteDataNode = tmpwriteDataNode->list_next;
895 		}
896 		/* xor node needs to get at RAID information */
897 		xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
898 		xorNodes->results[0] = readParityNodes->params[1].p;
899 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
900 		if (nfaults == 2) {
901 			rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc,
902 				    undoFunc, NULL, 1,
903 				    (numDataNodes + numParityNodes),
904 				    (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
905 				    dag_h, qname, allocList);
906 			tmpreadDataNode = readDataNodes;
907 			for (i = 0; i < numDataNodes; i++) {
908 				/* set up params related to Rod */
909 				qNodes->params[2 * i + 0] = tmpreadDataNode->params[0];	/* pda */
910 				qNodes->params[2 * i + 1] = tmpreadDataNode->params[1];	/* buffer ptr */
911 				tmpreadDataNode = tmpreadDataNode->list_next;
912 			}
913 			/* and read old q */
914 			qNodes->params[2 * numDataNodes + 0] =	/* pda */
915 			    readQNodes->params[0];
916 			qNodes->params[2 * numDataNodes + 1] =	/* buffer ptr */
917 			    readQNodes->params[1];
918 			tmpwriteDataNode = writeDataNodes;
919 			for (i = 0; i < numDataNodes; i++) {
920 				/* set up params related to Wnd nodes */
921 				qNodes->params[2 * (numDataNodes + 1 + i) + 0] =	/* pda */
922 				    tmpwriteDataNode->params[0];
923 				qNodes->params[2 * (numDataNodes + 1 + i) + 1] =	/* buffer ptr */
924 				    tmpwriteDataNode->params[1];
925 				tmpwriteDataNode = tmpwriteDataNode->list_next;
926 			}
927 			/* xor node needs to get at RAID information */
928 			qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
929 			qNodes->results[0] = readQNodes->params[1].p;
930 		}
931 #endif
932 	}
933 
934 	/* initialize nodes which write new parity (Wnp) */
935 	pda = asmap->parityInfo;
936 	tmpwriteParityNode = writeParityNodes;
937 	tmpxorNode = xorNodes;
938 	for (i = 0; i < numParityNodes; i++) {
939 		rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE,
940 			    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
941 			    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
942 			    "Wnp", allocList);
943 		RF_ASSERT(pda != NULL);
944 		tmpwriteParityNode->params[0].p = pda;	/* param 1 (bufPtr)
945 				  			 * filled in by xor node */
946 		tmpwriteParityNode->params[1].p = tmpxorNode->results[0];	/* buffer pointer for
947 				  						 * parity write
948 				  						 * operation */
949 		tmpwriteParityNode->params[2].v = parityStripeID;
950 		tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
951 		    which_ru);
952 		pda = pda->next;
953 		tmpwriteParityNode = tmpwriteParityNode->list_next;
954 		tmpxorNode = tmpxorNode->list_next;
955 	}
956 
957 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
958 	/* initialize nodes which write new Q (Wnq) */
959 	if (nfaults == 2) {
960 		pda = asmap->qInfo;
961 		tmpwriteQNode = writeQNodes;
962 		tmpqNode = qNodes;
963 		for (i = 0; i < numParityNodes; i++) {
964 			rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE,
965 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
966 				    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
967 				    "Wnq", allocList);
968 			RF_ASSERT(pda != NULL);
969 			tmpwriteQNode->params[0].p = pda;	/* param 1 (bufPtr)
970 								 * filled in by xor node */
971 			tmpwriteQNode->params[1].p = tmpqNode->results[0];	/* buffer pointer for
972 										 * parity write
973 										 * operation */
974 			tmpwriteQNode->params[2].v = parityStripeID;
975 			tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
976 			    which_ru);
977 			pda = pda->next;
978 			tmpwriteQNode = tmpwriteQNode->list_next;
979 			tmpqNode = tmpqNode->list_next;
980 		}
981 	}
982 #endif
983 	/*
984          * Step 4. connect the nodes.
985          */
986 
987 	/* connect header to block node */
988 	dag_h->succedents[0] = blockNode;
989 
990 	/* connect block node to read old data nodes */
991 	RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
992 	tmpreadDataNode = readDataNodes;
993 	for (i = 0; i < numDataNodes; i++) {
994 		blockNode->succedents[i] = tmpreadDataNode;
995 		RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
996 		tmpreadDataNode->antecedents[0] = blockNode;
997 		tmpreadDataNode->antType[0] = rf_control;
998 		tmpreadDataNode = tmpreadDataNode->list_next;
999 	}
1000 
1001 	/* connect block node to read old parity nodes */
1002 	tmpreadParityNode = readParityNodes;
1003 	for (i = 0; i < numParityNodes; i++) {
1004 		blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
1005 		RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
1006 		tmpreadParityNode->antecedents[0] = blockNode;
1007 		tmpreadParityNode->antType[0] = rf_control;
1008 		tmpreadParityNode = tmpreadParityNode->list_next;
1009 	}
1010 
1011 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1012 	/* connect block node to read old Q nodes */
1013 	if (nfaults == 2) {
1014 		tmpreadQNode = readQNodes;
1015 		for (i = 0; i < numParityNodes; i++) {
1016 			blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
1017 			RF_ASSERT(tmpreadQNode->numAntecedents == 1);
1018 			tmpreadQNode->antecedents[0] = blockNode;
1019 			tmpreadQNode->antType[0] = rf_control;
1020 			tmpreadQNode = tmpreadQNode->list_next;
1021 		}
1022 	}
1023 #endif
1024 	/* connect read old data nodes to xor nodes */
1025 	tmpreadDataNode = readDataNodes;
1026 	for (i = 0; i < numDataNodes; i++) {
1027 		RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
1028 		tmpxorNode = xorNodes;
1029 		for (j = 0; j < numParityNodes; j++) {
1030 			RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
1031 			tmpreadDataNode->succedents[j] = tmpxorNode;
1032 			tmpxorNode->antecedents[i] = tmpreadDataNode;
1033 			tmpxorNode->antType[i] = rf_trueData;
1034 			tmpxorNode = tmpxorNode->list_next;
1035 		}
1036 		tmpreadDataNode = tmpreadDataNode->list_next;
1037 	}
1038 
1039 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1040 	/* connect read old data nodes to q nodes */
1041 	if (nfaults == 2) {
1042 		tmpreadDataNode = readDataNodes;
1043 		for (i = 0; i < numDataNodes; i++) {
1044 			tmpqNode = qNodes;
1045 			for (j = 0; j < numParityNodes; j++) {
1046 				RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
1047 				tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
1048 				tmpqNode->antecedents[i] = tmpreadDataNode;
1049 				tmpqNode->antType[i] = rf_trueData;
1050 				tmpqNode = tmpqNode->list_next;
1051 			}
1052 			tmpreadDataNode = tmpreadDataNode->list_next;
1053 		}
1054 	}
1055 #endif
1056 	/* connect read old parity nodes to xor nodes */
1057 	tmpreadParityNode = readParityNodes;
1058 	for (i = 0; i < numParityNodes; i++) {
1059 		RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1060 		tmpxorNode = xorNodes;
1061 		for (j = 0; j < numParityNodes; j++) {
1062 			tmpreadParityNode->succedents[j] = tmpxorNode;
1063 			tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
1064 			tmpxorNode->antType[numDataNodes + i] = rf_trueData;
1065 			tmpxorNode = tmpxorNode->list_next;
1066 		}
1067 		tmpreadParityNode = tmpreadParityNode->list_next;
1068 	}
1069 
1070 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1071 	/* connect read old q nodes to q nodes */
1072 	if (nfaults == 2) {
1073 		tmpreadParityNode = readParityNodes;
1074 		tmpreadQNode = readQNodes;
1075 		for (i = 0; i < numParityNodes; i++) {
1076 			RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
1077 			tmpqNode = qNodes;
1078 			for (j = 0; j < numParityNodes; j++) {
1079 				tmpreadQNode->succedents[j] = tmpqNode;
1080 				tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
1081 				tmpqNode->antType[numDataNodes + i] = rf_trueData;
1082 				tmpqNode = tmpqNode->list_next;
1083 			}
1084 			tmpreadParityNode = tmpreadParityNode->list_next;
1085 			tmpreadQNode = tmpreadQNode->list_next;
1086 		}
1087 	}
1088 #endif
1089 	/* connect xor nodes to commit node */
1090 	RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
1091 	tmpxorNode = xorNodes;
1092 	for (i = 0; i < numParityNodes; i++) {
1093 		RF_ASSERT(tmpxorNode->numSuccedents == 1);
1094 		tmpxorNode->succedents[0] = commitNode;
1095 		commitNode->antecedents[i] = tmpxorNode;
1096 		commitNode->antType[i] = rf_control;
1097 		tmpxorNode = tmpxorNode->list_next;
1098 	}
1099 
1100 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1101 	/* connect q nodes to commit node */
1102 	if (nfaults == 2) {
1103 		tmpqNode = qNodes;
1104 		for (i = 0; i < numParityNodes; i++) {
1105 			RF_ASSERT(tmpqNode->numSuccedents == 1);
1106 			tmpqNode->succedents[0] = commitNode;
1107 			commitNode->antecedents[i + numParityNodes] = tmpqNode;
1108 			commitNode->antType[i + numParityNodes] = rf_control;
1109 			tmpqNode = tmpqNode->list_next;
1110 		}
1111 	}
1112 #endif
1113 	/* connect commit node to write nodes */
1114 	RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
1115 	tmpwriteDataNode = writeDataNodes;
1116 	for (i = 0; i < numDataNodes; i++) {
1117 		RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
1118 		commitNode->succedents[i] = tmpwriteDataNode;
1119 		tmpwriteDataNode->antecedents[0] = commitNode;
1120 		tmpwriteDataNode->antType[0] = rf_trueData;
1121 		tmpwriteDataNode = tmpwriteDataNode->list_next;
1122 	}
1123 	tmpwriteParityNode = writeParityNodes;
1124 	for (i = 0; i < numParityNodes; i++) {
1125 		RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
1126 		commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
1127 		tmpwriteParityNode->antecedents[0] = commitNode;
1128 		tmpwriteParityNode->antType[0] = rf_trueData;
1129 		tmpwriteParityNode = tmpwriteParityNode->list_next;
1130 	}
1131 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1132 	if (nfaults == 2) {
1133 		tmpwriteQNode = writeQNodes;
1134 		for (i = 0; i < numParityNodes; i++) {
1135 			RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
1136 			commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
1137 			tmpwriteQNode->antecedents[0] = commitNode;
1138 			tmpwriteQNode->antType[0] = rf_trueData;
1139 			tmpwriteQNode = tmpwriteQNode->list_next;
1140 		}
1141 	}
1142 #endif
1143 	RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1144 	RF_ASSERT(termNode->numSuccedents == 0);
1145 	tmpwriteDataNode = writeDataNodes;
1146 	for (i = 0; i < numDataNodes; i++) {
1147 		/* connect write new data nodes to term node */
1148 		RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
1149 		RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
1150 		tmpwriteDataNode->succedents[0] = termNode;
1151 		termNode->antecedents[i] = tmpwriteDataNode;
1152 		termNode->antType[i] = rf_control;
1153 		tmpwriteDataNode = tmpwriteDataNode->list_next;
1154 	}
1155 
1156 	tmpwriteParityNode = writeParityNodes;
1157 	for (i = 0; i < numParityNodes; i++) {
1158 		RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
1159 		tmpwriteParityNode->succedents[0] = termNode;
1160 		termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
1161 		termNode->antType[numDataNodes + i] = rf_control;
1162 		tmpwriteParityNode = tmpwriteParityNode->list_next;
1163 	}
1164 
1165 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
1166 	if (nfaults == 2) {
1167 		tmpwriteQNode = writeQNodes;
1168 		for (i = 0; i < numParityNodes; i++) {
1169 			RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
1170 			tmpwriteQNode->succedents[0] = termNode;
1171 			termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
1172 			termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
1173 			tmpwriteQNode = tmpwriteQNode->list_next;
1174 		}
1175 	}
1176 #endif
1177 }
1178 
1179 
1180 /******************************************************************************
1181  * create a write graph (fault-free or degraded) for RAID level 1
1182  *
1183  * Hdr -> Commit -> Wpd -> Nil -> Trm
1184  *               -> Wsd ->
1185  *
1186  * The "Wpd" node writes data to the primary copy in the mirror pair
1187  * The "Wsd" node writes data to the secondary copy in the mirror pair
1188  *
1189  * Parameters:  raidPtr   - description of the physical array
1190  *              asmap     - logical & physical addresses for this access
1191  *              bp        - buffer ptr (holds write data)
1192  *              flags     - general flags (e.g. disk locking)
1193  *              allocList - list of memory allocated in DAG creation
1194  *****************************************************************************/
1195 
1196 void
1197 rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
1198 			 RF_DagHeader_t *dag_h, void *bp,
1199 			 RF_RaidAccessFlags_t flags,
1200 			 RF_AllocListElem_t *allocList)
1201 {
1202 	RF_DagNode_t *unblockNode, *termNode, *commitNode;
1203 	RF_DagNode_t *wndNode, *wmirNode;
1204 	RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
1205 	int     nWndNodes, nWmirNodes, i;
1206 	RF_ReconUnitNum_t which_ru;
1207 	RF_PhysDiskAddr_t *pda, *pdaP;
1208 	RF_StripeNum_t parityStripeID;
1209 
1210 	parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
1211 	    asmap->raidAddress, &which_ru);
1212 #if RF_DEBUG_DAG
1213 	if (rf_dagDebug) {
1214 		printf("[Creating RAID level 1 write DAG]\n");
1215 	}
1216 #endif
1217 	dag_h->creator = "RaidOneWriteDAG";
1218 
1219 	/* 2 implies access not SU aligned */
1220 	nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
1221 	nWndNodes = (asmap->physInfo->next) ? 2 : 1;
1222 
1223 	/* alloc the Wnd nodes and the Wmir node */
1224 	if (asmap->numDataFailed == 1)
1225 		nWndNodes--;
1226 	if (asmap->numParityFailed == 1)
1227 		nWmirNodes--;
1228 
1229 	/* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
1230 	 * + terminator) */
1231 	for (i = 0; i < nWndNodes; i++) {
1232 		tmpNode = rf_AllocDAGNode();
1233 		tmpNode->list_next = dag_h->nodes;
1234 		dag_h->nodes = tmpNode;
1235 	}
1236 	wndNode = dag_h->nodes;
1237 
1238 	for (i = 0; i < nWmirNodes; i++) {
1239 		tmpNode = rf_AllocDAGNode();
1240 		tmpNode->list_next = dag_h->nodes;
1241 		dag_h->nodes = tmpNode;
1242 	}
1243 	wmirNode = dag_h->nodes;
1244 
1245 	commitNode = rf_AllocDAGNode();
1246 	commitNode->list_next = dag_h->nodes;
1247 	dag_h->nodes = commitNode;
1248 
1249 	unblockNode = rf_AllocDAGNode();
1250 	unblockNode->list_next = dag_h->nodes;
1251 	dag_h->nodes = unblockNode;
1252 
1253 	termNode = rf_AllocDAGNode();
1254 	termNode->list_next = dag_h->nodes;
1255 	dag_h->nodes = termNode;
1256 
1257 	/* this dag can commit immediately */
1258 	dag_h->numCommitNodes = 1;
1259 	dag_h->numCommits = 0;
1260 	dag_h->numSuccedents = 1;
1261 
1262 	/* initialize the commit, unblock, and term nodes */
1263 	rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
1264 		    rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes),
1265 		    0, 0, 0, dag_h, "Cmt", allocList);
1266 	rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
1267 		    rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes),
1268 		    0, 0, dag_h, "Nil", allocList);
1269 	rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
1270 		    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0,
1271 		    dag_h, "Trm", allocList);
1272 
1273 	/* initialize the wnd nodes */
1274 	if (nWndNodes > 0) {
1275 		pda = asmap->physInfo;
1276 		tmpwndNode = wndNode;
1277 		for (i = 0; i < nWndNodes; i++) {
1278 			rf_InitNode(tmpwndNode, rf_wait, RF_FALSE,
1279 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1280 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1281 				    dag_h, "Wpd", allocList);
1282 			RF_ASSERT(pda != NULL);
1283 			tmpwndNode->params[0].p = pda;
1284 			tmpwndNode->params[1].p = pda->bufPtr;
1285 			tmpwndNode->params[2].v = parityStripeID;
1286 			tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1287 			pda = pda->next;
1288 			tmpwndNode = tmpwndNode->list_next;
1289 		}
1290 		RF_ASSERT(pda == NULL);
1291 	}
1292 	/* initialize the mirror nodes */
1293 	if (nWmirNodes > 0) {
1294 		pda = asmap->physInfo;
1295 		pdaP = asmap->parityInfo;
1296 		tmpwmirNode = wmirNode;
1297 		for (i = 0; i < nWmirNodes; i++) {
1298 			rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE,
1299 				    rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
1300 				    rf_GenericWakeupFunc, 1, 1, 4, 0,
1301 				    dag_h, "Wsd", allocList);
1302 			RF_ASSERT(pda != NULL);
1303 			tmpwmirNode->params[0].p = pdaP;
1304 			tmpwmirNode->params[1].p = pda->bufPtr;
1305 			tmpwmirNode->params[2].v = parityStripeID;
1306 			tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
1307 			pda = pda->next;
1308 			pdaP = pdaP->next;
1309 			tmpwmirNode = tmpwmirNode->list_next;
1310 		}
1311 		RF_ASSERT(pda == NULL);
1312 		RF_ASSERT(pdaP == NULL);
1313 	}
1314 	/* link the header node to the commit node */
1315 	RF_ASSERT(dag_h->numSuccedents == 1);
1316 	RF_ASSERT(commitNode->numAntecedents == 0);
1317 	dag_h->succedents[0] = commitNode;
1318 
1319 	/* link the commit node to the write nodes */
1320 	RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
1321 	tmpwndNode = wndNode;
1322 	for (i = 0; i < nWndNodes; i++) {
1323 		RF_ASSERT(tmpwndNode->numAntecedents == 1);
1324 		commitNode->succedents[i] = tmpwndNode;
1325 		tmpwndNode->antecedents[0] = commitNode;
1326 		tmpwndNode->antType[0] = rf_control;
1327 		tmpwndNode = tmpwndNode->list_next;
1328 	}
1329 	tmpwmirNode = wmirNode;
1330 	for (i = 0; i < nWmirNodes; i++) {
1331 		RF_ASSERT(tmpwmirNode->numAntecedents == 1);
1332 		commitNode->succedents[i + nWndNodes] = tmpwmirNode;
1333 		tmpwmirNode->antecedents[0] = commitNode;
1334 		tmpwmirNode->antType[0] = rf_control;
1335 		tmpwmirNode = tmpwmirNode->list_next;
1336 	}
1337 
1338 	/* link the write nodes to the unblock node */
1339 	RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
1340 	tmpwndNode = wndNode;
1341 	for (i = 0; i < nWndNodes; i++) {
1342 		RF_ASSERT(tmpwndNode->numSuccedents == 1);
1343 		tmpwndNode->succedents[0] = unblockNode;
1344 		unblockNode->antecedents[i] = tmpwndNode;
1345 		unblockNode->antType[i] = rf_control;
1346 		tmpwndNode = tmpwndNode->list_next;
1347 	}
1348 	tmpwmirNode = wmirNode;
1349 	for (i = 0; i < nWmirNodes; i++) {
1350 		RF_ASSERT(tmpwmirNode->numSuccedents == 1);
1351 		tmpwmirNode->succedents[0] = unblockNode;
1352 		unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
1353 		unblockNode->antType[i + nWndNodes] = rf_control;
1354 		tmpwmirNode = tmpwmirNode->list_next;
1355 	}
1356 
1357 	/* link the unblock node to the term node */
1358 	RF_ASSERT(unblockNode->numSuccedents == 1);
1359 	RF_ASSERT(termNode->numAntecedents == 1);
1360 	RF_ASSERT(termNode->numSuccedents == 0);
1361 	unblockNode->succedents[0] = termNode;
1362 	termNode->antecedents[0] = unblockNode;
1363 	termNode->antType[0] = rf_control;
1364 }
1365