xref: /dflybsd-src/sys/vfs/hammer2/hammer2_flush.c (revision 636eca08bbf84bbb648c68d757be5ec1a2b86a72)
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/types.h>
40 #include <sys/lock.h>
41 #include <sys/uuid.h>
42 
43 #include "hammer2.h"
44 
45 /*
46  * Recursively flush the specified chain.  The chain is locked and
47  * referenced by the caller and will remain so on return.  The chain
48  * will remain referenced throughout but can temporarily lose its
49  * lock during the recursion to avoid unnecessarily stalling user
50  * processes.
51  */
52 struct hammer2_flush_info {
53 	struct flush_deferral_list flush_list;
54 	int		depth;
55 	hammer2_tid_t	modify_tid;
56 };
57 
58 typedef struct hammer2_flush_info hammer2_flush_info_t;
59 
60 static void hammer2_chain_flush_pass1(hammer2_mount_t *hmp,
61 			hammer2_chain_t *chain, hammer2_flush_info_t *info);
62 static void hammer2_saved_child_cleanup(hammer2_mount_t *hmp,
63 			hammer2_chain_t *parent, hammer2_chain_t *child);
64 
65 /*
66  * Stand-alone flush.  If the chain is unable to completely flush we have
67  * to be sure that SUBMODIFIED propagates up the parent chain.  We must not
68  * clear the MOVED bit after flushing in this situation or our desynchronized
69  * bref will not properly update in the parent.
70  *
71  * This routine can be called from several places but the most important
72  * is from the hammer2_vop_reclaim() function.  We want to try to completely
73  * clean out the inode structure to prevent disconnected inodes from
74  * building up and blowing out the kmalloc pool.
75  *
76  * If modify_tid is 0 (usual case), a new modify_tid is allocated and
77  * applied to the flush.  The depth-limit handling code is the only
78  * code which passes a non-zero modify_tid to hammer2_chain_flush().
79  *
80  * chain is locked on call and will remain locked on return.
81  */
82 void
83 hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain,
84 		    hammer2_tid_t modify_tid)
85 {
86 	hammer2_chain_t *parent;
87 	hammer2_chain_t *scan;
88 	hammer2_blockref_t *base;
89 	hammer2_flush_info_t info;
90 	int count;
91 	int reflush;
92 
93 	/*
94 	 * Execute the recursive flush and handle deferrals.
95 	 *
96 	 * Chains can be ridiculously long (thousands deep), so to
97 	 * avoid blowing out the kernel stack the recursive flush has a
98 	 * depth limit.  Elements at the limit are placed on a list
99 	 * for re-execution after the stack has been popped.
100 	 */
101 	bzero(&info, sizeof(info));
102 	TAILQ_INIT(&info.flush_list);
103 
104 	if (modify_tid == 0) {
105 		hammer2_voldata_lock(hmp);
106 		info.modify_tid = hmp->voldata.alloc_tid++;
107 		atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED_AUX);
108 		hammer2_voldata_unlock(hmp);
109 	} else {
110 		info.modify_tid = modify_tid;
111 	}
112 	reflush = 1;
113 
114 	while (reflush) {
115 		/*
116 		 * Primary recursion
117 		 */
118 		hammer2_chain_flush_pass1(hmp, chain, &info);
119 		reflush = 0;
120 
121 		while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) {
122 			/*
123 			 * Secondary recursion.  Note that a reference is
124 			 * retained from the element's presence on the
125 			 * deferral list.
126 			 */
127 			KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED);
128 			TAILQ_REMOVE(&info.flush_list, scan, flush_node);
129 			atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED);
130 
131 			/*
132 			 * Now that we've popped back up we can do a secondary
133 			 * recursion on the deferred elements.
134 			 */
135 			if (hammer2_debug & 0x0040)
136 				kprintf("defered flush %p\n", scan);
137 			hammer2_chain_lock(hmp, scan, HAMMER2_RESOLVE_MAYBE);
138 			hammer2_chain_flush(hmp, scan, info.modify_tid);
139 			hammer2_chain_unlock(hmp, scan);
140 
141 			/*
142 			 * Only flag a reflush if SUBMODIFIED is no longer
143 			 * set.  If SUBMODIFIED is set the element will just
144 			 * wind up on our flush_list again.
145 			 */
146 			if ((scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
147 					    HAMMER2_CHAIN_MODIFIED |
148 					    HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
149 				reflush = 1;
150 			}
151 			hammer2_chain_drop(hmp, scan);
152 		}
153 		if ((hammer2_debug & 0x0040) && reflush)
154 			kprintf("reflush %p\n", chain);
155 	}
156 
157 	/*
158 	 * The SUBMODIFIED bit must propagate upward if the chain could not
159 	 * be completely flushed.
160 	 */
161 	if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
162 			    HAMMER2_CHAIN_MODIFIED |
163 			    HAMMER2_CHAIN_MODIFIED_AUX |
164 			    HAMMER2_CHAIN_MOVED)) {
165 		hammer2_chain_parent_setsubmod(hmp, chain);
166 	}
167 
168 	/*
169 	 * If the only thing left is a simple bref update try to
170 	 * pro-actively update the parent, otherwise return early.
171 	 */
172 	parent = chain->parent;
173 	if (parent == NULL) {
174 		return;
175 	}
176 	if (chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
177 	    (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
178 			     HAMMER2_CHAIN_MODIFIED |
179 			     HAMMER2_CHAIN_MODIFIED_AUX |
180 			     HAMMER2_CHAIN_MOVED)) != HAMMER2_CHAIN_MOVED) {
181 		return;
182 	}
183 
184 	/*
185 	 * We are locking backwards so allow the lock to fail.
186 	 */
187 	if (ccms_thread_lock_nonblock(&parent->cst, CCMS_STATE_EXCLUSIVE))
188 		return;
189 
190 	/*
191 	 * We are updating brefs but we have to call chain_modify()
192 	 * because our caller is not being run from a recursive flush.
193 	 *
194 	 * This will also chain up the parent list and set the SUBMODIFIED
195 	 * flag.
196 	 *
197 	 * We do not want to set HAMMER2_CHAIN_MODIFY_TID here because the
198 	 * modification is only related to updating a bref in the parent.
199 	 *
200 	 * When updating the blockset embedded in the volume header we must
201 	 * also update voldata.mirror_tid.
202 	 */
203 	hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE);
204 	hammer2_chain_modify(hmp, parent, HAMMER2_MODIFY_NO_MODIFY_TID);
205 
206 	switch(parent->bref.type) {
207 	case HAMMER2_BREF_TYPE_INODE:
208 		base = &parent->data->ipdata.u.blockset.
209 			blockref[0];
210 		count = HAMMER2_SET_COUNT;
211 		break;
212 	case HAMMER2_BREF_TYPE_INDIRECT:
213 		base = &parent->data->npdata.blockref[0];
214 		count = parent->bytes /
215 			sizeof(hammer2_blockref_t);
216 		break;
217 	case HAMMER2_BREF_TYPE_VOLUME:
218 		base = &hmp->voldata.sroot_blockset.blockref[0];
219 		count = HAMMER2_SET_COUNT;
220 		if (chain->flags & HAMMER2_CHAIN_MOVED) {
221 			if (hmp->voldata.mirror_tid < chain->bref.mirror_tid) {
222 				hmp->voldata.mirror_tid =
223 					chain->bref.mirror_tid;
224 			}
225 		}
226 		break;
227 	default:
228 		base = NULL;
229 		panic("hammer2_chain_flush: "
230 		      "unrecognized blockref type: %d",
231 		      parent->bref.type);
232 	}
233 
234 	/*
235 	 * Update the blockref in the parent.  We do not have to set
236 	 * MOVED in the parent because the parent has been marked modified,
237 	 * so the flush sequence will pick up the bref change.
238 	 *
239 	 * We do have to propagate mirror_tid upward.
240 	 */
241 	KKASSERT(chain->index >= 0 &&
242 		 chain->index < count);
243 	KKASSERT(chain->parent == parent);
244 	if (chain->flags & HAMMER2_CHAIN_MOVED) {
245 		base[chain->index] = chain->bref_flush;
246 		if (parent->bref.mirror_tid < chain->bref_flush.mirror_tid)
247 			parent->bref.mirror_tid = chain->bref_flush.mirror_tid;
248 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MOVED);
249 		hammer2_chain_drop(hmp, chain);
250 	} else if (bcmp(&base[chain->index], &chain->bref_flush,
251 		   sizeof(chain->bref)) != 0) {
252 		panic("hammer2: unflagged bref update(2)");
253 	}
254 	ccms_thread_unlock(&parent->cst);		/* release manual op */
255 	hammer2_chain_unlock(hmp, parent);
256 }
257 
258 static void
259 hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain,
260 			  hammer2_flush_info_t *info)
261 {
262 	hammer2_blockref_t *bref;
263 	hammer2_off_t pbase;
264 	size_t bbytes;
265 	size_t boff;
266 	char *bdata;
267 	struct buf *bp;
268 	int error;
269 	int wasmodified;
270 
271 	/*
272 	 * If we hit the stack recursion depth limit defer the operation.
273 	 * The controller of the info structure will execute the deferral
274 	 * list and then retry.
275 	 *
276 	 * This is only applicable if SUBMODIFIED is set.  After a reflush
277 	 * SUBMODIFIED will probably be cleared and we want to drop through
278 	 * to finish processing the current element so our direct parent
279 	 * can process the results.
280 	 */
281 	if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT &&
282 	    (chain->flags & HAMMER2_CHAIN_SUBMODIFIED)) {
283 		if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) {
284 			hammer2_chain_ref(hmp, chain);
285 			TAILQ_INSERT_TAIL(&info->flush_list,
286 					  chain, flush_node);
287 			atomic_set_int(&chain->flags, HAMMER2_CHAIN_DEFERRED);
288 		}
289 		return;
290 	}
291 
292 	if (hammer2_debug & 0x0008)
293 		kprintf("%*.*sCHAIN type=%d@%08jx %p/%d %04x {\n",
294 			info->depth, info->depth, "",
295 			chain->bref.type, chain->bref.data_off,
296 			chain, chain->refs, chain->flags);
297 
298 	/*
299 	 * If SUBMODIFIED is set we recurse the flush and adjust the
300 	 * blockrefs accordingly.
301 	 *
302 	 * NOTE: Looping on SUBMODIFIED can prevent a flush from ever
303 	 *	 finishing in the face of filesystem activity.
304 	 */
305 	if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) {
306 		hammer2_chain_t *child;
307 		hammer2_chain_t *saved;
308 		hammer2_blockref_t *base;
309 		int count;
310 
311 		/*
312 		 * Clear SUBMODIFIED to catch races.  Note that if any
313 		 * child has to be flushed SUBMODIFIED will wind up being
314 		 * set again (for next time), but this does not stop us from
315 		 * synchronizing block updates which occurred.
316 		 *
317 		 * We don't want to set our chain to MODIFIED gratuitously.
318 		 *
319 		 * We need an extra ref on chain because we are going to
320 		 * release its lock temporarily in our child loop.
321 		 */
322 		/* XXX SUBMODIFIED not interlocked, can race */
323 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED);
324 		hammer2_chain_ref(hmp, chain);
325 
326 		/*
327 		 * Flush the children and update the blockrefs in the chain.
328 		 * Be careful of ripouts during the loop.
329 		 *
330 		 * The flushing counter prevents ripouts on lastdrop and
331 		 * also prevents moves (causes renames to sleep/retry).
332 		 * Be very careful with it.
333 		 */
334 		RB_FOREACH(child, hammer2_chain_tree, &chain->rbhead) {
335 			KASSERT(child->parent == chain,
336 				("hammer2_flush: child->parent mismatch %p/%p",
337 				 child->parent, chain));
338 
339 			/*
340 			 * We only recurse if SUBMODIFIED (internal node)
341 			 * or MODIFIED (internal node or leaf) is set.
342 			 * However, we must still track whether any MOVED
343 			 * entries are present to determine if the chain's
344 			 * blockref's need updating or not.
345 			 */
346 			if ((child->flags & (HAMMER2_CHAIN_SUBMODIFIED |
347 					     HAMMER2_CHAIN_MODIFIED |
348 					    HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
349 				continue;
350 			}
351 
352 			/*
353 			 * flushing can only be adjusted while its parent
354 			 * is locked, and prevent the destruction/removal
355 			 * of the child from the parent's B-Tree.  This allows
356 			 * us to temporarily unlock the parent.
357 			 *
358 			 * To unwind, we must hold the parent locked before
359 			 * decrementing flushing to prevent child corruption
360 			 * during our loop.
361 			 */
362 			atomic_add_int(&child->flushing, 1);
363 			hammer2_chain_unlock(hmp, chain);
364 			hammer2_chain_lock(hmp, child, HAMMER2_RESOLVE_MAYBE);
365 			KASSERT(child->parent == chain,
366 				("hammer2_flush: child->parent mismatch %p/%p",
367 				 child->parent, chain));
368 			if ((child->flags & (HAMMER2_CHAIN_SUBMODIFIED |
369 					     HAMMER2_CHAIN_MODIFIED |
370 					    HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
371 				hammer2_chain_unlock(hmp, child);
372 				hammer2_chain_lock(hmp, chain,
373 						   HAMMER2_RESOLVE_ALWAYS);
374 				KKASSERT(child->parent == chain);
375 				atomic_add_int(&child->flushing, -1);
376 				continue;
377 			}
378 
379 			/*
380 			 * Propagate the DESTROYED flag if found set, then
381 			 * recurse the flush.
382 			 */
383 			if ((chain->flags & HAMMER2_CHAIN_DESTROYED) &&
384 			    (child->flags & HAMMER2_CHAIN_DESTROYED) == 0) {
385 				atomic_set_int(&child->flags,
386 					       HAMMER2_CHAIN_DESTROYED |
387 					       HAMMER2_CHAIN_SUBMODIFIED);
388 			}
389 			++info->depth;
390 			hammer2_chain_flush_pass1(hmp, child, info);
391 			--info->depth;
392 			hammer2_chain_unlock(hmp, child);
393 
394 			/*
395 			 * Always resolve when relocking the parent.
396 			 */
397 			hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_ALWAYS);
398 			KASSERT(child->parent == chain,
399 				("hammer2_flush: child->parent mismatch %p/%p",
400 				 child->parent, chain));
401 			atomic_add_int(&child->flushing, -1);
402 		}
403 
404 		/*
405 		 * Now synchronize any block updates and handle any
406 		 * chains marked DELETED.
407 		 *
408 		 * The flushing counter prevents ripouts on lastdrop and
409 		 * also prevents moves (causes renames to sleep/retry).
410 		 * Be very careful with it.
411 		 */
412 		saved = NULL;
413 		RB_FOREACH(child, hammer2_chain_tree, &chain->rbhead) {
414 			if ((child->flags & (HAMMER2_CHAIN_MOVED |
415 					     HAMMER2_CHAIN_DELETED)) == 0) {
416 				continue;
417 			}
418 			atomic_add_int(&child->flushing, 1);
419 			if (saved) {
420 				hammer2_saved_child_cleanup(hmp, chain, saved);
421 				saved = NULL;
422 			}
423 			saved = child;
424 			hammer2_chain_lock(hmp, child, HAMMER2_RESOLVE_NEVER);
425 			KKASSERT(child->parent == chain);
426 			if ((child->flags & (HAMMER2_CHAIN_MOVED |
427 					     HAMMER2_CHAIN_DELETED)) == 0) {
428 				hammer2_chain_unlock(hmp, child);
429 				continue;
430 			}
431 			if (child->flags & HAMMER2_CHAIN_MOVED) {
432 				hammer2_chain_modify(hmp, chain,
433 					     HAMMER2_MODIFY_NO_MODIFY_TID);
434 			}
435 
436 			switch(chain->bref.type) {
437 			case HAMMER2_BREF_TYPE_INODE:
438 				KKASSERT((chain->data->ipdata.op_flags &
439 					  HAMMER2_OPFLAG_DIRECTDATA) == 0);
440 				base = &chain->data->ipdata.u.blockset.
441 					blockref[0];
442 				count = HAMMER2_SET_COUNT;
443 				break;
444 			case HAMMER2_BREF_TYPE_INDIRECT:
445 				if (chain->data) {
446 					base = &chain->data->npdata.blockref[0];
447 				} else {
448 					base = NULL;
449 					KKASSERT(child->flags &
450 						 HAMMER2_CHAIN_DELETED);
451 				}
452 				count = chain->bytes /
453 					sizeof(hammer2_blockref_t);
454 				break;
455 			case HAMMER2_BREF_TYPE_VOLUME:
456 				base = &hmp->voldata.sroot_blockset.blockref[0];
457 				count = HAMMER2_SET_COUNT;
458 				break;
459 			default:
460 				base = NULL;
461 				panic("hammer2_chain_get: "
462 				      "unrecognized blockref type: %d",
463 				      chain->bref.type);
464 			}
465 
466 			KKASSERT(child->index >= 0);
467 
468 			if (chain->bref.mirror_tid <
469 			    child->bref_flush.mirror_tid) {
470 				chain->bref.mirror_tid =
471 					child->bref_flush.mirror_tid;
472 			}
473 			if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME &&
474 			    hmp->voldata.mirror_tid <
475 			    child->bref_flush.mirror_tid) {
476 				hmp->voldata.mirror_tid =
477 					child->bref_flush.mirror_tid;
478 			}
479 			if (child->flags & HAMMER2_CHAIN_DELETED) {
480 				bzero(&child->bref_flush,
481 				      sizeof(child->bref_flush));
482 			}
483 			if (base)
484 				base[child->index] = child->bref_flush;
485 			if (child->flags & HAMMER2_CHAIN_MOVED) {
486 				atomic_clear_int(&child->flags,
487 						 HAMMER2_CHAIN_MOVED);
488 				hammer2_chain_drop(hmp, child); /* flag */
489 			}
490 			hammer2_chain_unlock(hmp, child);
491 		}
492 		if (saved) {
493 			hammer2_saved_child_cleanup(hmp, chain, saved);
494 			saved = NULL;
495 		}
496 		hammer2_chain_drop(hmp, chain);
497 	}
498 
499 	/*
500 	 * If destroying the object we unconditonally clear the MODIFIED
501 	 * and MOVED bits, and we destroy the buffer without writing it
502 	 * out.
503 	 *
504 	 * We don't bother updating the hash/crc or the chain bref.
505 	 *
506 	 * NOTE: The destroy'd object's bref has already been updated.
507 	 *	 so we can clear MOVED without propagating mirror_tid
508 	 *	 or modify_tid upward.
509 	 *
510 	 * XXX allocations for unflushed data can be returned to the
511 	 *     free pool.
512 	 */
513 	if (chain->flags & HAMMER2_CHAIN_DESTROYED) {
514 		if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
515 			if (chain->bp) {
516 				chain->bp->b_flags |= B_INVAL|B_RELBUF;
517 			}
518 			atomic_clear_int(&chain->flags,
519 					 HAMMER2_CHAIN_MODIFIED |
520 					 HAMMER2_CHAIN_MODIFY_TID);
521 			hammer2_chain_drop(hmp, chain);
522 		}
523 		if (chain->flags & HAMMER2_CHAIN_MODIFIED_AUX) {
524 			atomic_clear_int(&chain->flags,
525 					 HAMMER2_CHAIN_MODIFIED_AUX);
526 		}
527 		if (chain->flags & HAMMER2_CHAIN_MOVED) {
528 			atomic_clear_int(&chain->flags,
529 					 HAMMER2_CHAIN_MOVED);
530 			hammer2_chain_drop(hmp, chain);
531 		}
532 		return;
533 	}
534 
535 	/*
536 	 * Flush this chain entry only if it is marked modified.
537 	 */
538 	if ((chain->flags & (HAMMER2_CHAIN_MODIFIED |
539 			     HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
540 		goto done;
541 	}
542 
543 	/*
544 	 * Synchronize cumulative data and inode count adjustments to
545 	 * the inode and propagate the deltas upward to the parent.
546 	 */
547 	if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
548 		hammer2_inode_t *ip;
549 
550 		ip = chain->u.ip;
551 		ip->ip_data.inode_count += ip->delta_icount;
552 		ip->ip_data.data_count += ip->delta_dcount;
553 		if (ip->pip) {
554 			ip->pip->delta_icount += ip->delta_icount;
555 			ip->pip->delta_dcount += ip->delta_dcount;
556 		}
557 		ip->delta_icount = 0;
558 		ip->delta_dcount = 0;
559 	}
560 
561 	/*
562 	 * Flush if MODIFIED or MODIFIED_AUX is set.  MODIFIED_AUX is only
563 	 * used by the volume header (&hmp->vchain).
564 	 */
565 	if ((chain->flags & (HAMMER2_CHAIN_MODIFIED |
566 			     HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
567 		goto done;
568 	}
569 	atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED_AUX);
570 
571 	/*
572 	 * Clear MODIFIED and set HAMMER2_CHAIN_MOVED.  The caller
573 	 * will re-test the MOVED bit.  We must also update the mirror_tid
574 	 * and modify_tid fields as appropriate.
575 	 *
576 	 * bits own a single chain ref and the MOVED bit owns its own
577 	 * chain ref.
578 	 */
579 	chain->bref.mirror_tid = info->modify_tid;
580 	if (chain->flags & HAMMER2_CHAIN_MODIFY_TID)
581 		chain->bref.modify_tid = info->modify_tid;
582 	wasmodified = (chain->flags & HAMMER2_CHAIN_MODIFIED) != 0;
583 	atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED |
584 					HAMMER2_CHAIN_MODIFY_TID);
585 
586 	if (chain->flags & HAMMER2_CHAIN_MOVED) {
587 		/*
588 		 * Drop the ref from the MODIFIED bit we cleared.
589 		 */
590 		if (wasmodified)
591 			hammer2_chain_drop(hmp, chain);
592 	} else {
593 		/*
594 		 * If we were MODIFIED we inherit the ref from clearing
595 		 * that bit, otherwise we need another ref.
596 		 */
597 		if (wasmodified == 0)
598 			hammer2_chain_ref(hmp, chain);
599 		atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
600 	}
601 	chain->bref_flush = chain->bref;
602 
603 	/*
604 	 * If this is part of a recursive flush we can go ahead and write
605 	 * out the buffer cache buffer and pass a new bref back up the chain.
606 	 *
607 	 * This will never be a volume header.
608 	 */
609 	switch(chain->bref.type) {
610 	case HAMMER2_BREF_TYPE_VOLUME:
611 		/*
612 		 * The volume header is flushed manually by the syncer, not
613 		 * here.
614 		 */
615 		KKASSERT(chain->data != NULL);
616 		KKASSERT(chain->bp == NULL);
617 		kprintf("volume header mirror_tid %jd\n",
618 			hmp->voldata.mirror_tid);
619 
620 		hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
621 			hammer2_icrc32(
622 				(char *)&hmp->voldata +
623 				 HAMMER2_VOLUME_ICRC1_OFF,
624 				HAMMER2_VOLUME_ICRC1_SIZE);
625 		hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
626 			hammer2_icrc32(
627 				(char *)&hmp->voldata +
628 				 HAMMER2_VOLUME_ICRC0_OFF,
629 				HAMMER2_VOLUME_ICRC0_SIZE);
630 		hmp->voldata.icrc_volheader =
631 			hammer2_icrc32(
632 				(char *)&hmp->voldata +
633 				 HAMMER2_VOLUME_ICRCVH_OFF,
634 				HAMMER2_VOLUME_ICRCVH_SIZE);
635 		hmp->volsync = hmp->voldata;
636 		break;
637 	case HAMMER2_BREF_TYPE_DATA:
638 		/*
639 		 * Data elements have already been flushed via the logical
640 		 * file buffer cache.  Their hash was set in the bref by
641 		 * the vop_write code.
642 		 *
643 		 * Make sure the buffer(s) have been flushed out here.
644 		 */
645 		bbytes = chain->bytes;
646 		pbase = chain->bref.data_off & ~(hammer2_off_t)(bbytes - 1);
647 		boff = chain->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1);
648 
649 		bp = getblk(hmp->devvp, pbase, bbytes, GETBLK_NOWAIT, 0);
650 		if (bp) {
651 			if ((bp->b_flags & (B_CACHE | B_DIRTY)) ==
652 			    (B_CACHE | B_DIRTY)) {
653 				kprintf("x");
654 				cluster_awrite(bp);
655 			} else {
656 				bp->b_flags |= B_RELBUF;
657 				brelse(bp);
658 			}
659 		}
660 		break;
661 	case HAMMER2_BREF_TYPE_INDIRECT:
662 		/*
663 		 * Indirect blocks may be in an INITIAL state.  Use the
664 		 * chain_lock() call to ensure that the buffer has been
665 		 * instantiated (even though it is already locked the buffer
666 		 * might not have been instantiated).
667 		 *
668 		 * Only write the buffer out if it is dirty, it is possible
669 		 * the operating system had already written out the buffer.
670 		 */
671 		hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_ALWAYS);
672 		KKASSERT(chain->bp != NULL);
673 
674 		bp = chain->bp;
675 		if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) ||
676 		    (bp->b_flags & B_DIRTY)) {
677 			bdwrite(chain->bp);
678 		} else {
679 			brelse(chain->bp);
680 		}
681 		chain->bp = NULL;
682 		chain->data = NULL;
683 		hammer2_chain_unlock(hmp, chain);
684 		break;
685 	default:
686 		/*
687 		 * Embedded elements have to be flushed out.
688 		 */
689 		KKASSERT(chain->data != NULL);
690 		KKASSERT(chain->bp == NULL);
691 		bref = &chain->bref;
692 
693 		KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0);
694 
695 		if (chain->bp == NULL) {
696 			/*
697 			 * The data is embedded, we have to acquire the
698 			 * buffer cache buffer and copy the data into it.
699 			 */
700 			if ((bbytes = chain->bytes) < HAMMER2_MINIOSIZE)
701 				bbytes = HAMMER2_MINIOSIZE;
702 			pbase = bref->data_off & ~(hammer2_off_t)(bbytes - 1);
703 			boff = bref->data_off & HAMMER2_OFF_MASK & (bbytes - 1);
704 
705 			/*
706 			 * The getblk() optimization can only be used if the
707 			 * physical block size matches the request.
708 			 */
709 			if (chain->bytes == bbytes) {
710 				bp = getblk(hmp->devvp, pbase, bbytes, 0, 0);
711 				error = 0;
712 			} else {
713 				error = bread(hmp->devvp, pbase, bbytes, &bp);
714 				KKASSERT(error == 0);
715 			}
716 			bdata = (char *)bp->b_data + boff;
717 
718 			/*
719 			 * Copy the data to the buffer, mark the buffer
720 			 * dirty, and convert the chain to unmodified.
721 			 */
722 			bcopy(chain->data, bdata, chain->bytes);
723 			bp->b_flags |= B_CLUSTEROK;
724 			bdwrite(bp);
725 			bp = NULL;
726 			chain->bref.check.iscsi32.value =
727 				hammer2_icrc32(chain->data, chain->bytes);
728 			if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
729 				++hammer2_iod_meta_write;
730 			else
731 				++hammer2_iod_indr_write;
732 		} else {
733 			chain->bref.check.iscsi32.value =
734 				hammer2_icrc32(chain->data, chain->bytes);
735 		}
736 	}
737 done:
738 	if (hammer2_debug & 0x0008) {
739 		kprintf("%*.*s} %p/%d %04x ",
740 			info->depth, info->depth, "",
741 			chain, chain->refs, chain->flags);
742 	}
743 }
744 
745 #if 0
746 /*
747  * PASS2 - not yet implemented (should be called only with the root chain?)
748  */
749 static void
750 hammer2_chain_flush_pass2(hammer2_mount_t *hmp, hammer2_chain_t *chain)
751 {
752 }
753 #endif
754 
755 static
756 void
757 hammer2_saved_child_cleanup(hammer2_mount_t *hmp,
758 			    hammer2_chain_t *parent, hammer2_chain_t *child)
759 {
760 	atomic_add_int(&child->flushing, -1);
761 	if (child->flushing == 0 && (child->flags & HAMMER2_CHAIN_DELETED)) {
762 		kprintf("hammer2: fixup deferred deleted child\n");
763 		hammer2_chain_lock(hmp, child, HAMMER2_RESOLVE_MAYBE);
764 		hammer2_chain_delete(hmp, parent, child, 0);
765 		hammer2_chain_unlock(hmp, child);
766 	}
767 }
768