xref: /dflybsd-src/sys/vfs/hammer/hammer_flusher.c (revision 6048b4113f26008d927fa69ddd438dabc392ef1f)
1059819e3SMatthew Dillon /*
2059819e3SMatthew Dillon  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3059819e3SMatthew Dillon  *
4059819e3SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
5059819e3SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
6059819e3SMatthew Dillon  *
7059819e3SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
8059819e3SMatthew Dillon  * modification, are permitted provided that the following conditions
9059819e3SMatthew Dillon  * are met:
10059819e3SMatthew Dillon  *
11059819e3SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
12059819e3SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
13059819e3SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
14059819e3SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
15059819e3SMatthew Dillon  *    the documentation and/or other materials provided with the
16059819e3SMatthew Dillon  *    distribution.
17059819e3SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
18059819e3SMatthew Dillon  *    contributors may be used to endorse or promote products derived
19059819e3SMatthew Dillon  *    from this software without specific, prior written permission.
20059819e3SMatthew Dillon  *
21059819e3SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22059819e3SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23059819e3SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24059819e3SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25059819e3SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26059819e3SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27059819e3SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28059819e3SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29059819e3SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30059819e3SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31059819e3SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32059819e3SMatthew Dillon  * SUCH DAMAGE.
33059819e3SMatthew Dillon  *
344889cbd4SMatthew Dillon  * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.45 2008/07/31 04:42:04 dillon Exp $
35059819e3SMatthew Dillon  */
36059819e3SMatthew Dillon /*
37059819e3SMatthew Dillon  * HAMMER dependancy flusher thread
38059819e3SMatthew Dillon  *
39059819e3SMatthew Dillon  * Meta data updates create buffer dependancies which are arranged as a
40059819e3SMatthew Dillon  * hierarchy of lists.
41059819e3SMatthew Dillon  */
42059819e3SMatthew Dillon 
43059819e3SMatthew Dillon #include "hammer.h"
44059819e3SMatthew Dillon 
45da2da375SMatthew Dillon static void hammer_flusher_master_thread(void *arg);
46da2da375SMatthew Dillon static void hammer_flusher_slave_thread(void *arg);
47059819e3SMatthew Dillon static void hammer_flusher_flush(hammer_mount_t hmp);
489f5097dcSMatthew Dillon static void hammer_flusher_flush_inode(hammer_inode_t ip,
499f5097dcSMatthew Dillon 					hammer_transaction_t trans);
50c9b9e29dSMatthew Dillon 
51ff003b11SMatthew Dillon RB_GENERATE(hammer_fls_rb_tree, hammer_inode, rb_flsnode,
52ff003b11SMatthew Dillon               hammer_ino_rb_compare);
53ff003b11SMatthew Dillon 
54ff003b11SMatthew Dillon /*
55ff003b11SMatthew Dillon  * Inodes are sorted and assigned to slave threads in groups of 128.
56ff003b11SMatthew Dillon  * We want a flush group size large enough such that the slave threads
57ff003b11SMatthew Dillon  * are not likely to interfere with each other when accessing the B-Tree,
58ff003b11SMatthew Dillon  * but not so large that we lose concurrency.
59ff003b11SMatthew Dillon  */
60ff003b11SMatthew Dillon #define HAMMER_FLUSH_GROUP_SIZE 128
61ff003b11SMatthew Dillon 
62af209b0fSMatthew Dillon /*
63af209b0fSMatthew Dillon  * Support structures for the flusher threads.
64af209b0fSMatthew Dillon  */
65af209b0fSMatthew Dillon struct hammer_flusher_info {
667a61b85dSMatthew Dillon 	TAILQ_ENTRY(hammer_flusher_info) entry;
67af209b0fSMatthew Dillon 	struct hammer_mount *hmp;
68af209b0fSMatthew Dillon 	thread_t	td;
697a61b85dSMatthew Dillon 	int		runstate;
707a61b85dSMatthew Dillon 	int		count;
717a61b85dSMatthew Dillon 	hammer_flush_group_t flg;
72cb51be26SMatthew Dillon 	hammer_inode_t	work_array[HAMMER_FLUSH_GROUP_SIZE];
73af209b0fSMatthew Dillon };
74af209b0fSMatthew Dillon 
75af209b0fSMatthew Dillon typedef struct hammer_flusher_info *hammer_flusher_info_t;
76059819e3SMatthew Dillon 
777bc5b8c2SMatthew Dillon /*
787a61b85dSMatthew Dillon  * Sync all inodes pending on the flusher.
797a61b85dSMatthew Dillon  *
807a61b85dSMatthew Dillon  * All flush groups will be flushed.  This does not queue dirty inodes
817a61b85dSMatthew Dillon  * to the flush groups, it just flushes out what has already been queued!
827bc5b8c2SMatthew Dillon  */
83059819e3SMatthew Dillon void
84059819e3SMatthew Dillon hammer_flusher_sync(hammer_mount_t hmp)
85059819e3SMatthew Dillon {
86059819e3SMatthew Dillon 	int seq;
87059819e3SMatthew Dillon 
887a61b85dSMatthew Dillon 	seq = hammer_flusher_async(hmp, NULL);
89f437a2abSMatthew Dillon 	hammer_flusher_wait(hmp, seq);
90059819e3SMatthew Dillon }
91059819e3SMatthew Dillon 
927bc5b8c2SMatthew Dillon /*
937bc5b8c2SMatthew Dillon  * Sync all inodes pending on the flusher - return immediately.
947a61b85dSMatthew Dillon  *
957a61b85dSMatthew Dillon  * All flush groups will be flushed.
967bc5b8c2SMatthew Dillon  */
9793291532SMatthew Dillon int
987a61b85dSMatthew Dillon hammer_flusher_async(hammer_mount_t hmp, hammer_flush_group_t close_flg)
99059819e3SMatthew Dillon {
1007a61b85dSMatthew Dillon 	hammer_flush_group_t flg;
1017a61b85dSMatthew Dillon 	int seq = hmp->flusher.next;
10293291532SMatthew Dillon 
1037a61b85dSMatthew Dillon 	TAILQ_FOREACH(flg, &hmp->flush_group_list, flush_entry) {
1047a61b85dSMatthew Dillon 		if (flg->running == 0)
1057a61b85dSMatthew Dillon 			++seq;
1067a61b85dSMatthew Dillon 		flg->closed = 1;
1077a61b85dSMatthew Dillon 		if (flg == close_flg)
1087a61b85dSMatthew Dillon 			break;
1097a61b85dSMatthew Dillon 	}
110da2da375SMatthew Dillon 	if (hmp->flusher.td) {
111da2da375SMatthew Dillon 		if (hmp->flusher.signal++ == 0)
112da2da375SMatthew Dillon 			wakeup(&hmp->flusher.signal);
11393291532SMatthew Dillon 	} else {
11493291532SMatthew Dillon 		seq = hmp->flusher.done;
1151f07f686SMatthew Dillon 	}
11693291532SMatthew Dillon 	return(seq);
11793291532SMatthew Dillon }
11893291532SMatthew Dillon 
11915e75dabSMatthew Dillon int
12015e75dabSMatthew Dillon hammer_flusher_async_one(hammer_mount_t hmp)
12115e75dabSMatthew Dillon {
12215e75dabSMatthew Dillon 	int seq;
12315e75dabSMatthew Dillon 
12415e75dabSMatthew Dillon 	if (hmp->flusher.td) {
12515e75dabSMatthew Dillon 		seq = hmp->flusher.next;
12615e75dabSMatthew Dillon 		if (hmp->flusher.signal++ == 0)
12715e75dabSMatthew Dillon 			wakeup(&hmp->flusher.signal);
12815e75dabSMatthew Dillon 	} else {
12915e75dabSMatthew Dillon 		seq = hmp->flusher.done;
13015e75dabSMatthew Dillon 	}
13115e75dabSMatthew Dillon 	return(seq);
13215e75dabSMatthew Dillon }
13315e75dabSMatthew Dillon 
134f437a2abSMatthew Dillon /*
135f437a2abSMatthew Dillon  * Wait for the flusher to get to the specified sequence number.
136f437a2abSMatthew Dillon  * Signal the flusher as often as necessary to keep it going.
137f437a2abSMatthew Dillon  */
13893291532SMatthew Dillon void
13993291532SMatthew Dillon hammer_flusher_wait(hammer_mount_t hmp, int seq)
14093291532SMatthew Dillon {
141cdb6e4e6SMatthew Dillon 	while ((int)(seq - hmp->flusher.done) > 0) {
142f437a2abSMatthew Dillon 		if (hmp->flusher.act != seq) {
143f437a2abSMatthew Dillon 			if (hmp->flusher.signal++ == 0)
144f437a2abSMatthew Dillon 				wakeup(&hmp->flusher.signal);
145f437a2abSMatthew Dillon 		}
14693291532SMatthew Dillon 		tsleep(&hmp->flusher.done, 0, "hmrfls", 0);
147059819e3SMatthew Dillon 	}
148cdb6e4e6SMatthew Dillon }
149059819e3SMatthew Dillon 
150059819e3SMatthew Dillon void
15182010f9fSMatthew Dillon hammer_flusher_wait_next(hammer_mount_t hmp)
15282010f9fSMatthew Dillon {
15382010f9fSMatthew Dillon 	int seq;
15482010f9fSMatthew Dillon 
15582010f9fSMatthew Dillon 	seq = hammer_flusher_async_one(hmp);
15682010f9fSMatthew Dillon 	hammer_flusher_wait(hmp, seq);
15782010f9fSMatthew Dillon }
15882010f9fSMatthew Dillon 
15982010f9fSMatthew Dillon void
160059819e3SMatthew Dillon hammer_flusher_create(hammer_mount_t hmp)
161059819e3SMatthew Dillon {
162da2da375SMatthew Dillon 	hammer_flusher_info_t info;
163da2da375SMatthew Dillon 	int i;
164da2da375SMatthew Dillon 
165da2da375SMatthew Dillon 	hmp->flusher.signal = 0;
166da2da375SMatthew Dillon 	hmp->flusher.act = 0;
167da2da375SMatthew Dillon 	hmp->flusher.done = 0;
168da2da375SMatthew Dillon 	hmp->flusher.next = 1;
169da2da375SMatthew Dillon 	hammer_ref(&hmp->flusher.finalize_lock);
1707a61b85dSMatthew Dillon 	TAILQ_INIT(&hmp->flusher.run_list);
1717a61b85dSMatthew Dillon 	TAILQ_INIT(&hmp->flusher.ready_list);
172da2da375SMatthew Dillon 
173da2da375SMatthew Dillon 	lwkt_create(hammer_flusher_master_thread, hmp,
174da2da375SMatthew Dillon 		    &hmp->flusher.td, NULL, 0, -1, "hammer-M");
175da2da375SMatthew Dillon 	for (i = 0; i < HAMMER_MAX_FLUSHERS; ++i) {
176bac808feSMatthew Dillon 		info = kmalloc(sizeof(*info), hmp->m_misc, M_WAITOK|M_ZERO);
177da2da375SMatthew Dillon 		info->hmp = hmp;
1787a61b85dSMatthew Dillon 		TAILQ_INSERT_TAIL(&hmp->flusher.ready_list, info, entry);
179da2da375SMatthew Dillon 		lwkt_create(hammer_flusher_slave_thread, info,
180da2da375SMatthew Dillon 			    &info->td, NULL, 0, -1, "hammer-S%d", i);
181da2da375SMatthew Dillon 	}
182059819e3SMatthew Dillon }
183059819e3SMatthew Dillon 
184059819e3SMatthew Dillon void
185059819e3SMatthew Dillon hammer_flusher_destroy(hammer_mount_t hmp)
186059819e3SMatthew Dillon {
187da2da375SMatthew Dillon 	hammer_flusher_info_t info;
188da2da375SMatthew Dillon 
189da2da375SMatthew Dillon 	/*
190da2da375SMatthew Dillon 	 * Kill the master
191da2da375SMatthew Dillon 	 */
192da2da375SMatthew Dillon 	hmp->flusher.exiting = 1;
193da2da375SMatthew Dillon 	while (hmp->flusher.td) {
194da2da375SMatthew Dillon 		++hmp->flusher.signal;
195da2da375SMatthew Dillon 		wakeup(&hmp->flusher.signal);
196da2da375SMatthew Dillon 		tsleep(&hmp->flusher.exiting, 0, "hmrwex", hz);
197da2da375SMatthew Dillon 	}
198da2da375SMatthew Dillon 
199da2da375SMatthew Dillon 	/*
200da2da375SMatthew Dillon 	 * Kill the slaves
201da2da375SMatthew Dillon 	 */
2027a61b85dSMatthew Dillon 	while ((info = TAILQ_FIRST(&hmp->flusher.ready_list)) != NULL) {
2037a61b85dSMatthew Dillon 		KKASSERT(info->runstate == 0);
2047a61b85dSMatthew Dillon 		TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
2057a61b85dSMatthew Dillon 		info->runstate = -1;
2067a61b85dSMatthew Dillon 		wakeup(&info->runstate);
2077a61b85dSMatthew Dillon 		while (info->td)
208da2da375SMatthew Dillon 			tsleep(&info->td, 0, "hmrwwc", 0);
209bac808feSMatthew Dillon 		kfree(info, hmp->m_misc);
210059819e3SMatthew Dillon 	}
211f90dde4cSMatthew Dillon }
212059819e3SMatthew Dillon 
213af209b0fSMatthew Dillon /*
214af209b0fSMatthew Dillon  * The master flusher thread manages the flusher sequence id and
215af209b0fSMatthew Dillon  * synchronization with the slave work threads.
216af209b0fSMatthew Dillon  */
217059819e3SMatthew Dillon static void
218da2da375SMatthew Dillon hammer_flusher_master_thread(void *arg)
219059819e3SMatthew Dillon {
2207a61b85dSMatthew Dillon 	hammer_flush_group_t flg;
2217a61b85dSMatthew Dillon 	hammer_mount_t hmp;
2220729c8c8SMatthew Dillon 
2237a61b85dSMatthew Dillon 	hmp = arg;
2247a61b85dSMatthew Dillon 
2257a61b85dSMatthew Dillon 	for (;;) {
2267a61b85dSMatthew Dillon 		/*
2277a61b85dSMatthew Dillon 		 * Do at least one flush cycle.  We may have to update the
2287a61b85dSMatthew Dillon 		 * UNDO FIFO even if no inodes are queued.
2297a61b85dSMatthew Dillon 		 */
230059819e3SMatthew Dillon 		for (;;) {
231da2da375SMatthew Dillon 			while (hmp->flusher.group_lock)
232da2da375SMatthew Dillon 				tsleep(&hmp->flusher.group_lock, 0, "hmrhld", 0);
233da2da375SMatthew Dillon 			hmp->flusher.act = hmp->flusher.next;
234da2da375SMatthew Dillon 			++hmp->flusher.next;
23510a5d1baSMatthew Dillon 			hammer_flusher_clean_loose_ios(hmp);
236059819e3SMatthew Dillon 			hammer_flusher_flush(hmp);
237da2da375SMatthew Dillon 			hmp->flusher.done = hmp->flusher.act;
238da2da375SMatthew Dillon 			wakeup(&hmp->flusher.done);
2397a61b85dSMatthew Dillon 			flg = TAILQ_FIRST(&hmp->flush_group_list);
2407a61b85dSMatthew Dillon 			if (flg == NULL || flg->closed == 0)
2417a61b85dSMatthew Dillon 				break;
242cdb6e4e6SMatthew Dillon 			if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR)
243cdb6e4e6SMatthew Dillon 				break;
2447a61b85dSMatthew Dillon 		}
245c32a6806SMatthew Dillon 
246c32a6806SMatthew Dillon 		/*
2471f07f686SMatthew Dillon 		 * Wait for activity.
248c32a6806SMatthew Dillon 		 */
2497a61b85dSMatthew Dillon 		if (hmp->flusher.exiting && TAILQ_EMPTY(&hmp->flush_group_list))
250059819e3SMatthew Dillon 			break;
251da2da375SMatthew Dillon 		while (hmp->flusher.signal == 0)
252da2da375SMatthew Dillon 			tsleep(&hmp->flusher.signal, 0, "hmrwwa", 0);
2534889cbd4SMatthew Dillon 
2544889cbd4SMatthew Dillon 		/*
2554889cbd4SMatthew Dillon 		 * Flush for each count on signal but only allow one extra
2564889cbd4SMatthew Dillon 		 * flush request to build up.
2574889cbd4SMatthew Dillon 		 */
2584889cbd4SMatthew Dillon 		if (--hmp->flusher.signal != 0)
2594889cbd4SMatthew Dillon 			hmp->flusher.signal = 1;
2601f07f686SMatthew Dillon 	}
261da2da375SMatthew Dillon 
262da2da375SMatthew Dillon 	/*
263da2da375SMatthew Dillon 	 * And we are done.
264da2da375SMatthew Dillon 	 */
265da2da375SMatthew Dillon 	hmp->flusher.td = NULL;
266da2da375SMatthew Dillon 	wakeup(&hmp->flusher.exiting);
267da2da375SMatthew Dillon 	lwkt_exit();
268da2da375SMatthew Dillon }
269da2da375SMatthew Dillon 
270af209b0fSMatthew Dillon /*
2717a61b85dSMatthew Dillon  * Flush all inodes in the current flush group.
2727a61b85dSMatthew Dillon  */
2737a61b85dSMatthew Dillon static void
2747a61b85dSMatthew Dillon hammer_flusher_flush(hammer_mount_t hmp)
2757a61b85dSMatthew Dillon {
2767a61b85dSMatthew Dillon 	hammer_flusher_info_t info;
2777a61b85dSMatthew Dillon 	hammer_flush_group_t flg;
2787a61b85dSMatthew Dillon 	hammer_reserve_t resv;
2797a61b85dSMatthew Dillon 	hammer_inode_t ip;
2807a61b85dSMatthew Dillon 	hammer_inode_t next_ip;
2817a61b85dSMatthew Dillon 	int slave_index;
28215e75dabSMatthew Dillon 	int count;
2837a61b85dSMatthew Dillon 
2847a61b85dSMatthew Dillon 	/*
2857a61b85dSMatthew Dillon 	 * Just in-case there's a flush race on mount
2867a61b85dSMatthew Dillon 	 */
2877a61b85dSMatthew Dillon 	if (TAILQ_FIRST(&hmp->flusher.ready_list) == NULL)
2887a61b85dSMatthew Dillon 		return;
2897a61b85dSMatthew Dillon 
2907a61b85dSMatthew Dillon 	/*
2917a61b85dSMatthew Dillon 	 * We only do one flg but we may have to loop/retry.
2927a61b85dSMatthew Dillon 	 */
29315e75dabSMatthew Dillon 	count = 0;
2947a61b85dSMatthew Dillon 	while ((flg = TAILQ_FIRST(&hmp->flush_group_list)) != NULL) {
29515e75dabSMatthew Dillon 		++count;
2967a61b85dSMatthew Dillon 		if (hammer_debug_general & 0x0001) {
2977a61b85dSMatthew Dillon 			kprintf("hammer_flush %d ttl=%d recs=%d\n",
2987a61b85dSMatthew Dillon 				hmp->flusher.act,
2997a61b85dSMatthew Dillon 				flg->total_count, flg->refs);
3007a61b85dSMatthew Dillon 		}
301cdb6e4e6SMatthew Dillon 		if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR)
302cdb6e4e6SMatthew Dillon 			break;
3037a61b85dSMatthew Dillon 		hammer_start_transaction_fls(&hmp->flusher.trans, hmp);
3047a61b85dSMatthew Dillon 
3057a61b85dSMatthew Dillon 		/*
3067a61b85dSMatthew Dillon 		 * If the previous flush cycle just about exhausted our
3077a61b85dSMatthew Dillon 		 * UNDO space we may have to do a dummy cycle to move the
3087a61b85dSMatthew Dillon 		 * first_offset up before actually digging into a new cycle,
3097a61b85dSMatthew Dillon 		 * or the new cycle will not have sufficient undo space.
3107a61b85dSMatthew Dillon 		 */
3117a61b85dSMatthew Dillon 		if (hammer_flusher_undo_exhausted(&hmp->flusher.trans, 3))
3127a61b85dSMatthew Dillon 			hammer_flusher_finalize(&hmp->flusher.trans, 0);
3137a61b85dSMatthew Dillon 
3147a61b85dSMatthew Dillon 		/*
3157b6ccb11SMatthew Dillon 		 * Ok, we are running this flush group now (this prevents new
3167b6ccb11SMatthew Dillon 		 * additions to it).
3177b6ccb11SMatthew Dillon 		 */
3187b6ccb11SMatthew Dillon 		flg->running = 1;
3197b6ccb11SMatthew Dillon 		if (hmp->next_flush_group == flg)
3207b6ccb11SMatthew Dillon 			hmp->next_flush_group = TAILQ_NEXT(flg, flush_entry);
3217b6ccb11SMatthew Dillon 
3227b6ccb11SMatthew Dillon 		/*
323ff003b11SMatthew Dillon 		 * Iterate the inodes in the flg's flush_tree and assign
3247a61b85dSMatthew Dillon 		 * them to slaves.
3257a61b85dSMatthew Dillon 		 */
3267a61b85dSMatthew Dillon 		slave_index = 0;
3277a61b85dSMatthew Dillon 		info = TAILQ_FIRST(&hmp->flusher.ready_list);
328ff003b11SMatthew Dillon 		next_ip = RB_FIRST(hammer_fls_rb_tree, &flg->flush_tree);
3297a61b85dSMatthew Dillon 
3307a61b85dSMatthew Dillon 		while ((ip = next_ip) != NULL) {
331ff003b11SMatthew Dillon 			next_ip = RB_NEXT(hammer_fls_rb_tree,
332ff003b11SMatthew Dillon 					  &flg->flush_tree, ip);
3337a61b85dSMatthew Dillon 
3343e583440SMatthew Dillon 			if (++hmp->check_yield > hammer_yield_check) {
3353e583440SMatthew Dillon 				hmp->check_yield = 0;
3363e583440SMatthew Dillon 				lwkt_user_yield();
3373e583440SMatthew Dillon 			}
3383e583440SMatthew Dillon 
3397a61b85dSMatthew Dillon 			/*
3407a61b85dSMatthew Dillon 			 * Add ip to the slave's work array.  The slave is
3417a61b85dSMatthew Dillon 			 * not currently running.
3427a61b85dSMatthew Dillon 			 */
3437a61b85dSMatthew Dillon 			info->work_array[info->count++] = ip;
3447a61b85dSMatthew Dillon 			if (info->count != HAMMER_FLUSH_GROUP_SIZE)
3457a61b85dSMatthew Dillon 				continue;
3467a61b85dSMatthew Dillon 
3477a61b85dSMatthew Dillon 			/*
3487a61b85dSMatthew Dillon 			 * Get the slave running
3497a61b85dSMatthew Dillon 			 */
3507a61b85dSMatthew Dillon 			TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
3517a61b85dSMatthew Dillon 			TAILQ_INSERT_TAIL(&hmp->flusher.run_list, info, entry);
3527a61b85dSMatthew Dillon 			info->flg = flg;
3537a61b85dSMatthew Dillon 			info->runstate = 1;
3547a61b85dSMatthew Dillon 			wakeup(&info->runstate);
3557a61b85dSMatthew Dillon 
3567a61b85dSMatthew Dillon 			/*
3577a61b85dSMatthew Dillon 			 * Get a new slave.  We may have to wait for one to
3587a61b85dSMatthew Dillon 			 * finish running.
3597a61b85dSMatthew Dillon 			 */
3607a61b85dSMatthew Dillon 			while ((info = TAILQ_FIRST(&hmp->flusher.ready_list)) == NULL) {
3617a61b85dSMatthew Dillon 				tsleep(&hmp->flusher.ready_list, 0, "hmrfcc", 0);
3627a61b85dSMatthew Dillon 			}
3637a61b85dSMatthew Dillon 		}
3647a61b85dSMatthew Dillon 
3657a61b85dSMatthew Dillon 		/*
3667a61b85dSMatthew Dillon 		 * Run the current slave if necessary
3677a61b85dSMatthew Dillon 		 */
3687a61b85dSMatthew Dillon 		if (info->count) {
3697a61b85dSMatthew Dillon 			TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
3707a61b85dSMatthew Dillon 			TAILQ_INSERT_TAIL(&hmp->flusher.run_list, info, entry);
3717a61b85dSMatthew Dillon 			info->flg = flg;
3727a61b85dSMatthew Dillon 			info->runstate = 1;
3737a61b85dSMatthew Dillon 			wakeup(&info->runstate);
3747a61b85dSMatthew Dillon 		}
3757a61b85dSMatthew Dillon 
3767a61b85dSMatthew Dillon 		/*
3777a61b85dSMatthew Dillon 		 * Wait for all slaves to finish running
3787a61b85dSMatthew Dillon 		 */
3797a61b85dSMatthew Dillon 		while (TAILQ_FIRST(&hmp->flusher.run_list) != NULL)
3807a61b85dSMatthew Dillon 			tsleep(&hmp->flusher.ready_list, 0, "hmrfcc", 0);
3817a61b85dSMatthew Dillon 
3827a61b85dSMatthew Dillon 		/*
3837a61b85dSMatthew Dillon 		 * Do the final finalization, clean up
3847a61b85dSMatthew Dillon 		 */
3857a61b85dSMatthew Dillon 		hammer_flusher_finalize(&hmp->flusher.trans, 1);
3867a61b85dSMatthew Dillon 		hmp->flusher.tid = hmp->flusher.trans.tid;
3877a61b85dSMatthew Dillon 
3887a61b85dSMatthew Dillon 		hammer_done_transaction(&hmp->flusher.trans);
3897a61b85dSMatthew Dillon 
3907a61b85dSMatthew Dillon 		/*
3917a61b85dSMatthew Dillon 		 * Loop up on the same flg.  If the flg is done clean it up
3927a61b85dSMatthew Dillon 		 * and break out.  We only flush one flg.
3937a61b85dSMatthew Dillon 		 */
394ff003b11SMatthew Dillon 		if (RB_EMPTY(&flg->flush_tree)) {
3957a61b85dSMatthew Dillon 			KKASSERT(flg->refs == 0);
3967a61b85dSMatthew Dillon 			TAILQ_REMOVE(&hmp->flush_group_list, flg, flush_entry);
397bac808feSMatthew Dillon 			kfree(flg, hmp->m_misc);
3987a61b85dSMatthew Dillon 			break;
3997a61b85dSMatthew Dillon 		}
4007a61b85dSMatthew Dillon 	}
4017a61b85dSMatthew Dillon 
4027a61b85dSMatthew Dillon 	/*
4031b0ab2c3SMatthew Dillon 	 * We may have pure meta-data to flush, or we may have to finish
4041b0ab2c3SMatthew Dillon 	 * cycling the UNDO FIFO, even if there were no flush groups.
40515e75dabSMatthew Dillon 	 */
4061b0ab2c3SMatthew Dillon 	if (count == 0 && hammer_flusher_haswork(hmp)) {
40715e75dabSMatthew Dillon 		hammer_start_transaction_fls(&hmp->flusher.trans, hmp);
40815e75dabSMatthew Dillon 		hammer_flusher_finalize(&hmp->flusher.trans, 1);
40915e75dabSMatthew Dillon 		hammer_done_transaction(&hmp->flusher.trans);
41015e75dabSMatthew Dillon 	}
41115e75dabSMatthew Dillon 
41215e75dabSMatthew Dillon 	/*
4137a61b85dSMatthew Dillon 	 * Clean up any freed big-blocks (typically zone-2).
4147a61b85dSMatthew Dillon 	 * resv->flush_group is typically set several flush groups ahead
4157a61b85dSMatthew Dillon 	 * of the free to ensure that the freed block is not reused until
4167a61b85dSMatthew Dillon 	 * it can no longer be reused.
4177a61b85dSMatthew Dillon 	 */
4187a61b85dSMatthew Dillon 	while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) {
4197a61b85dSMatthew Dillon 		if (resv->flush_group != hmp->flusher.act)
4207a61b85dSMatthew Dillon 			break;
4217a61b85dSMatthew Dillon 		hammer_reserve_clrdelay(hmp, resv);
4227a61b85dSMatthew Dillon 	}
4237a61b85dSMatthew Dillon }
4247a61b85dSMatthew Dillon 
4257a61b85dSMatthew Dillon 
4267a61b85dSMatthew Dillon /*
427ff003b11SMatthew Dillon  * The slave flusher thread pulls work off the master flush list until no
428af209b0fSMatthew Dillon  * work is left.
429af209b0fSMatthew Dillon  */
430da2da375SMatthew Dillon static void
431da2da375SMatthew Dillon hammer_flusher_slave_thread(void *arg)
432da2da375SMatthew Dillon {
4337a61b85dSMatthew Dillon 	hammer_flush_group_t flg;
434da2da375SMatthew Dillon 	hammer_flusher_info_t info;
435da2da375SMatthew Dillon 	hammer_mount_t hmp;
436da2da375SMatthew Dillon 	hammer_inode_t ip;
437cb51be26SMatthew Dillon 	int i;
438da2da375SMatthew Dillon 
439da2da375SMatthew Dillon 	info = arg;
440da2da375SMatthew Dillon 	hmp = info->hmp;
441da2da375SMatthew Dillon 
442da2da375SMatthew Dillon 	for (;;) {
4437a61b85dSMatthew Dillon 		while (info->runstate == 0)
4447a61b85dSMatthew Dillon 			tsleep(&info->runstate, 0, "hmrssw", 0);
4457a61b85dSMatthew Dillon 		if (info->runstate < 0)
446da2da375SMatthew Dillon 			break;
4477a61b85dSMatthew Dillon 		flg = info->flg;
448cb51be26SMatthew Dillon 
4497a61b85dSMatthew Dillon 		for (i = 0; i < info->count; ++i) {
4507a61b85dSMatthew Dillon 			ip = info->work_array[i];
4517a61b85dSMatthew Dillon 			hammer_flusher_flush_inode(ip, &hmp->flusher.trans);
452ce0138a6SMatthew Dillon 			++hammer_stats_inode_flushes;
453cb51be26SMatthew Dillon 		}
4547a61b85dSMatthew Dillon 		info->count = 0;
4557a61b85dSMatthew Dillon 		info->runstate = 0;
4567a61b85dSMatthew Dillon 		TAILQ_REMOVE(&hmp->flusher.run_list, info, entry);
4577a61b85dSMatthew Dillon 		TAILQ_INSERT_TAIL(&hmp->flusher.ready_list, info, entry);
4587a61b85dSMatthew Dillon 		wakeup(&hmp->flusher.ready_list);
459da2da375SMatthew Dillon 	}
460da2da375SMatthew Dillon 	info->td = NULL;
461da2da375SMatthew Dillon 	wakeup(&info->td);
462059819e3SMatthew Dillon 	lwkt_exit();
463059819e3SMatthew Dillon }
464059819e3SMatthew Dillon 
465525aad3aSMatthew Dillon void
46610a5d1baSMatthew Dillon hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
46710a5d1baSMatthew Dillon {
46810a5d1baSMatthew Dillon 	hammer_buffer_t buffer;
46910a5d1baSMatthew Dillon 	hammer_io_t io;
47010a5d1baSMatthew Dillon 
47110a5d1baSMatthew Dillon 	/*
47210a5d1baSMatthew Dillon 	 * loose ends - buffers without bp's aren't tracked by the kernel
47310a5d1baSMatthew Dillon 	 * and can build up, so clean them out.  This can occur when an
47410a5d1baSMatthew Dillon 	 * IO completes on a buffer with no references left.
47510a5d1baSMatthew Dillon 	 */
476525aad3aSMatthew Dillon 	if ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
477a99b9ea2SMatthew Dillon 		crit_enter();	/* biodone() race */
47810a5d1baSMatthew Dillon 		while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
47910a5d1baSMatthew Dillon 			KKASSERT(io->mod_list == &hmp->lose_list);
480bf3b416bSMatthew Dillon 			TAILQ_REMOVE(&hmp->lose_list, io, mod_entry);
48110a5d1baSMatthew Dillon 			io->mod_list = NULL;
482a99b9ea2SMatthew Dillon 			if (io->lock.refs == 0)
483a99b9ea2SMatthew Dillon 				++hammer_count_refedbufs;
48410a5d1baSMatthew Dillon 			hammer_ref(&io->lock);
48510a5d1baSMatthew Dillon 			buffer = (void *)io;
48610a5d1baSMatthew Dillon 			hammer_rel_buffer(buffer, 0);
48710a5d1baSMatthew Dillon 		}
488a99b9ea2SMatthew Dillon 		crit_exit();
48910a5d1baSMatthew Dillon 	}
490525aad3aSMatthew Dillon }
49110a5d1baSMatthew Dillon 
492059819e3SMatthew Dillon /*
4939f5097dcSMatthew Dillon  * Flush a single inode that is part of a flush group.
49406ad81ffSMatthew Dillon  *
495cdb6e4e6SMatthew Dillon  * Flusher errors are extremely serious, even ENOSPC shouldn't occur because
496cdb6e4e6SMatthew Dillon  * the front-end should have reserved sufficient space on the media.  Any
497cdb6e4e6SMatthew Dillon  * error other then EWOULDBLOCK will force the mount to be read-only.
498059819e3SMatthew Dillon  */
4999f5097dcSMatthew Dillon static
5009f5097dcSMatthew Dillon void
5019f5097dcSMatthew Dillon hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans)
5029f5097dcSMatthew Dillon {
5039f5097dcSMatthew Dillon 	hammer_mount_t hmp = ip->hmp;
50406ad81ffSMatthew Dillon 	int error;
5059f5097dcSMatthew Dillon 
506525aad3aSMatthew Dillon 	hammer_flusher_clean_loose_ios(hmp);
50702325004SMatthew Dillon 	error = hammer_sync_inode(trans, ip);
508cdb6e4e6SMatthew Dillon 
509cdb6e4e6SMatthew Dillon 	/*
510cdb6e4e6SMatthew Dillon 	 * EWOULDBLOCK can happen under normal operation, all other errors
511cdb6e4e6SMatthew Dillon 	 * are considered extremely serious.  We must set WOULDBLOCK
512cdb6e4e6SMatthew Dillon 	 * mechanics to deal with the mess left over from the abort of the
513cdb6e4e6SMatthew Dillon 	 * previous flush.
514cdb6e4e6SMatthew Dillon 	 */
515cdb6e4e6SMatthew Dillon 	if (error) {
516cdb6e4e6SMatthew Dillon 		ip->flags |= HAMMER_INODE_WOULDBLOCK;
517cdb6e4e6SMatthew Dillon 		if (error == EWOULDBLOCK)
518cdb6e4e6SMatthew Dillon 			error = 0;
519cdb6e4e6SMatthew Dillon 	}
520cdb6e4e6SMatthew Dillon 	hammer_flush_inode_done(ip, error);
521da2da375SMatthew Dillon 	while (hmp->flusher.finalize_want)
522da2da375SMatthew Dillon 		tsleep(&hmp->flusher.finalize_want, 0, "hmrsxx", 0);
52306ad81ffSMatthew Dillon 	if (hammer_flusher_undo_exhausted(trans, 1)) {
5245a930e66SMatthew Dillon 		kprintf("HAMMER: Warning: UNDO area too small!\n");
5259f5097dcSMatthew Dillon 		hammer_flusher_finalize(trans, 1);
52606ad81ffSMatthew Dillon 	} else if (hammer_flusher_meta_limit(trans->hmp)) {
5279f5097dcSMatthew Dillon 		hammer_flusher_finalize(trans, 0);
528059819e3SMatthew Dillon 	}
529059819e3SMatthew Dillon }
530059819e3SMatthew Dillon 
53110a5d1baSMatthew Dillon /*
53206ad81ffSMatthew Dillon  * Return non-zero if the UNDO area has less then (QUARTER / 4) of its
53306ad81ffSMatthew Dillon  * space left.
53406ad81ffSMatthew Dillon  *
53506ad81ffSMatthew Dillon  * 1/4 - Emergency free undo space level.  Below this point the flusher
53606ad81ffSMatthew Dillon  *	 will finalize even if directory dependancies have not been resolved.
53706ad81ffSMatthew Dillon  *
53806ad81ffSMatthew Dillon  * 2/4 - Used by the pruning and reblocking code.  These functions may be
53906ad81ffSMatthew Dillon  *	 running in parallel with a flush and cannot be allowed to drop
54006ad81ffSMatthew Dillon  *	 available undo space to emergency levels.
54106ad81ffSMatthew Dillon  *
54206ad81ffSMatthew Dillon  * 3/4 - Used at the beginning of a flush to force-sync the volume header
54306ad81ffSMatthew Dillon  *	 to give the flush plenty of runway to work in.
544ec4e8497SMatthew Dillon  */
545ec4e8497SMatthew Dillon int
54606ad81ffSMatthew Dillon hammer_flusher_undo_exhausted(hammer_transaction_t trans, int quarter)
547ec4e8497SMatthew Dillon {
54806ad81ffSMatthew Dillon 	if (hammer_undo_space(trans) <
54906ad81ffSMatthew Dillon 	    hammer_undo_max(trans->hmp) * quarter / 4) {
5501f07f686SMatthew Dillon 		return(1);
5511f07f686SMatthew Dillon 	} else {
5521f07f686SMatthew Dillon 		return(0);
5531f07f686SMatthew Dillon 	}
554ec4e8497SMatthew Dillon }
555ec4e8497SMatthew Dillon 
556ec4e8497SMatthew Dillon /*
5579f5097dcSMatthew Dillon  * Flush all pending UNDOs, wait for write completion, update the volume
5589f5097dcSMatthew Dillon  * header with the new UNDO end position, and flush it.  Then
5599f5097dcSMatthew Dillon  * asynchronously flush the meta-data.
56010a5d1baSMatthew Dillon  *
5619f5097dcSMatthew Dillon  * If this is the last finalization in a flush group we also synchronize
5629f5097dcSMatthew Dillon  * our cached blockmap and set hmp->flusher_undo_start and our cached undo
5639f5097dcSMatthew Dillon  * fifo first_offset so the next flush resets the FIFO pointers.
5646c1f89f4SMatthew Dillon  *
5656c1f89f4SMatthew Dillon  * If this is not final it is being called because too many dirty meta-data
5666c1f89f4SMatthew Dillon  * buffers have built up and must be flushed with UNDO synchronization to
5676c1f89f4SMatthew Dillon  * avoid a buffer cache deadlock.
56810a5d1baSMatthew Dillon  */
56910a5d1baSMatthew Dillon void
5709f5097dcSMatthew Dillon hammer_flusher_finalize(hammer_transaction_t trans, int final)
571059819e3SMatthew Dillon {
5729f5097dcSMatthew Dillon 	hammer_volume_t root_volume;
5739f5097dcSMatthew Dillon 	hammer_blockmap_t cundomap, dundomap;
5749f5097dcSMatthew Dillon 	hammer_mount_t hmp;
57510a5d1baSMatthew Dillon 	hammer_io_t io;
576c9b9e29dSMatthew Dillon 	int count;
57719619882SMatthew Dillon 	int i;
57810a5d1baSMatthew Dillon 
5799f5097dcSMatthew Dillon 	hmp = trans->hmp;
5809f5097dcSMatthew Dillon 	root_volume = trans->rootvol;
5819f5097dcSMatthew Dillon 
58247637bffSMatthew Dillon 	/*
5836c1f89f4SMatthew Dillon 	 * Exclusively lock the flusher.  This guarantees that all dirty
5846c1f89f4SMatthew Dillon 	 * buffers will be idled (have a mod-count of 0).
5856c1f89f4SMatthew Dillon 	 */
5866c1f89f4SMatthew Dillon 	++hmp->flusher.finalize_want;
5876c1f89f4SMatthew Dillon 	hammer_lock_ex(&hmp->flusher.finalize_lock);
5886c1f89f4SMatthew Dillon 
5896c1f89f4SMatthew Dillon 	/*
5906c1f89f4SMatthew Dillon 	 * If this isn't the final sync several threads may have hit the
5916c1f89f4SMatthew Dillon 	 * meta-limit at the same time and raced.  Only sync if we really
5926c1f89f4SMatthew Dillon 	 * have to, after acquiring the lock.
5936c1f89f4SMatthew Dillon 	 */
5946c1f89f4SMatthew Dillon 	if (final == 0 && !hammer_flusher_meta_limit(hmp))
5956c1f89f4SMatthew Dillon 		goto done;
5966c1f89f4SMatthew Dillon 
597cdb6e4e6SMatthew Dillon 	if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR)
598cdb6e4e6SMatthew Dillon 		goto done;
599cdb6e4e6SMatthew Dillon 
6006c1f89f4SMatthew Dillon 	/*
60147637bffSMatthew Dillon 	 * Flush data buffers.  This can occur asynchronously and at any
6029f5097dcSMatthew Dillon 	 * time.  We must interlock against the frontend direct-data write
6039f5097dcSMatthew Dillon 	 * but do not have to acquire the sync-lock yet.
60447637bffSMatthew Dillon 	 */
60547637bffSMatthew Dillon 	count = 0;
60647637bffSMatthew Dillon 	while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
607cdb6e4e6SMatthew Dillon 		if (io->ioerror)
608cdb6e4e6SMatthew Dillon 			break;
609a99b9ea2SMatthew Dillon 		if (io->lock.refs == 0)
610a99b9ea2SMatthew Dillon 			++hammer_count_refedbufs;
61147637bffSMatthew Dillon 		hammer_ref(&io->lock);
6129f5097dcSMatthew Dillon 		hammer_io_write_interlock(io);
61347637bffSMatthew Dillon 		KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
614710733a6SMatthew Dillon 		hammer_io_flush(io, 0);
6159f5097dcSMatthew Dillon 		hammer_io_done_interlock(io);
61647637bffSMatthew Dillon 		hammer_rel_buffer((hammer_buffer_t)io, 0);
61747637bffSMatthew Dillon 		++count;
61847637bffSMatthew Dillon 	}
61947637bffSMatthew Dillon 
6209f5097dcSMatthew Dillon 	/*
6219f5097dcSMatthew Dillon 	 * The sync-lock is required for the remaining sequence.  This lock
6229f5097dcSMatthew Dillon 	 * prevents meta-data from being modified.
6239f5097dcSMatthew Dillon 	 */
6242f85fa4dSMatthew Dillon 	hammer_sync_lock_ex(trans);
6259480ff55SMatthew Dillon 
626059819e3SMatthew Dillon 	/*
6279f5097dcSMatthew Dillon 	 * If we have been asked to finalize the volume header sync the
6289f5097dcSMatthew Dillon 	 * cached blockmap to the on-disk blockmap.  Generate an UNDO
6299f5097dcSMatthew Dillon 	 * record for the update.
630e8599db1SMatthew Dillon 	 */
6319f5097dcSMatthew Dillon 	if (final) {
6329f5097dcSMatthew Dillon 		cundomap = &hmp->blockmap[0];
6339f5097dcSMatthew Dillon 		dundomap = &root_volume->ondisk->vol0_blockmap[0];
6349f5097dcSMatthew Dillon 		if (root_volume->io.modified) {
635e8599db1SMatthew Dillon 			hammer_modify_volume(trans, root_volume,
6369f5097dcSMatthew Dillon 					     dundomap, sizeof(hmp->blockmap));
63719619882SMatthew Dillon 			for (i = 0; i < HAMMER_MAX_ZONES; ++i)
6389f5097dcSMatthew Dillon 				hammer_crc_set_blockmap(&cundomap[i]);
6399f5097dcSMatthew Dillon 			bcopy(cundomap, dundomap, sizeof(hmp->blockmap));
640e8599db1SMatthew Dillon 			hammer_modify_volume_done(root_volume);
641e8599db1SMatthew Dillon 		}
6429f5097dcSMatthew Dillon 	}
643e8599db1SMatthew Dillon 
644e8599db1SMatthew Dillon 	/*
645*6048b411SMatthew Dillon 	 * Flush UNDOs.  This also waits for I/Os to complete and flushes
646*6048b411SMatthew Dillon 	 * the cache on the target disk.
647059819e3SMatthew Dillon 	 */
648*6048b411SMatthew Dillon 	hammer_flusher_flush_undos(hmp, 1);
649059819e3SMatthew Dillon 
650cdb6e4e6SMatthew Dillon 	if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR)
651cdb6e4e6SMatthew Dillon 		goto failed;
652cdb6e4e6SMatthew Dillon 
653059819e3SMatthew Dillon 	/*
65402428fb6SMatthew Dillon 	 * HAMMER VERSION < 4:
65502428fb6SMatthew Dillon 	 *	Update the on-disk volume header with new UNDO FIFO end
65602428fb6SMatthew Dillon 	 *	position (do not generate new UNDO records for this change).
65702428fb6SMatthew Dillon 	 *	We have to do this for the UNDO FIFO whether (final) is
65802428fb6SMatthew Dillon 	 *	set or not in order for the UNDOs to be recognized on
65902428fb6SMatthew Dillon 	 *	recovery.
66002428fb6SMatthew Dillon 	 *
66102428fb6SMatthew Dillon 	 * HAMMER VERSION >= 4:
66202428fb6SMatthew Dillon 	 *	The UNDO FIFO data written above will be recognized on
66302428fb6SMatthew Dillon 	 *	recovery without us having to sync the volume header.
664c9b9e29dSMatthew Dillon 	 *
6659f5097dcSMatthew Dillon 	 * Also update the on-disk next_tid field.  This does not require
6669f5097dcSMatthew Dillon 	 * an UNDO.  However, because our TID is generated before we get
6679f5097dcSMatthew Dillon 	 * the sync lock another sync may have beat us to the punch.
6689f5097dcSMatthew Dillon 	 *
66906ad81ffSMatthew Dillon 	 * This also has the side effect of updating first_offset based on
67006ad81ffSMatthew Dillon 	 * a prior finalization when the first finalization of the next flush
67106ad81ffSMatthew Dillon 	 * cycle occurs, removing any undo info from the prior finalization
67206ad81ffSMatthew Dillon 	 * from consideration.
67306ad81ffSMatthew Dillon 	 *
6749f5097dcSMatthew Dillon 	 * The volume header will be flushed out synchronously.
675c9b9e29dSMatthew Dillon 	 */
6769f5097dcSMatthew Dillon 	dundomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
6779f5097dcSMatthew Dillon 	cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
6789f5097dcSMatthew Dillon 
6799f5097dcSMatthew Dillon 	if (dundomap->first_offset != cundomap->first_offset ||
6809f5097dcSMatthew Dillon 		   dundomap->next_offset != cundomap->next_offset) {
6819f5097dcSMatthew Dillon 		hammer_modify_volume(NULL, root_volume, NULL, 0);
6829f5097dcSMatthew Dillon 		dundomap->first_offset = cundomap->first_offset;
6839f5097dcSMatthew Dillon 		dundomap->next_offset = cundomap->next_offset;
6849f5097dcSMatthew Dillon 		hammer_crc_set_blockmap(dundomap);
6859f5097dcSMatthew Dillon 		hammer_modify_volume_done(root_volume);
6869f5097dcSMatthew Dillon 	}
6879f5097dcSMatthew Dillon 
6884889cbd4SMatthew Dillon 	/*
6894889cbd4SMatthew Dillon 	 * vol0_next_tid is used for TID selection and is updated without
6904889cbd4SMatthew Dillon 	 * an UNDO so we do not reuse a TID that may have been rolled-back.
6914889cbd4SMatthew Dillon 	 *
6924889cbd4SMatthew Dillon 	 * vol0_last_tid is the highest fully-synchronized TID.  It is
6934889cbd4SMatthew Dillon 	 * set-up when the UNDO fifo is fully synced, later on (not here).
6944889cbd4SMatthew Dillon 	 */
6959f5097dcSMatthew Dillon 	if (root_volume->io.modified) {
696adf01747SMatthew Dillon 		hammer_modify_volume(NULL, root_volume, NULL, 0);
697adf01747SMatthew Dillon 		if (root_volume->ondisk->vol0_next_tid < trans->tid)
698adf01747SMatthew Dillon 			root_volume->ondisk->vol0_next_tid = trans->tid;
699adf01747SMatthew Dillon 		hammer_crc_set_volume(root_volume->ondisk);
700adf01747SMatthew Dillon 		hammer_modify_volume_done(root_volume);
701710733a6SMatthew Dillon 		hammer_io_flush(&root_volume->io, 0);
70219619882SMatthew Dillon 	}
703059819e3SMatthew Dillon 
704059819e3SMatthew Dillon 	/*
70502428fb6SMatthew Dillon 	 * Wait for I/Os to complete.
70602428fb6SMatthew Dillon 	 *
70702428fb6SMatthew Dillon 	 * For HAMMER VERSION 4+ filesystems we do not have to wait for
70802428fb6SMatthew Dillon 	 * the I/O to complete as the new UNDO FIFO entries are recognized
70902428fb6SMatthew Dillon 	 * even without the volume header update.  This allows the volume
71002428fb6SMatthew Dillon 	 * header to flushed along with meta-data, significantly reducing
71102428fb6SMatthew Dillon 	 * flush overheads.
712059819e3SMatthew Dillon 	 */
713a99b9ea2SMatthew Dillon 	hammer_flusher_clean_loose_ios(hmp);
71402428fb6SMatthew Dillon 	if (hmp->version < HAMMER_VOL_VERSION_FOUR)
715af209b0fSMatthew Dillon 		hammer_io_wait_all(hmp, "hmrfl2");
716059819e3SMatthew Dillon 
717cdb6e4e6SMatthew Dillon 	if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR)
718cdb6e4e6SMatthew Dillon 		goto failed;
719cdb6e4e6SMatthew Dillon 
720059819e3SMatthew Dillon 	/*
721e8599db1SMatthew Dillon 	 * Flush meta-data.  The meta-data will be undone if we crash
72202428fb6SMatthew Dillon 	 * so we can safely flush it asynchronously.  There is no need
72302428fb6SMatthew Dillon 	 * to wait for I/O to complete (or issue a synchronous disk flush).
7249f5097dcSMatthew Dillon 	 *
72502428fb6SMatthew Dillon 	 * In fact, even if we did wait the meta-data will still be undone
72602428fb6SMatthew Dillon 	 * by a crash up until the next flush cycle due to the first_offset
72702428fb6SMatthew Dillon 	 * in the volume header for the UNDO FIFO not being adjusted until
72802428fb6SMatthew Dillon 	 * the following flush cycle.
729059819e3SMatthew Dillon 	 */
730c9b9e29dSMatthew Dillon 	count = 0;
73110a5d1baSMatthew Dillon 	while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
732cdb6e4e6SMatthew Dillon 		if (io->ioerror)
733cdb6e4e6SMatthew Dillon 			break;
73410a5d1baSMatthew Dillon 		KKASSERT(io->modify_refs == 0);
735a99b9ea2SMatthew Dillon 		if (io->lock.refs == 0)
736a99b9ea2SMatthew Dillon 			++hammer_count_refedbufs;
73710a5d1baSMatthew Dillon 		hammer_ref(&io->lock);
73810a5d1baSMatthew Dillon 		KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
739710733a6SMatthew Dillon 		hammer_io_flush(io, 0);
74009ac686bSMatthew Dillon 		hammer_rel_buffer((hammer_buffer_t)io, 0);
741c9b9e29dSMatthew Dillon 		++count;
742059819e3SMatthew Dillon 	}
7439f5097dcSMatthew Dillon 
7449f5097dcSMatthew Dillon 	/*
7459f5097dcSMatthew Dillon 	 * If this is the final finalization for the flush group set
7469f5097dcSMatthew Dillon 	 * up for the next sequence by setting a new first_offset in
74706ad81ffSMatthew Dillon 	 * our cached blockmap and clearing the undo history.
74806ad81ffSMatthew Dillon 	 *
74906ad81ffSMatthew Dillon 	 * Even though we have updated our cached first_offset, the on-disk
75006ad81ffSMatthew Dillon 	 * first_offset still governs available-undo-space calculations.
7519f5097dcSMatthew Dillon 	 */
7529f5097dcSMatthew Dillon 	if (final) {
7539f5097dcSMatthew Dillon 		cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
7541b0ab2c3SMatthew Dillon 		if (cundomap->first_offset == cundomap->next_offset) {
7551b0ab2c3SMatthew Dillon 			hmp->hflags &= ~HMNT_UNDO_DIRTY;
7561b0ab2c3SMatthew Dillon 		} else {
7579f5097dcSMatthew Dillon 			cundomap->first_offset = cundomap->next_offset;
7581b0ab2c3SMatthew Dillon 			hmp->hflags |= HMNT_UNDO_DIRTY;
7591b0ab2c3SMatthew Dillon 		}
7609f5097dcSMatthew Dillon 		hammer_clear_undo_history(hmp);
7614889cbd4SMatthew Dillon 
7624889cbd4SMatthew Dillon 		/*
7634889cbd4SMatthew Dillon 		 * Flush tid sequencing.  flush_tid1 is fully synchronized,
7644889cbd4SMatthew Dillon 		 * meaning a crash will not roll it back.  flush_tid2 has
7654889cbd4SMatthew Dillon 		 * been written out asynchronously and a crash will roll
7664889cbd4SMatthew Dillon 		 * it back.  flush_tid1 is used for all mirroring masters.
7674889cbd4SMatthew Dillon 		 */
7684889cbd4SMatthew Dillon 		if (hmp->flush_tid1 != hmp->flush_tid2) {
7694889cbd4SMatthew Dillon 			hmp->flush_tid1 = hmp->flush_tid2;
7704889cbd4SMatthew Dillon 			wakeup(&hmp->flush_tid1);
7714889cbd4SMatthew Dillon 		}
7724889cbd4SMatthew Dillon 		hmp->flush_tid2 = trans->tid;
7739f5097dcSMatthew Dillon 	}
7749f5097dcSMatthew Dillon 
775cdb6e4e6SMatthew Dillon 	/*
776cdb6e4e6SMatthew Dillon 	 * Cleanup.  Report any critical errors.
777cdb6e4e6SMatthew Dillon 	 */
778cdb6e4e6SMatthew Dillon failed:
7792f85fa4dSMatthew Dillon 	hammer_sync_unlock(trans);
7806c1f89f4SMatthew Dillon 
781cdb6e4e6SMatthew Dillon 	if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) {
782cdb6e4e6SMatthew Dillon 		kprintf("HAMMER(%s): Critical write error during flush, "
783cdb6e4e6SMatthew Dillon 			"refusing to sync UNDO FIFO\n",
784cdb6e4e6SMatthew Dillon 			root_volume->ondisk->vol_name);
785cdb6e4e6SMatthew Dillon 	}
786cdb6e4e6SMatthew Dillon 
7876c1f89f4SMatthew Dillon done:
7886c1f89f4SMatthew Dillon 	hammer_unlock(&hmp->flusher.finalize_lock);
7894889cbd4SMatthew Dillon 
7906c1f89f4SMatthew Dillon 	if (--hmp->flusher.finalize_want == 0)
7916c1f89f4SMatthew Dillon 		wakeup(&hmp->flusher.finalize_want);
792ce0138a6SMatthew Dillon 	hammer_stats_commits += final;
793059819e3SMatthew Dillon }
794059819e3SMatthew Dillon 
79506ad81ffSMatthew Dillon /*
796*6048b411SMatthew Dillon  * Flush UNDOs.  If already_flushed is non-zero we force a disk sync
797*6048b411SMatthew Dillon  * even if no UNDOs are present.
798*6048b411SMatthew Dillon  */
799*6048b411SMatthew Dillon void
800*6048b411SMatthew Dillon hammer_flusher_flush_undos(hammer_mount_t hmp, int already_flushed)
801*6048b411SMatthew Dillon {
802*6048b411SMatthew Dillon 	hammer_io_t io;
803*6048b411SMatthew Dillon 	int count;
804*6048b411SMatthew Dillon 
805*6048b411SMatthew Dillon 	if (already_flushed == 0 && TAILQ_EMPTY(&hmp->undo_list))
806*6048b411SMatthew Dillon 		return;
807*6048b411SMatthew Dillon 
808*6048b411SMatthew Dillon 	count = 0;
809*6048b411SMatthew Dillon 	while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
810*6048b411SMatthew Dillon 		if (io->ioerror)
811*6048b411SMatthew Dillon 			break;
812*6048b411SMatthew Dillon 		KKASSERT(io->modify_refs == 0);
813*6048b411SMatthew Dillon 		if (io->lock.refs == 0)
814*6048b411SMatthew Dillon 			++hammer_count_refedbufs;
815*6048b411SMatthew Dillon 		hammer_ref(&io->lock);
816*6048b411SMatthew Dillon 		KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
817*6048b411SMatthew Dillon 		hammer_io_flush(io, hammer_undo_reclaim(io));
818*6048b411SMatthew Dillon 		hammer_rel_buffer((hammer_buffer_t)io, 0);
819*6048b411SMatthew Dillon 		++count;
820*6048b411SMatthew Dillon 	}
821*6048b411SMatthew Dillon 	hammer_flusher_clean_loose_ios(hmp);
822*6048b411SMatthew Dillon 	hammer_io_wait_all(hmp, "hmrfl1");
823*6048b411SMatthew Dillon }
824*6048b411SMatthew Dillon 
825*6048b411SMatthew Dillon /*
82606ad81ffSMatthew Dillon  * Return non-zero if too many dirty meta-data buffers have built up.
82706ad81ffSMatthew Dillon  *
82806ad81ffSMatthew Dillon  * Since we cannot allow such buffers to flush until we have dealt with
82906ad81ffSMatthew Dillon  * the UNDOs, we risk deadlocking the kernel's buffer cache.
83006ad81ffSMatthew Dillon  */
83106ad81ffSMatthew Dillon int
83206ad81ffSMatthew Dillon hammer_flusher_meta_limit(hammer_mount_t hmp)
83306ad81ffSMatthew Dillon {
834f5a07a7aSMatthew Dillon 	if (hmp->locked_dirty_space + hmp->io_running_space >
835f5a07a7aSMatthew Dillon 	    hammer_limit_dirtybufspace) {
83606ad81ffSMatthew Dillon 		return(1);
83706ad81ffSMatthew Dillon 	}
83806ad81ffSMatthew Dillon 	return(0);
83906ad81ffSMatthew Dillon }
84006ad81ffSMatthew Dillon 
8411b0ab2c3SMatthew Dillon /*
8421b0ab2c3SMatthew Dillon  * Return non-zero if too many dirty meta-data buffers have built up.
8431b0ab2c3SMatthew Dillon  *
8441b0ab2c3SMatthew Dillon  * This version is used by background operations (mirror, prune, reblock)
8451b0ab2c3SMatthew Dillon  * to leave room for foreground operations.
8461b0ab2c3SMatthew Dillon  */
84793291532SMatthew Dillon int
84893291532SMatthew Dillon hammer_flusher_meta_halflimit(hammer_mount_t hmp)
84993291532SMatthew Dillon {
85093291532SMatthew Dillon 	if (hmp->locked_dirty_space + hmp->io_running_space >
85193291532SMatthew Dillon 	    hammer_limit_dirtybufspace / 2) {
85293291532SMatthew Dillon 		return(1);
85393291532SMatthew Dillon 	}
85493291532SMatthew Dillon 	return(0);
85593291532SMatthew Dillon }
85693291532SMatthew Dillon 
8571b0ab2c3SMatthew Dillon /*
8581b0ab2c3SMatthew Dillon  * Return non-zero if the flusher still has something to flush.
8591b0ab2c3SMatthew Dillon  */
8601b0ab2c3SMatthew Dillon int
8611b0ab2c3SMatthew Dillon hammer_flusher_haswork(hammer_mount_t hmp)
8621b0ab2c3SMatthew Dillon {
863cdb6e4e6SMatthew Dillon 	if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR)
864cdb6e4e6SMatthew Dillon 		return(0);
8651b0ab2c3SMatthew Dillon 	if (TAILQ_FIRST(&hmp->flush_group_list) ||	/* dirty inodes */
86683ec399bSMichael Neumann 	    TAILQ_FIRST(&hmp->volu_list) ||		/* dirty buffers */
8671b0ab2c3SMatthew Dillon 	    TAILQ_FIRST(&hmp->undo_list) ||
8681b0ab2c3SMatthew Dillon 	    TAILQ_FIRST(&hmp->data_list) ||
8691b0ab2c3SMatthew Dillon 	    TAILQ_FIRST(&hmp->meta_list) ||
8701b0ab2c3SMatthew Dillon 	    (hmp->hflags & HMNT_UNDO_DIRTY)		/* UNDO FIFO sync */
8711b0ab2c3SMatthew Dillon 	) {
8721b0ab2c3SMatthew Dillon 		return(1);
8731b0ab2c3SMatthew Dillon 	}
8741b0ab2c3SMatthew Dillon 	return(0);
8751b0ab2c3SMatthew Dillon }
8761b0ab2c3SMatthew Dillon 
877