1059819e3SMatthew Dillon /* 2059819e3SMatthew Dillon * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3059819e3SMatthew Dillon * 4059819e3SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 5059819e3SMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 6059819e3SMatthew Dillon * 7059819e3SMatthew Dillon * Redistribution and use in source and binary forms, with or without 8059819e3SMatthew Dillon * modification, are permitted provided that the following conditions 9059819e3SMatthew Dillon * are met: 10059819e3SMatthew Dillon * 11059819e3SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 12059819e3SMatthew Dillon * notice, this list of conditions and the following disclaimer. 13059819e3SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 14059819e3SMatthew Dillon * notice, this list of conditions and the following disclaimer in 15059819e3SMatthew Dillon * the documentation and/or other materials provided with the 16059819e3SMatthew Dillon * distribution. 17059819e3SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 18059819e3SMatthew Dillon * contributors may be used to endorse or promote products derived 19059819e3SMatthew Dillon * from this software without specific, prior written permission. 20059819e3SMatthew Dillon * 21059819e3SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22059819e3SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23059819e3SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24059819e3SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25059819e3SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26059819e3SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27059819e3SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28059819e3SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29059819e3SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30059819e3SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31059819e3SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32059819e3SMatthew Dillon * SUCH DAMAGE. 33059819e3SMatthew Dillon * 344889cbd4SMatthew Dillon * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.45 2008/07/31 04:42:04 dillon Exp $ 35059819e3SMatthew Dillon */ 36059819e3SMatthew Dillon /* 37059819e3SMatthew Dillon * HAMMER dependancy flusher thread 38059819e3SMatthew Dillon * 39059819e3SMatthew Dillon * Meta data updates create buffer dependancies which are arranged as a 40059819e3SMatthew Dillon * hierarchy of lists. 41059819e3SMatthew Dillon */ 42059819e3SMatthew Dillon 43059819e3SMatthew Dillon #include "hammer.h" 44059819e3SMatthew Dillon 45da2da375SMatthew Dillon static void hammer_flusher_master_thread(void *arg); 46da2da375SMatthew Dillon static void hammer_flusher_slave_thread(void *arg); 47059819e3SMatthew Dillon static void hammer_flusher_flush(hammer_mount_t hmp); 489f5097dcSMatthew Dillon static void hammer_flusher_flush_inode(hammer_inode_t ip, 499f5097dcSMatthew Dillon hammer_transaction_t trans); 50c9b9e29dSMatthew Dillon 51ff003b11SMatthew Dillon RB_GENERATE(hammer_fls_rb_tree, hammer_inode, rb_flsnode, 52ff003b11SMatthew Dillon hammer_ino_rb_compare); 53ff003b11SMatthew Dillon 54ff003b11SMatthew Dillon /* 55ff003b11SMatthew Dillon * Inodes are sorted and assigned to slave threads in groups of 128. 56ff003b11SMatthew Dillon * We want a flush group size large enough such that the slave threads 57ff003b11SMatthew Dillon * are not likely to interfere with each other when accessing the B-Tree, 58ff003b11SMatthew Dillon * but not so large that we lose concurrency. 59ff003b11SMatthew Dillon */ 60ff003b11SMatthew Dillon #define HAMMER_FLUSH_GROUP_SIZE 128 61ff003b11SMatthew Dillon 62af209b0fSMatthew Dillon /* 63af209b0fSMatthew Dillon * Support structures for the flusher threads. 64af209b0fSMatthew Dillon */ 65af209b0fSMatthew Dillon struct hammer_flusher_info { 667a61b85dSMatthew Dillon TAILQ_ENTRY(hammer_flusher_info) entry; 67af209b0fSMatthew Dillon struct hammer_mount *hmp; 68af209b0fSMatthew Dillon thread_t td; 697a61b85dSMatthew Dillon int runstate; 707a61b85dSMatthew Dillon int count; 717a61b85dSMatthew Dillon hammer_flush_group_t flg; 72cb51be26SMatthew Dillon hammer_inode_t work_array[HAMMER_FLUSH_GROUP_SIZE]; 73af209b0fSMatthew Dillon }; 74af209b0fSMatthew Dillon 75af209b0fSMatthew Dillon typedef struct hammer_flusher_info *hammer_flusher_info_t; 76059819e3SMatthew Dillon 777bc5b8c2SMatthew Dillon /* 787a61b85dSMatthew Dillon * Sync all inodes pending on the flusher. 797a61b85dSMatthew Dillon * 807a61b85dSMatthew Dillon * All flush groups will be flushed. This does not queue dirty inodes 817a61b85dSMatthew Dillon * to the flush groups, it just flushes out what has already been queued! 827bc5b8c2SMatthew Dillon */ 83059819e3SMatthew Dillon void 84059819e3SMatthew Dillon hammer_flusher_sync(hammer_mount_t hmp) 85059819e3SMatthew Dillon { 86059819e3SMatthew Dillon int seq; 87059819e3SMatthew Dillon 887a61b85dSMatthew Dillon seq = hammer_flusher_async(hmp, NULL); 89f437a2abSMatthew Dillon hammer_flusher_wait(hmp, seq); 90059819e3SMatthew Dillon } 91059819e3SMatthew Dillon 927bc5b8c2SMatthew Dillon /* 937bc5b8c2SMatthew Dillon * Sync all inodes pending on the flusher - return immediately. 947a61b85dSMatthew Dillon * 957a61b85dSMatthew Dillon * All flush groups will be flushed. 967bc5b8c2SMatthew Dillon */ 9793291532SMatthew Dillon int 987a61b85dSMatthew Dillon hammer_flusher_async(hammer_mount_t hmp, hammer_flush_group_t close_flg) 99059819e3SMatthew Dillon { 1007a61b85dSMatthew Dillon hammer_flush_group_t flg; 1017a61b85dSMatthew Dillon int seq = hmp->flusher.next; 10293291532SMatthew Dillon 1037a61b85dSMatthew Dillon TAILQ_FOREACH(flg, &hmp->flush_group_list, flush_entry) { 1047a61b85dSMatthew Dillon if (flg->running == 0) 1057a61b85dSMatthew Dillon ++seq; 1067a61b85dSMatthew Dillon flg->closed = 1; 1077a61b85dSMatthew Dillon if (flg == close_flg) 1087a61b85dSMatthew Dillon break; 1097a61b85dSMatthew Dillon } 110da2da375SMatthew Dillon if (hmp->flusher.td) { 111da2da375SMatthew Dillon if (hmp->flusher.signal++ == 0) 112da2da375SMatthew Dillon wakeup(&hmp->flusher.signal); 11393291532SMatthew Dillon } else { 11493291532SMatthew Dillon seq = hmp->flusher.done; 1151f07f686SMatthew Dillon } 11693291532SMatthew Dillon return(seq); 11793291532SMatthew Dillon } 11893291532SMatthew Dillon 11915e75dabSMatthew Dillon int 12015e75dabSMatthew Dillon hammer_flusher_async_one(hammer_mount_t hmp) 12115e75dabSMatthew Dillon { 12215e75dabSMatthew Dillon int seq; 12315e75dabSMatthew Dillon 12415e75dabSMatthew Dillon if (hmp->flusher.td) { 12515e75dabSMatthew Dillon seq = hmp->flusher.next; 12615e75dabSMatthew Dillon if (hmp->flusher.signal++ == 0) 12715e75dabSMatthew Dillon wakeup(&hmp->flusher.signal); 12815e75dabSMatthew Dillon } else { 12915e75dabSMatthew Dillon seq = hmp->flusher.done; 13015e75dabSMatthew Dillon } 13115e75dabSMatthew Dillon return(seq); 13215e75dabSMatthew Dillon } 13315e75dabSMatthew Dillon 134f437a2abSMatthew Dillon /* 135f437a2abSMatthew Dillon * Wait for the flusher to get to the specified sequence number. 136f437a2abSMatthew Dillon * Signal the flusher as often as necessary to keep it going. 137f437a2abSMatthew Dillon */ 13893291532SMatthew Dillon void 13993291532SMatthew Dillon hammer_flusher_wait(hammer_mount_t hmp, int seq) 14093291532SMatthew Dillon { 141cdb6e4e6SMatthew Dillon while ((int)(seq - hmp->flusher.done) > 0) { 142f437a2abSMatthew Dillon if (hmp->flusher.act != seq) { 143f437a2abSMatthew Dillon if (hmp->flusher.signal++ == 0) 144f437a2abSMatthew Dillon wakeup(&hmp->flusher.signal); 145f437a2abSMatthew Dillon } 14693291532SMatthew Dillon tsleep(&hmp->flusher.done, 0, "hmrfls", 0); 147059819e3SMatthew Dillon } 148cdb6e4e6SMatthew Dillon } 149059819e3SMatthew Dillon 150059819e3SMatthew Dillon void 15182010f9fSMatthew Dillon hammer_flusher_wait_next(hammer_mount_t hmp) 15282010f9fSMatthew Dillon { 15382010f9fSMatthew Dillon int seq; 15482010f9fSMatthew Dillon 15582010f9fSMatthew Dillon seq = hammer_flusher_async_one(hmp); 15682010f9fSMatthew Dillon hammer_flusher_wait(hmp, seq); 15782010f9fSMatthew Dillon } 15882010f9fSMatthew Dillon 15982010f9fSMatthew Dillon void 160059819e3SMatthew Dillon hammer_flusher_create(hammer_mount_t hmp) 161059819e3SMatthew Dillon { 162da2da375SMatthew Dillon hammer_flusher_info_t info; 163da2da375SMatthew Dillon int i; 164da2da375SMatthew Dillon 165da2da375SMatthew Dillon hmp->flusher.signal = 0; 166da2da375SMatthew Dillon hmp->flusher.act = 0; 167da2da375SMatthew Dillon hmp->flusher.done = 0; 168da2da375SMatthew Dillon hmp->flusher.next = 1; 169da2da375SMatthew Dillon hammer_ref(&hmp->flusher.finalize_lock); 1707a61b85dSMatthew Dillon TAILQ_INIT(&hmp->flusher.run_list); 1717a61b85dSMatthew Dillon TAILQ_INIT(&hmp->flusher.ready_list); 172da2da375SMatthew Dillon 173da2da375SMatthew Dillon lwkt_create(hammer_flusher_master_thread, hmp, 174da2da375SMatthew Dillon &hmp->flusher.td, NULL, 0, -1, "hammer-M"); 175da2da375SMatthew Dillon for (i = 0; i < HAMMER_MAX_FLUSHERS; ++i) { 176bac808feSMatthew Dillon info = kmalloc(sizeof(*info), hmp->m_misc, M_WAITOK|M_ZERO); 177da2da375SMatthew Dillon info->hmp = hmp; 1787a61b85dSMatthew Dillon TAILQ_INSERT_TAIL(&hmp->flusher.ready_list, info, entry); 179da2da375SMatthew Dillon lwkt_create(hammer_flusher_slave_thread, info, 180da2da375SMatthew Dillon &info->td, NULL, 0, -1, "hammer-S%d", i); 181da2da375SMatthew Dillon } 182059819e3SMatthew Dillon } 183059819e3SMatthew Dillon 184059819e3SMatthew Dillon void 185059819e3SMatthew Dillon hammer_flusher_destroy(hammer_mount_t hmp) 186059819e3SMatthew Dillon { 187da2da375SMatthew Dillon hammer_flusher_info_t info; 188da2da375SMatthew Dillon 189da2da375SMatthew Dillon /* 190da2da375SMatthew Dillon * Kill the master 191da2da375SMatthew Dillon */ 192da2da375SMatthew Dillon hmp->flusher.exiting = 1; 193da2da375SMatthew Dillon while (hmp->flusher.td) { 194da2da375SMatthew Dillon ++hmp->flusher.signal; 195da2da375SMatthew Dillon wakeup(&hmp->flusher.signal); 196da2da375SMatthew Dillon tsleep(&hmp->flusher.exiting, 0, "hmrwex", hz); 197da2da375SMatthew Dillon } 198da2da375SMatthew Dillon 199da2da375SMatthew Dillon /* 200da2da375SMatthew Dillon * Kill the slaves 201da2da375SMatthew Dillon */ 2027a61b85dSMatthew Dillon while ((info = TAILQ_FIRST(&hmp->flusher.ready_list)) != NULL) { 2037a61b85dSMatthew Dillon KKASSERT(info->runstate == 0); 2047a61b85dSMatthew Dillon TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry); 2057a61b85dSMatthew Dillon info->runstate = -1; 2067a61b85dSMatthew Dillon wakeup(&info->runstate); 2077a61b85dSMatthew Dillon while (info->td) 208da2da375SMatthew Dillon tsleep(&info->td, 0, "hmrwwc", 0); 209bac808feSMatthew Dillon kfree(info, hmp->m_misc); 210059819e3SMatthew Dillon } 211f90dde4cSMatthew Dillon } 212059819e3SMatthew Dillon 213af209b0fSMatthew Dillon /* 214af209b0fSMatthew Dillon * The master flusher thread manages the flusher sequence id and 215af209b0fSMatthew Dillon * synchronization with the slave work threads. 216af209b0fSMatthew Dillon */ 217059819e3SMatthew Dillon static void 218da2da375SMatthew Dillon hammer_flusher_master_thread(void *arg) 219059819e3SMatthew Dillon { 2207a61b85dSMatthew Dillon hammer_flush_group_t flg; 2217a61b85dSMatthew Dillon hammer_mount_t hmp; 2220729c8c8SMatthew Dillon 2237a61b85dSMatthew Dillon hmp = arg; 2247a61b85dSMatthew Dillon 2257a61b85dSMatthew Dillon for (;;) { 2267a61b85dSMatthew Dillon /* 2277a61b85dSMatthew Dillon * Do at least one flush cycle. We may have to update the 2287a61b85dSMatthew Dillon * UNDO FIFO even if no inodes are queued. 2297a61b85dSMatthew Dillon */ 230059819e3SMatthew Dillon for (;;) { 231da2da375SMatthew Dillon while (hmp->flusher.group_lock) 232da2da375SMatthew Dillon tsleep(&hmp->flusher.group_lock, 0, "hmrhld", 0); 233da2da375SMatthew Dillon hmp->flusher.act = hmp->flusher.next; 234da2da375SMatthew Dillon ++hmp->flusher.next; 23510a5d1baSMatthew Dillon hammer_flusher_clean_loose_ios(hmp); 236059819e3SMatthew Dillon hammer_flusher_flush(hmp); 237da2da375SMatthew Dillon hmp->flusher.done = hmp->flusher.act; 238da2da375SMatthew Dillon wakeup(&hmp->flusher.done); 2397a61b85dSMatthew Dillon flg = TAILQ_FIRST(&hmp->flush_group_list); 2407a61b85dSMatthew Dillon if (flg == NULL || flg->closed == 0) 2417a61b85dSMatthew Dillon break; 242cdb6e4e6SMatthew Dillon if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) 243cdb6e4e6SMatthew Dillon break; 2447a61b85dSMatthew Dillon } 245c32a6806SMatthew Dillon 246c32a6806SMatthew Dillon /* 2471f07f686SMatthew Dillon * Wait for activity. 248c32a6806SMatthew Dillon */ 2497a61b85dSMatthew Dillon if (hmp->flusher.exiting && TAILQ_EMPTY(&hmp->flush_group_list)) 250059819e3SMatthew Dillon break; 251da2da375SMatthew Dillon while (hmp->flusher.signal == 0) 252da2da375SMatthew Dillon tsleep(&hmp->flusher.signal, 0, "hmrwwa", 0); 2534889cbd4SMatthew Dillon 2544889cbd4SMatthew Dillon /* 2554889cbd4SMatthew Dillon * Flush for each count on signal but only allow one extra 2564889cbd4SMatthew Dillon * flush request to build up. 2574889cbd4SMatthew Dillon */ 2584889cbd4SMatthew Dillon if (--hmp->flusher.signal != 0) 2594889cbd4SMatthew Dillon hmp->flusher.signal = 1; 2601f07f686SMatthew Dillon } 261da2da375SMatthew Dillon 262da2da375SMatthew Dillon /* 263da2da375SMatthew Dillon * And we are done. 264da2da375SMatthew Dillon */ 265da2da375SMatthew Dillon hmp->flusher.td = NULL; 266da2da375SMatthew Dillon wakeup(&hmp->flusher.exiting); 267da2da375SMatthew Dillon lwkt_exit(); 268da2da375SMatthew Dillon } 269da2da375SMatthew Dillon 270af209b0fSMatthew Dillon /* 2717a61b85dSMatthew Dillon * Flush all inodes in the current flush group. 2727a61b85dSMatthew Dillon */ 2737a61b85dSMatthew Dillon static void 2747a61b85dSMatthew Dillon hammer_flusher_flush(hammer_mount_t hmp) 2757a61b85dSMatthew Dillon { 2767a61b85dSMatthew Dillon hammer_flusher_info_t info; 2777a61b85dSMatthew Dillon hammer_flush_group_t flg; 2787a61b85dSMatthew Dillon hammer_reserve_t resv; 2797a61b85dSMatthew Dillon hammer_inode_t ip; 2807a61b85dSMatthew Dillon hammer_inode_t next_ip; 2817a61b85dSMatthew Dillon int slave_index; 28215e75dabSMatthew Dillon int count; 2837a61b85dSMatthew Dillon 2847a61b85dSMatthew Dillon /* 2857a61b85dSMatthew Dillon * Just in-case there's a flush race on mount 2867a61b85dSMatthew Dillon */ 2877a61b85dSMatthew Dillon if (TAILQ_FIRST(&hmp->flusher.ready_list) == NULL) 2887a61b85dSMatthew Dillon return; 2897a61b85dSMatthew Dillon 2907a61b85dSMatthew Dillon /* 2917a61b85dSMatthew Dillon * We only do one flg but we may have to loop/retry. 2927a61b85dSMatthew Dillon */ 29315e75dabSMatthew Dillon count = 0; 2947a61b85dSMatthew Dillon while ((flg = TAILQ_FIRST(&hmp->flush_group_list)) != NULL) { 29515e75dabSMatthew Dillon ++count; 2967a61b85dSMatthew Dillon if (hammer_debug_general & 0x0001) { 2977a61b85dSMatthew Dillon kprintf("hammer_flush %d ttl=%d recs=%d\n", 2987a61b85dSMatthew Dillon hmp->flusher.act, 2997a61b85dSMatthew Dillon flg->total_count, flg->refs); 3007a61b85dSMatthew Dillon } 301cdb6e4e6SMatthew Dillon if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) 302cdb6e4e6SMatthew Dillon break; 3037a61b85dSMatthew Dillon hammer_start_transaction_fls(&hmp->flusher.trans, hmp); 3047a61b85dSMatthew Dillon 3057a61b85dSMatthew Dillon /* 3067a61b85dSMatthew Dillon * If the previous flush cycle just about exhausted our 3077a61b85dSMatthew Dillon * UNDO space we may have to do a dummy cycle to move the 3087a61b85dSMatthew Dillon * first_offset up before actually digging into a new cycle, 3097a61b85dSMatthew Dillon * or the new cycle will not have sufficient undo space. 3107a61b85dSMatthew Dillon */ 3117a61b85dSMatthew Dillon if (hammer_flusher_undo_exhausted(&hmp->flusher.trans, 3)) 3127a61b85dSMatthew Dillon hammer_flusher_finalize(&hmp->flusher.trans, 0); 3137a61b85dSMatthew Dillon 3147a61b85dSMatthew Dillon /* 3157b6ccb11SMatthew Dillon * Ok, we are running this flush group now (this prevents new 3167b6ccb11SMatthew Dillon * additions to it). 3177b6ccb11SMatthew Dillon */ 3187b6ccb11SMatthew Dillon flg->running = 1; 3197b6ccb11SMatthew Dillon if (hmp->next_flush_group == flg) 3207b6ccb11SMatthew Dillon hmp->next_flush_group = TAILQ_NEXT(flg, flush_entry); 3217b6ccb11SMatthew Dillon 3227b6ccb11SMatthew Dillon /* 323ff003b11SMatthew Dillon * Iterate the inodes in the flg's flush_tree and assign 3247a61b85dSMatthew Dillon * them to slaves. 3257a61b85dSMatthew Dillon */ 3267a61b85dSMatthew Dillon slave_index = 0; 3277a61b85dSMatthew Dillon info = TAILQ_FIRST(&hmp->flusher.ready_list); 328ff003b11SMatthew Dillon next_ip = RB_FIRST(hammer_fls_rb_tree, &flg->flush_tree); 3297a61b85dSMatthew Dillon 3307a61b85dSMatthew Dillon while ((ip = next_ip) != NULL) { 331ff003b11SMatthew Dillon next_ip = RB_NEXT(hammer_fls_rb_tree, 332ff003b11SMatthew Dillon &flg->flush_tree, ip); 3337a61b85dSMatthew Dillon 3343e583440SMatthew Dillon if (++hmp->check_yield > hammer_yield_check) { 3353e583440SMatthew Dillon hmp->check_yield = 0; 3363e583440SMatthew Dillon lwkt_user_yield(); 3373e583440SMatthew Dillon } 3383e583440SMatthew Dillon 3397a61b85dSMatthew Dillon /* 3407a61b85dSMatthew Dillon * Add ip to the slave's work array. The slave is 3417a61b85dSMatthew Dillon * not currently running. 3427a61b85dSMatthew Dillon */ 3437a61b85dSMatthew Dillon info->work_array[info->count++] = ip; 3447a61b85dSMatthew Dillon if (info->count != HAMMER_FLUSH_GROUP_SIZE) 3457a61b85dSMatthew Dillon continue; 3467a61b85dSMatthew Dillon 3477a61b85dSMatthew Dillon /* 3487a61b85dSMatthew Dillon * Get the slave running 3497a61b85dSMatthew Dillon */ 3507a61b85dSMatthew Dillon TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry); 3517a61b85dSMatthew Dillon TAILQ_INSERT_TAIL(&hmp->flusher.run_list, info, entry); 3527a61b85dSMatthew Dillon info->flg = flg; 3537a61b85dSMatthew Dillon info->runstate = 1; 3547a61b85dSMatthew Dillon wakeup(&info->runstate); 3557a61b85dSMatthew Dillon 3567a61b85dSMatthew Dillon /* 3577a61b85dSMatthew Dillon * Get a new slave. We may have to wait for one to 3587a61b85dSMatthew Dillon * finish running. 3597a61b85dSMatthew Dillon */ 3607a61b85dSMatthew Dillon while ((info = TAILQ_FIRST(&hmp->flusher.ready_list)) == NULL) { 3617a61b85dSMatthew Dillon tsleep(&hmp->flusher.ready_list, 0, "hmrfcc", 0); 3627a61b85dSMatthew Dillon } 3637a61b85dSMatthew Dillon } 3647a61b85dSMatthew Dillon 3657a61b85dSMatthew Dillon /* 3667a61b85dSMatthew Dillon * Run the current slave if necessary 3677a61b85dSMatthew Dillon */ 3687a61b85dSMatthew Dillon if (info->count) { 3697a61b85dSMatthew Dillon TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry); 3707a61b85dSMatthew Dillon TAILQ_INSERT_TAIL(&hmp->flusher.run_list, info, entry); 3717a61b85dSMatthew Dillon info->flg = flg; 3727a61b85dSMatthew Dillon info->runstate = 1; 3737a61b85dSMatthew Dillon wakeup(&info->runstate); 3747a61b85dSMatthew Dillon } 3757a61b85dSMatthew Dillon 3767a61b85dSMatthew Dillon /* 3777a61b85dSMatthew Dillon * Wait for all slaves to finish running 3787a61b85dSMatthew Dillon */ 3797a61b85dSMatthew Dillon while (TAILQ_FIRST(&hmp->flusher.run_list) != NULL) 3807a61b85dSMatthew Dillon tsleep(&hmp->flusher.ready_list, 0, "hmrfcc", 0); 3817a61b85dSMatthew Dillon 3827a61b85dSMatthew Dillon /* 3837a61b85dSMatthew Dillon * Do the final finalization, clean up 3847a61b85dSMatthew Dillon */ 3857a61b85dSMatthew Dillon hammer_flusher_finalize(&hmp->flusher.trans, 1); 3867a61b85dSMatthew Dillon hmp->flusher.tid = hmp->flusher.trans.tid; 3877a61b85dSMatthew Dillon 3887a61b85dSMatthew Dillon hammer_done_transaction(&hmp->flusher.trans); 3897a61b85dSMatthew Dillon 3907a61b85dSMatthew Dillon /* 3917a61b85dSMatthew Dillon * Loop up on the same flg. If the flg is done clean it up 3927a61b85dSMatthew Dillon * and break out. We only flush one flg. 3937a61b85dSMatthew Dillon */ 394ff003b11SMatthew Dillon if (RB_EMPTY(&flg->flush_tree)) { 3957a61b85dSMatthew Dillon KKASSERT(flg->refs == 0); 3967a61b85dSMatthew Dillon TAILQ_REMOVE(&hmp->flush_group_list, flg, flush_entry); 397bac808feSMatthew Dillon kfree(flg, hmp->m_misc); 3987a61b85dSMatthew Dillon break; 3997a61b85dSMatthew Dillon } 4007a61b85dSMatthew Dillon } 4017a61b85dSMatthew Dillon 4027a61b85dSMatthew Dillon /* 4031b0ab2c3SMatthew Dillon * We may have pure meta-data to flush, or we may have to finish 4041b0ab2c3SMatthew Dillon * cycling the UNDO FIFO, even if there were no flush groups. 40515e75dabSMatthew Dillon */ 4061b0ab2c3SMatthew Dillon if (count == 0 && hammer_flusher_haswork(hmp)) { 40715e75dabSMatthew Dillon hammer_start_transaction_fls(&hmp->flusher.trans, hmp); 40815e75dabSMatthew Dillon hammer_flusher_finalize(&hmp->flusher.trans, 1); 40915e75dabSMatthew Dillon hammer_done_transaction(&hmp->flusher.trans); 41015e75dabSMatthew Dillon } 41115e75dabSMatthew Dillon 41215e75dabSMatthew Dillon /* 4137a61b85dSMatthew Dillon * Clean up any freed big-blocks (typically zone-2). 4147a61b85dSMatthew Dillon * resv->flush_group is typically set several flush groups ahead 4157a61b85dSMatthew Dillon * of the free to ensure that the freed block is not reused until 4167a61b85dSMatthew Dillon * it can no longer be reused. 4177a61b85dSMatthew Dillon */ 4187a61b85dSMatthew Dillon while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) { 4197a61b85dSMatthew Dillon if (resv->flush_group != hmp->flusher.act) 4207a61b85dSMatthew Dillon break; 4217a61b85dSMatthew Dillon hammer_reserve_clrdelay(hmp, resv); 4227a61b85dSMatthew Dillon } 4237a61b85dSMatthew Dillon } 4247a61b85dSMatthew Dillon 4257a61b85dSMatthew Dillon 4267a61b85dSMatthew Dillon /* 427ff003b11SMatthew Dillon * The slave flusher thread pulls work off the master flush list until no 428af209b0fSMatthew Dillon * work is left. 429af209b0fSMatthew Dillon */ 430da2da375SMatthew Dillon static void 431da2da375SMatthew Dillon hammer_flusher_slave_thread(void *arg) 432da2da375SMatthew Dillon { 4337a61b85dSMatthew Dillon hammer_flush_group_t flg; 434da2da375SMatthew Dillon hammer_flusher_info_t info; 435da2da375SMatthew Dillon hammer_mount_t hmp; 436da2da375SMatthew Dillon hammer_inode_t ip; 437cb51be26SMatthew Dillon int i; 438da2da375SMatthew Dillon 439da2da375SMatthew Dillon info = arg; 440da2da375SMatthew Dillon hmp = info->hmp; 441da2da375SMatthew Dillon 442da2da375SMatthew Dillon for (;;) { 4437a61b85dSMatthew Dillon while (info->runstate == 0) 4447a61b85dSMatthew Dillon tsleep(&info->runstate, 0, "hmrssw", 0); 4457a61b85dSMatthew Dillon if (info->runstate < 0) 446da2da375SMatthew Dillon break; 4477a61b85dSMatthew Dillon flg = info->flg; 448cb51be26SMatthew Dillon 4497a61b85dSMatthew Dillon for (i = 0; i < info->count; ++i) { 4507a61b85dSMatthew Dillon ip = info->work_array[i]; 4517a61b85dSMatthew Dillon hammer_flusher_flush_inode(ip, &hmp->flusher.trans); 452ce0138a6SMatthew Dillon ++hammer_stats_inode_flushes; 453cb51be26SMatthew Dillon } 4547a61b85dSMatthew Dillon info->count = 0; 4557a61b85dSMatthew Dillon info->runstate = 0; 4567a61b85dSMatthew Dillon TAILQ_REMOVE(&hmp->flusher.run_list, info, entry); 4577a61b85dSMatthew Dillon TAILQ_INSERT_TAIL(&hmp->flusher.ready_list, info, entry); 4587a61b85dSMatthew Dillon wakeup(&hmp->flusher.ready_list); 459da2da375SMatthew Dillon } 460da2da375SMatthew Dillon info->td = NULL; 461da2da375SMatthew Dillon wakeup(&info->td); 462059819e3SMatthew Dillon lwkt_exit(); 463059819e3SMatthew Dillon } 464059819e3SMatthew Dillon 465525aad3aSMatthew Dillon void 46610a5d1baSMatthew Dillon hammer_flusher_clean_loose_ios(hammer_mount_t hmp) 46710a5d1baSMatthew Dillon { 46810a5d1baSMatthew Dillon hammer_buffer_t buffer; 46910a5d1baSMatthew Dillon hammer_io_t io; 47010a5d1baSMatthew Dillon 47110a5d1baSMatthew Dillon /* 47210a5d1baSMatthew Dillon * loose ends - buffers without bp's aren't tracked by the kernel 47310a5d1baSMatthew Dillon * and can build up, so clean them out. This can occur when an 47410a5d1baSMatthew Dillon * IO completes on a buffer with no references left. 47510a5d1baSMatthew Dillon */ 476525aad3aSMatthew Dillon if ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) { 477a99b9ea2SMatthew Dillon crit_enter(); /* biodone() race */ 47810a5d1baSMatthew Dillon while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) { 47910a5d1baSMatthew Dillon KKASSERT(io->mod_list == &hmp->lose_list); 480bf3b416bSMatthew Dillon TAILQ_REMOVE(&hmp->lose_list, io, mod_entry); 48110a5d1baSMatthew Dillon io->mod_list = NULL; 482a99b9ea2SMatthew Dillon if (io->lock.refs == 0) 483a99b9ea2SMatthew Dillon ++hammer_count_refedbufs; 48410a5d1baSMatthew Dillon hammer_ref(&io->lock); 48510a5d1baSMatthew Dillon buffer = (void *)io; 48610a5d1baSMatthew Dillon hammer_rel_buffer(buffer, 0); 48710a5d1baSMatthew Dillon } 488a99b9ea2SMatthew Dillon crit_exit(); 48910a5d1baSMatthew Dillon } 490525aad3aSMatthew Dillon } 49110a5d1baSMatthew Dillon 492059819e3SMatthew Dillon /* 4939f5097dcSMatthew Dillon * Flush a single inode that is part of a flush group. 49406ad81ffSMatthew Dillon * 495cdb6e4e6SMatthew Dillon * Flusher errors are extremely serious, even ENOSPC shouldn't occur because 496cdb6e4e6SMatthew Dillon * the front-end should have reserved sufficient space on the media. Any 497cdb6e4e6SMatthew Dillon * error other then EWOULDBLOCK will force the mount to be read-only. 498059819e3SMatthew Dillon */ 4999f5097dcSMatthew Dillon static 5009f5097dcSMatthew Dillon void 5019f5097dcSMatthew Dillon hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans) 5029f5097dcSMatthew Dillon { 5039f5097dcSMatthew Dillon hammer_mount_t hmp = ip->hmp; 50406ad81ffSMatthew Dillon int error; 5059f5097dcSMatthew Dillon 506525aad3aSMatthew Dillon hammer_flusher_clean_loose_ios(hmp); 50702325004SMatthew Dillon error = hammer_sync_inode(trans, ip); 508cdb6e4e6SMatthew Dillon 509cdb6e4e6SMatthew Dillon /* 510cdb6e4e6SMatthew Dillon * EWOULDBLOCK can happen under normal operation, all other errors 511cdb6e4e6SMatthew Dillon * are considered extremely serious. We must set WOULDBLOCK 512cdb6e4e6SMatthew Dillon * mechanics to deal with the mess left over from the abort of the 513cdb6e4e6SMatthew Dillon * previous flush. 514cdb6e4e6SMatthew Dillon */ 515cdb6e4e6SMatthew Dillon if (error) { 516cdb6e4e6SMatthew Dillon ip->flags |= HAMMER_INODE_WOULDBLOCK; 517cdb6e4e6SMatthew Dillon if (error == EWOULDBLOCK) 518cdb6e4e6SMatthew Dillon error = 0; 519cdb6e4e6SMatthew Dillon } 520cdb6e4e6SMatthew Dillon hammer_flush_inode_done(ip, error); 521da2da375SMatthew Dillon while (hmp->flusher.finalize_want) 522da2da375SMatthew Dillon tsleep(&hmp->flusher.finalize_want, 0, "hmrsxx", 0); 52306ad81ffSMatthew Dillon if (hammer_flusher_undo_exhausted(trans, 1)) { 5245a930e66SMatthew Dillon kprintf("HAMMER: Warning: UNDO area too small!\n"); 5259f5097dcSMatthew Dillon hammer_flusher_finalize(trans, 1); 52606ad81ffSMatthew Dillon } else if (hammer_flusher_meta_limit(trans->hmp)) { 5279f5097dcSMatthew Dillon hammer_flusher_finalize(trans, 0); 528059819e3SMatthew Dillon } 529059819e3SMatthew Dillon } 530059819e3SMatthew Dillon 53110a5d1baSMatthew Dillon /* 53206ad81ffSMatthew Dillon * Return non-zero if the UNDO area has less then (QUARTER / 4) of its 53306ad81ffSMatthew Dillon * space left. 53406ad81ffSMatthew Dillon * 53506ad81ffSMatthew Dillon * 1/4 - Emergency free undo space level. Below this point the flusher 53606ad81ffSMatthew Dillon * will finalize even if directory dependancies have not been resolved. 53706ad81ffSMatthew Dillon * 53806ad81ffSMatthew Dillon * 2/4 - Used by the pruning and reblocking code. These functions may be 53906ad81ffSMatthew Dillon * running in parallel with a flush and cannot be allowed to drop 54006ad81ffSMatthew Dillon * available undo space to emergency levels. 54106ad81ffSMatthew Dillon * 54206ad81ffSMatthew Dillon * 3/4 - Used at the beginning of a flush to force-sync the volume header 54306ad81ffSMatthew Dillon * to give the flush plenty of runway to work in. 544ec4e8497SMatthew Dillon */ 545ec4e8497SMatthew Dillon int 54606ad81ffSMatthew Dillon hammer_flusher_undo_exhausted(hammer_transaction_t trans, int quarter) 547ec4e8497SMatthew Dillon { 54806ad81ffSMatthew Dillon if (hammer_undo_space(trans) < 54906ad81ffSMatthew Dillon hammer_undo_max(trans->hmp) * quarter / 4) { 5501f07f686SMatthew Dillon return(1); 5511f07f686SMatthew Dillon } else { 5521f07f686SMatthew Dillon return(0); 5531f07f686SMatthew Dillon } 554ec4e8497SMatthew Dillon } 555ec4e8497SMatthew Dillon 556ec4e8497SMatthew Dillon /* 5579f5097dcSMatthew Dillon * Flush all pending UNDOs, wait for write completion, update the volume 5589f5097dcSMatthew Dillon * header with the new UNDO end position, and flush it. Then 5599f5097dcSMatthew Dillon * asynchronously flush the meta-data. 56010a5d1baSMatthew Dillon * 5619f5097dcSMatthew Dillon * If this is the last finalization in a flush group we also synchronize 5629f5097dcSMatthew Dillon * our cached blockmap and set hmp->flusher_undo_start and our cached undo 5639f5097dcSMatthew Dillon * fifo first_offset so the next flush resets the FIFO pointers. 5646c1f89f4SMatthew Dillon * 5656c1f89f4SMatthew Dillon * If this is not final it is being called because too many dirty meta-data 5666c1f89f4SMatthew Dillon * buffers have built up and must be flushed with UNDO synchronization to 5676c1f89f4SMatthew Dillon * avoid a buffer cache deadlock. 56810a5d1baSMatthew Dillon */ 56910a5d1baSMatthew Dillon void 5709f5097dcSMatthew Dillon hammer_flusher_finalize(hammer_transaction_t trans, int final) 571059819e3SMatthew Dillon { 5729f5097dcSMatthew Dillon hammer_volume_t root_volume; 5739f5097dcSMatthew Dillon hammer_blockmap_t cundomap, dundomap; 5749f5097dcSMatthew Dillon hammer_mount_t hmp; 57510a5d1baSMatthew Dillon hammer_io_t io; 576c9b9e29dSMatthew Dillon int count; 57719619882SMatthew Dillon int i; 57810a5d1baSMatthew Dillon 5799f5097dcSMatthew Dillon hmp = trans->hmp; 5809f5097dcSMatthew Dillon root_volume = trans->rootvol; 5819f5097dcSMatthew Dillon 58247637bffSMatthew Dillon /* 5836c1f89f4SMatthew Dillon * Exclusively lock the flusher. This guarantees that all dirty 5846c1f89f4SMatthew Dillon * buffers will be idled (have a mod-count of 0). 5856c1f89f4SMatthew Dillon */ 5866c1f89f4SMatthew Dillon ++hmp->flusher.finalize_want; 5876c1f89f4SMatthew Dillon hammer_lock_ex(&hmp->flusher.finalize_lock); 5886c1f89f4SMatthew Dillon 5896c1f89f4SMatthew Dillon /* 5906c1f89f4SMatthew Dillon * If this isn't the final sync several threads may have hit the 5916c1f89f4SMatthew Dillon * meta-limit at the same time and raced. Only sync if we really 5926c1f89f4SMatthew Dillon * have to, after acquiring the lock. 5936c1f89f4SMatthew Dillon */ 5946c1f89f4SMatthew Dillon if (final == 0 && !hammer_flusher_meta_limit(hmp)) 5956c1f89f4SMatthew Dillon goto done; 5966c1f89f4SMatthew Dillon 597cdb6e4e6SMatthew Dillon if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) 598cdb6e4e6SMatthew Dillon goto done; 599cdb6e4e6SMatthew Dillon 6006c1f89f4SMatthew Dillon /* 60147637bffSMatthew Dillon * Flush data buffers. This can occur asynchronously and at any 6029f5097dcSMatthew Dillon * time. We must interlock against the frontend direct-data write 6039f5097dcSMatthew Dillon * but do not have to acquire the sync-lock yet. 60447637bffSMatthew Dillon */ 60547637bffSMatthew Dillon count = 0; 60647637bffSMatthew Dillon while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) { 607cdb6e4e6SMatthew Dillon if (io->ioerror) 608cdb6e4e6SMatthew Dillon break; 609a99b9ea2SMatthew Dillon if (io->lock.refs == 0) 610a99b9ea2SMatthew Dillon ++hammer_count_refedbufs; 61147637bffSMatthew Dillon hammer_ref(&io->lock); 6129f5097dcSMatthew Dillon hammer_io_write_interlock(io); 61347637bffSMatthew Dillon KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME); 614710733a6SMatthew Dillon hammer_io_flush(io, 0); 6159f5097dcSMatthew Dillon hammer_io_done_interlock(io); 61647637bffSMatthew Dillon hammer_rel_buffer((hammer_buffer_t)io, 0); 61747637bffSMatthew Dillon ++count; 61847637bffSMatthew Dillon } 61947637bffSMatthew Dillon 6209f5097dcSMatthew Dillon /* 6219f5097dcSMatthew Dillon * The sync-lock is required for the remaining sequence. This lock 6229f5097dcSMatthew Dillon * prevents meta-data from being modified. 6239f5097dcSMatthew Dillon */ 6242f85fa4dSMatthew Dillon hammer_sync_lock_ex(trans); 6259480ff55SMatthew Dillon 626059819e3SMatthew Dillon /* 6279f5097dcSMatthew Dillon * If we have been asked to finalize the volume header sync the 6289f5097dcSMatthew Dillon * cached blockmap to the on-disk blockmap. Generate an UNDO 6299f5097dcSMatthew Dillon * record for the update. 630e8599db1SMatthew Dillon */ 6319f5097dcSMatthew Dillon if (final) { 6329f5097dcSMatthew Dillon cundomap = &hmp->blockmap[0]; 6339f5097dcSMatthew Dillon dundomap = &root_volume->ondisk->vol0_blockmap[0]; 6349f5097dcSMatthew Dillon if (root_volume->io.modified) { 635e8599db1SMatthew Dillon hammer_modify_volume(trans, root_volume, 6369f5097dcSMatthew Dillon dundomap, sizeof(hmp->blockmap)); 63719619882SMatthew Dillon for (i = 0; i < HAMMER_MAX_ZONES; ++i) 6389f5097dcSMatthew Dillon hammer_crc_set_blockmap(&cundomap[i]); 6399f5097dcSMatthew Dillon bcopy(cundomap, dundomap, sizeof(hmp->blockmap)); 640e8599db1SMatthew Dillon hammer_modify_volume_done(root_volume); 641e8599db1SMatthew Dillon } 6429f5097dcSMatthew Dillon } 643e8599db1SMatthew Dillon 644e8599db1SMatthew Dillon /* 645*6048b411SMatthew Dillon * Flush UNDOs. This also waits for I/Os to complete and flushes 646*6048b411SMatthew Dillon * the cache on the target disk. 647059819e3SMatthew Dillon */ 648*6048b411SMatthew Dillon hammer_flusher_flush_undos(hmp, 1); 649059819e3SMatthew Dillon 650cdb6e4e6SMatthew Dillon if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) 651cdb6e4e6SMatthew Dillon goto failed; 652cdb6e4e6SMatthew Dillon 653059819e3SMatthew Dillon /* 65402428fb6SMatthew Dillon * HAMMER VERSION < 4: 65502428fb6SMatthew Dillon * Update the on-disk volume header with new UNDO FIFO end 65602428fb6SMatthew Dillon * position (do not generate new UNDO records for this change). 65702428fb6SMatthew Dillon * We have to do this for the UNDO FIFO whether (final) is 65802428fb6SMatthew Dillon * set or not in order for the UNDOs to be recognized on 65902428fb6SMatthew Dillon * recovery. 66002428fb6SMatthew Dillon * 66102428fb6SMatthew Dillon * HAMMER VERSION >= 4: 66202428fb6SMatthew Dillon * The UNDO FIFO data written above will be recognized on 66302428fb6SMatthew Dillon * recovery without us having to sync the volume header. 664c9b9e29dSMatthew Dillon * 6659f5097dcSMatthew Dillon * Also update the on-disk next_tid field. This does not require 6669f5097dcSMatthew Dillon * an UNDO. However, because our TID is generated before we get 6679f5097dcSMatthew Dillon * the sync lock another sync may have beat us to the punch. 6689f5097dcSMatthew Dillon * 66906ad81ffSMatthew Dillon * This also has the side effect of updating first_offset based on 67006ad81ffSMatthew Dillon * a prior finalization when the first finalization of the next flush 67106ad81ffSMatthew Dillon * cycle occurs, removing any undo info from the prior finalization 67206ad81ffSMatthew Dillon * from consideration. 67306ad81ffSMatthew Dillon * 6749f5097dcSMatthew Dillon * The volume header will be flushed out synchronously. 675c9b9e29dSMatthew Dillon */ 6769f5097dcSMatthew Dillon dundomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 6779f5097dcSMatthew Dillon cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; 6789f5097dcSMatthew Dillon 6799f5097dcSMatthew Dillon if (dundomap->first_offset != cundomap->first_offset || 6809f5097dcSMatthew Dillon dundomap->next_offset != cundomap->next_offset) { 6819f5097dcSMatthew Dillon hammer_modify_volume(NULL, root_volume, NULL, 0); 6829f5097dcSMatthew Dillon dundomap->first_offset = cundomap->first_offset; 6839f5097dcSMatthew Dillon dundomap->next_offset = cundomap->next_offset; 6849f5097dcSMatthew Dillon hammer_crc_set_blockmap(dundomap); 6859f5097dcSMatthew Dillon hammer_modify_volume_done(root_volume); 6869f5097dcSMatthew Dillon } 6879f5097dcSMatthew Dillon 6884889cbd4SMatthew Dillon /* 6894889cbd4SMatthew Dillon * vol0_next_tid is used for TID selection and is updated without 6904889cbd4SMatthew Dillon * an UNDO so we do not reuse a TID that may have been rolled-back. 6914889cbd4SMatthew Dillon * 6924889cbd4SMatthew Dillon * vol0_last_tid is the highest fully-synchronized TID. It is 6934889cbd4SMatthew Dillon * set-up when the UNDO fifo is fully synced, later on (not here). 6944889cbd4SMatthew Dillon */ 6959f5097dcSMatthew Dillon if (root_volume->io.modified) { 696adf01747SMatthew Dillon hammer_modify_volume(NULL, root_volume, NULL, 0); 697adf01747SMatthew Dillon if (root_volume->ondisk->vol0_next_tid < trans->tid) 698adf01747SMatthew Dillon root_volume->ondisk->vol0_next_tid = trans->tid; 699adf01747SMatthew Dillon hammer_crc_set_volume(root_volume->ondisk); 700adf01747SMatthew Dillon hammer_modify_volume_done(root_volume); 701710733a6SMatthew Dillon hammer_io_flush(&root_volume->io, 0); 70219619882SMatthew Dillon } 703059819e3SMatthew Dillon 704059819e3SMatthew Dillon /* 70502428fb6SMatthew Dillon * Wait for I/Os to complete. 70602428fb6SMatthew Dillon * 70702428fb6SMatthew Dillon * For HAMMER VERSION 4+ filesystems we do not have to wait for 70802428fb6SMatthew Dillon * the I/O to complete as the new UNDO FIFO entries are recognized 70902428fb6SMatthew Dillon * even without the volume header update. This allows the volume 71002428fb6SMatthew Dillon * header to flushed along with meta-data, significantly reducing 71102428fb6SMatthew Dillon * flush overheads. 712059819e3SMatthew Dillon */ 713a99b9ea2SMatthew Dillon hammer_flusher_clean_loose_ios(hmp); 71402428fb6SMatthew Dillon if (hmp->version < HAMMER_VOL_VERSION_FOUR) 715af209b0fSMatthew Dillon hammer_io_wait_all(hmp, "hmrfl2"); 716059819e3SMatthew Dillon 717cdb6e4e6SMatthew Dillon if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) 718cdb6e4e6SMatthew Dillon goto failed; 719cdb6e4e6SMatthew Dillon 720059819e3SMatthew Dillon /* 721e8599db1SMatthew Dillon * Flush meta-data. The meta-data will be undone if we crash 72202428fb6SMatthew Dillon * so we can safely flush it asynchronously. There is no need 72302428fb6SMatthew Dillon * to wait for I/O to complete (or issue a synchronous disk flush). 7249f5097dcSMatthew Dillon * 72502428fb6SMatthew Dillon * In fact, even if we did wait the meta-data will still be undone 72602428fb6SMatthew Dillon * by a crash up until the next flush cycle due to the first_offset 72702428fb6SMatthew Dillon * in the volume header for the UNDO FIFO not being adjusted until 72802428fb6SMatthew Dillon * the following flush cycle. 729059819e3SMatthew Dillon */ 730c9b9e29dSMatthew Dillon count = 0; 73110a5d1baSMatthew Dillon while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) { 732cdb6e4e6SMatthew Dillon if (io->ioerror) 733cdb6e4e6SMatthew Dillon break; 73410a5d1baSMatthew Dillon KKASSERT(io->modify_refs == 0); 735a99b9ea2SMatthew Dillon if (io->lock.refs == 0) 736a99b9ea2SMatthew Dillon ++hammer_count_refedbufs; 73710a5d1baSMatthew Dillon hammer_ref(&io->lock); 73810a5d1baSMatthew Dillon KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME); 739710733a6SMatthew Dillon hammer_io_flush(io, 0); 74009ac686bSMatthew Dillon hammer_rel_buffer((hammer_buffer_t)io, 0); 741c9b9e29dSMatthew Dillon ++count; 742059819e3SMatthew Dillon } 7439f5097dcSMatthew Dillon 7449f5097dcSMatthew Dillon /* 7459f5097dcSMatthew Dillon * If this is the final finalization for the flush group set 7469f5097dcSMatthew Dillon * up for the next sequence by setting a new first_offset in 74706ad81ffSMatthew Dillon * our cached blockmap and clearing the undo history. 74806ad81ffSMatthew Dillon * 74906ad81ffSMatthew Dillon * Even though we have updated our cached first_offset, the on-disk 75006ad81ffSMatthew Dillon * first_offset still governs available-undo-space calculations. 7519f5097dcSMatthew Dillon */ 7529f5097dcSMatthew Dillon if (final) { 7539f5097dcSMatthew Dillon cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; 7541b0ab2c3SMatthew Dillon if (cundomap->first_offset == cundomap->next_offset) { 7551b0ab2c3SMatthew Dillon hmp->hflags &= ~HMNT_UNDO_DIRTY; 7561b0ab2c3SMatthew Dillon } else { 7579f5097dcSMatthew Dillon cundomap->first_offset = cundomap->next_offset; 7581b0ab2c3SMatthew Dillon hmp->hflags |= HMNT_UNDO_DIRTY; 7591b0ab2c3SMatthew Dillon } 7609f5097dcSMatthew Dillon hammer_clear_undo_history(hmp); 7614889cbd4SMatthew Dillon 7624889cbd4SMatthew Dillon /* 7634889cbd4SMatthew Dillon * Flush tid sequencing. flush_tid1 is fully synchronized, 7644889cbd4SMatthew Dillon * meaning a crash will not roll it back. flush_tid2 has 7654889cbd4SMatthew Dillon * been written out asynchronously and a crash will roll 7664889cbd4SMatthew Dillon * it back. flush_tid1 is used for all mirroring masters. 7674889cbd4SMatthew Dillon */ 7684889cbd4SMatthew Dillon if (hmp->flush_tid1 != hmp->flush_tid2) { 7694889cbd4SMatthew Dillon hmp->flush_tid1 = hmp->flush_tid2; 7704889cbd4SMatthew Dillon wakeup(&hmp->flush_tid1); 7714889cbd4SMatthew Dillon } 7724889cbd4SMatthew Dillon hmp->flush_tid2 = trans->tid; 7739f5097dcSMatthew Dillon } 7749f5097dcSMatthew Dillon 775cdb6e4e6SMatthew Dillon /* 776cdb6e4e6SMatthew Dillon * Cleanup. Report any critical errors. 777cdb6e4e6SMatthew Dillon */ 778cdb6e4e6SMatthew Dillon failed: 7792f85fa4dSMatthew Dillon hammer_sync_unlock(trans); 7806c1f89f4SMatthew Dillon 781cdb6e4e6SMatthew Dillon if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) { 782cdb6e4e6SMatthew Dillon kprintf("HAMMER(%s): Critical write error during flush, " 783cdb6e4e6SMatthew Dillon "refusing to sync UNDO FIFO\n", 784cdb6e4e6SMatthew Dillon root_volume->ondisk->vol_name); 785cdb6e4e6SMatthew Dillon } 786cdb6e4e6SMatthew Dillon 7876c1f89f4SMatthew Dillon done: 7886c1f89f4SMatthew Dillon hammer_unlock(&hmp->flusher.finalize_lock); 7894889cbd4SMatthew Dillon 7906c1f89f4SMatthew Dillon if (--hmp->flusher.finalize_want == 0) 7916c1f89f4SMatthew Dillon wakeup(&hmp->flusher.finalize_want); 792ce0138a6SMatthew Dillon hammer_stats_commits += final; 793059819e3SMatthew Dillon } 794059819e3SMatthew Dillon 79506ad81ffSMatthew Dillon /* 796*6048b411SMatthew Dillon * Flush UNDOs. If already_flushed is non-zero we force a disk sync 797*6048b411SMatthew Dillon * even if no UNDOs are present. 798*6048b411SMatthew Dillon */ 799*6048b411SMatthew Dillon void 800*6048b411SMatthew Dillon hammer_flusher_flush_undos(hammer_mount_t hmp, int already_flushed) 801*6048b411SMatthew Dillon { 802*6048b411SMatthew Dillon hammer_io_t io; 803*6048b411SMatthew Dillon int count; 804*6048b411SMatthew Dillon 805*6048b411SMatthew Dillon if (already_flushed == 0 && TAILQ_EMPTY(&hmp->undo_list)) 806*6048b411SMatthew Dillon return; 807*6048b411SMatthew Dillon 808*6048b411SMatthew Dillon count = 0; 809*6048b411SMatthew Dillon while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) { 810*6048b411SMatthew Dillon if (io->ioerror) 811*6048b411SMatthew Dillon break; 812*6048b411SMatthew Dillon KKASSERT(io->modify_refs == 0); 813*6048b411SMatthew Dillon if (io->lock.refs == 0) 814*6048b411SMatthew Dillon ++hammer_count_refedbufs; 815*6048b411SMatthew Dillon hammer_ref(&io->lock); 816*6048b411SMatthew Dillon KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME); 817*6048b411SMatthew Dillon hammer_io_flush(io, hammer_undo_reclaim(io)); 818*6048b411SMatthew Dillon hammer_rel_buffer((hammer_buffer_t)io, 0); 819*6048b411SMatthew Dillon ++count; 820*6048b411SMatthew Dillon } 821*6048b411SMatthew Dillon hammer_flusher_clean_loose_ios(hmp); 822*6048b411SMatthew Dillon hammer_io_wait_all(hmp, "hmrfl1"); 823*6048b411SMatthew Dillon } 824*6048b411SMatthew Dillon 825*6048b411SMatthew Dillon /* 82606ad81ffSMatthew Dillon * Return non-zero if too many dirty meta-data buffers have built up. 82706ad81ffSMatthew Dillon * 82806ad81ffSMatthew Dillon * Since we cannot allow such buffers to flush until we have dealt with 82906ad81ffSMatthew Dillon * the UNDOs, we risk deadlocking the kernel's buffer cache. 83006ad81ffSMatthew Dillon */ 83106ad81ffSMatthew Dillon int 83206ad81ffSMatthew Dillon hammer_flusher_meta_limit(hammer_mount_t hmp) 83306ad81ffSMatthew Dillon { 834f5a07a7aSMatthew Dillon if (hmp->locked_dirty_space + hmp->io_running_space > 835f5a07a7aSMatthew Dillon hammer_limit_dirtybufspace) { 83606ad81ffSMatthew Dillon return(1); 83706ad81ffSMatthew Dillon } 83806ad81ffSMatthew Dillon return(0); 83906ad81ffSMatthew Dillon } 84006ad81ffSMatthew Dillon 8411b0ab2c3SMatthew Dillon /* 8421b0ab2c3SMatthew Dillon * Return non-zero if too many dirty meta-data buffers have built up. 8431b0ab2c3SMatthew Dillon * 8441b0ab2c3SMatthew Dillon * This version is used by background operations (mirror, prune, reblock) 8451b0ab2c3SMatthew Dillon * to leave room for foreground operations. 8461b0ab2c3SMatthew Dillon */ 84793291532SMatthew Dillon int 84893291532SMatthew Dillon hammer_flusher_meta_halflimit(hammer_mount_t hmp) 84993291532SMatthew Dillon { 85093291532SMatthew Dillon if (hmp->locked_dirty_space + hmp->io_running_space > 85193291532SMatthew Dillon hammer_limit_dirtybufspace / 2) { 85293291532SMatthew Dillon return(1); 85393291532SMatthew Dillon } 85493291532SMatthew Dillon return(0); 85593291532SMatthew Dillon } 85693291532SMatthew Dillon 8571b0ab2c3SMatthew Dillon /* 8581b0ab2c3SMatthew Dillon * Return non-zero if the flusher still has something to flush. 8591b0ab2c3SMatthew Dillon */ 8601b0ab2c3SMatthew Dillon int 8611b0ab2c3SMatthew Dillon hammer_flusher_haswork(hammer_mount_t hmp) 8621b0ab2c3SMatthew Dillon { 863cdb6e4e6SMatthew Dillon if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) 864cdb6e4e6SMatthew Dillon return(0); 8651b0ab2c3SMatthew Dillon if (TAILQ_FIRST(&hmp->flush_group_list) || /* dirty inodes */ 86683ec399bSMichael Neumann TAILQ_FIRST(&hmp->volu_list) || /* dirty buffers */ 8671b0ab2c3SMatthew Dillon TAILQ_FIRST(&hmp->undo_list) || 8681b0ab2c3SMatthew Dillon TAILQ_FIRST(&hmp->data_list) || 8691b0ab2c3SMatthew Dillon TAILQ_FIRST(&hmp->meta_list) || 8701b0ab2c3SMatthew Dillon (hmp->hflags & HMNT_UNDO_DIRTY) /* UNDO FIFO sync */ 8711b0ab2c3SMatthew Dillon ) { 8721b0ab2c3SMatthew Dillon return(1); 8731b0ab2c3SMatthew Dillon } 8741b0ab2c3SMatthew Dillon return(0); 8751b0ab2c3SMatthew Dillon } 8761b0ab2c3SMatthew Dillon 877