1bf686dbeSMatthew Dillon /* 2bf686dbeSMatthew Dillon * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3bf686dbeSMatthew Dillon * 4bf686dbeSMatthew Dillon * This code is derived from software contributed to The DragonFly Project 5bf686dbeSMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 6bf686dbeSMatthew Dillon * 7bf686dbeSMatthew Dillon * Redistribution and use in source and binary forms, with or without 8bf686dbeSMatthew Dillon * modification, are permitted provided that the following conditions 9bf686dbeSMatthew Dillon * are met: 10bf686dbeSMatthew Dillon * 11bf686dbeSMatthew Dillon * 1. Redistributions of source code must retain the above copyright 12bf686dbeSMatthew Dillon * notice, this list of conditions and the following disclaimer. 13bf686dbeSMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 14bf686dbeSMatthew Dillon * notice, this list of conditions and the following disclaimer in 15bf686dbeSMatthew Dillon * the documentation and/or other materials provided with the 16bf686dbeSMatthew Dillon * distribution. 17bf686dbeSMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 18bf686dbeSMatthew Dillon * contributors may be used to endorse or promote products derived 19bf686dbeSMatthew Dillon * from this software without specific, prior written permission. 20bf686dbeSMatthew Dillon * 21bf686dbeSMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22bf686dbeSMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23bf686dbeSMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24bf686dbeSMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25bf686dbeSMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26bf686dbeSMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27bf686dbeSMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28bf686dbeSMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29bf686dbeSMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30bf686dbeSMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31bf686dbeSMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32bf686dbeSMatthew Dillon * SUCH DAMAGE. 33bf686dbeSMatthew Dillon * 3444a83111SMatthew Dillon * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.34 2008/11/13 02:18:43 dillon Exp $ 35bf686dbeSMatthew Dillon */ 36bf686dbeSMatthew Dillon /* 37bf686dbeSMatthew Dillon * HAMMER reblocker - This code frees up fragmented physical space 38bf686dbeSMatthew Dillon * 39bf686dbeSMatthew Dillon * HAMMER only keeps track of free space on a big-block basis. A big-block 40bf686dbeSMatthew Dillon * containing holes can only be freed by migrating the remaining data in 41bf686dbeSMatthew Dillon * that big-block into a new big-block, then freeing the big-block. 42bf686dbeSMatthew Dillon * 43bf686dbeSMatthew Dillon * This function is called from an ioctl or via the hammer support thread. 44bf686dbeSMatthew Dillon */ 45bf686dbeSMatthew Dillon 46bf686dbeSMatthew Dillon #include "hammer.h" 47bf686dbeSMatthew Dillon 4836f82b23SMatthew Dillon static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 49bf686dbeSMatthew Dillon hammer_cursor_t cursor, 50bf686dbeSMatthew Dillon hammer_btree_elm_t elm); 5136f82b23SMatthew Dillon static int hammer_reblock_data(struct hammer_ioc_reblock *reblock, 52bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 532f85fa4dSMatthew Dillon static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 542f85fa4dSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 552f85fa4dSMatthew Dillon static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 56bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 57bf686dbeSMatthew Dillon 58bf686dbeSMatthew Dillon int 5936f82b23SMatthew Dillon hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, 6036f82b23SMatthew Dillon struct hammer_ioc_reblock *reblock) 61bf686dbeSMatthew Dillon { 62bf686dbeSMatthew Dillon struct hammer_cursor cursor; 63bf686dbeSMatthew Dillon hammer_btree_elm_t elm; 64a7e9bef1SMatthew Dillon int checkspace_count; 6593291532SMatthew Dillon int error; 6693291532SMatthew Dillon int seq; 677b6ccb11SMatthew Dillon int slop; 687b6ccb11SMatthew Dillon 697b6ccb11SMatthew Dillon /* 707b6ccb11SMatthew Dillon * A fill level <= 20% is considered an emergency. free_level is 717b6ccb11SMatthew Dillon * inverted from fill_level. 727b6ccb11SMatthew Dillon */ 737b6ccb11SMatthew Dillon if (reblock->free_level >= HAMMER_LARGEBLOCK_SIZE * 8 / 10) 747b6ccb11SMatthew Dillon slop = HAMMER_CHKSPC_EMERGENCY; 757b6ccb11SMatthew Dillon else 767b6ccb11SMatthew Dillon slop = HAMMER_CHKSPC_REBLOCK; 77bf686dbeSMatthew Dillon 78dd94f1b1SMatthew Dillon if ((reblock->key_beg.localization | reblock->key_end.localization) & 79dd94f1b1SMatthew Dillon HAMMER_LOCALIZE_PSEUDOFS_MASK) { 80dd94f1b1SMatthew Dillon return(EINVAL); 81dd94f1b1SMatthew Dillon } 82dd94f1b1SMatthew Dillon if (reblock->key_beg.obj_id >= reblock->key_end.obj_id) 83bf686dbeSMatthew Dillon return(EINVAL); 84bf686dbeSMatthew Dillon if (reblock->free_level < 0) 85bf686dbeSMatthew Dillon return(EINVAL); 86bf686dbeSMatthew Dillon 87dd94f1b1SMatthew Dillon reblock->key_cur = reblock->key_beg; 88842e7a70SMatthew Dillon reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 89dd94f1b1SMatthew Dillon reblock->key_cur.localization += ip->obj_localization; 90814387f6SMatthew Dillon 91a7e9bef1SMatthew Dillon checkspace_count = 0; 9293291532SMatthew Dillon seq = trans->hmp->flusher.act; 93bf686dbeSMatthew Dillon retry: 944e17f465SMatthew Dillon error = hammer_init_cursor(trans, &cursor, NULL, NULL); 95bf686dbeSMatthew Dillon if (error) { 96bf686dbeSMatthew Dillon hammer_done_cursor(&cursor); 97dd94f1b1SMatthew Dillon goto failed; 98bf686dbeSMatthew Dillon } 99dd94f1b1SMatthew Dillon cursor.key_beg.localization = reblock->key_cur.localization; 100dd94f1b1SMatthew Dillon cursor.key_beg.obj_id = reblock->key_cur.obj_id; 101bf686dbeSMatthew Dillon cursor.key_beg.key = HAMMER_MIN_KEY; 102bf686dbeSMatthew Dillon cursor.key_beg.create_tid = 1; 103bf686dbeSMatthew Dillon cursor.key_beg.delete_tid = 0; 104bf686dbeSMatthew Dillon cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; 105bf686dbeSMatthew Dillon cursor.key_beg.obj_type = 0; 106bf686dbeSMatthew Dillon 107842e7a70SMatthew Dillon cursor.key_end.localization = (reblock->key_end.localization & 108842e7a70SMatthew Dillon HAMMER_LOCALIZE_MASK) + 109dd94f1b1SMatthew Dillon ip->obj_localization; 110dd94f1b1SMatthew Dillon cursor.key_end.obj_id = reblock->key_end.obj_id; 111bf686dbeSMatthew Dillon cursor.key_end.key = HAMMER_MAX_KEY; 112bf686dbeSMatthew Dillon cursor.key_end.create_tid = HAMMER_MAX_TID - 1; 113bf686dbeSMatthew Dillon cursor.key_end.delete_tid = 0; 114bf686dbeSMatthew Dillon cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 115bf686dbeSMatthew Dillon cursor.key_end.obj_type = 0; 116bf686dbeSMatthew Dillon 117bf686dbeSMatthew Dillon cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 1189480ff55SMatthew Dillon cursor.flags |= HAMMER_CURSOR_BACKEND; 119bf686dbeSMatthew Dillon 1202f85fa4dSMatthew Dillon /* 1212f85fa4dSMatthew Dillon * This flag allows the btree scan code to return internal nodes, 1222f85fa4dSMatthew Dillon * so we can reblock them in addition to the leafs. Only specify it 1232f85fa4dSMatthew Dillon * if we intend to reblock B-Tree nodes. 1242f85fa4dSMatthew Dillon */ 1252f85fa4dSMatthew Dillon if (reblock->head.flags & HAMMER_IOC_DO_BTREE) 1262f85fa4dSMatthew Dillon cursor.flags |= HAMMER_CURSOR_REBLOCKING; 1272f85fa4dSMatthew Dillon 128bf686dbeSMatthew Dillon error = hammer_btree_first(&cursor); 129bf686dbeSMatthew Dillon while (error == 0) { 1302f85fa4dSMatthew Dillon /* 1312f85fa4dSMatthew Dillon * Internal or Leaf node 1322f85fa4dSMatthew Dillon */ 13307ed04b5SMatthew Dillon KKASSERT(cursor.index < cursor.node->ondisk->count); 134bf686dbeSMatthew Dillon elm = &cursor.node->ondisk->elms[cursor.index]; 135dd94f1b1SMatthew Dillon reblock->key_cur.obj_id = elm->base.obj_id; 136dd94f1b1SMatthew Dillon reblock->key_cur.localization = elm->base.localization; 137bf686dbeSMatthew Dillon 1389480ff55SMatthew Dillon /* 1399f5097dcSMatthew Dillon * Yield to more important tasks 1409f5097dcSMatthew Dillon */ 1419f5097dcSMatthew Dillon if ((error = hammer_signal_check(trans->hmp)) != 0) 1429f5097dcSMatthew Dillon break; 143a7e9bef1SMatthew Dillon 144a7e9bef1SMatthew Dillon /* 145a7e9bef1SMatthew Dillon * If there is insufficient free space it may be due to 146a7e9bef1SMatthew Dillon * reserved bigblocks, which flushing might fix. 147c9ce54d6SMatthew Dillon * 14807ed04b5SMatthew Dillon * We must force a retest in case the unlocked cursor is 14907ed04b5SMatthew Dillon * moved to the end of the leaf, or moved to an internal 15007ed04b5SMatthew Dillon * node. 15107ed04b5SMatthew Dillon * 152c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 153a7e9bef1SMatthew Dillon */ 1547b6ccb11SMatthew Dillon if (hammer_checkspace(trans->hmp, slop)) { 155a7e9bef1SMatthew Dillon if (++checkspace_count == 10) { 156a7e9bef1SMatthew Dillon error = ENOSPC; 157a7e9bef1SMatthew Dillon break; 158a7e9bef1SMatthew Dillon } 159982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 16007ed04b5SMatthew Dillon cursor.flags |= HAMMER_CURSOR_RETEST; 16193291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 162982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 1637a61b85dSMatthew Dillon seq = hammer_flusher_async(trans->hmp, NULL); 16407ed04b5SMatthew Dillon goto skip; 16593291532SMatthew Dillon } 166a7e9bef1SMatthew Dillon 167a7e9bef1SMatthew Dillon /* 1689480ff55SMatthew Dillon * Acquiring the sync_lock prevents the operation from 1699480ff55SMatthew Dillon * crossing a synchronization boundary. 17009ac686bSMatthew Dillon * 17109ac686bSMatthew Dillon * NOTE: cursor.node may have changed on return. 172c9ce54d6SMatthew Dillon * 173c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 1749480ff55SMatthew Dillon */ 1752f85fa4dSMatthew Dillon hammer_sync_lock_sh(trans); 17636f82b23SMatthew Dillon error = hammer_reblock_helper(reblock, &cursor, elm); 1772f85fa4dSMatthew Dillon hammer_sync_unlock(trans); 17893291532SMatthew Dillon 17915e75dabSMatthew Dillon while (hammer_flusher_meta_halflimit(trans->hmp) || 1807a61b85dSMatthew Dillon hammer_flusher_undo_exhausted(trans, 2)) { 181982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 18293291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 183982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 18415e75dabSMatthew Dillon seq = hammer_flusher_async_one(trans->hmp); 18593291532SMatthew Dillon } 1861b0ab2c3SMatthew Dillon 1871b0ab2c3SMatthew Dillon /* 1881b0ab2c3SMatthew Dillon * Setup for iteration, our cursor flags may be modified by 1891b0ab2c3SMatthew Dillon * other threads while we are unlocked. 1901b0ab2c3SMatthew Dillon */ 191bf686dbeSMatthew Dillon cursor.flags |= HAMMER_CURSOR_ATEDISK; 1921b0ab2c3SMatthew Dillon 1931b0ab2c3SMatthew Dillon /* 1941b0ab2c3SMatthew Dillon * We allocate data buffers, which atm we don't track 1951b0ab2c3SMatthew Dillon * dirty levels for because we allow the kernel to write 1961b0ab2c3SMatthew Dillon * them. But if we allocate too many we can still deadlock 1971b0ab2c3SMatthew Dillon * the buffer cache. 1981b0ab2c3SMatthew Dillon * 199c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 2001b0ab2c3SMatthew Dillon * (The cursor's node and element may change!) 2011b0ab2c3SMatthew Dillon */ 2021b0ab2c3SMatthew Dillon if (bd_heatup()) { 203982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 2041b0ab2c3SMatthew Dillon bwillwrite(HAMMER_XBUFSIZE); 205982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 2061b0ab2c3SMatthew Dillon } 20707ed04b5SMatthew Dillon skip: 2081b0ab2c3SMatthew Dillon if (error == 0) { 209bf686dbeSMatthew Dillon error = hammer_btree_iterate(&cursor); 210bf686dbeSMatthew Dillon } 211bf686dbeSMatthew Dillon } 212bf686dbeSMatthew Dillon if (error == ENOENT) 213bf686dbeSMatthew Dillon error = 0; 214bf686dbeSMatthew Dillon hammer_done_cursor(&cursor); 21506ad81ffSMatthew Dillon if (error == EWOULDBLOCK) { 21606ad81ffSMatthew Dillon hammer_flusher_sync(trans->hmp); 21706ad81ffSMatthew Dillon goto retry; 21806ad81ffSMatthew Dillon } 219bf686dbeSMatthew Dillon if (error == EDEADLK) 220bf686dbeSMatthew Dillon goto retry; 22119619882SMatthew Dillon if (error == EINTR) { 22219619882SMatthew Dillon reblock->head.flags |= HAMMER_IOC_HEAD_INTR; 22319619882SMatthew Dillon error = 0; 22419619882SMatthew Dillon } 225dd94f1b1SMatthew Dillon failed: 226dd94f1b1SMatthew Dillon reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 227bf686dbeSMatthew Dillon return(error); 228bf686dbeSMatthew Dillon } 229bf686dbeSMatthew Dillon 230bf686dbeSMatthew Dillon /* 231bf686dbeSMatthew Dillon * Reblock the B-Tree (leaf) node, record, and/or data if necessary. 232bf686dbeSMatthew Dillon * 2339480ff55SMatthew Dillon * XXX We have no visibility into internal B-Tree nodes at the moment, 2349480ff55SMatthew Dillon * only leaf nodes. 235bf686dbeSMatthew Dillon */ 236bf686dbeSMatthew Dillon static int 23736f82b23SMatthew Dillon hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 238bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 239bf686dbeSMatthew Dillon { 24043c665aeSMatthew Dillon hammer_mount_t hmp; 241bf686dbeSMatthew Dillon hammer_off_t tmp_offset; 242ebbcfba9SMatthew Dillon hammer_node_ondisk_t ondisk; 24344a83111SMatthew Dillon struct hammer_btree_leaf_elm leaf; 244bf686dbeSMatthew Dillon int error; 245bf686dbeSMatthew Dillon int bytes; 246bf686dbeSMatthew Dillon int cur; 247bf3b416bSMatthew Dillon int iocflags; 248bf686dbeSMatthew Dillon 249bf686dbeSMatthew Dillon error = 0; 25043c665aeSMatthew Dillon hmp = cursor->trans->hmp; 251bf686dbeSMatthew Dillon 252bf686dbeSMatthew Dillon /* 253bf686dbeSMatthew Dillon * Reblock data. Note that data embedded in a record is reblocked 2542f85fa4dSMatthew Dillon * by the record reblock code. Data processing only occurs at leaf 2552f85fa4dSMatthew Dillon * nodes and for RECORD element types. 256bf686dbeSMatthew Dillon */ 2572f85fa4dSMatthew Dillon if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF) 2582f85fa4dSMatthew Dillon goto skip; 2592f85fa4dSMatthew Dillon if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) 2602f85fa4dSMatthew Dillon return(0); 261bf686dbeSMatthew Dillon tmp_offset = elm->leaf.data_offset; 262bf3b416bSMatthew Dillon if (tmp_offset == 0) 263bf3b416bSMatthew Dillon goto skip; 264bf3b416bSMatthew Dillon if (error) 265bf3b416bSMatthew Dillon goto skip; 266bf3b416bSMatthew Dillon 267bf3b416bSMatthew Dillon /* 268bf3b416bSMatthew Dillon * NOTE: Localization restrictions may also have been set-up, we can't 269bf3b416bSMatthew Dillon * just set the match flags willy-nilly here. 270bf3b416bSMatthew Dillon */ 271bf3b416bSMatthew Dillon switch(elm->leaf.base.rec_type) { 272bf3b416bSMatthew Dillon case HAMMER_RECTYPE_INODE: 27383f2a3aaSMatthew Dillon case HAMMER_RECTYPE_SNAPSHOT: 27483f2a3aaSMatthew Dillon case HAMMER_RECTYPE_CONFIG: 275bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_INODES; 276bf3b416bSMatthew Dillon break; 277bf3b416bSMatthew Dillon case HAMMER_RECTYPE_EXT: 278bf3b416bSMatthew Dillon case HAMMER_RECTYPE_FIX: 279ea434b6fSMatthew Dillon case HAMMER_RECTYPE_PFS: 280bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DIRENTRY: 281bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_DIRS; 282bf3b416bSMatthew Dillon break; 283bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DATA: 284bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DB: 285bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_DATA; 286bf3b416bSMatthew Dillon break; 287bf3b416bSMatthew Dillon default: 288bf3b416bSMatthew Dillon iocflags = 0; 289bf3b416bSMatthew Dillon break; 290bf3b416bSMatthew Dillon } 291bf3b416bSMatthew Dillon if (reblock->head.flags & iocflags) { 292bf686dbeSMatthew Dillon ++reblock->data_count; 293bf686dbeSMatthew Dillon reblock->data_byte_count += elm->leaf.data_len; 29443c665aeSMatthew Dillon bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 2956e1e8b6dSMatthew Dillon if (hammer_debug_general & 0x4000) 2962f85fa4dSMatthew Dillon kprintf("D %6d/%d\n", bytes, reblock->free_level); 297bf3b416bSMatthew Dillon if (error == 0 && (cur == 0 || reblock->free_level == 0) && 298bf3b416bSMatthew Dillon bytes >= reblock->free_level) { 29944a83111SMatthew Dillon /* 30044a83111SMatthew Dillon * This is nasty, the uncache code may have to get 30144a83111SMatthew Dillon * vnode locks and because of that we can't hold 30244a83111SMatthew Dillon * the cursor locked. 303c9ce54d6SMatthew Dillon * 304c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() 305c9ce54d6SMatthew Dillon * function. 30644a83111SMatthew Dillon */ 30744a83111SMatthew Dillon leaf = elm->leaf; 308982be4bfSMatthew Dillon hammer_unlock_cursor(cursor); 30944a83111SMatthew Dillon hammer_io_direct_uncache(hmp, &leaf); 310982be4bfSMatthew Dillon hammer_lock_cursor(cursor); 311ebbcfba9SMatthew Dillon 312ebbcfba9SMatthew Dillon /* 313ebbcfba9SMatthew Dillon * elm may have become stale or invalid, reload it. 314ebbcfba9SMatthew Dillon * ondisk variable is temporary only. Note that 315ebbcfba9SMatthew Dillon * cursor->node and thus cursor->node->ondisk may 316ebbcfba9SMatthew Dillon * also changed. 317ebbcfba9SMatthew Dillon */ 318ebbcfba9SMatthew Dillon ondisk = cursor->node->ondisk; 319ebbcfba9SMatthew Dillon elm = &ondisk->elms[cursor->index]; 32044a83111SMatthew Dillon if (cursor->flags & HAMMER_CURSOR_RETEST) { 321ebbcfba9SMatthew Dillon kprintf("hammer: debug: retest on " 322ebbcfba9SMatthew Dillon "reblocker uncache\n"); 32344a83111SMatthew Dillon error = EDEADLK; 324ebbcfba9SMatthew Dillon } else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF || 325ebbcfba9SMatthew Dillon cursor->index >= ondisk->count) { 326ebbcfba9SMatthew Dillon kprintf("hammer: debug: shifted on " 327ebbcfba9SMatthew Dillon "reblocker uncache\n"); 328ebbcfba9SMatthew Dillon error = EDEADLK; 329ebbcfba9SMatthew Dillon } else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) { 330ebbcfba9SMatthew Dillon kprintf("hammer: debug: changed on " 331ebbcfba9SMatthew Dillon "reblocker uncache\n"); 332ebbcfba9SMatthew Dillon error = EDEADLK; 33344a83111SMatthew Dillon } 33444a83111SMatthew Dillon if (error == 0) 335bf686dbeSMatthew Dillon error = hammer_cursor_upgrade(cursor); 336bf686dbeSMatthew Dillon if (error == 0) { 33707ed04b5SMatthew Dillon KKASSERT(cursor->index < ondisk->count); 33836f82b23SMatthew Dillon error = hammer_reblock_data(reblock, 339bf686dbeSMatthew Dillon cursor, elm); 340bf686dbeSMatthew Dillon } 341bf686dbeSMatthew Dillon if (error == 0) { 342bf686dbeSMatthew Dillon ++reblock->data_moves; 343bf686dbeSMatthew Dillon reblock->data_byte_moves += elm->leaf.data_len; 344bf686dbeSMatthew Dillon } 345bf686dbeSMatthew Dillon } 346bf686dbeSMatthew Dillon } 347bf686dbeSMatthew Dillon 3482f85fa4dSMatthew Dillon skip: 349bf686dbeSMatthew Dillon /* 3501775b6a0SMatthew Dillon * Reblock a B-Tree internal or leaf node. A leaf node is reblocked 3511775b6a0SMatthew Dillon * on initial entry only (element 0). An internal node is reblocked 3521775b6a0SMatthew Dillon * when entered upward from its first leaf node only (also element 0). 3531775b6a0SMatthew Dillon * Further revisits of the internal node (index > 0) are ignored. 354bf686dbeSMatthew Dillon */ 355bf686dbeSMatthew Dillon tmp_offset = cursor->node->node_offset; 356bf3b416bSMatthew Dillon if (cursor->index == 0 && 357814387f6SMatthew Dillon error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) { 358bf686dbeSMatthew Dillon ++reblock->btree_count; 35943c665aeSMatthew Dillon bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 3606e1e8b6dSMatthew Dillon if (hammer_debug_general & 0x4000) 3612f85fa4dSMatthew Dillon kprintf("B %6d/%d\n", bytes, reblock->free_level); 362bf3b416bSMatthew Dillon if (error == 0 && (cur == 0 || reblock->free_level == 0) && 363bf3b416bSMatthew Dillon bytes >= reblock->free_level) { 364bf686dbeSMatthew Dillon error = hammer_cursor_upgrade(cursor); 365bf686dbeSMatthew Dillon if (error == 0) { 36607ed04b5SMatthew Dillon if (cursor->parent) { 36707ed04b5SMatthew Dillon KKASSERT(cursor->parent_index < 36807ed04b5SMatthew Dillon cursor->parent->ondisk->count); 369bf686dbeSMatthew Dillon elm = &cursor->parent->ondisk->elms[cursor->parent_index]; 37007ed04b5SMatthew Dillon } else { 371bf686dbeSMatthew Dillon elm = NULL; 37207ed04b5SMatthew Dillon } 3732f85fa4dSMatthew Dillon switch(cursor->node->ondisk->type) { 3742f85fa4dSMatthew Dillon case HAMMER_BTREE_TYPE_LEAF: 3752f85fa4dSMatthew Dillon error = hammer_reblock_leaf_node( 3762f85fa4dSMatthew Dillon reblock, cursor, elm); 3772f85fa4dSMatthew Dillon break; 3782f85fa4dSMatthew Dillon case HAMMER_BTREE_TYPE_INTERNAL: 3792f85fa4dSMatthew Dillon error = hammer_reblock_int_node( 3802f85fa4dSMatthew Dillon reblock, cursor, elm); 3812f85fa4dSMatthew Dillon break; 3822f85fa4dSMatthew Dillon default: 3832f85fa4dSMatthew Dillon panic("Illegal B-Tree node type"); 3842f85fa4dSMatthew Dillon } 385bf686dbeSMatthew Dillon } 386bf686dbeSMatthew Dillon if (error == 0) { 387bf686dbeSMatthew Dillon ++reblock->btree_moves; 388bf686dbeSMatthew Dillon } 389bf686dbeSMatthew Dillon } 390bf686dbeSMatthew Dillon } 391bf686dbeSMatthew Dillon 392bf686dbeSMatthew Dillon hammer_cursor_downgrade(cursor); 393bf686dbeSMatthew Dillon return(error); 394bf686dbeSMatthew Dillon } 395bf686dbeSMatthew Dillon 396bf686dbeSMatthew Dillon /* 397bf686dbeSMatthew Dillon * Reblock a record's data. Both the B-Tree element and record pointers 398bf686dbeSMatthew Dillon * to the data must be adjusted. 399bf686dbeSMatthew Dillon */ 400bf686dbeSMatthew Dillon static int 40136f82b23SMatthew Dillon hammer_reblock_data(struct hammer_ioc_reblock *reblock, 402bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 403bf686dbeSMatthew Dillon { 404bf686dbeSMatthew Dillon struct hammer_buffer *data_buffer = NULL; 405bf686dbeSMatthew Dillon hammer_off_t ndata_offset; 406bf686dbeSMatthew Dillon int error; 407bf686dbeSMatthew Dillon void *ndata; 408bf686dbeSMatthew Dillon 409bf686dbeSMatthew Dillon error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA | 41011ad5adeSMatthew Dillon HAMMER_CURSOR_GET_LEAF); 411bf686dbeSMatthew Dillon if (error) 412bf686dbeSMatthew Dillon return (error); 41336f82b23SMatthew Dillon ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, 414bf3b416bSMatthew Dillon elm->leaf.base.rec_type, 415df2ccbacSMatthew Dillon &ndata_offset, &data_buffer, 416df2ccbacSMatthew Dillon 0, &error); 417bf686dbeSMatthew Dillon if (error) 418bf686dbeSMatthew Dillon goto done; 419b8a41159SMatthew Dillon hammer_io_notmeta(data_buffer); 420bf686dbeSMatthew Dillon 421bf686dbeSMatthew Dillon /* 422b9107f58SMatthew Dillon * Move the data. Note that we must invalidate any cached 423b9107f58SMatthew Dillon * data buffer in the cursor before calling blockmap_free. 424b9107f58SMatthew Dillon * The blockmap_free may free up the entire large-block and 425b9107f58SMatthew Dillon * will not be able to invalidate it if the cursor is holding 426b9107f58SMatthew Dillon * a data buffer cached in that large block. 427bf686dbeSMatthew Dillon */ 42810a5d1baSMatthew Dillon hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0); 429bf686dbeSMatthew Dillon bcopy(cursor->data, ndata, elm->leaf.data_len); 43010a5d1baSMatthew Dillon hammer_modify_buffer_done(data_buffer); 431b9107f58SMatthew Dillon hammer_cursor_invalidate_cache(cursor); 432bf686dbeSMatthew Dillon 43336f82b23SMatthew Dillon hammer_blockmap_free(cursor->trans, 43436f82b23SMatthew Dillon elm->leaf.data_offset, elm->leaf.data_len); 435bf686dbeSMatthew Dillon 43610a5d1baSMatthew Dillon hammer_modify_node(cursor->trans, cursor->node, 43710a5d1baSMatthew Dillon &elm->leaf.data_offset, sizeof(hammer_off_t)); 438bf686dbeSMatthew Dillon elm->leaf.data_offset = ndata_offset; 43910a5d1baSMatthew Dillon hammer_modify_node_done(cursor->node); 440bf686dbeSMatthew Dillon 441bf686dbeSMatthew Dillon done: 442bf686dbeSMatthew Dillon if (data_buffer) 443bf686dbeSMatthew Dillon hammer_rel_buffer(data_buffer, 0); 444bf686dbeSMatthew Dillon return (error); 445bf686dbeSMatthew Dillon } 446bf686dbeSMatthew Dillon 447bf686dbeSMatthew Dillon /* 4482f85fa4dSMatthew Dillon * Reblock a B-Tree leaf node. The parent must be adjusted to point to 4492f85fa4dSMatthew Dillon * the new copy of the leaf node. 450bf686dbeSMatthew Dillon * 4512f85fa4dSMatthew Dillon * elm is a pointer to the parent element pointing at cursor.node. 452bf686dbeSMatthew Dillon */ 453bf686dbeSMatthew Dillon static int 4542f85fa4dSMatthew Dillon hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 455bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 456bf686dbeSMatthew Dillon { 457bf686dbeSMatthew Dillon hammer_node_t onode; 458bf686dbeSMatthew Dillon hammer_node_t nnode; 459bf686dbeSMatthew Dillon int error; 460bf686dbeSMatthew Dillon 461df2ccbacSMatthew Dillon /* 462df2ccbacSMatthew Dillon * Don't supply a hint when allocating the leaf. Fills are done 463df2ccbacSMatthew Dillon * from the leaf upwards. 464df2ccbacSMatthew Dillon */ 465bf686dbeSMatthew Dillon onode = cursor->node; 466df2ccbacSMatthew Dillon nnode = hammer_alloc_btree(cursor->trans, 0, &error); 4678d0efe43SMatthew Dillon 468bf686dbeSMatthew Dillon if (nnode == NULL) 469bf686dbeSMatthew Dillon return (error); 470bf686dbeSMatthew Dillon 471bf686dbeSMatthew Dillon /* 472bf686dbeSMatthew Dillon * Move the node 473bf686dbeSMatthew Dillon */ 47409ac686bSMatthew Dillon hammer_lock_ex(&nnode->lock); 47509ac686bSMatthew Dillon hammer_modify_node_noundo(cursor->trans, nnode); 476bf686dbeSMatthew Dillon bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); 477bf686dbeSMatthew Dillon 478bf686dbeSMatthew Dillon if (elm) { 479bf686dbeSMatthew Dillon /* 480bf686dbeSMatthew Dillon * We are not the root of the B-Tree 481bf686dbeSMatthew Dillon */ 48236f82b23SMatthew Dillon hammer_modify_node(cursor->trans, cursor->parent, 483bf686dbeSMatthew Dillon &elm->internal.subtree_offset, 484bf686dbeSMatthew Dillon sizeof(elm->internal.subtree_offset)); 485bf686dbeSMatthew Dillon elm->internal.subtree_offset = nnode->node_offset; 48610a5d1baSMatthew Dillon hammer_modify_node_done(cursor->parent); 487bf686dbeSMatthew Dillon } else { 488bf686dbeSMatthew Dillon /* 489bf686dbeSMatthew Dillon * We are the root of the B-Tree 490bf686dbeSMatthew Dillon */ 491bf686dbeSMatthew Dillon hammer_volume_t volume; 492bf686dbeSMatthew Dillon 49336f82b23SMatthew Dillon volume = hammer_get_root_volume(cursor->trans->hmp, &error); 494bf686dbeSMatthew Dillon KKASSERT(error == 0); 495bf686dbeSMatthew Dillon 496e8599db1SMatthew Dillon hammer_modify_volume_field(cursor->trans, volume, 497e8599db1SMatthew Dillon vol0_btree_root); 498bf686dbeSMatthew Dillon volume->ondisk->vol0_btree_root = nnode->node_offset; 49910a5d1baSMatthew Dillon hammer_modify_volume_done(volume); 500bf686dbeSMatthew Dillon hammer_rel_volume(volume, 0); 501bf686dbeSMatthew Dillon } 502bf686dbeSMatthew Dillon 503b3bad96fSMatthew Dillon hammer_cursor_replaced_node(onode, nnode); 50436f82b23SMatthew Dillon hammer_delete_node(cursor->trans, onode); 505bf686dbeSMatthew Dillon 506b58c6388SMatthew Dillon if (hammer_debug_general & 0x4000) { 5072f85fa4dSMatthew Dillon kprintf("REBLOCK LNODE %016llx -> %016llx\n", 508973c11b9SMatthew Dillon (long long)onode->node_offset, 509973c11b9SMatthew Dillon (long long)nnode->node_offset); 510b58c6388SMatthew Dillon } 5118d0efe43SMatthew Dillon hammer_modify_node_done(nnode); 512bf686dbeSMatthew Dillon cursor->node = nnode; 51309ac686bSMatthew Dillon 51409ac686bSMatthew Dillon hammer_unlock(&onode->lock); 515bf686dbeSMatthew Dillon hammer_rel_node(onode); 516bf686dbeSMatthew Dillon 517bf686dbeSMatthew Dillon return (error); 518bf686dbeSMatthew Dillon } 519bf686dbeSMatthew Dillon 5202f85fa4dSMatthew Dillon /* 5212f85fa4dSMatthew Dillon * Reblock a B-Tree internal node. The parent must be adjusted to point to 5222f85fa4dSMatthew Dillon * the new copy of the internal node, and the node's children's parent 5232f85fa4dSMatthew Dillon * pointers must also be adjusted to point to the new copy. 5242f85fa4dSMatthew Dillon * 5252f85fa4dSMatthew Dillon * elm is a pointer to the parent element pointing at cursor.node. 5262f85fa4dSMatthew Dillon */ 5272f85fa4dSMatthew Dillon static int 5282f85fa4dSMatthew Dillon hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 5292f85fa4dSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 5302f85fa4dSMatthew Dillon { 5311775b6a0SMatthew Dillon struct hammer_node_lock lockroot; 5322f85fa4dSMatthew Dillon hammer_node_t onode; 5332f85fa4dSMatthew Dillon hammer_node_t nnode; 534df2ccbacSMatthew Dillon hammer_off_t hint; 5352f85fa4dSMatthew Dillon int error; 5362f85fa4dSMatthew Dillon int i; 5372f85fa4dSMatthew Dillon 5381775b6a0SMatthew Dillon hammer_node_lock_init(&lockroot, cursor->node); 53924cf83d2SMatthew Dillon error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL); 5402f85fa4dSMatthew Dillon if (error) 5412f85fa4dSMatthew Dillon goto done; 5422f85fa4dSMatthew Dillon 543df2ccbacSMatthew Dillon /* 544df2ccbacSMatthew Dillon * The internal node is visited after recursing through its 545df2ccbacSMatthew Dillon * first element. Use the subtree offset allocated for that 546df2ccbacSMatthew Dillon * element as a hint for allocating the internal node. 547df2ccbacSMatthew Dillon */ 5482f85fa4dSMatthew Dillon onode = cursor->node; 549df2ccbacSMatthew Dillon if (onode->ondisk->count) 550df2ccbacSMatthew Dillon hint = onode->ondisk->elms[0].internal.subtree_offset; 551df2ccbacSMatthew Dillon else 552df2ccbacSMatthew Dillon hint = 0; 553*b4f86ea3SMatthew Dillon nnode = hammer_alloc_btree(cursor->trans, 0, &error); 5542f85fa4dSMatthew Dillon 5552f85fa4dSMatthew Dillon if (nnode == NULL) 5562f85fa4dSMatthew Dillon goto done; 5572f85fa4dSMatthew Dillon 5582f85fa4dSMatthew Dillon /* 5592f85fa4dSMatthew Dillon * Move the node. Adjust the parent's pointer to us first. 5602f85fa4dSMatthew Dillon */ 5612f85fa4dSMatthew Dillon hammer_lock_ex(&nnode->lock); 5622f85fa4dSMatthew Dillon hammer_modify_node_noundo(cursor->trans, nnode); 5632f85fa4dSMatthew Dillon bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); 5642f85fa4dSMatthew Dillon 5652f85fa4dSMatthew Dillon if (elm) { 5662f85fa4dSMatthew Dillon /* 5672f85fa4dSMatthew Dillon * We are not the root of the B-Tree 5682f85fa4dSMatthew Dillon */ 5692f85fa4dSMatthew Dillon hammer_modify_node(cursor->trans, cursor->parent, 5702f85fa4dSMatthew Dillon &elm->internal.subtree_offset, 5712f85fa4dSMatthew Dillon sizeof(elm->internal.subtree_offset)); 5722f85fa4dSMatthew Dillon elm->internal.subtree_offset = nnode->node_offset; 5732f85fa4dSMatthew Dillon hammer_modify_node_done(cursor->parent); 5742f85fa4dSMatthew Dillon } else { 5752f85fa4dSMatthew Dillon /* 5762f85fa4dSMatthew Dillon * We are the root of the B-Tree 5772f85fa4dSMatthew Dillon */ 5782f85fa4dSMatthew Dillon hammer_volume_t volume; 5792f85fa4dSMatthew Dillon 5802f85fa4dSMatthew Dillon volume = hammer_get_root_volume(cursor->trans->hmp, &error); 5812f85fa4dSMatthew Dillon KKASSERT(error == 0); 5822f85fa4dSMatthew Dillon 5832f85fa4dSMatthew Dillon hammer_modify_volume_field(cursor->trans, volume, 5842f85fa4dSMatthew Dillon vol0_btree_root); 5852f85fa4dSMatthew Dillon volume->ondisk->vol0_btree_root = nnode->node_offset; 5862f85fa4dSMatthew Dillon hammer_modify_volume_done(volume); 5872f85fa4dSMatthew Dillon hammer_rel_volume(volume, 0); 5882f85fa4dSMatthew Dillon } 5892f85fa4dSMatthew Dillon 5902f85fa4dSMatthew Dillon /* 5912f85fa4dSMatthew Dillon * Now adjust our children's pointers to us. 5922f85fa4dSMatthew Dillon */ 5932f85fa4dSMatthew Dillon for (i = 0; i < nnode->ondisk->count; ++i) { 5942f85fa4dSMatthew Dillon elm = &nnode->ondisk->elms[i]; 5952f85fa4dSMatthew Dillon error = btree_set_parent(cursor->trans, nnode, elm); 5962f85fa4dSMatthew Dillon if (error) 5972f85fa4dSMatthew Dillon panic("reblock internal node: fixup problem"); 5982f85fa4dSMatthew Dillon } 5992f85fa4dSMatthew Dillon 6002f85fa4dSMatthew Dillon /* 6012f85fa4dSMatthew Dillon * Clean up. 6022f85fa4dSMatthew Dillon * 6032f85fa4dSMatthew Dillon * The new node replaces the current node in the cursor. The cursor 6042f85fa4dSMatthew Dillon * expects it to be locked so leave it locked. Discard onode. 6052f85fa4dSMatthew Dillon */ 606b3bad96fSMatthew Dillon hammer_cursor_replaced_node(onode, nnode); 6072f85fa4dSMatthew Dillon hammer_delete_node(cursor->trans, onode); 6082f85fa4dSMatthew Dillon 6092f85fa4dSMatthew Dillon if (hammer_debug_general & 0x4000) { 6102f85fa4dSMatthew Dillon kprintf("REBLOCK INODE %016llx -> %016llx\n", 611973c11b9SMatthew Dillon (long long)onode->node_offset, 612973c11b9SMatthew Dillon (long long)nnode->node_offset); 6132f85fa4dSMatthew Dillon } 6142f85fa4dSMatthew Dillon hammer_modify_node_done(nnode); 6152f85fa4dSMatthew Dillon cursor->node = nnode; 6162f85fa4dSMatthew Dillon 6172f85fa4dSMatthew Dillon hammer_unlock(&onode->lock); 6182f85fa4dSMatthew Dillon hammer_rel_node(onode); 6192f85fa4dSMatthew Dillon 6202f85fa4dSMatthew Dillon done: 62124cf83d2SMatthew Dillon hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL); 6222f85fa4dSMatthew Dillon return (error); 6232f85fa4dSMatthew Dillon } 6242f85fa4dSMatthew Dillon 625