1bf686dbeSMatthew Dillon /* 255b50bd5SMatthew Dillon * Copyright (c) 2008-2012 The DragonFly Project. All rights reserved. 3bf686dbeSMatthew Dillon * 4bf686dbeSMatthew Dillon * This code is derived from software contributed to The DragonFly Project 5bf686dbeSMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 6bf686dbeSMatthew Dillon * 7bf686dbeSMatthew Dillon * Redistribution and use in source and binary forms, with or without 8bf686dbeSMatthew Dillon * modification, are permitted provided that the following conditions 9bf686dbeSMatthew Dillon * are met: 10bf686dbeSMatthew Dillon * 11bf686dbeSMatthew Dillon * 1. Redistributions of source code must retain the above copyright 12bf686dbeSMatthew Dillon * notice, this list of conditions and the following disclaimer. 13bf686dbeSMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 14bf686dbeSMatthew Dillon * notice, this list of conditions and the following disclaimer in 15bf686dbeSMatthew Dillon * the documentation and/or other materials provided with the 16bf686dbeSMatthew Dillon * distribution. 17bf686dbeSMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 18bf686dbeSMatthew Dillon * contributors may be used to endorse or promote products derived 19bf686dbeSMatthew Dillon * from this software without specific, prior written permission. 20bf686dbeSMatthew Dillon * 21bf686dbeSMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22bf686dbeSMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23bf686dbeSMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24bf686dbeSMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25bf686dbeSMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26bf686dbeSMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27bf686dbeSMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28bf686dbeSMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29bf686dbeSMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30bf686dbeSMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31bf686dbeSMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32bf686dbeSMatthew Dillon * SUCH DAMAGE. 33bf686dbeSMatthew Dillon */ 34bf686dbeSMatthew Dillon /* 35bf686dbeSMatthew Dillon * HAMMER reblocker - This code frees up fragmented physical space 36bf686dbeSMatthew Dillon * 37bf686dbeSMatthew Dillon * HAMMER only keeps track of free space on a big-block basis. A big-block 38bf686dbeSMatthew Dillon * containing holes can only be freed by migrating the remaining data in 39bf686dbeSMatthew Dillon * that big-block into a new big-block, then freeing the big-block. 40bf686dbeSMatthew Dillon * 41bf686dbeSMatthew Dillon * This function is called from an ioctl or via the hammer support thread. 42bf686dbeSMatthew Dillon */ 43bf686dbeSMatthew Dillon 44bf686dbeSMatthew Dillon #include "hammer.h" 45bf686dbeSMatthew Dillon 4636f82b23SMatthew Dillon static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 47bf686dbeSMatthew Dillon hammer_cursor_t cursor, 48bf686dbeSMatthew Dillon hammer_btree_elm_t elm); 4936f82b23SMatthew Dillon static int hammer_reblock_data(struct hammer_ioc_reblock *reblock, 50bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 512f85fa4dSMatthew Dillon static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 522f85fa4dSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 532f85fa4dSMatthew Dillon static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 54bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 55bf686dbeSMatthew Dillon 56bf686dbeSMatthew Dillon int 5736f82b23SMatthew Dillon hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, 5836f82b23SMatthew Dillon struct hammer_ioc_reblock *reblock) 59bf686dbeSMatthew Dillon { 60bf686dbeSMatthew Dillon struct hammer_cursor cursor; 61bf686dbeSMatthew Dillon hammer_btree_elm_t elm; 62a7e9bef1SMatthew Dillon int checkspace_count; 6393291532SMatthew Dillon int error; 6493291532SMatthew Dillon int seq; 657b6ccb11SMatthew Dillon int slop; 666540d157STomohiro Kusumi u_int32_t key_end_localization; 677b6ccb11SMatthew Dillon 684fa5fb92STomohiro Kusumi if ((reblock->key_beg.localization | reblock->key_end.localization) & 694fa5fb92STomohiro Kusumi HAMMER_LOCALIZE_PSEUDOFS_MASK) { 704fa5fb92STomohiro Kusumi return(EINVAL); 714fa5fb92STomohiro Kusumi } 724fa5fb92STomohiro Kusumi if (reblock->key_beg.obj_id >= reblock->key_end.obj_id) 734fa5fb92STomohiro Kusumi return(EINVAL); 744fa5fb92STomohiro Kusumi if (reblock->free_level < 0 || 754fa5fb92STomohiro Kusumi reblock->free_level > HAMMER_BIGBLOCK_SIZE) 764fa5fb92STomohiro Kusumi return(EINVAL); 774fa5fb92STomohiro Kusumi 787b6ccb11SMatthew Dillon /* 79558a44e2STomohiro Kusumi * A fill_percentage <= 20% is considered an emergency. free_level is 80558a44e2STomohiro Kusumi * inverted from fill_percentage. 817b6ccb11SMatthew Dillon */ 82e04ee2deSTomohiro Kusumi if (reblock->free_level >= HAMMER_BIGBLOCK_SIZE * 8 / 10) 837b6ccb11SMatthew Dillon slop = HAMMER_CHKSPC_EMERGENCY; 847b6ccb11SMatthew Dillon else 857b6ccb11SMatthew Dillon slop = HAMMER_CHKSPC_REBLOCK; 86bf686dbeSMatthew Dillon 876540d157STomohiro Kusumi /* 886540d157STomohiro Kusumi * Ioctl caller has only set localization type to reblock. 896540d157STomohiro Kusumi * Initialize cursor key localization with ip localization. 906540d157STomohiro Kusumi */ 91dd94f1b1SMatthew Dillon reblock->key_cur = reblock->key_beg; 92842e7a70SMatthew Dillon reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 935e1e1454STomohiro Kusumi if (reblock->allpfs == 0) 94dd94f1b1SMatthew Dillon reblock->key_cur.localization += ip->obj_localization; 95814387f6SMatthew Dillon 966540d157STomohiro Kusumi key_end_localization = reblock->key_end.localization; 976540d157STomohiro Kusumi key_end_localization &= HAMMER_LOCALIZE_MASK; 985e1e1454STomohiro Kusumi if (reblock->allpfs == 0) 996540d157STomohiro Kusumi key_end_localization += ip->obj_localization; 1005e1e1454STomohiro Kusumi else 1015e1e1454STomohiro Kusumi key_end_localization += ((HAMMER_MAX_PFS - 1) << 16); 1026540d157STomohiro Kusumi 103a7e9bef1SMatthew Dillon checkspace_count = 0; 104e86903d8SMatthew Dillon seq = trans->hmp->flusher.done; 105bf686dbeSMatthew Dillon retry: 1064e17f465SMatthew Dillon error = hammer_init_cursor(trans, &cursor, NULL, NULL); 107bf686dbeSMatthew Dillon if (error) { 108bf686dbeSMatthew Dillon hammer_done_cursor(&cursor); 109dd94f1b1SMatthew Dillon goto failed; 110bf686dbeSMatthew Dillon } 111dd94f1b1SMatthew Dillon cursor.key_beg.localization = reblock->key_cur.localization; 112dd94f1b1SMatthew Dillon cursor.key_beg.obj_id = reblock->key_cur.obj_id; 113bf686dbeSMatthew Dillon cursor.key_beg.key = HAMMER_MIN_KEY; 114bf686dbeSMatthew Dillon cursor.key_beg.create_tid = 1; 115bf686dbeSMatthew Dillon cursor.key_beg.delete_tid = 0; 116bf686dbeSMatthew Dillon cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; 117bf686dbeSMatthew Dillon cursor.key_beg.obj_type = 0; 118bf686dbeSMatthew Dillon 1196540d157STomohiro Kusumi cursor.key_end.localization = key_end_localization; 120dd94f1b1SMatthew Dillon cursor.key_end.obj_id = reblock->key_end.obj_id; 121bf686dbeSMatthew Dillon cursor.key_end.key = HAMMER_MAX_KEY; 122bf686dbeSMatthew Dillon cursor.key_end.create_tid = HAMMER_MAX_TID - 1; 123bf686dbeSMatthew Dillon cursor.key_end.delete_tid = 0; 124bf686dbeSMatthew Dillon cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 125bf686dbeSMatthew Dillon cursor.key_end.obj_type = 0; 126bf686dbeSMatthew Dillon 127bf686dbeSMatthew Dillon cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 1289480ff55SMatthew Dillon cursor.flags |= HAMMER_CURSOR_BACKEND; 12918bee4a2SMatthew Dillon cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE; 130bf686dbeSMatthew Dillon 1312f85fa4dSMatthew Dillon /* 1322f85fa4dSMatthew Dillon * This flag allows the btree scan code to return internal nodes, 1332f85fa4dSMatthew Dillon * so we can reblock them in addition to the leafs. Only specify it 1342f85fa4dSMatthew Dillon * if we intend to reblock B-Tree nodes. 1352f85fa4dSMatthew Dillon */ 1362f85fa4dSMatthew Dillon if (reblock->head.flags & HAMMER_IOC_DO_BTREE) 1372f85fa4dSMatthew Dillon cursor.flags |= HAMMER_CURSOR_REBLOCKING; 1382f85fa4dSMatthew Dillon 139bf686dbeSMatthew Dillon error = hammer_btree_first(&cursor); 140bf686dbeSMatthew Dillon while (error == 0) { 1412f85fa4dSMatthew Dillon /* 1422f85fa4dSMatthew Dillon * Internal or Leaf node 1432f85fa4dSMatthew Dillon */ 14407ed04b5SMatthew Dillon KKASSERT(cursor.index < cursor.node->ondisk->count); 145bf686dbeSMatthew Dillon elm = &cursor.node->ondisk->elms[cursor.index]; 146dd94f1b1SMatthew Dillon reblock->key_cur.obj_id = elm->base.obj_id; 147dd94f1b1SMatthew Dillon reblock->key_cur.localization = elm->base.localization; 148bf686dbeSMatthew Dillon 1499480ff55SMatthew Dillon /* 1509f5097dcSMatthew Dillon * Yield to more important tasks 1519f5097dcSMatthew Dillon */ 1529f5097dcSMatthew Dillon if ((error = hammer_signal_check(trans->hmp)) != 0) 1539f5097dcSMatthew Dillon break; 154a7e9bef1SMatthew Dillon 155a7e9bef1SMatthew Dillon /* 156a7e9bef1SMatthew Dillon * If there is insufficient free space it may be due to 157a981af19STomohiro Kusumi * reserved big-blocks, which flushing might fix. 158c9ce54d6SMatthew Dillon * 15907ed04b5SMatthew Dillon * We must force a retest in case the unlocked cursor is 16007ed04b5SMatthew Dillon * moved to the end of the leaf, or moved to an internal 16107ed04b5SMatthew Dillon * node. 16207ed04b5SMatthew Dillon * 163c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 164a7e9bef1SMatthew Dillon */ 1657b6ccb11SMatthew Dillon if (hammer_checkspace(trans->hmp, slop)) { 166a7e9bef1SMatthew Dillon if (++checkspace_count == 10) { 167a7e9bef1SMatthew Dillon error = ENOSPC; 168a7e9bef1SMatthew Dillon break; 169a7e9bef1SMatthew Dillon } 170982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 17107ed04b5SMatthew Dillon cursor.flags |= HAMMER_CURSOR_RETEST; 17293291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 173982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 1747a61b85dSMatthew Dillon seq = hammer_flusher_async(trans->hmp, NULL); 17507ed04b5SMatthew Dillon goto skip; 17693291532SMatthew Dillon } 177a7e9bef1SMatthew Dillon 178a7e9bef1SMatthew Dillon /* 1799480ff55SMatthew Dillon * Acquiring the sync_lock prevents the operation from 1809480ff55SMatthew Dillon * crossing a synchronization boundary. 18109ac686bSMatthew Dillon * 18209ac686bSMatthew Dillon * NOTE: cursor.node may have changed on return. 183c9ce54d6SMatthew Dillon * 184c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 1859480ff55SMatthew Dillon */ 1862f85fa4dSMatthew Dillon hammer_sync_lock_sh(trans); 18736f82b23SMatthew Dillon error = hammer_reblock_helper(reblock, &cursor, elm); 1882f85fa4dSMatthew Dillon hammer_sync_unlock(trans); 18993291532SMatthew Dillon 19015e75dabSMatthew Dillon while (hammer_flusher_meta_halflimit(trans->hmp) || 1917a61b85dSMatthew Dillon hammer_flusher_undo_exhausted(trans, 2)) { 192982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 19393291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 194982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 19515e75dabSMatthew Dillon seq = hammer_flusher_async_one(trans->hmp); 19693291532SMatthew Dillon } 1971b0ab2c3SMatthew Dillon 1981b0ab2c3SMatthew Dillon /* 1991b0ab2c3SMatthew Dillon * Setup for iteration, our cursor flags may be modified by 2001b0ab2c3SMatthew Dillon * other threads while we are unlocked. 2011b0ab2c3SMatthew Dillon */ 202bf686dbeSMatthew Dillon cursor.flags |= HAMMER_CURSOR_ATEDISK; 2031b0ab2c3SMatthew Dillon 2041b0ab2c3SMatthew Dillon /* 2051b0ab2c3SMatthew Dillon * We allocate data buffers, which atm we don't track 2061b0ab2c3SMatthew Dillon * dirty levels for because we allow the kernel to write 2071b0ab2c3SMatthew Dillon * them. But if we allocate too many we can still deadlock 2081b0ab2c3SMatthew Dillon * the buffer cache. 2091b0ab2c3SMatthew Dillon * 210c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 2111b0ab2c3SMatthew Dillon * (The cursor's node and element may change!) 2121b0ab2c3SMatthew Dillon */ 2131b0ab2c3SMatthew Dillon if (bd_heatup()) { 214982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 2151b0ab2c3SMatthew Dillon bwillwrite(HAMMER_XBUFSIZE); 216982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 2171b0ab2c3SMatthew Dillon } 21855b50bd5SMatthew Dillon vm_wait_nominal(); 21907ed04b5SMatthew Dillon skip: 2201b0ab2c3SMatthew Dillon if (error == 0) { 221bf686dbeSMatthew Dillon error = hammer_btree_iterate(&cursor); 222bf686dbeSMatthew Dillon } 223bf686dbeSMatthew Dillon } 224bf686dbeSMatthew Dillon if (error == ENOENT) 225bf686dbeSMatthew Dillon error = 0; 226bf686dbeSMatthew Dillon hammer_done_cursor(&cursor); 22706ad81ffSMatthew Dillon if (error == EWOULDBLOCK) { 22806ad81ffSMatthew Dillon hammer_flusher_sync(trans->hmp); 22906ad81ffSMatthew Dillon goto retry; 23006ad81ffSMatthew Dillon } 231bf686dbeSMatthew Dillon if (error == EDEADLK) 232bf686dbeSMatthew Dillon goto retry; 23319619882SMatthew Dillon if (error == EINTR) { 23419619882SMatthew Dillon reblock->head.flags |= HAMMER_IOC_HEAD_INTR; 23519619882SMatthew Dillon error = 0; 23619619882SMatthew Dillon } 237dd94f1b1SMatthew Dillon failed: 238dd94f1b1SMatthew Dillon reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 239bf686dbeSMatthew Dillon return(error); 240bf686dbeSMatthew Dillon } 241bf686dbeSMatthew Dillon 242bf686dbeSMatthew Dillon /* 243bf686dbeSMatthew Dillon * Reblock the B-Tree (leaf) node, record, and/or data if necessary. 244bf686dbeSMatthew Dillon * 2459480ff55SMatthew Dillon * XXX We have no visibility into internal B-Tree nodes at the moment, 2469480ff55SMatthew Dillon * only leaf nodes. 247bf686dbeSMatthew Dillon */ 248bf686dbeSMatthew Dillon static int 24936f82b23SMatthew Dillon hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 250bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 251bf686dbeSMatthew Dillon { 25243c665aeSMatthew Dillon hammer_mount_t hmp; 253bf686dbeSMatthew Dillon hammer_off_t tmp_offset; 254ebbcfba9SMatthew Dillon hammer_node_ondisk_t ondisk; 25544a83111SMatthew Dillon struct hammer_btree_leaf_elm leaf; 256bf686dbeSMatthew Dillon int error; 257bf686dbeSMatthew Dillon int bytes; 258bf686dbeSMatthew Dillon int cur; 259bf3b416bSMatthew Dillon int iocflags; 260bf686dbeSMatthew Dillon 261bf686dbeSMatthew Dillon error = 0; 26243c665aeSMatthew Dillon hmp = cursor->trans->hmp; 263bf686dbeSMatthew Dillon 264bf686dbeSMatthew Dillon /* 265bf686dbeSMatthew Dillon * Reblock data. Note that data embedded in a record is reblocked 2662f85fa4dSMatthew Dillon * by the record reblock code. Data processing only occurs at leaf 2672f85fa4dSMatthew Dillon * nodes and for RECORD element types. 268bf686dbeSMatthew Dillon */ 2692f85fa4dSMatthew Dillon if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF) 2702f85fa4dSMatthew Dillon goto skip; 2712f85fa4dSMatthew Dillon if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) 2722f85fa4dSMatthew Dillon return(0); 273bf686dbeSMatthew Dillon tmp_offset = elm->leaf.data_offset; 274bf3b416bSMatthew Dillon if (tmp_offset == 0) 275bf3b416bSMatthew Dillon goto skip; 276bf3b416bSMatthew Dillon if (error) 277bf3b416bSMatthew Dillon goto skip; 278bf3b416bSMatthew Dillon 279bf3b416bSMatthew Dillon /* 280bf3b416bSMatthew Dillon * NOTE: Localization restrictions may also have been set-up, we can't 281bf3b416bSMatthew Dillon * just set the match flags willy-nilly here. 282bf3b416bSMatthew Dillon */ 283bf3b416bSMatthew Dillon switch(elm->leaf.base.rec_type) { 284bf3b416bSMatthew Dillon case HAMMER_RECTYPE_INODE: 28583f2a3aaSMatthew Dillon case HAMMER_RECTYPE_SNAPSHOT: 28683f2a3aaSMatthew Dillon case HAMMER_RECTYPE_CONFIG: 287bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_INODES; 288bf3b416bSMatthew Dillon break; 289bf3b416bSMatthew Dillon case HAMMER_RECTYPE_EXT: 290bf3b416bSMatthew Dillon case HAMMER_RECTYPE_FIX: 291ea434b6fSMatthew Dillon case HAMMER_RECTYPE_PFS: 292bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DIRENTRY: 293bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_DIRS; 294bf3b416bSMatthew Dillon break; 295bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DATA: 296bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DB: 297bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_DATA; 298bf3b416bSMatthew Dillon break; 299bf3b416bSMatthew Dillon default: 300bf3b416bSMatthew Dillon iocflags = 0; 301bf3b416bSMatthew Dillon break; 302bf3b416bSMatthew Dillon } 303bf3b416bSMatthew Dillon if (reblock->head.flags & iocflags) { 304bf686dbeSMatthew Dillon ++reblock->data_count; 305bf686dbeSMatthew Dillon reblock->data_byte_count += elm->leaf.data_len; 30643c665aeSMatthew Dillon bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 3076e1e8b6dSMatthew Dillon if (hammer_debug_general & 0x4000) 3082f85fa4dSMatthew Dillon kprintf("D %6d/%d\n", bytes, reblock->free_level); 309bf3b416bSMatthew Dillon if (error == 0 && (cur == 0 || reblock->free_level == 0) && 310bf3b416bSMatthew Dillon bytes >= reblock->free_level) { 31144a83111SMatthew Dillon /* 31244a83111SMatthew Dillon * This is nasty, the uncache code may have to get 31344a83111SMatthew Dillon * vnode locks and because of that we can't hold 31444a83111SMatthew Dillon * the cursor locked. 315c9ce54d6SMatthew Dillon * 316c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() 317c9ce54d6SMatthew Dillon * function. 31844a83111SMatthew Dillon */ 31944a83111SMatthew Dillon leaf = elm->leaf; 320982be4bfSMatthew Dillon hammer_unlock_cursor(cursor); 32144a83111SMatthew Dillon hammer_io_direct_uncache(hmp, &leaf); 322982be4bfSMatthew Dillon hammer_lock_cursor(cursor); 323ebbcfba9SMatthew Dillon 324ebbcfba9SMatthew Dillon /* 325ebbcfba9SMatthew Dillon * elm may have become stale or invalid, reload it. 326ebbcfba9SMatthew Dillon * ondisk variable is temporary only. Note that 327ebbcfba9SMatthew Dillon * cursor->node and thus cursor->node->ondisk may 328ebbcfba9SMatthew Dillon * also changed. 329ebbcfba9SMatthew Dillon */ 330ebbcfba9SMatthew Dillon ondisk = cursor->node->ondisk; 331ebbcfba9SMatthew Dillon elm = &ondisk->elms[cursor->index]; 33244a83111SMatthew Dillon if (cursor->flags & HAMMER_CURSOR_RETEST) { 333ebbcfba9SMatthew Dillon kprintf("hammer: debug: retest on " 334ebbcfba9SMatthew Dillon "reblocker uncache\n"); 33544a83111SMatthew Dillon error = EDEADLK; 336ebbcfba9SMatthew Dillon } else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF || 337ebbcfba9SMatthew Dillon cursor->index >= ondisk->count) { 338ebbcfba9SMatthew Dillon kprintf("hammer: debug: shifted on " 339ebbcfba9SMatthew Dillon "reblocker uncache\n"); 340ebbcfba9SMatthew Dillon error = EDEADLK; 341ebbcfba9SMatthew Dillon } else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) { 342ebbcfba9SMatthew Dillon kprintf("hammer: debug: changed on " 343ebbcfba9SMatthew Dillon "reblocker uncache\n"); 344ebbcfba9SMatthew Dillon error = EDEADLK; 34544a83111SMatthew Dillon } 34644a83111SMatthew Dillon if (error == 0) 347bf686dbeSMatthew Dillon error = hammer_cursor_upgrade(cursor); 348bf686dbeSMatthew Dillon if (error == 0) { 34907ed04b5SMatthew Dillon KKASSERT(cursor->index < ondisk->count); 35036f82b23SMatthew Dillon error = hammer_reblock_data(reblock, 351bf686dbeSMatthew Dillon cursor, elm); 352bf686dbeSMatthew Dillon } 353bf686dbeSMatthew Dillon if (error == 0) { 354bf686dbeSMatthew Dillon ++reblock->data_moves; 355bf686dbeSMatthew Dillon reblock->data_byte_moves += elm->leaf.data_len; 356bf686dbeSMatthew Dillon } 357bf686dbeSMatthew Dillon } 358bf686dbeSMatthew Dillon } 359bf686dbeSMatthew Dillon 3602f85fa4dSMatthew Dillon skip: 361bf686dbeSMatthew Dillon /* 3621775b6a0SMatthew Dillon * Reblock a B-Tree internal or leaf node. A leaf node is reblocked 3631775b6a0SMatthew Dillon * on initial entry only (element 0). An internal node is reblocked 3641775b6a0SMatthew Dillon * when entered upward from its first leaf node only (also element 0). 3651775b6a0SMatthew Dillon * Further revisits of the internal node (index > 0) are ignored. 366bf686dbeSMatthew Dillon */ 367bf686dbeSMatthew Dillon tmp_offset = cursor->node->node_offset; 368bf3b416bSMatthew Dillon if (cursor->index == 0 && 369814387f6SMatthew Dillon error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) { 370bf686dbeSMatthew Dillon ++reblock->btree_count; 37143c665aeSMatthew Dillon bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 3726e1e8b6dSMatthew Dillon if (hammer_debug_general & 0x4000) 3732f85fa4dSMatthew Dillon kprintf("B %6d/%d\n", bytes, reblock->free_level); 374bf3b416bSMatthew Dillon if (error == 0 && (cur == 0 || reblock->free_level == 0) && 375bf3b416bSMatthew Dillon bytes >= reblock->free_level) { 376bf686dbeSMatthew Dillon error = hammer_cursor_upgrade(cursor); 377bf686dbeSMatthew Dillon if (error == 0) { 37807ed04b5SMatthew Dillon if (cursor->parent) { 37907ed04b5SMatthew Dillon KKASSERT(cursor->parent_index < 38007ed04b5SMatthew Dillon cursor->parent->ondisk->count); 381bf686dbeSMatthew Dillon elm = &cursor->parent->ondisk->elms[cursor->parent_index]; 38207ed04b5SMatthew Dillon } else { 383bf686dbeSMatthew Dillon elm = NULL; 38407ed04b5SMatthew Dillon } 3852f85fa4dSMatthew Dillon switch(cursor->node->ondisk->type) { 3862f85fa4dSMatthew Dillon case HAMMER_BTREE_TYPE_LEAF: 3872f85fa4dSMatthew Dillon error = hammer_reblock_leaf_node( 3882f85fa4dSMatthew Dillon reblock, cursor, elm); 3892f85fa4dSMatthew Dillon break; 3902f85fa4dSMatthew Dillon case HAMMER_BTREE_TYPE_INTERNAL: 3912f85fa4dSMatthew Dillon error = hammer_reblock_int_node( 3922f85fa4dSMatthew Dillon reblock, cursor, elm); 3932f85fa4dSMatthew Dillon break; 3942f85fa4dSMatthew Dillon default: 3952f85fa4dSMatthew Dillon panic("Illegal B-Tree node type"); 3962f85fa4dSMatthew Dillon } 397bf686dbeSMatthew Dillon } 398bf686dbeSMatthew Dillon if (error == 0) { 399bf686dbeSMatthew Dillon ++reblock->btree_moves; 400bf686dbeSMatthew Dillon } 401bf686dbeSMatthew Dillon } 402bf686dbeSMatthew Dillon } 403bf686dbeSMatthew Dillon 404bf686dbeSMatthew Dillon hammer_cursor_downgrade(cursor); 405bf686dbeSMatthew Dillon return(error); 406bf686dbeSMatthew Dillon } 407bf686dbeSMatthew Dillon 408bf686dbeSMatthew Dillon /* 409bf686dbeSMatthew Dillon * Reblock a record's data. Both the B-Tree element and record pointers 410bf686dbeSMatthew Dillon * to the data must be adjusted. 411bf686dbeSMatthew Dillon */ 412bf686dbeSMatthew Dillon static int 41336f82b23SMatthew Dillon hammer_reblock_data(struct hammer_ioc_reblock *reblock, 414bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 415bf686dbeSMatthew Dillon { 416bf686dbeSMatthew Dillon struct hammer_buffer *data_buffer = NULL; 417*bc996e65STomohiro Kusumi hammer_off_t odata_offset; 418bf686dbeSMatthew Dillon hammer_off_t ndata_offset; 419bf686dbeSMatthew Dillon int error; 420bf686dbeSMatthew Dillon void *ndata; 421bf686dbeSMatthew Dillon 422bf686dbeSMatthew Dillon error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA | 42311ad5adeSMatthew Dillon HAMMER_CURSOR_GET_LEAF); 424bf686dbeSMatthew Dillon if (error) 425bf686dbeSMatthew Dillon return (error); 42636f82b23SMatthew Dillon ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, 427bf3b416bSMatthew Dillon elm->leaf.base.rec_type, 428df2ccbacSMatthew Dillon &ndata_offset, &data_buffer, 429df2ccbacSMatthew Dillon 0, &error); 430bf686dbeSMatthew Dillon if (error) 431bf686dbeSMatthew Dillon goto done; 432b8a41159SMatthew Dillon hammer_io_notmeta(data_buffer); 433bf686dbeSMatthew Dillon 434bf686dbeSMatthew Dillon /* 435b9107f58SMatthew Dillon * Move the data. Note that we must invalidate any cached 436b9107f58SMatthew Dillon * data buffer in the cursor before calling blockmap_free. 437e04ee2deSTomohiro Kusumi * The blockmap_free may free up the entire big-block and 438b9107f58SMatthew Dillon * will not be able to invalidate it if the cursor is holding 439d165c90aSTomohiro Kusumi * a data buffer cached in that big-block. 440bf686dbeSMatthew Dillon */ 441f1c0ae53STomohiro Kusumi hammer_modify_buffer_noundo(cursor->trans, data_buffer); 442bf686dbeSMatthew Dillon bcopy(cursor->data, ndata, elm->leaf.data_len); 44310a5d1baSMatthew Dillon hammer_modify_buffer_done(data_buffer); 444b9107f58SMatthew Dillon hammer_cursor_invalidate_cache(cursor); 445bf686dbeSMatthew Dillon 44636f82b23SMatthew Dillon hammer_blockmap_free(cursor->trans, 44736f82b23SMatthew Dillon elm->leaf.data_offset, elm->leaf.data_len); 448bf686dbeSMatthew Dillon 44910a5d1baSMatthew Dillon hammer_modify_node(cursor->trans, cursor->node, 45010a5d1baSMatthew Dillon &elm->leaf.data_offset, sizeof(hammer_off_t)); 451*bc996e65STomohiro Kusumi odata_offset = elm->leaf.data_offset; 452bf686dbeSMatthew Dillon elm->leaf.data_offset = ndata_offset; 45310a5d1baSMatthew Dillon hammer_modify_node_done(cursor->node); 454bf686dbeSMatthew Dillon 455*bc996e65STomohiro Kusumi if (hammer_debug_general & 0x4000) { 456*bc996e65STomohiro Kusumi kprintf("REBLOCK DATA %08x %016llx -> %016llx\n", 457*bc996e65STomohiro Kusumi elm->base.localization, 458*bc996e65STomohiro Kusumi (long long)odata_offset, 459*bc996e65STomohiro Kusumi (long long)ndata_offset); 460*bc996e65STomohiro Kusumi } 461bf686dbeSMatthew Dillon done: 462bf686dbeSMatthew Dillon if (data_buffer) 463bf686dbeSMatthew Dillon hammer_rel_buffer(data_buffer, 0); 464bf686dbeSMatthew Dillon return (error); 465bf686dbeSMatthew Dillon } 466bf686dbeSMatthew Dillon 467bf686dbeSMatthew Dillon /* 4682f85fa4dSMatthew Dillon * Reblock a B-Tree leaf node. The parent must be adjusted to point to 4692f85fa4dSMatthew Dillon * the new copy of the leaf node. 470bf686dbeSMatthew Dillon * 4712f85fa4dSMatthew Dillon * elm is a pointer to the parent element pointing at cursor.node. 472bf686dbeSMatthew Dillon */ 473bf686dbeSMatthew Dillon static int 4742f85fa4dSMatthew Dillon hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 475bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 476bf686dbeSMatthew Dillon { 477bf686dbeSMatthew Dillon hammer_node_t onode; 478bf686dbeSMatthew Dillon hammer_node_t nnode; 479bf686dbeSMatthew Dillon int error; 480bf686dbeSMatthew Dillon 481df2ccbacSMatthew Dillon /* 482df2ccbacSMatthew Dillon * Don't supply a hint when allocating the leaf. Fills are done 483df2ccbacSMatthew Dillon * from the leaf upwards. 484df2ccbacSMatthew Dillon */ 485bf686dbeSMatthew Dillon onode = cursor->node; 486df2ccbacSMatthew Dillon nnode = hammer_alloc_btree(cursor->trans, 0, &error); 4878d0efe43SMatthew Dillon 488bf686dbeSMatthew Dillon if (nnode == NULL) 489bf686dbeSMatthew Dillon return (error); 490bf686dbeSMatthew Dillon 491bf686dbeSMatthew Dillon /* 492bf686dbeSMatthew Dillon * Move the node 493bf686dbeSMatthew Dillon */ 49409ac686bSMatthew Dillon hammer_lock_ex(&nnode->lock); 49509ac686bSMatthew Dillon hammer_modify_node_noundo(cursor->trans, nnode); 496bf686dbeSMatthew Dillon bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); 497bf686dbeSMatthew Dillon 498bf686dbeSMatthew Dillon if (elm) { 499bf686dbeSMatthew Dillon /* 500bf686dbeSMatthew Dillon * We are not the root of the B-Tree 501bf686dbeSMatthew Dillon */ 50236f82b23SMatthew Dillon hammer_modify_node(cursor->trans, cursor->parent, 503bf686dbeSMatthew Dillon &elm->internal.subtree_offset, 504bf686dbeSMatthew Dillon sizeof(elm->internal.subtree_offset)); 505bf686dbeSMatthew Dillon elm->internal.subtree_offset = nnode->node_offset; 50610a5d1baSMatthew Dillon hammer_modify_node_done(cursor->parent); 507bf686dbeSMatthew Dillon } else { 508bf686dbeSMatthew Dillon /* 509bf686dbeSMatthew Dillon * We are the root of the B-Tree 510bf686dbeSMatthew Dillon */ 511bf686dbeSMatthew Dillon hammer_volume_t volume; 51236f82b23SMatthew Dillon volume = hammer_get_root_volume(cursor->trans->hmp, &error); 513bf686dbeSMatthew Dillon KKASSERT(error == 0); 514bf686dbeSMatthew Dillon 515e8599db1SMatthew Dillon hammer_modify_volume_field(cursor->trans, volume, 516e8599db1SMatthew Dillon vol0_btree_root); 517bf686dbeSMatthew Dillon volume->ondisk->vol0_btree_root = nnode->node_offset; 51810a5d1baSMatthew Dillon hammer_modify_volume_done(volume); 519bf686dbeSMatthew Dillon hammer_rel_volume(volume, 0); 520bf686dbeSMatthew Dillon } 521bf686dbeSMatthew Dillon 522b3bad96fSMatthew Dillon hammer_cursor_replaced_node(onode, nnode); 52336f82b23SMatthew Dillon hammer_delete_node(cursor->trans, onode); 524bf686dbeSMatthew Dillon 525b58c6388SMatthew Dillon if (hammer_debug_general & 0x4000) { 526*bc996e65STomohiro Kusumi kprintf("REBLOCK LNODE %08x %016llx -> %016llx\n", 527*bc996e65STomohiro Kusumi elm->base.localization, 528973c11b9SMatthew Dillon (long long)onode->node_offset, 529973c11b9SMatthew Dillon (long long)nnode->node_offset); 530b58c6388SMatthew Dillon } 5318d0efe43SMatthew Dillon hammer_modify_node_done(nnode); 532bf686dbeSMatthew Dillon cursor->node = nnode; 53309ac686bSMatthew Dillon 53409ac686bSMatthew Dillon hammer_unlock(&onode->lock); 535bf686dbeSMatthew Dillon hammer_rel_node(onode); 536bf686dbeSMatthew Dillon 537bf686dbeSMatthew Dillon return (error); 538bf686dbeSMatthew Dillon } 539bf686dbeSMatthew Dillon 5402f85fa4dSMatthew Dillon /* 5412f85fa4dSMatthew Dillon * Reblock a B-Tree internal node. The parent must be adjusted to point to 5422f85fa4dSMatthew Dillon * the new copy of the internal node, and the node's children's parent 5432f85fa4dSMatthew Dillon * pointers must also be adjusted to point to the new copy. 5442f85fa4dSMatthew Dillon * 5452f85fa4dSMatthew Dillon * elm is a pointer to the parent element pointing at cursor.node. 5462f85fa4dSMatthew Dillon */ 5472f85fa4dSMatthew Dillon static int 5482f85fa4dSMatthew Dillon hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 5492f85fa4dSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 5502f85fa4dSMatthew Dillon { 5511775b6a0SMatthew Dillon struct hammer_node_lock lockroot; 5522f85fa4dSMatthew Dillon hammer_node_t onode; 5532f85fa4dSMatthew Dillon hammer_node_t nnode; 5542f85fa4dSMatthew Dillon int error; 5552f85fa4dSMatthew Dillon int i; 5562f85fa4dSMatthew Dillon 5571775b6a0SMatthew Dillon hammer_node_lock_init(&lockroot, cursor->node); 55824cf83d2SMatthew Dillon error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL); 5592f85fa4dSMatthew Dillon if (error) 5602f85fa4dSMatthew Dillon goto done; 5612f85fa4dSMatthew Dillon 5622f85fa4dSMatthew Dillon onode = cursor->node; 563b4f86ea3SMatthew Dillon nnode = hammer_alloc_btree(cursor->trans, 0, &error); 5642f85fa4dSMatthew Dillon 5652f85fa4dSMatthew Dillon if (nnode == NULL) 5662f85fa4dSMatthew Dillon goto done; 5672f85fa4dSMatthew Dillon 5682f85fa4dSMatthew Dillon /* 5692f85fa4dSMatthew Dillon * Move the node. Adjust the parent's pointer to us first. 5702f85fa4dSMatthew Dillon */ 5712f85fa4dSMatthew Dillon hammer_lock_ex(&nnode->lock); 5722f85fa4dSMatthew Dillon hammer_modify_node_noundo(cursor->trans, nnode); 5732f85fa4dSMatthew Dillon bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); 5742f85fa4dSMatthew Dillon 5752f85fa4dSMatthew Dillon if (elm) { 5762f85fa4dSMatthew Dillon /* 5772f85fa4dSMatthew Dillon * We are not the root of the B-Tree 5782f85fa4dSMatthew Dillon */ 5792f85fa4dSMatthew Dillon hammer_modify_node(cursor->trans, cursor->parent, 5802f85fa4dSMatthew Dillon &elm->internal.subtree_offset, 5812f85fa4dSMatthew Dillon sizeof(elm->internal.subtree_offset)); 5822f85fa4dSMatthew Dillon elm->internal.subtree_offset = nnode->node_offset; 5832f85fa4dSMatthew Dillon hammer_modify_node_done(cursor->parent); 5842f85fa4dSMatthew Dillon } else { 5852f85fa4dSMatthew Dillon /* 5862f85fa4dSMatthew Dillon * We are the root of the B-Tree 5872f85fa4dSMatthew Dillon */ 5882f85fa4dSMatthew Dillon hammer_volume_t volume; 5892f85fa4dSMatthew Dillon volume = hammer_get_root_volume(cursor->trans->hmp, &error); 5902f85fa4dSMatthew Dillon KKASSERT(error == 0); 5912f85fa4dSMatthew Dillon 5922f85fa4dSMatthew Dillon hammer_modify_volume_field(cursor->trans, volume, 5932f85fa4dSMatthew Dillon vol0_btree_root); 5942f85fa4dSMatthew Dillon volume->ondisk->vol0_btree_root = nnode->node_offset; 5952f85fa4dSMatthew Dillon hammer_modify_volume_done(volume); 5962f85fa4dSMatthew Dillon hammer_rel_volume(volume, 0); 5972f85fa4dSMatthew Dillon } 5982f85fa4dSMatthew Dillon 5992f85fa4dSMatthew Dillon /* 6002f85fa4dSMatthew Dillon * Now adjust our children's pointers to us. 6012f85fa4dSMatthew Dillon */ 6022f85fa4dSMatthew Dillon for (i = 0; i < nnode->ondisk->count; ++i) { 6032f85fa4dSMatthew Dillon elm = &nnode->ondisk->elms[i]; 6042f85fa4dSMatthew Dillon error = btree_set_parent(cursor->trans, nnode, elm); 6052f85fa4dSMatthew Dillon if (error) 6062f85fa4dSMatthew Dillon panic("reblock internal node: fixup problem"); 6072f85fa4dSMatthew Dillon } 6082f85fa4dSMatthew Dillon 6092f85fa4dSMatthew Dillon /* 6102f85fa4dSMatthew Dillon * Clean up. 6112f85fa4dSMatthew Dillon * 6122f85fa4dSMatthew Dillon * The new node replaces the current node in the cursor. The cursor 6132f85fa4dSMatthew Dillon * expects it to be locked so leave it locked. Discard onode. 6142f85fa4dSMatthew Dillon */ 615b3bad96fSMatthew Dillon hammer_cursor_replaced_node(onode, nnode); 6162f85fa4dSMatthew Dillon hammer_delete_node(cursor->trans, onode); 6172f85fa4dSMatthew Dillon 6182f85fa4dSMatthew Dillon if (hammer_debug_general & 0x4000) { 619*bc996e65STomohiro Kusumi kprintf("REBLOCK INODE %08x %016llx -> %016llx\n", 620*bc996e65STomohiro Kusumi elm->base.localization, 621973c11b9SMatthew Dillon (long long)onode->node_offset, 622973c11b9SMatthew Dillon (long long)nnode->node_offset); 6232f85fa4dSMatthew Dillon } 6242f85fa4dSMatthew Dillon hammer_modify_node_done(nnode); 6252f85fa4dSMatthew Dillon cursor->node = nnode; 6262f85fa4dSMatthew Dillon 6272f85fa4dSMatthew Dillon hammer_unlock(&onode->lock); 6282f85fa4dSMatthew Dillon hammer_rel_node(onode); 6292f85fa4dSMatthew Dillon 6302f85fa4dSMatthew Dillon done: 63124cf83d2SMatthew Dillon hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL); 6322f85fa4dSMatthew Dillon return (error); 6332f85fa4dSMatthew Dillon } 6342f85fa4dSMatthew Dillon 635