1bf686dbeSMatthew Dillon /* 255b50bd5SMatthew Dillon * Copyright (c) 2008-2012 The DragonFly Project. All rights reserved. 3bf686dbeSMatthew Dillon * 4bf686dbeSMatthew Dillon * This code is derived from software contributed to The DragonFly Project 5bf686dbeSMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 6bf686dbeSMatthew Dillon * 7bf686dbeSMatthew Dillon * Redistribution and use in source and binary forms, with or without 8bf686dbeSMatthew Dillon * modification, are permitted provided that the following conditions 9bf686dbeSMatthew Dillon * are met: 10bf686dbeSMatthew Dillon * 11bf686dbeSMatthew Dillon * 1. Redistributions of source code must retain the above copyright 12bf686dbeSMatthew Dillon * notice, this list of conditions and the following disclaimer. 13bf686dbeSMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 14bf686dbeSMatthew Dillon * notice, this list of conditions and the following disclaimer in 15bf686dbeSMatthew Dillon * the documentation and/or other materials provided with the 16bf686dbeSMatthew Dillon * distribution. 17bf686dbeSMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 18bf686dbeSMatthew Dillon * contributors may be used to endorse or promote products derived 19bf686dbeSMatthew Dillon * from this software without specific, prior written permission. 20bf686dbeSMatthew Dillon * 21bf686dbeSMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22bf686dbeSMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23bf686dbeSMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24bf686dbeSMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25bf686dbeSMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26bf686dbeSMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27bf686dbeSMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28bf686dbeSMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29bf686dbeSMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30bf686dbeSMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31bf686dbeSMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32bf686dbeSMatthew Dillon * SUCH DAMAGE. 33bf686dbeSMatthew Dillon */ 34bf686dbeSMatthew Dillon /* 35bf686dbeSMatthew Dillon * HAMMER reblocker - This code frees up fragmented physical space 36bf686dbeSMatthew Dillon * 37bf686dbeSMatthew Dillon * HAMMER only keeps track of free space on a big-block basis. A big-block 38bf686dbeSMatthew Dillon * containing holes can only be freed by migrating the remaining data in 39bf686dbeSMatthew Dillon * that big-block into a new big-block, then freeing the big-block. 40bf686dbeSMatthew Dillon * 41bf686dbeSMatthew Dillon * This function is called from an ioctl or via the hammer support thread. 42bf686dbeSMatthew Dillon */ 43bf686dbeSMatthew Dillon 44bf686dbeSMatthew Dillon #include "hammer.h" 45bf686dbeSMatthew Dillon 4636f82b23SMatthew Dillon static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 47bf686dbeSMatthew Dillon hammer_cursor_t cursor, 48bf686dbeSMatthew Dillon hammer_btree_elm_t elm); 4936f82b23SMatthew Dillon static int hammer_reblock_data(struct hammer_ioc_reblock *reblock, 50bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 512f85fa4dSMatthew Dillon static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 522f85fa4dSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 532f85fa4dSMatthew Dillon static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 54bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 5526748b87STomohiro Kusumi static void hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm, 5626748b87STomohiro Kusumi hammer_node_t onode, hammer_node_t nnode); 57bf686dbeSMatthew Dillon 58bf686dbeSMatthew Dillon int 5936f82b23SMatthew Dillon hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, 6036f82b23SMatthew Dillon struct hammer_ioc_reblock *reblock) 61bf686dbeSMatthew Dillon { 62bf686dbeSMatthew Dillon struct hammer_cursor cursor; 63bf686dbeSMatthew Dillon hammer_btree_elm_t elm; 64a7e9bef1SMatthew Dillon int checkspace_count; 6593291532SMatthew Dillon int error; 6693291532SMatthew Dillon int seq; 677b6ccb11SMatthew Dillon int slop; 6846137e17STomohiro Kusumi uint32_t key_end_localization; 697b6ccb11SMatthew Dillon 704fa5fb92STomohiro Kusumi if ((reblock->key_beg.localization | reblock->key_end.localization) & 714fa5fb92STomohiro Kusumi HAMMER_LOCALIZE_PSEUDOFS_MASK) { 724fa5fb92STomohiro Kusumi return(EINVAL); 734fa5fb92STomohiro Kusumi } 744fa5fb92STomohiro Kusumi if (reblock->key_beg.obj_id >= reblock->key_end.obj_id) 754fa5fb92STomohiro Kusumi return(EINVAL); 764fa5fb92STomohiro Kusumi if (reblock->free_level < 0 || 774fa5fb92STomohiro Kusumi reblock->free_level > HAMMER_BIGBLOCK_SIZE) 784fa5fb92STomohiro Kusumi return(EINVAL); 794fa5fb92STomohiro Kusumi 807b6ccb11SMatthew Dillon /* 81558a44e2STomohiro Kusumi * A fill_percentage <= 20% is considered an emergency. free_level is 82558a44e2STomohiro Kusumi * inverted from fill_percentage. 837b6ccb11SMatthew Dillon */ 84e04ee2deSTomohiro Kusumi if (reblock->free_level >= HAMMER_BIGBLOCK_SIZE * 8 / 10) 857b6ccb11SMatthew Dillon slop = HAMMER_CHKSPC_EMERGENCY; 867b6ccb11SMatthew Dillon else 877b6ccb11SMatthew Dillon slop = HAMMER_CHKSPC_REBLOCK; 88bf686dbeSMatthew Dillon 896540d157STomohiro Kusumi /* 906540d157STomohiro Kusumi * Ioctl caller has only set localization type to reblock. 916540d157STomohiro Kusumi * Initialize cursor key localization with ip localization. 926540d157STomohiro Kusumi */ 93dd94f1b1SMatthew Dillon reblock->key_cur = reblock->key_beg; 94842e7a70SMatthew Dillon reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 955e1e1454STomohiro Kusumi if (reblock->allpfs == 0) 967e52af60STomohiro Kusumi reblock->key_cur.localization |= ip->obj_localization; 97814387f6SMatthew Dillon 986540d157STomohiro Kusumi key_end_localization = reblock->key_end.localization; 996540d157STomohiro Kusumi key_end_localization &= HAMMER_LOCALIZE_MASK; 1005e1e1454STomohiro Kusumi if (reblock->allpfs == 0) 1017e52af60STomohiro Kusumi key_end_localization |= ip->obj_localization; 1025e1e1454STomohiro Kusumi else 10320cf2291STomohiro Kusumi key_end_localization |= pfs_to_lo(HAMMER_MAX_PFSID); 1046540d157STomohiro Kusumi 105a7e9bef1SMatthew Dillon checkspace_count = 0; 106e86903d8SMatthew Dillon seq = trans->hmp->flusher.done; 107bf686dbeSMatthew Dillon retry: 1084e17f465SMatthew Dillon error = hammer_init_cursor(trans, &cursor, NULL, NULL); 109bf686dbeSMatthew Dillon if (error) { 110bf686dbeSMatthew Dillon hammer_done_cursor(&cursor); 111dd94f1b1SMatthew Dillon goto failed; 112bf686dbeSMatthew Dillon } 113dd94f1b1SMatthew Dillon cursor.key_beg.localization = reblock->key_cur.localization; 114dd94f1b1SMatthew Dillon cursor.key_beg.obj_id = reblock->key_cur.obj_id; 115bf686dbeSMatthew Dillon cursor.key_beg.key = HAMMER_MIN_KEY; 116bf686dbeSMatthew Dillon cursor.key_beg.create_tid = 1; 117bf686dbeSMatthew Dillon cursor.key_beg.delete_tid = 0; 118bf686dbeSMatthew Dillon cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; 119bf686dbeSMatthew Dillon cursor.key_beg.obj_type = 0; 120bf686dbeSMatthew Dillon 1216540d157STomohiro Kusumi cursor.key_end.localization = key_end_localization; 122dd94f1b1SMatthew Dillon cursor.key_end.obj_id = reblock->key_end.obj_id; 123bf686dbeSMatthew Dillon cursor.key_end.key = HAMMER_MAX_KEY; 124bf686dbeSMatthew Dillon cursor.key_end.create_tid = HAMMER_MAX_TID - 1; 125bf686dbeSMatthew Dillon cursor.key_end.delete_tid = 0; 126bf686dbeSMatthew Dillon cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 127bf686dbeSMatthew Dillon cursor.key_end.obj_type = 0; 128bf686dbeSMatthew Dillon 129bf686dbeSMatthew Dillon cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 1309480ff55SMatthew Dillon cursor.flags |= HAMMER_CURSOR_BACKEND; 13118bee4a2SMatthew Dillon cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE; 132bf686dbeSMatthew Dillon 1332f85fa4dSMatthew Dillon /* 1342f85fa4dSMatthew Dillon * This flag allows the btree scan code to return internal nodes, 1352f85fa4dSMatthew Dillon * so we can reblock them in addition to the leafs. Only specify it 1362f85fa4dSMatthew Dillon * if we intend to reblock B-Tree nodes. 1372f85fa4dSMatthew Dillon */ 1382f85fa4dSMatthew Dillon if (reblock->head.flags & HAMMER_IOC_DO_BTREE) 1392f85fa4dSMatthew Dillon cursor.flags |= HAMMER_CURSOR_REBLOCKING; 1402f85fa4dSMatthew Dillon 141bf686dbeSMatthew Dillon error = hammer_btree_first(&cursor); 142bf686dbeSMatthew Dillon while (error == 0) { 1432f85fa4dSMatthew Dillon /* 1442f85fa4dSMatthew Dillon * Internal or Leaf node 1452f85fa4dSMatthew Dillon */ 14607ed04b5SMatthew Dillon KKASSERT(cursor.index < cursor.node->ondisk->count); 147bf686dbeSMatthew Dillon elm = &cursor.node->ondisk->elms[cursor.index]; 148dd94f1b1SMatthew Dillon reblock->key_cur.obj_id = elm->base.obj_id; 149dd94f1b1SMatthew Dillon reblock->key_cur.localization = elm->base.localization; 150bf686dbeSMatthew Dillon 1519480ff55SMatthew Dillon /* 1529f5097dcSMatthew Dillon * Yield to more important tasks 1539f5097dcSMatthew Dillon */ 1549f5097dcSMatthew Dillon if ((error = hammer_signal_check(trans->hmp)) != 0) 1559f5097dcSMatthew Dillon break; 156a7e9bef1SMatthew Dillon 157a7e9bef1SMatthew Dillon /* 158a7e9bef1SMatthew Dillon * If there is insufficient free space it may be due to 159a981af19STomohiro Kusumi * reserved big-blocks, which flushing might fix. 160c9ce54d6SMatthew Dillon * 16107ed04b5SMatthew Dillon * We must force a retest in case the unlocked cursor is 16207ed04b5SMatthew Dillon * moved to the end of the leaf, or moved to an internal 16307ed04b5SMatthew Dillon * node. 16407ed04b5SMatthew Dillon * 165c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 166a7e9bef1SMatthew Dillon */ 1677b6ccb11SMatthew Dillon if (hammer_checkspace(trans->hmp, slop)) { 168a7e9bef1SMatthew Dillon if (++checkspace_count == 10) { 169a7e9bef1SMatthew Dillon error = ENOSPC; 170a7e9bef1SMatthew Dillon break; 171a7e9bef1SMatthew Dillon } 172982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 17307ed04b5SMatthew Dillon cursor.flags |= HAMMER_CURSOR_RETEST; 17493291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 175982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 1767a61b85dSMatthew Dillon seq = hammer_flusher_async(trans->hmp, NULL); 17707ed04b5SMatthew Dillon goto skip; 17893291532SMatthew Dillon } 179a7e9bef1SMatthew Dillon 180a7e9bef1SMatthew Dillon /* 1819480ff55SMatthew Dillon * Acquiring the sync_lock prevents the operation from 1829480ff55SMatthew Dillon * crossing a synchronization boundary. 18309ac686bSMatthew Dillon * 18409ac686bSMatthew Dillon * NOTE: cursor.node may have changed on return. 185c9ce54d6SMatthew Dillon * 186c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 1879480ff55SMatthew Dillon */ 1882f85fa4dSMatthew Dillon hammer_sync_lock_sh(trans); 18936f82b23SMatthew Dillon error = hammer_reblock_helper(reblock, &cursor, elm); 1902f85fa4dSMatthew Dillon hammer_sync_unlock(trans); 19193291532SMatthew Dillon 19215e75dabSMatthew Dillon while (hammer_flusher_meta_halflimit(trans->hmp) || 1937a61b85dSMatthew Dillon hammer_flusher_undo_exhausted(trans, 2)) { 194982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 19593291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 196982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 19715e75dabSMatthew Dillon seq = hammer_flusher_async_one(trans->hmp); 19893291532SMatthew Dillon } 1991b0ab2c3SMatthew Dillon 2001b0ab2c3SMatthew Dillon /* 2011b0ab2c3SMatthew Dillon * Setup for iteration, our cursor flags may be modified by 2021b0ab2c3SMatthew Dillon * other threads while we are unlocked. 2031b0ab2c3SMatthew Dillon */ 204bf686dbeSMatthew Dillon cursor.flags |= HAMMER_CURSOR_ATEDISK; 2051b0ab2c3SMatthew Dillon 2061b0ab2c3SMatthew Dillon /* 2071b0ab2c3SMatthew Dillon * We allocate data buffers, which atm we don't track 2081b0ab2c3SMatthew Dillon * dirty levels for because we allow the kernel to write 2091b0ab2c3SMatthew Dillon * them. But if we allocate too many we can still deadlock 2101b0ab2c3SMatthew Dillon * the buffer cache. 2111b0ab2c3SMatthew Dillon * 212c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 2131b0ab2c3SMatthew Dillon * (The cursor's node and element may change!) 2141b0ab2c3SMatthew Dillon */ 2151b0ab2c3SMatthew Dillon if (bd_heatup()) { 216982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 2171b0ab2c3SMatthew Dillon bwillwrite(HAMMER_XBUFSIZE); 218982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 2191b0ab2c3SMatthew Dillon } 22055b50bd5SMatthew Dillon vm_wait_nominal(); 22107ed04b5SMatthew Dillon skip: 2221b0ab2c3SMatthew Dillon if (error == 0) { 223bf686dbeSMatthew Dillon error = hammer_btree_iterate(&cursor); 224bf686dbeSMatthew Dillon } 225bf686dbeSMatthew Dillon } 226bf686dbeSMatthew Dillon if (error == ENOENT) 227bf686dbeSMatthew Dillon error = 0; 228bf686dbeSMatthew Dillon hammer_done_cursor(&cursor); 22906ad81ffSMatthew Dillon if (error == EWOULDBLOCK) { 23006ad81ffSMatthew Dillon hammer_flusher_sync(trans->hmp); 23106ad81ffSMatthew Dillon goto retry; 23206ad81ffSMatthew Dillon } 233bf686dbeSMatthew Dillon if (error == EDEADLK) 234bf686dbeSMatthew Dillon goto retry; 23519619882SMatthew Dillon if (error == EINTR) { 23619619882SMatthew Dillon reblock->head.flags |= HAMMER_IOC_HEAD_INTR; 23719619882SMatthew Dillon error = 0; 23819619882SMatthew Dillon } 239dd94f1b1SMatthew Dillon failed: 240dd94f1b1SMatthew Dillon reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 241bf686dbeSMatthew Dillon return(error); 242bf686dbeSMatthew Dillon } 243bf686dbeSMatthew Dillon 244bf686dbeSMatthew Dillon /* 245bf686dbeSMatthew Dillon * Reblock the B-Tree (leaf) node, record, and/or data if necessary. 246bf686dbeSMatthew Dillon * 2479480ff55SMatthew Dillon * XXX We have no visibility into internal B-Tree nodes at the moment, 2489480ff55SMatthew Dillon * only leaf nodes. 249bf686dbeSMatthew Dillon */ 250bf686dbeSMatthew Dillon static int 25136f82b23SMatthew Dillon hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 252bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 253bf686dbeSMatthew Dillon { 25443c665aeSMatthew Dillon hammer_mount_t hmp; 255bf686dbeSMatthew Dillon hammer_off_t tmp_offset; 256ebbcfba9SMatthew Dillon hammer_node_ondisk_t ondisk; 25744a83111SMatthew Dillon struct hammer_btree_leaf_elm leaf; 258bf686dbeSMatthew Dillon int error; 259bf686dbeSMatthew Dillon int bytes; 260bf686dbeSMatthew Dillon int cur; 261bf3b416bSMatthew Dillon int iocflags; 262bf686dbeSMatthew Dillon 263bf686dbeSMatthew Dillon error = 0; 26443c665aeSMatthew Dillon hmp = cursor->trans->hmp; 265bf686dbeSMatthew Dillon 266bf686dbeSMatthew Dillon /* 267bf686dbeSMatthew Dillon * Reblock data. Note that data embedded in a record is reblocked 2682f85fa4dSMatthew Dillon * by the record reblock code. Data processing only occurs at leaf 2692f85fa4dSMatthew Dillon * nodes and for RECORD element types. 270bf686dbeSMatthew Dillon */ 2712f85fa4dSMatthew Dillon if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF) 2722f85fa4dSMatthew Dillon goto skip; 2732f85fa4dSMatthew Dillon if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) 274c9d1310cSTomohiro Kusumi return(EINVAL); 275bf686dbeSMatthew Dillon tmp_offset = elm->leaf.data_offset; 276bf3b416bSMatthew Dillon if (tmp_offset == 0) 277bf3b416bSMatthew Dillon goto skip; 278bf3b416bSMatthew Dillon 279bf3b416bSMatthew Dillon /* 2807ef2d7b3STomohiro Kusumi * If reblock->vol_no is specified we only want to reblock data 2817ef2d7b3STomohiro Kusumi * in that volume, but ignore everything else. 2827ef2d7b3STomohiro Kusumi */ 2837ef2d7b3STomohiro Kusumi if (reblock->vol_no != -1 && 2847ef2d7b3STomohiro Kusumi reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset)) 2857ef2d7b3STomohiro Kusumi goto skip; 2867ef2d7b3STomohiro Kusumi 2877ef2d7b3STomohiro Kusumi /* 288bf3b416bSMatthew Dillon * NOTE: Localization restrictions may also have been set-up, we can't 289bf3b416bSMatthew Dillon * just set the match flags willy-nilly here. 290bf3b416bSMatthew Dillon */ 291bf3b416bSMatthew Dillon switch(elm->leaf.base.rec_type) { 292bf3b416bSMatthew Dillon case HAMMER_RECTYPE_INODE: 29383f2a3aaSMatthew Dillon case HAMMER_RECTYPE_SNAPSHOT: 29483f2a3aaSMatthew Dillon case HAMMER_RECTYPE_CONFIG: 295bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_INODES; 296bf3b416bSMatthew Dillon break; 297bf3b416bSMatthew Dillon case HAMMER_RECTYPE_EXT: 298bf3b416bSMatthew Dillon case HAMMER_RECTYPE_FIX: 299ea434b6fSMatthew Dillon case HAMMER_RECTYPE_PFS: 300bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DIRENTRY: 301bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_DIRS; 302bf3b416bSMatthew Dillon break; 303bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DATA: 304bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DB: 305bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_DATA; 306bf3b416bSMatthew Dillon break; 307bf3b416bSMatthew Dillon default: 308bf3b416bSMatthew Dillon iocflags = 0; 309bf3b416bSMatthew Dillon break; 310bf3b416bSMatthew Dillon } 311bf3b416bSMatthew Dillon if (reblock->head.flags & iocflags) { 312bf686dbeSMatthew Dillon ++reblock->data_count; 313bf686dbeSMatthew Dillon reblock->data_byte_count += elm->leaf.data_len; 31443c665aeSMatthew Dillon bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 3156e1e8b6dSMatthew Dillon if (hammer_debug_general & 0x4000) 31633234d14STomohiro Kusumi hdkprintf("D %6d/%d\n", bytes, reblock->free_level); 3174af7f537STomohiro Kusumi /* 3184af7f537STomohiro Kusumi * Start data reblock if 3194af7f537STomohiro Kusumi * 1. there is no error 3204af7f537STomohiro Kusumi * 2. the data and allocator offset are not in the same 3214af7f537STomohiro Kusumi * big-block, or free level threshold is 0 3224af7f537STomohiro Kusumi * 3. free bytes in the data's big-block is larger than 3234af7f537STomohiro Kusumi * free level threshold (means if threshold is 0 then 3244af7f537STomohiro Kusumi * do reblock no matter what). 3254af7f537STomohiro Kusumi */ 326bf3b416bSMatthew Dillon if (error == 0 && (cur == 0 || reblock->free_level == 0) && 327bf3b416bSMatthew Dillon bytes >= reblock->free_level) { 32844a83111SMatthew Dillon /* 32944a83111SMatthew Dillon * This is nasty, the uncache code may have to get 33044a83111SMatthew Dillon * vnode locks and because of that we can't hold 33144a83111SMatthew Dillon * the cursor locked. 332c9ce54d6SMatthew Dillon * 333c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() 334c9ce54d6SMatthew Dillon * function. 33544a83111SMatthew Dillon */ 33644a83111SMatthew Dillon leaf = elm->leaf; 337982be4bfSMatthew Dillon hammer_unlock_cursor(cursor); 33844a83111SMatthew Dillon hammer_io_direct_uncache(hmp, &leaf); 339982be4bfSMatthew Dillon hammer_lock_cursor(cursor); 340ebbcfba9SMatthew Dillon 341ebbcfba9SMatthew Dillon /* 342ebbcfba9SMatthew Dillon * elm may have become stale or invalid, reload it. 343ebbcfba9SMatthew Dillon * ondisk variable is temporary only. Note that 344ebbcfba9SMatthew Dillon * cursor->node and thus cursor->node->ondisk may 345ebbcfba9SMatthew Dillon * also changed. 346ebbcfba9SMatthew Dillon */ 347ebbcfba9SMatthew Dillon ondisk = cursor->node->ondisk; 348ebbcfba9SMatthew Dillon elm = &ondisk->elms[cursor->index]; 34944a83111SMatthew Dillon if (cursor->flags & HAMMER_CURSOR_RETEST) { 350d053aa8aSTomohiro Kusumi hkprintf("debug: retest on reblocker uncache\n"); 35144a83111SMatthew Dillon error = EDEADLK; 352ebbcfba9SMatthew Dillon } else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF || 353ebbcfba9SMatthew Dillon cursor->index >= ondisk->count) { 354d053aa8aSTomohiro Kusumi hkprintf("debug: shifted on reblocker uncache\n"); 355ebbcfba9SMatthew Dillon error = EDEADLK; 356ebbcfba9SMatthew Dillon } else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) { 357d053aa8aSTomohiro Kusumi hkprintf("debug: changed on reblocker uncache\n"); 358ebbcfba9SMatthew Dillon error = EDEADLK; 35944a83111SMatthew Dillon } 36044a83111SMatthew Dillon if (error == 0) 361bf686dbeSMatthew Dillon error = hammer_cursor_upgrade(cursor); 362bf686dbeSMatthew Dillon if (error == 0) { 36307ed04b5SMatthew Dillon KKASSERT(cursor->index < ondisk->count); 36436f82b23SMatthew Dillon error = hammer_reblock_data(reblock, 365bf686dbeSMatthew Dillon cursor, elm); 366bf686dbeSMatthew Dillon } 367bf686dbeSMatthew Dillon if (error == 0) { 368bf686dbeSMatthew Dillon ++reblock->data_moves; 369bf686dbeSMatthew Dillon reblock->data_byte_moves += elm->leaf.data_len; 370bf686dbeSMatthew Dillon } 371bf686dbeSMatthew Dillon } 372bf686dbeSMatthew Dillon } 373bf686dbeSMatthew Dillon 3742f85fa4dSMatthew Dillon skip: 375bf686dbeSMatthew Dillon /* 3761775b6a0SMatthew Dillon * Reblock a B-Tree internal or leaf node. A leaf node is reblocked 3771775b6a0SMatthew Dillon * on initial entry only (element 0). An internal node is reblocked 378525fa6bbSTomohiro Kusumi * when entered upward from its first leaf node only (also element 0, 379525fa6bbSTomohiro Kusumi * see hammer_btree_iterate() where cursor moves up and may return). 3801775b6a0SMatthew Dillon * Further revisits of the internal node (index > 0) are ignored. 381bf686dbeSMatthew Dillon */ 382bf686dbeSMatthew Dillon tmp_offset = cursor->node->node_offset; 3837ef2d7b3STomohiro Kusumi 3847ef2d7b3STomohiro Kusumi /* 3857ef2d7b3STomohiro Kusumi * If reblock->vol_no is specified we only want to reblock data 3867ef2d7b3STomohiro Kusumi * in that volume, but ignore everything else. 3877ef2d7b3STomohiro Kusumi */ 3887ef2d7b3STomohiro Kusumi if (reblock->vol_no != -1 && 3897ef2d7b3STomohiro Kusumi reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset)) 3907ef2d7b3STomohiro Kusumi goto end; 3917ef2d7b3STomohiro Kusumi 392bf3b416bSMatthew Dillon if (cursor->index == 0 && 393814387f6SMatthew Dillon error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) { 394bf686dbeSMatthew Dillon ++reblock->btree_count; 39543c665aeSMatthew Dillon bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 3966e1e8b6dSMatthew Dillon if (hammer_debug_general & 0x4000) 39733234d14STomohiro Kusumi hdkprintf("B %6d/%d\n", bytes, reblock->free_level); 3984af7f537STomohiro Kusumi /* 3994af7f537STomohiro Kusumi * Start node reblock if 4004af7f537STomohiro Kusumi * 1. there is no error 4014af7f537STomohiro Kusumi * 2. the node and allocator offset are not in the same 4024af7f537STomohiro Kusumi * big-block, or free level threshold is 0 4034af7f537STomohiro Kusumi * 3. free bytes in the node's big-block is larger than 4044af7f537STomohiro Kusumi * free level threshold (means if threshold is 0 then 4054af7f537STomohiro Kusumi * do reblock no matter what). 4064af7f537STomohiro Kusumi */ 407bf3b416bSMatthew Dillon if (error == 0 && (cur == 0 || reblock->free_level == 0) && 408bf3b416bSMatthew Dillon bytes >= reblock->free_level) { 409bf686dbeSMatthew Dillon error = hammer_cursor_upgrade(cursor); 410bf686dbeSMatthew Dillon if (error == 0) { 41107ed04b5SMatthew Dillon if (cursor->parent) { 41207ed04b5SMatthew Dillon KKASSERT(cursor->parent_index < 41307ed04b5SMatthew Dillon cursor->parent->ondisk->count); 414bf686dbeSMatthew Dillon elm = &cursor->parent->ondisk->elms[cursor->parent_index]; 41507ed04b5SMatthew Dillon } else { 416bf686dbeSMatthew Dillon elm = NULL; 41707ed04b5SMatthew Dillon } 4182f85fa4dSMatthew Dillon switch(cursor->node->ondisk->type) { 4192f85fa4dSMatthew Dillon case HAMMER_BTREE_TYPE_LEAF: 4202f85fa4dSMatthew Dillon error = hammer_reblock_leaf_node( 4212f85fa4dSMatthew Dillon reblock, cursor, elm); 4222f85fa4dSMatthew Dillon break; 4232f85fa4dSMatthew Dillon case HAMMER_BTREE_TYPE_INTERNAL: 4242f85fa4dSMatthew Dillon error = hammer_reblock_int_node( 4252f85fa4dSMatthew Dillon reblock, cursor, elm); 4262f85fa4dSMatthew Dillon break; 4272f85fa4dSMatthew Dillon default: 4285134aacdSTomohiro Kusumi hpanic("Illegal B-Tree node type"); 4292f85fa4dSMatthew Dillon } 430bf686dbeSMatthew Dillon } 431bf686dbeSMatthew Dillon if (error == 0) { 432bf686dbeSMatthew Dillon ++reblock->btree_moves; 433bf686dbeSMatthew Dillon } 434bf686dbeSMatthew Dillon } 435bf686dbeSMatthew Dillon } 4367ef2d7b3STomohiro Kusumi end: 437ebc5d79eSTomohiro Kusumi hammer_cursor_downgrade(cursor); 438bf686dbeSMatthew Dillon return(error); 439bf686dbeSMatthew Dillon } 440bf686dbeSMatthew Dillon 441bf686dbeSMatthew Dillon /* 442bf686dbeSMatthew Dillon * Reblock a record's data. Both the B-Tree element and record pointers 443bf686dbeSMatthew Dillon * to the data must be adjusted. 444bf686dbeSMatthew Dillon */ 445bf686dbeSMatthew Dillon static int 44636f82b23SMatthew Dillon hammer_reblock_data(struct hammer_ioc_reblock *reblock, 447bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 448bf686dbeSMatthew Dillon { 449bf686dbeSMatthew Dillon struct hammer_buffer *data_buffer = NULL; 450bc996e65STomohiro Kusumi hammer_off_t odata_offset; 451bf686dbeSMatthew Dillon hammer_off_t ndata_offset; 452bf686dbeSMatthew Dillon int error; 453bf686dbeSMatthew Dillon void *ndata; 454bf686dbeSMatthew Dillon 455*0a6fabdbSTomohiro Kusumi error = hammer_btree_extract_data(cursor); 456bf686dbeSMatthew Dillon if (error) 457bf686dbeSMatthew Dillon return (error); 45836f82b23SMatthew Dillon ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, 459bf3b416bSMatthew Dillon elm->leaf.base.rec_type, 460df2ccbacSMatthew Dillon &ndata_offset, &data_buffer, 461df2ccbacSMatthew Dillon 0, &error); 462bf686dbeSMatthew Dillon if (error) 463bf686dbeSMatthew Dillon goto done; 464b8a41159SMatthew Dillon hammer_io_notmeta(data_buffer); 465bf686dbeSMatthew Dillon 466bf686dbeSMatthew Dillon /* 467b9107f58SMatthew Dillon * Move the data. Note that we must invalidate any cached 468b9107f58SMatthew Dillon * data buffer in the cursor before calling blockmap_free. 469e04ee2deSTomohiro Kusumi * The blockmap_free may free up the entire big-block and 470b9107f58SMatthew Dillon * will not be able to invalidate it if the cursor is holding 471d165c90aSTomohiro Kusumi * a data buffer cached in that big-block. 472bf686dbeSMatthew Dillon */ 473f1c0ae53STomohiro Kusumi hammer_modify_buffer_noundo(cursor->trans, data_buffer); 474bf686dbeSMatthew Dillon bcopy(cursor->data, ndata, elm->leaf.data_len); 47510a5d1baSMatthew Dillon hammer_modify_buffer_done(data_buffer); 476b9107f58SMatthew Dillon hammer_cursor_invalidate_cache(cursor); 477bf686dbeSMatthew Dillon 47836f82b23SMatthew Dillon hammer_blockmap_free(cursor->trans, 47936f82b23SMatthew Dillon elm->leaf.data_offset, elm->leaf.data_len); 480bf686dbeSMatthew Dillon 48110a5d1baSMatthew Dillon hammer_modify_node(cursor->trans, cursor->node, 48210a5d1baSMatthew Dillon &elm->leaf.data_offset, sizeof(hammer_off_t)); 483bc996e65STomohiro Kusumi odata_offset = elm->leaf.data_offset; 484bf686dbeSMatthew Dillon elm->leaf.data_offset = ndata_offset; 48510a5d1baSMatthew Dillon hammer_modify_node_done(cursor->node); 486bf686dbeSMatthew Dillon 487bc996e65STomohiro Kusumi if (hammer_debug_general & 0x4000) { 48833234d14STomohiro Kusumi hdkprintf("%08x %016llx -> %016llx\n", 489a6af8eaeSTomohiro Kusumi (elm ? elm->base.localization : -1), 490bc996e65STomohiro Kusumi (long long)odata_offset, 491bc996e65STomohiro Kusumi (long long)ndata_offset); 492bc996e65STomohiro Kusumi } 493bf686dbeSMatthew Dillon done: 494bf686dbeSMatthew Dillon if (data_buffer) 495bf686dbeSMatthew Dillon hammer_rel_buffer(data_buffer, 0); 496bf686dbeSMatthew Dillon return (error); 497bf686dbeSMatthew Dillon } 498bf686dbeSMatthew Dillon 499bf686dbeSMatthew Dillon /* 5002f85fa4dSMatthew Dillon * Reblock a B-Tree leaf node. The parent must be adjusted to point to 5012f85fa4dSMatthew Dillon * the new copy of the leaf node. 502bf686dbeSMatthew Dillon * 5032f85fa4dSMatthew Dillon * elm is a pointer to the parent element pointing at cursor.node. 504bf686dbeSMatthew Dillon */ 505bf686dbeSMatthew Dillon static int 5062f85fa4dSMatthew Dillon hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 507bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 508bf686dbeSMatthew Dillon { 509bf686dbeSMatthew Dillon hammer_node_t onode; 510bf686dbeSMatthew Dillon hammer_node_t nnode; 511bf686dbeSMatthew Dillon int error; 512bf686dbeSMatthew Dillon 513df2ccbacSMatthew Dillon /* 514df2ccbacSMatthew Dillon * Don't supply a hint when allocating the leaf. Fills are done 515df2ccbacSMatthew Dillon * from the leaf upwards. 516df2ccbacSMatthew Dillon */ 517bf686dbeSMatthew Dillon onode = cursor->node; 518df2ccbacSMatthew Dillon nnode = hammer_alloc_btree(cursor->trans, 0, &error); 5198d0efe43SMatthew Dillon 520bf686dbeSMatthew Dillon if (nnode == NULL) 521bf686dbeSMatthew Dillon return (error); 522bf686dbeSMatthew Dillon 52309ac686bSMatthew Dillon hammer_lock_ex(&nnode->lock); 52409ac686bSMatthew Dillon hammer_modify_node_noundo(cursor->trans, nnode); 525bf686dbeSMatthew Dillon 52626748b87STomohiro Kusumi hammer_move_node(cursor, elm, onode, nnode); 52726748b87STomohiro Kusumi 528bf686dbeSMatthew Dillon /* 52926748b87STomohiro Kusumi * Clean up. 53026748b87STomohiro Kusumi * 53126748b87STomohiro Kusumi * The new node replaces the current node in the cursor. The cursor 53226748b87STomohiro Kusumi * expects it to be locked so leave it locked. Discard onode. 533bf686dbeSMatthew Dillon */ 534b3bad96fSMatthew Dillon hammer_cursor_replaced_node(onode, nnode); 53536f82b23SMatthew Dillon hammer_delete_node(cursor->trans, onode); 536bf686dbeSMatthew Dillon 537b58c6388SMatthew Dillon if (hammer_debug_general & 0x4000) { 53833234d14STomohiro Kusumi hdkprintf("%08x %016llx -> %016llx\n", 539a6af8eaeSTomohiro Kusumi (elm ? elm->base.localization : -1), 540973c11b9SMatthew Dillon (long long)onode->node_offset, 541973c11b9SMatthew Dillon (long long)nnode->node_offset); 542b58c6388SMatthew Dillon } 5438d0efe43SMatthew Dillon hammer_modify_node_done(nnode); 544bf686dbeSMatthew Dillon cursor->node = nnode; 54509ac686bSMatthew Dillon 54609ac686bSMatthew Dillon hammer_unlock(&onode->lock); 547bf686dbeSMatthew Dillon hammer_rel_node(onode); 548bf686dbeSMatthew Dillon 549bf686dbeSMatthew Dillon return (error); 550bf686dbeSMatthew Dillon } 551bf686dbeSMatthew Dillon 5522f85fa4dSMatthew Dillon /* 5532f85fa4dSMatthew Dillon * Reblock a B-Tree internal node. The parent must be adjusted to point to 5542f85fa4dSMatthew Dillon * the new copy of the internal node, and the node's children's parent 5552f85fa4dSMatthew Dillon * pointers must also be adjusted to point to the new copy. 5562f85fa4dSMatthew Dillon * 5572f85fa4dSMatthew Dillon * elm is a pointer to the parent element pointing at cursor.node. 5582f85fa4dSMatthew Dillon */ 5592f85fa4dSMatthew Dillon static int 5602f85fa4dSMatthew Dillon hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 5612f85fa4dSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 5622f85fa4dSMatthew Dillon { 5631775b6a0SMatthew Dillon struct hammer_node_lock lockroot; 5642f85fa4dSMatthew Dillon hammer_node_t onode; 5652f85fa4dSMatthew Dillon hammer_node_t nnode; 5662f85fa4dSMatthew Dillon int error; 5672f85fa4dSMatthew Dillon 5681775b6a0SMatthew Dillon hammer_node_lock_init(&lockroot, cursor->node); 56924cf83d2SMatthew Dillon error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL); 5702f85fa4dSMatthew Dillon if (error) 5712f85fa4dSMatthew Dillon goto done; 5722f85fa4dSMatthew Dillon 573525fa6bbSTomohiro Kusumi /* 574525fa6bbSTomohiro Kusumi * Don't supply a hint when allocating the leaf. Fills are done 575525fa6bbSTomohiro Kusumi * from the leaf upwards. 576525fa6bbSTomohiro Kusumi */ 5772f85fa4dSMatthew Dillon onode = cursor->node; 578b4f86ea3SMatthew Dillon nnode = hammer_alloc_btree(cursor->trans, 0, &error); 5792f85fa4dSMatthew Dillon 5802f85fa4dSMatthew Dillon if (nnode == NULL) 5812f85fa4dSMatthew Dillon goto done; 5822f85fa4dSMatthew Dillon 5832f85fa4dSMatthew Dillon hammer_lock_ex(&nnode->lock); 5842f85fa4dSMatthew Dillon hammer_modify_node_noundo(cursor->trans, nnode); 5852f85fa4dSMatthew Dillon 58626748b87STomohiro Kusumi hammer_move_node(cursor, elm, onode, nnode); 5872f85fa4dSMatthew Dillon 5882f85fa4dSMatthew Dillon /* 5892f85fa4dSMatthew Dillon * Clean up. 5902f85fa4dSMatthew Dillon * 5912f85fa4dSMatthew Dillon * The new node replaces the current node in the cursor. The cursor 5922f85fa4dSMatthew Dillon * expects it to be locked so leave it locked. Discard onode. 5932f85fa4dSMatthew Dillon */ 594b3bad96fSMatthew Dillon hammer_cursor_replaced_node(onode, nnode); 5952f85fa4dSMatthew Dillon hammer_delete_node(cursor->trans, onode); 5962f85fa4dSMatthew Dillon 5972f85fa4dSMatthew Dillon if (hammer_debug_general & 0x4000) { 59833234d14STomohiro Kusumi hdkprintf("%08x %016llx -> %016llx\n", 599a6af8eaeSTomohiro Kusumi (elm ? elm->base.localization : -1), 600973c11b9SMatthew Dillon (long long)onode->node_offset, 601973c11b9SMatthew Dillon (long long)nnode->node_offset); 6022f85fa4dSMatthew Dillon } 6032f85fa4dSMatthew Dillon hammer_modify_node_done(nnode); 6042f85fa4dSMatthew Dillon cursor->node = nnode; 6052f85fa4dSMatthew Dillon 6062f85fa4dSMatthew Dillon hammer_unlock(&onode->lock); 6072f85fa4dSMatthew Dillon hammer_rel_node(onode); 6082f85fa4dSMatthew Dillon 6092f85fa4dSMatthew Dillon done: 61024cf83d2SMatthew Dillon hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL); 6112f85fa4dSMatthew Dillon return (error); 6122f85fa4dSMatthew Dillon } 6132f85fa4dSMatthew Dillon 61426748b87STomohiro Kusumi /* 61526748b87STomohiro Kusumi * nnode is a newly allocated node, and now elm becomes the node 61626748b87STomohiro Kusumi * element within nnode's parent that represents a pointer to nnode, 61726748b87STomohiro Kusumi * or nnode becomes the root node if elm does not exist. 61826748b87STomohiro Kusumi */ 61926748b87STomohiro Kusumi static void 62026748b87STomohiro Kusumi hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm, 62126748b87STomohiro Kusumi hammer_node_t onode, hammer_node_t nnode) 62226748b87STomohiro Kusumi { 62326748b87STomohiro Kusumi int error, i; 62426748b87STomohiro Kusumi 62526748b87STomohiro Kusumi bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); 62626748b87STomohiro Kusumi 62726748b87STomohiro Kusumi /* 62826748b87STomohiro Kusumi * Adjust the parent's pointer to us first. 62926748b87STomohiro Kusumi */ 63026748b87STomohiro Kusumi if (elm) { 63126748b87STomohiro Kusumi /* 63226748b87STomohiro Kusumi * We are not the root of the B-Tree 63326748b87STomohiro Kusumi */ 6341424c922STomohiro Kusumi KKASSERT(hammer_is_internal_node_elm(elm)); 63526748b87STomohiro Kusumi hammer_modify_node(cursor->trans, cursor->parent, 63626748b87STomohiro Kusumi &elm->internal.subtree_offset, 63726748b87STomohiro Kusumi sizeof(elm->internal.subtree_offset)); 63826748b87STomohiro Kusumi elm->internal.subtree_offset = nnode->node_offset; 63926748b87STomohiro Kusumi hammer_modify_node_done(cursor->parent); 64026748b87STomohiro Kusumi } else { 64126748b87STomohiro Kusumi /* 64226748b87STomohiro Kusumi * We are the root of the B-Tree 64326748b87STomohiro Kusumi */ 64426748b87STomohiro Kusumi hammer_volume_t volume; 64526748b87STomohiro Kusumi volume = hammer_get_root_volume(cursor->trans->hmp, &error); 64626748b87STomohiro Kusumi KKASSERT(error == 0); 64726748b87STomohiro Kusumi 64826748b87STomohiro Kusumi hammer_modify_volume_field(cursor->trans, volume, 64926748b87STomohiro Kusumi vol0_btree_root); 65026748b87STomohiro Kusumi volume->ondisk->vol0_btree_root = nnode->node_offset; 65126748b87STomohiro Kusumi hammer_modify_volume_done(volume); 65226748b87STomohiro Kusumi hammer_rel_volume(volume, 0); 65326748b87STomohiro Kusumi } 65426748b87STomohiro Kusumi 65526748b87STomohiro Kusumi /* 65626748b87STomohiro Kusumi * Now adjust our children's pointers to us 65726748b87STomohiro Kusumi * if we are an internal node. 65826748b87STomohiro Kusumi */ 65926748b87STomohiro Kusumi if (nnode->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) { 66026748b87STomohiro Kusumi for (i = 0; i < nnode->ondisk->count; ++i) { 66190f96c37STomohiro Kusumi error = btree_set_parent_of_child(cursor->trans, nnode, 66226748b87STomohiro Kusumi &nnode->ondisk->elms[i]); 66326748b87STomohiro Kusumi if (error) 6645134aacdSTomohiro Kusumi hpanic("reblock internal node: fixup problem"); 66526748b87STomohiro Kusumi } 66626748b87STomohiro Kusumi } 66726748b87STomohiro Kusumi } 668