1bf686dbeSMatthew Dillon /* 255b50bd5SMatthew Dillon * Copyright (c) 2008-2012 The DragonFly Project. All rights reserved. 3bf686dbeSMatthew Dillon * 4bf686dbeSMatthew Dillon * This code is derived from software contributed to The DragonFly Project 5bf686dbeSMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 6bf686dbeSMatthew Dillon * 7bf686dbeSMatthew Dillon * Redistribution and use in source and binary forms, with or without 8bf686dbeSMatthew Dillon * modification, are permitted provided that the following conditions 9bf686dbeSMatthew Dillon * are met: 10bf686dbeSMatthew Dillon * 11bf686dbeSMatthew Dillon * 1. Redistributions of source code must retain the above copyright 12bf686dbeSMatthew Dillon * notice, this list of conditions and the following disclaimer. 13bf686dbeSMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 14bf686dbeSMatthew Dillon * notice, this list of conditions and the following disclaimer in 15bf686dbeSMatthew Dillon * the documentation and/or other materials provided with the 16bf686dbeSMatthew Dillon * distribution. 17bf686dbeSMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 18bf686dbeSMatthew Dillon * contributors may be used to endorse or promote products derived 19bf686dbeSMatthew Dillon * from this software without specific, prior written permission. 20bf686dbeSMatthew Dillon * 21bf686dbeSMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22bf686dbeSMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23bf686dbeSMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24bf686dbeSMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25bf686dbeSMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26bf686dbeSMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27bf686dbeSMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28bf686dbeSMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29bf686dbeSMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30bf686dbeSMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31bf686dbeSMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32bf686dbeSMatthew Dillon * SUCH DAMAGE. 33bf686dbeSMatthew Dillon */ 34bf686dbeSMatthew Dillon /* 35bf686dbeSMatthew Dillon * HAMMER reblocker - This code frees up fragmented physical space 36bf686dbeSMatthew Dillon * 37bf686dbeSMatthew Dillon * HAMMER only keeps track of free space on a big-block basis. A big-block 38bf686dbeSMatthew Dillon * containing holes can only be freed by migrating the remaining data in 39bf686dbeSMatthew Dillon * that big-block into a new big-block, then freeing the big-block. 40bf686dbeSMatthew Dillon * 41bf686dbeSMatthew Dillon * This function is called from an ioctl or via the hammer support thread. 42bf686dbeSMatthew Dillon */ 43bf686dbeSMatthew Dillon 44bf686dbeSMatthew Dillon #include "hammer.h" 45bf686dbeSMatthew Dillon 4636f82b23SMatthew Dillon static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 47bf686dbeSMatthew Dillon hammer_cursor_t cursor, 48bf686dbeSMatthew Dillon hammer_btree_elm_t elm); 4936f82b23SMatthew Dillon static int hammer_reblock_data(struct hammer_ioc_reblock *reblock, 50bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 512f85fa4dSMatthew Dillon static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 522f85fa4dSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 532f85fa4dSMatthew Dillon static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 54bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm); 5526748b87STomohiro Kusumi static void hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm, 5626748b87STomohiro Kusumi hammer_node_t onode, hammer_node_t nnode); 57bf686dbeSMatthew Dillon 58bf686dbeSMatthew Dillon int 5936f82b23SMatthew Dillon hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, 6036f82b23SMatthew Dillon struct hammer_ioc_reblock *reblock) 61bf686dbeSMatthew Dillon { 62bf686dbeSMatthew Dillon struct hammer_cursor cursor; 63bf686dbeSMatthew Dillon hammer_btree_elm_t elm; 64a7e9bef1SMatthew Dillon int checkspace_count; 6593291532SMatthew Dillon int error; 6693291532SMatthew Dillon int seq; 677b6ccb11SMatthew Dillon int slop; 686540d157STomohiro Kusumi u_int32_t key_end_localization; 697b6ccb11SMatthew Dillon 704fa5fb92STomohiro Kusumi if ((reblock->key_beg.localization | reblock->key_end.localization) & 714fa5fb92STomohiro Kusumi HAMMER_LOCALIZE_PSEUDOFS_MASK) { 724fa5fb92STomohiro Kusumi return(EINVAL); 734fa5fb92STomohiro Kusumi } 744fa5fb92STomohiro Kusumi if (reblock->key_beg.obj_id >= reblock->key_end.obj_id) 754fa5fb92STomohiro Kusumi return(EINVAL); 764fa5fb92STomohiro Kusumi if (reblock->free_level < 0 || 774fa5fb92STomohiro Kusumi reblock->free_level > HAMMER_BIGBLOCK_SIZE) 784fa5fb92STomohiro Kusumi return(EINVAL); 794fa5fb92STomohiro Kusumi 807b6ccb11SMatthew Dillon /* 81558a44e2STomohiro Kusumi * A fill_percentage <= 20% is considered an emergency. free_level is 82558a44e2STomohiro Kusumi * inverted from fill_percentage. 837b6ccb11SMatthew Dillon */ 84e04ee2deSTomohiro Kusumi if (reblock->free_level >= HAMMER_BIGBLOCK_SIZE * 8 / 10) 857b6ccb11SMatthew Dillon slop = HAMMER_CHKSPC_EMERGENCY; 867b6ccb11SMatthew Dillon else 877b6ccb11SMatthew Dillon slop = HAMMER_CHKSPC_REBLOCK; 88bf686dbeSMatthew Dillon 896540d157STomohiro Kusumi /* 906540d157STomohiro Kusumi * Ioctl caller has only set localization type to reblock. 916540d157STomohiro Kusumi * Initialize cursor key localization with ip localization. 926540d157STomohiro Kusumi */ 93dd94f1b1SMatthew Dillon reblock->key_cur = reblock->key_beg; 94842e7a70SMatthew Dillon reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 955e1e1454STomohiro Kusumi if (reblock->allpfs == 0) 96dd94f1b1SMatthew Dillon reblock->key_cur.localization += ip->obj_localization; 97814387f6SMatthew Dillon 986540d157STomohiro Kusumi key_end_localization = reblock->key_end.localization; 996540d157STomohiro Kusumi key_end_localization &= HAMMER_LOCALIZE_MASK; 1005e1e1454STomohiro Kusumi if (reblock->allpfs == 0) 1016540d157STomohiro Kusumi key_end_localization += ip->obj_localization; 1025e1e1454STomohiro Kusumi else 1035e1e1454STomohiro Kusumi key_end_localization += ((HAMMER_MAX_PFS - 1) << 16); 1046540d157STomohiro Kusumi 105a7e9bef1SMatthew Dillon checkspace_count = 0; 106e86903d8SMatthew Dillon seq = trans->hmp->flusher.done; 107bf686dbeSMatthew Dillon retry: 1084e17f465SMatthew Dillon error = hammer_init_cursor(trans, &cursor, NULL, NULL); 109bf686dbeSMatthew Dillon if (error) { 110bf686dbeSMatthew Dillon hammer_done_cursor(&cursor); 111dd94f1b1SMatthew Dillon goto failed; 112bf686dbeSMatthew Dillon } 113dd94f1b1SMatthew Dillon cursor.key_beg.localization = reblock->key_cur.localization; 114dd94f1b1SMatthew Dillon cursor.key_beg.obj_id = reblock->key_cur.obj_id; 115bf686dbeSMatthew Dillon cursor.key_beg.key = HAMMER_MIN_KEY; 116bf686dbeSMatthew Dillon cursor.key_beg.create_tid = 1; 117bf686dbeSMatthew Dillon cursor.key_beg.delete_tid = 0; 118bf686dbeSMatthew Dillon cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; 119bf686dbeSMatthew Dillon cursor.key_beg.obj_type = 0; 120bf686dbeSMatthew Dillon 1216540d157STomohiro Kusumi cursor.key_end.localization = key_end_localization; 122dd94f1b1SMatthew Dillon cursor.key_end.obj_id = reblock->key_end.obj_id; 123bf686dbeSMatthew Dillon cursor.key_end.key = HAMMER_MAX_KEY; 124bf686dbeSMatthew Dillon cursor.key_end.create_tid = HAMMER_MAX_TID - 1; 125bf686dbeSMatthew Dillon cursor.key_end.delete_tid = 0; 126bf686dbeSMatthew Dillon cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 127bf686dbeSMatthew Dillon cursor.key_end.obj_type = 0; 128bf686dbeSMatthew Dillon 129bf686dbeSMatthew Dillon cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 1309480ff55SMatthew Dillon cursor.flags |= HAMMER_CURSOR_BACKEND; 13118bee4a2SMatthew Dillon cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE; 132bf686dbeSMatthew Dillon 1332f85fa4dSMatthew Dillon /* 1342f85fa4dSMatthew Dillon * This flag allows the btree scan code to return internal nodes, 1352f85fa4dSMatthew Dillon * so we can reblock them in addition to the leafs. Only specify it 1362f85fa4dSMatthew Dillon * if we intend to reblock B-Tree nodes. 1372f85fa4dSMatthew Dillon */ 1382f85fa4dSMatthew Dillon if (reblock->head.flags & HAMMER_IOC_DO_BTREE) 1392f85fa4dSMatthew Dillon cursor.flags |= HAMMER_CURSOR_REBLOCKING; 1402f85fa4dSMatthew Dillon 141bf686dbeSMatthew Dillon error = hammer_btree_first(&cursor); 142bf686dbeSMatthew Dillon while (error == 0) { 1432f85fa4dSMatthew Dillon /* 1442f85fa4dSMatthew Dillon * Internal or Leaf node 1452f85fa4dSMatthew Dillon */ 14607ed04b5SMatthew Dillon KKASSERT(cursor.index < cursor.node->ondisk->count); 147bf686dbeSMatthew Dillon elm = &cursor.node->ondisk->elms[cursor.index]; 148dd94f1b1SMatthew Dillon reblock->key_cur.obj_id = elm->base.obj_id; 149dd94f1b1SMatthew Dillon reblock->key_cur.localization = elm->base.localization; 150bf686dbeSMatthew Dillon 1519480ff55SMatthew Dillon /* 1529f5097dcSMatthew Dillon * Yield to more important tasks 1539f5097dcSMatthew Dillon */ 1549f5097dcSMatthew Dillon if ((error = hammer_signal_check(trans->hmp)) != 0) 1559f5097dcSMatthew Dillon break; 156a7e9bef1SMatthew Dillon 157a7e9bef1SMatthew Dillon /* 158a7e9bef1SMatthew Dillon * If there is insufficient free space it may be due to 159a981af19STomohiro Kusumi * reserved big-blocks, which flushing might fix. 160c9ce54d6SMatthew Dillon * 16107ed04b5SMatthew Dillon * We must force a retest in case the unlocked cursor is 16207ed04b5SMatthew Dillon * moved to the end of the leaf, or moved to an internal 16307ed04b5SMatthew Dillon * node. 16407ed04b5SMatthew Dillon * 165c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 166a7e9bef1SMatthew Dillon */ 1677b6ccb11SMatthew Dillon if (hammer_checkspace(trans->hmp, slop)) { 168a7e9bef1SMatthew Dillon if (++checkspace_count == 10) { 169a7e9bef1SMatthew Dillon error = ENOSPC; 170a7e9bef1SMatthew Dillon break; 171a7e9bef1SMatthew Dillon } 172982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 17307ed04b5SMatthew Dillon cursor.flags |= HAMMER_CURSOR_RETEST; 17493291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 175982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 1767a61b85dSMatthew Dillon seq = hammer_flusher_async(trans->hmp, NULL); 17707ed04b5SMatthew Dillon goto skip; 17893291532SMatthew Dillon } 179a7e9bef1SMatthew Dillon 180a7e9bef1SMatthew Dillon /* 1819480ff55SMatthew Dillon * Acquiring the sync_lock prevents the operation from 1829480ff55SMatthew Dillon * crossing a synchronization boundary. 18309ac686bSMatthew Dillon * 18409ac686bSMatthew Dillon * NOTE: cursor.node may have changed on return. 185c9ce54d6SMatthew Dillon * 186c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 1879480ff55SMatthew Dillon */ 1882f85fa4dSMatthew Dillon hammer_sync_lock_sh(trans); 18936f82b23SMatthew Dillon error = hammer_reblock_helper(reblock, &cursor, elm); 1902f85fa4dSMatthew Dillon hammer_sync_unlock(trans); 19193291532SMatthew Dillon 19215e75dabSMatthew Dillon while (hammer_flusher_meta_halflimit(trans->hmp) || 1937a61b85dSMatthew Dillon hammer_flusher_undo_exhausted(trans, 2)) { 194982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 19593291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 196982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 19715e75dabSMatthew Dillon seq = hammer_flusher_async_one(trans->hmp); 19893291532SMatthew Dillon } 1991b0ab2c3SMatthew Dillon 2001b0ab2c3SMatthew Dillon /* 2011b0ab2c3SMatthew Dillon * Setup for iteration, our cursor flags may be modified by 2021b0ab2c3SMatthew Dillon * other threads while we are unlocked. 2031b0ab2c3SMatthew Dillon */ 204bf686dbeSMatthew Dillon cursor.flags |= HAMMER_CURSOR_ATEDISK; 2051b0ab2c3SMatthew Dillon 2061b0ab2c3SMatthew Dillon /* 2071b0ab2c3SMatthew Dillon * We allocate data buffers, which atm we don't track 2081b0ab2c3SMatthew Dillon * dirty levels for because we allow the kernel to write 2091b0ab2c3SMatthew Dillon * them. But if we allocate too many we can still deadlock 2101b0ab2c3SMatthew Dillon * the buffer cache. 2111b0ab2c3SMatthew Dillon * 212c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 2131b0ab2c3SMatthew Dillon * (The cursor's node and element may change!) 2141b0ab2c3SMatthew Dillon */ 2151b0ab2c3SMatthew Dillon if (bd_heatup()) { 216982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 2171b0ab2c3SMatthew Dillon bwillwrite(HAMMER_XBUFSIZE); 218982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 2191b0ab2c3SMatthew Dillon } 22055b50bd5SMatthew Dillon vm_wait_nominal(); 22107ed04b5SMatthew Dillon skip: 2221b0ab2c3SMatthew Dillon if (error == 0) { 223bf686dbeSMatthew Dillon error = hammer_btree_iterate(&cursor); 224bf686dbeSMatthew Dillon } 225bf686dbeSMatthew Dillon } 226bf686dbeSMatthew Dillon if (error == ENOENT) 227bf686dbeSMatthew Dillon error = 0; 228bf686dbeSMatthew Dillon hammer_done_cursor(&cursor); 22906ad81ffSMatthew Dillon if (error == EWOULDBLOCK) { 23006ad81ffSMatthew Dillon hammer_flusher_sync(trans->hmp); 23106ad81ffSMatthew Dillon goto retry; 23206ad81ffSMatthew Dillon } 233bf686dbeSMatthew Dillon if (error == EDEADLK) 234bf686dbeSMatthew Dillon goto retry; 23519619882SMatthew Dillon if (error == EINTR) { 23619619882SMatthew Dillon reblock->head.flags |= HAMMER_IOC_HEAD_INTR; 23719619882SMatthew Dillon error = 0; 23819619882SMatthew Dillon } 239dd94f1b1SMatthew Dillon failed: 240dd94f1b1SMatthew Dillon reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 241bf686dbeSMatthew Dillon return(error); 242bf686dbeSMatthew Dillon } 243bf686dbeSMatthew Dillon 244bf686dbeSMatthew Dillon /* 245bf686dbeSMatthew Dillon * Reblock the B-Tree (leaf) node, record, and/or data if necessary. 246bf686dbeSMatthew Dillon * 2479480ff55SMatthew Dillon * XXX We have no visibility into internal B-Tree nodes at the moment, 2489480ff55SMatthew Dillon * only leaf nodes. 249bf686dbeSMatthew Dillon */ 250bf686dbeSMatthew Dillon static int 25136f82b23SMatthew Dillon hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 252bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 253bf686dbeSMatthew Dillon { 25443c665aeSMatthew Dillon hammer_mount_t hmp; 255bf686dbeSMatthew Dillon hammer_off_t tmp_offset; 256ebbcfba9SMatthew Dillon hammer_node_ondisk_t ondisk; 25744a83111SMatthew Dillon struct hammer_btree_leaf_elm leaf; 258bf686dbeSMatthew Dillon int error; 259bf686dbeSMatthew Dillon int bytes; 260bf686dbeSMatthew Dillon int cur; 261bf3b416bSMatthew Dillon int iocflags; 262bf686dbeSMatthew Dillon 263bf686dbeSMatthew Dillon error = 0; 26443c665aeSMatthew Dillon hmp = cursor->trans->hmp; 265bf686dbeSMatthew Dillon 266bf686dbeSMatthew Dillon /* 267bf686dbeSMatthew Dillon * Reblock data. Note that data embedded in a record is reblocked 2682f85fa4dSMatthew Dillon * by the record reblock code. Data processing only occurs at leaf 2692f85fa4dSMatthew Dillon * nodes and for RECORD element types. 270bf686dbeSMatthew Dillon */ 2712f85fa4dSMatthew Dillon if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF) 2722f85fa4dSMatthew Dillon goto skip; 2732f85fa4dSMatthew Dillon if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) 2742f85fa4dSMatthew Dillon return(0); 275bf686dbeSMatthew Dillon tmp_offset = elm->leaf.data_offset; 276bf3b416bSMatthew Dillon if (tmp_offset == 0) 277bf3b416bSMatthew Dillon goto skip; 278bf3b416bSMatthew Dillon 279bf3b416bSMatthew Dillon /* 2807ef2d7b3STomohiro Kusumi * If reblock->vol_no is specified we only want to reblock data 2817ef2d7b3STomohiro Kusumi * in that volume, but ignore everything else. 2827ef2d7b3STomohiro Kusumi */ 2837ef2d7b3STomohiro Kusumi if (reblock->vol_no != -1 && 2847ef2d7b3STomohiro Kusumi reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset)) 2857ef2d7b3STomohiro Kusumi goto skip; 2867ef2d7b3STomohiro Kusumi 2877ef2d7b3STomohiro Kusumi /* 288bf3b416bSMatthew Dillon * NOTE: Localization restrictions may also have been set-up, we can't 289bf3b416bSMatthew Dillon * just set the match flags willy-nilly here. 290bf3b416bSMatthew Dillon */ 291bf3b416bSMatthew Dillon switch(elm->leaf.base.rec_type) { 292bf3b416bSMatthew Dillon case HAMMER_RECTYPE_INODE: 29383f2a3aaSMatthew Dillon case HAMMER_RECTYPE_SNAPSHOT: 29483f2a3aaSMatthew Dillon case HAMMER_RECTYPE_CONFIG: 295bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_INODES; 296bf3b416bSMatthew Dillon break; 297bf3b416bSMatthew Dillon case HAMMER_RECTYPE_EXT: 298bf3b416bSMatthew Dillon case HAMMER_RECTYPE_FIX: 299ea434b6fSMatthew Dillon case HAMMER_RECTYPE_PFS: 300bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DIRENTRY: 301bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_DIRS; 302bf3b416bSMatthew Dillon break; 303bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DATA: 304bf3b416bSMatthew Dillon case HAMMER_RECTYPE_DB: 305bf3b416bSMatthew Dillon iocflags = HAMMER_IOC_DO_DATA; 306bf3b416bSMatthew Dillon break; 307bf3b416bSMatthew Dillon default: 308bf3b416bSMatthew Dillon iocflags = 0; 309bf3b416bSMatthew Dillon break; 310bf3b416bSMatthew Dillon } 311bf3b416bSMatthew Dillon if (reblock->head.flags & iocflags) { 312bf686dbeSMatthew Dillon ++reblock->data_count; 313bf686dbeSMatthew Dillon reblock->data_byte_count += elm->leaf.data_len; 31443c665aeSMatthew Dillon bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 3156e1e8b6dSMatthew Dillon if (hammer_debug_general & 0x4000) 3162f85fa4dSMatthew Dillon kprintf("D %6d/%d\n", bytes, reblock->free_level); 3174af7f537STomohiro Kusumi /* 3184af7f537STomohiro Kusumi * Start data reblock if 3194af7f537STomohiro Kusumi * 1. there is no error 3204af7f537STomohiro Kusumi * 2. the data and allocator offset are not in the same 3214af7f537STomohiro Kusumi * big-block, or free level threshold is 0 3224af7f537STomohiro Kusumi * 3. free bytes in the data's big-block is larger than 3234af7f537STomohiro Kusumi * free level threshold (means if threshold is 0 then 3244af7f537STomohiro Kusumi * do reblock no matter what). 3254af7f537STomohiro Kusumi */ 326bf3b416bSMatthew Dillon if (error == 0 && (cur == 0 || reblock->free_level == 0) && 327bf3b416bSMatthew Dillon bytes >= reblock->free_level) { 32844a83111SMatthew Dillon /* 32944a83111SMatthew Dillon * This is nasty, the uncache code may have to get 33044a83111SMatthew Dillon * vnode locks and because of that we can't hold 33144a83111SMatthew Dillon * the cursor locked. 332c9ce54d6SMatthew Dillon * 333c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() 334c9ce54d6SMatthew Dillon * function. 33544a83111SMatthew Dillon */ 33644a83111SMatthew Dillon leaf = elm->leaf; 337982be4bfSMatthew Dillon hammer_unlock_cursor(cursor); 33844a83111SMatthew Dillon hammer_io_direct_uncache(hmp, &leaf); 339982be4bfSMatthew Dillon hammer_lock_cursor(cursor); 340ebbcfba9SMatthew Dillon 341ebbcfba9SMatthew Dillon /* 342ebbcfba9SMatthew Dillon * elm may have become stale or invalid, reload it. 343ebbcfba9SMatthew Dillon * ondisk variable is temporary only. Note that 344ebbcfba9SMatthew Dillon * cursor->node and thus cursor->node->ondisk may 345ebbcfba9SMatthew Dillon * also changed. 346ebbcfba9SMatthew Dillon */ 347ebbcfba9SMatthew Dillon ondisk = cursor->node->ondisk; 348ebbcfba9SMatthew Dillon elm = &ondisk->elms[cursor->index]; 34944a83111SMatthew Dillon if (cursor->flags & HAMMER_CURSOR_RETEST) { 350c54975d5STomohiro Kusumi kprintf("HAMMER: debug: retest on " 351ebbcfba9SMatthew Dillon "reblocker uncache\n"); 35244a83111SMatthew Dillon error = EDEADLK; 353ebbcfba9SMatthew Dillon } else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF || 354ebbcfba9SMatthew Dillon cursor->index >= ondisk->count) { 355c54975d5STomohiro Kusumi kprintf("HAMMER: debug: shifted on " 356ebbcfba9SMatthew Dillon "reblocker uncache\n"); 357ebbcfba9SMatthew Dillon error = EDEADLK; 358ebbcfba9SMatthew Dillon } else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) { 359c54975d5STomohiro Kusumi kprintf("HAMMER: debug: changed on " 360ebbcfba9SMatthew Dillon "reblocker uncache\n"); 361ebbcfba9SMatthew Dillon error = EDEADLK; 36244a83111SMatthew Dillon } 36344a83111SMatthew Dillon if (error == 0) 364bf686dbeSMatthew Dillon error = hammer_cursor_upgrade(cursor); 365bf686dbeSMatthew Dillon if (error == 0) { 36607ed04b5SMatthew Dillon KKASSERT(cursor->index < ondisk->count); 36736f82b23SMatthew Dillon error = hammer_reblock_data(reblock, 368bf686dbeSMatthew Dillon cursor, elm); 369bf686dbeSMatthew Dillon } 370bf686dbeSMatthew Dillon if (error == 0) { 371bf686dbeSMatthew Dillon ++reblock->data_moves; 372bf686dbeSMatthew Dillon reblock->data_byte_moves += elm->leaf.data_len; 373bf686dbeSMatthew Dillon } 374bf686dbeSMatthew Dillon } 375bf686dbeSMatthew Dillon } 376bf686dbeSMatthew Dillon 3772f85fa4dSMatthew Dillon skip: 378bf686dbeSMatthew Dillon /* 3791775b6a0SMatthew Dillon * Reblock a B-Tree internal or leaf node. A leaf node is reblocked 3801775b6a0SMatthew Dillon * on initial entry only (element 0). An internal node is reblocked 381525fa6bbSTomohiro Kusumi * when entered upward from its first leaf node only (also element 0, 382525fa6bbSTomohiro Kusumi * see hammer_btree_iterate() where cursor moves up and may return). 3831775b6a0SMatthew Dillon * Further revisits of the internal node (index > 0) are ignored. 384bf686dbeSMatthew Dillon */ 385bf686dbeSMatthew Dillon tmp_offset = cursor->node->node_offset; 3867ef2d7b3STomohiro Kusumi 3877ef2d7b3STomohiro Kusumi /* 3887ef2d7b3STomohiro Kusumi * If reblock->vol_no is specified we only want to reblock data 3897ef2d7b3STomohiro Kusumi * in that volume, but ignore everything else. 3907ef2d7b3STomohiro Kusumi */ 3917ef2d7b3STomohiro Kusumi if (reblock->vol_no != -1 && 3927ef2d7b3STomohiro Kusumi reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset)) 3937ef2d7b3STomohiro Kusumi goto end; 3947ef2d7b3STomohiro Kusumi 395bf3b416bSMatthew Dillon if (cursor->index == 0 && 396814387f6SMatthew Dillon error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) { 397bf686dbeSMatthew Dillon ++reblock->btree_count; 39843c665aeSMatthew Dillon bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 3996e1e8b6dSMatthew Dillon if (hammer_debug_general & 0x4000) 4002f85fa4dSMatthew Dillon kprintf("B %6d/%d\n", bytes, reblock->free_level); 4014af7f537STomohiro Kusumi /* 4024af7f537STomohiro Kusumi * Start node reblock if 4034af7f537STomohiro Kusumi * 1. there is no error 4044af7f537STomohiro Kusumi * 2. the node and allocator offset are not in the same 4054af7f537STomohiro Kusumi * big-block, or free level threshold is 0 4064af7f537STomohiro Kusumi * 3. free bytes in the node's big-block is larger than 4074af7f537STomohiro Kusumi * free level threshold (means if threshold is 0 then 4084af7f537STomohiro Kusumi * do reblock no matter what). 4094af7f537STomohiro Kusumi */ 410bf3b416bSMatthew Dillon if (error == 0 && (cur == 0 || reblock->free_level == 0) && 411bf3b416bSMatthew Dillon bytes >= reblock->free_level) { 412bf686dbeSMatthew Dillon error = hammer_cursor_upgrade(cursor); 413bf686dbeSMatthew Dillon if (error == 0) { 41407ed04b5SMatthew Dillon if (cursor->parent) { 41507ed04b5SMatthew Dillon KKASSERT(cursor->parent_index < 41607ed04b5SMatthew Dillon cursor->parent->ondisk->count); 417bf686dbeSMatthew Dillon elm = &cursor->parent->ondisk->elms[cursor->parent_index]; 41807ed04b5SMatthew Dillon } else { 419bf686dbeSMatthew Dillon elm = NULL; 42007ed04b5SMatthew Dillon } 4212f85fa4dSMatthew Dillon switch(cursor->node->ondisk->type) { 4222f85fa4dSMatthew Dillon case HAMMER_BTREE_TYPE_LEAF: 4232f85fa4dSMatthew Dillon error = hammer_reblock_leaf_node( 4242f85fa4dSMatthew Dillon reblock, cursor, elm); 4252f85fa4dSMatthew Dillon break; 4262f85fa4dSMatthew Dillon case HAMMER_BTREE_TYPE_INTERNAL: 4272f85fa4dSMatthew Dillon error = hammer_reblock_int_node( 4282f85fa4dSMatthew Dillon reblock, cursor, elm); 4292f85fa4dSMatthew Dillon break; 4302f85fa4dSMatthew Dillon default: 4312f85fa4dSMatthew Dillon panic("Illegal B-Tree node type"); 4322f85fa4dSMatthew Dillon } 433bf686dbeSMatthew Dillon } 434bf686dbeSMatthew Dillon if (error == 0) { 435bf686dbeSMatthew Dillon ++reblock->btree_moves; 436bf686dbeSMatthew Dillon } 437bf686dbeSMatthew Dillon } 438bf686dbeSMatthew Dillon } 4397ef2d7b3STomohiro Kusumi end: 440*ebc5d79eSTomohiro Kusumi hammer_cursor_downgrade(cursor); 441bf686dbeSMatthew Dillon return(error); 442bf686dbeSMatthew Dillon } 443bf686dbeSMatthew Dillon 444bf686dbeSMatthew Dillon /* 445bf686dbeSMatthew Dillon * Reblock a record's data. Both the B-Tree element and record pointers 446bf686dbeSMatthew Dillon * to the data must be adjusted. 447bf686dbeSMatthew Dillon */ 448bf686dbeSMatthew Dillon static int 44936f82b23SMatthew Dillon hammer_reblock_data(struct hammer_ioc_reblock *reblock, 450bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 451bf686dbeSMatthew Dillon { 452bf686dbeSMatthew Dillon struct hammer_buffer *data_buffer = NULL; 453bc996e65STomohiro Kusumi hammer_off_t odata_offset; 454bf686dbeSMatthew Dillon hammer_off_t ndata_offset; 455bf686dbeSMatthew Dillon int error; 456bf686dbeSMatthew Dillon void *ndata; 457bf686dbeSMatthew Dillon 458bf686dbeSMatthew Dillon error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA | 45911ad5adeSMatthew Dillon HAMMER_CURSOR_GET_LEAF); 460bf686dbeSMatthew Dillon if (error) 461bf686dbeSMatthew Dillon return (error); 46236f82b23SMatthew Dillon ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, 463bf3b416bSMatthew Dillon elm->leaf.base.rec_type, 464df2ccbacSMatthew Dillon &ndata_offset, &data_buffer, 465df2ccbacSMatthew Dillon 0, &error); 466bf686dbeSMatthew Dillon if (error) 467bf686dbeSMatthew Dillon goto done; 468b8a41159SMatthew Dillon hammer_io_notmeta(data_buffer); 469bf686dbeSMatthew Dillon 470bf686dbeSMatthew Dillon /* 471b9107f58SMatthew Dillon * Move the data. Note that we must invalidate any cached 472b9107f58SMatthew Dillon * data buffer in the cursor before calling blockmap_free. 473e04ee2deSTomohiro Kusumi * The blockmap_free may free up the entire big-block and 474b9107f58SMatthew Dillon * will not be able to invalidate it if the cursor is holding 475d165c90aSTomohiro Kusumi * a data buffer cached in that big-block. 476bf686dbeSMatthew Dillon */ 477f1c0ae53STomohiro Kusumi hammer_modify_buffer_noundo(cursor->trans, data_buffer); 478bf686dbeSMatthew Dillon bcopy(cursor->data, ndata, elm->leaf.data_len); 47910a5d1baSMatthew Dillon hammer_modify_buffer_done(data_buffer); 480b9107f58SMatthew Dillon hammer_cursor_invalidate_cache(cursor); 481bf686dbeSMatthew Dillon 48236f82b23SMatthew Dillon hammer_blockmap_free(cursor->trans, 48336f82b23SMatthew Dillon elm->leaf.data_offset, elm->leaf.data_len); 484bf686dbeSMatthew Dillon 48510a5d1baSMatthew Dillon hammer_modify_node(cursor->trans, cursor->node, 48610a5d1baSMatthew Dillon &elm->leaf.data_offset, sizeof(hammer_off_t)); 487bc996e65STomohiro Kusumi odata_offset = elm->leaf.data_offset; 488bf686dbeSMatthew Dillon elm->leaf.data_offset = ndata_offset; 48910a5d1baSMatthew Dillon hammer_modify_node_done(cursor->node); 490bf686dbeSMatthew Dillon 491bc996e65STomohiro Kusumi if (hammer_debug_general & 0x4000) { 492bc996e65STomohiro Kusumi kprintf("REBLOCK DATA %08x %016llx -> %016llx\n", 493a6af8eaeSTomohiro Kusumi (elm ? elm->base.localization : -1), 494bc996e65STomohiro Kusumi (long long)odata_offset, 495bc996e65STomohiro Kusumi (long long)ndata_offset); 496bc996e65STomohiro Kusumi } 497bf686dbeSMatthew Dillon done: 498bf686dbeSMatthew Dillon if (data_buffer) 499bf686dbeSMatthew Dillon hammer_rel_buffer(data_buffer, 0); 500bf686dbeSMatthew Dillon return (error); 501bf686dbeSMatthew Dillon } 502bf686dbeSMatthew Dillon 503bf686dbeSMatthew Dillon /* 5042f85fa4dSMatthew Dillon * Reblock a B-Tree leaf node. The parent must be adjusted to point to 5052f85fa4dSMatthew Dillon * the new copy of the leaf node. 506bf686dbeSMatthew Dillon * 5072f85fa4dSMatthew Dillon * elm is a pointer to the parent element pointing at cursor.node. 508bf686dbeSMatthew Dillon */ 509bf686dbeSMatthew Dillon static int 5102f85fa4dSMatthew Dillon hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 511bf686dbeSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 512bf686dbeSMatthew Dillon { 513bf686dbeSMatthew Dillon hammer_node_t onode; 514bf686dbeSMatthew Dillon hammer_node_t nnode; 515bf686dbeSMatthew Dillon int error; 516bf686dbeSMatthew Dillon 517df2ccbacSMatthew Dillon /* 518df2ccbacSMatthew Dillon * Don't supply a hint when allocating the leaf. Fills are done 519df2ccbacSMatthew Dillon * from the leaf upwards. 520df2ccbacSMatthew Dillon */ 521bf686dbeSMatthew Dillon onode = cursor->node; 522df2ccbacSMatthew Dillon nnode = hammer_alloc_btree(cursor->trans, 0, &error); 5238d0efe43SMatthew Dillon 524bf686dbeSMatthew Dillon if (nnode == NULL) 525bf686dbeSMatthew Dillon return (error); 526bf686dbeSMatthew Dillon 52709ac686bSMatthew Dillon hammer_lock_ex(&nnode->lock); 52809ac686bSMatthew Dillon hammer_modify_node_noundo(cursor->trans, nnode); 529bf686dbeSMatthew Dillon 53026748b87STomohiro Kusumi hammer_move_node(cursor, elm, onode, nnode); 53126748b87STomohiro Kusumi 532bf686dbeSMatthew Dillon /* 53326748b87STomohiro Kusumi * Clean up. 53426748b87STomohiro Kusumi * 53526748b87STomohiro Kusumi * The new node replaces the current node in the cursor. The cursor 53626748b87STomohiro Kusumi * expects it to be locked so leave it locked. Discard onode. 537bf686dbeSMatthew Dillon */ 538b3bad96fSMatthew Dillon hammer_cursor_replaced_node(onode, nnode); 53936f82b23SMatthew Dillon hammer_delete_node(cursor->trans, onode); 540bf686dbeSMatthew Dillon 541b58c6388SMatthew Dillon if (hammer_debug_general & 0x4000) { 542525fa6bbSTomohiro Kusumi kprintf("REBLOCK %cNODE %08x %016llx -> %016llx\n", 543525fa6bbSTomohiro Kusumi nnode->ondisk->type, 544a6af8eaeSTomohiro Kusumi (elm ? elm->base.localization : -1), 545973c11b9SMatthew Dillon (long long)onode->node_offset, 546973c11b9SMatthew Dillon (long long)nnode->node_offset); 547b58c6388SMatthew Dillon } 5488d0efe43SMatthew Dillon hammer_modify_node_done(nnode); 549bf686dbeSMatthew Dillon cursor->node = nnode; 55009ac686bSMatthew Dillon 55109ac686bSMatthew Dillon hammer_unlock(&onode->lock); 552bf686dbeSMatthew Dillon hammer_rel_node(onode); 553bf686dbeSMatthew Dillon 554bf686dbeSMatthew Dillon return (error); 555bf686dbeSMatthew Dillon } 556bf686dbeSMatthew Dillon 5572f85fa4dSMatthew Dillon /* 5582f85fa4dSMatthew Dillon * Reblock a B-Tree internal node. The parent must be adjusted to point to 5592f85fa4dSMatthew Dillon * the new copy of the internal node, and the node's children's parent 5602f85fa4dSMatthew Dillon * pointers must also be adjusted to point to the new copy. 5612f85fa4dSMatthew Dillon * 5622f85fa4dSMatthew Dillon * elm is a pointer to the parent element pointing at cursor.node. 5632f85fa4dSMatthew Dillon */ 5642f85fa4dSMatthew Dillon static int 5652f85fa4dSMatthew Dillon hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 5662f85fa4dSMatthew Dillon hammer_cursor_t cursor, hammer_btree_elm_t elm) 5672f85fa4dSMatthew Dillon { 5681775b6a0SMatthew Dillon struct hammer_node_lock lockroot; 5692f85fa4dSMatthew Dillon hammer_node_t onode; 5702f85fa4dSMatthew Dillon hammer_node_t nnode; 5712f85fa4dSMatthew Dillon int error; 5722f85fa4dSMatthew Dillon 5731775b6a0SMatthew Dillon hammer_node_lock_init(&lockroot, cursor->node); 57424cf83d2SMatthew Dillon error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL); 5752f85fa4dSMatthew Dillon if (error) 5762f85fa4dSMatthew Dillon goto done; 5772f85fa4dSMatthew Dillon 578525fa6bbSTomohiro Kusumi /* 579525fa6bbSTomohiro Kusumi * Don't supply a hint when allocating the leaf. Fills are done 580525fa6bbSTomohiro Kusumi * from the leaf upwards. 581525fa6bbSTomohiro Kusumi */ 5822f85fa4dSMatthew Dillon onode = cursor->node; 583b4f86ea3SMatthew Dillon nnode = hammer_alloc_btree(cursor->trans, 0, &error); 5842f85fa4dSMatthew Dillon 5852f85fa4dSMatthew Dillon if (nnode == NULL) 5862f85fa4dSMatthew Dillon goto done; 5872f85fa4dSMatthew Dillon 5882f85fa4dSMatthew Dillon hammer_lock_ex(&nnode->lock); 5892f85fa4dSMatthew Dillon hammer_modify_node_noundo(cursor->trans, nnode); 5902f85fa4dSMatthew Dillon 59126748b87STomohiro Kusumi hammer_move_node(cursor, elm, onode, nnode); 5922f85fa4dSMatthew Dillon 5932f85fa4dSMatthew Dillon /* 5942f85fa4dSMatthew Dillon * Clean up. 5952f85fa4dSMatthew Dillon * 5962f85fa4dSMatthew Dillon * The new node replaces the current node in the cursor. The cursor 5972f85fa4dSMatthew Dillon * expects it to be locked so leave it locked. Discard onode. 5982f85fa4dSMatthew Dillon */ 599b3bad96fSMatthew Dillon hammer_cursor_replaced_node(onode, nnode); 6002f85fa4dSMatthew Dillon hammer_delete_node(cursor->trans, onode); 6012f85fa4dSMatthew Dillon 6022f85fa4dSMatthew Dillon if (hammer_debug_general & 0x4000) { 603525fa6bbSTomohiro Kusumi kprintf("REBLOCK %cNODE %08x %016llx -> %016llx\n", 604525fa6bbSTomohiro Kusumi nnode->ondisk->type, 605a6af8eaeSTomohiro Kusumi (elm ? elm->base.localization : -1), 606973c11b9SMatthew Dillon (long long)onode->node_offset, 607973c11b9SMatthew Dillon (long long)nnode->node_offset); 6082f85fa4dSMatthew Dillon } 6092f85fa4dSMatthew Dillon hammer_modify_node_done(nnode); 6102f85fa4dSMatthew Dillon cursor->node = nnode; 6112f85fa4dSMatthew Dillon 6122f85fa4dSMatthew Dillon hammer_unlock(&onode->lock); 6132f85fa4dSMatthew Dillon hammer_rel_node(onode); 6142f85fa4dSMatthew Dillon 6152f85fa4dSMatthew Dillon done: 61624cf83d2SMatthew Dillon hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL); 6172f85fa4dSMatthew Dillon return (error); 6182f85fa4dSMatthew Dillon } 6192f85fa4dSMatthew Dillon 62026748b87STomohiro Kusumi /* 62126748b87STomohiro Kusumi * nnode is a newly allocated node, and now elm becomes the node 62226748b87STomohiro Kusumi * element within nnode's parent that represents a pointer to nnode, 62326748b87STomohiro Kusumi * or nnode becomes the root node if elm does not exist. 62426748b87STomohiro Kusumi */ 62526748b87STomohiro Kusumi static void 62626748b87STomohiro Kusumi hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm, 62726748b87STomohiro Kusumi hammer_node_t onode, hammer_node_t nnode) 62826748b87STomohiro Kusumi { 62926748b87STomohiro Kusumi int error, i; 63026748b87STomohiro Kusumi 63126748b87STomohiro Kusumi bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); 63226748b87STomohiro Kusumi 63326748b87STomohiro Kusumi /* 63426748b87STomohiro Kusumi * Adjust the parent's pointer to us first. 63526748b87STomohiro Kusumi */ 63626748b87STomohiro Kusumi if (elm) { 63726748b87STomohiro Kusumi /* 63826748b87STomohiro Kusumi * We are not the root of the B-Tree 63926748b87STomohiro Kusumi */ 64026748b87STomohiro Kusumi hammer_modify_node(cursor->trans, cursor->parent, 64126748b87STomohiro Kusumi &elm->internal.subtree_offset, 64226748b87STomohiro Kusumi sizeof(elm->internal.subtree_offset)); 64326748b87STomohiro Kusumi elm->internal.subtree_offset = nnode->node_offset; 64426748b87STomohiro Kusumi hammer_modify_node_done(cursor->parent); 64526748b87STomohiro Kusumi } else { 64626748b87STomohiro Kusumi /* 64726748b87STomohiro Kusumi * We are the root of the B-Tree 64826748b87STomohiro Kusumi */ 64926748b87STomohiro Kusumi hammer_volume_t volume; 65026748b87STomohiro Kusumi volume = hammer_get_root_volume(cursor->trans->hmp, &error); 65126748b87STomohiro Kusumi KKASSERT(error == 0); 65226748b87STomohiro Kusumi 65326748b87STomohiro Kusumi hammer_modify_volume_field(cursor->trans, volume, 65426748b87STomohiro Kusumi vol0_btree_root); 65526748b87STomohiro Kusumi volume->ondisk->vol0_btree_root = nnode->node_offset; 65626748b87STomohiro Kusumi hammer_modify_volume_done(volume); 65726748b87STomohiro Kusumi hammer_rel_volume(volume, 0); 65826748b87STomohiro Kusumi } 65926748b87STomohiro Kusumi 66026748b87STomohiro Kusumi /* 66126748b87STomohiro Kusumi * Now adjust our children's pointers to us 66226748b87STomohiro Kusumi * if we are an internal node. 66326748b87STomohiro Kusumi */ 66426748b87STomohiro Kusumi if (nnode->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) { 66526748b87STomohiro Kusumi for (i = 0; i < nnode->ondisk->count; ++i) { 66626748b87STomohiro Kusumi error = btree_set_parent(cursor->trans, nnode, 66726748b87STomohiro Kusumi &nnode->ondisk->elms[i]); 66826748b87STomohiro Kusumi if (error) 66926748b87STomohiro Kusumi panic("reblock internal node: fixup problem"); 67026748b87STomohiro Kusumi } 67126748b87STomohiro Kusumi } 67226748b87STomohiro Kusumi } 673