18cd0a023SMatthew Dillon /* 2b84de5afSMatthew Dillon * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 38cd0a023SMatthew Dillon * 48cd0a023SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 58cd0a023SMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 68cd0a023SMatthew Dillon * 78cd0a023SMatthew Dillon * Redistribution and use in source and binary forms, with or without 88cd0a023SMatthew Dillon * modification, are permitted provided that the following conditions 98cd0a023SMatthew Dillon * are met: 108cd0a023SMatthew Dillon * 118cd0a023SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 128cd0a023SMatthew Dillon * notice, this list of conditions and the following disclaimer. 138cd0a023SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 148cd0a023SMatthew Dillon * notice, this list of conditions and the following disclaimer in 158cd0a023SMatthew Dillon * the documentation and/or other materials provided with the 168cd0a023SMatthew Dillon * distribution. 178cd0a023SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 188cd0a023SMatthew Dillon * contributors may be used to endorse or promote products derived 198cd0a023SMatthew Dillon * from this software without specific, prior written permission. 208cd0a023SMatthew Dillon * 218cd0a023SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 228cd0a023SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 238cd0a023SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 248cd0a023SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 258cd0a023SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 268cd0a023SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 278cd0a023SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 288cd0a023SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 298cd0a023SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 308cd0a023SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 318cd0a023SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 328cd0a023SMatthew Dillon * SUCH DAMAGE. 338cd0a023SMatthew Dillon * 345c8d05e2SMatthew Dillon * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.42 2008/08/06 15:38:58 dillon Exp $ 358cd0a023SMatthew Dillon */ 368cd0a023SMatthew Dillon 378cd0a023SMatthew Dillon /* 388cd0a023SMatthew Dillon * HAMMER B-Tree index - cursor support routines 398cd0a023SMatthew Dillon */ 408cd0a023SMatthew Dillon #include "hammer.h" 418cd0a023SMatthew Dillon 42f36a9737SMatthew Dillon static int hammer_load_cursor_parent(hammer_cursor_t cursor, int try_exclusive); 438cd0a023SMatthew Dillon 448cd0a023SMatthew Dillon /* 4561aeeb33SMatthew Dillon * Initialize a fresh cursor using the B-Tree node cache. If the cache 4647197d71SMatthew Dillon * is not available initialize a fresh cursor at the root of the filesystem. 478cd0a023SMatthew Dillon */ 488cd0a023SMatthew Dillon int 4936f82b23SMatthew Dillon hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor, 50bcac4bbbSMatthew Dillon hammer_node_cache_t cache, hammer_inode_t ip) 518cd0a023SMatthew Dillon { 5247197d71SMatthew Dillon hammer_volume_t volume; 5361aeeb33SMatthew Dillon hammer_node_t node; 548cd0a023SMatthew Dillon int error; 558cd0a023SMatthew Dillon 568cd0a023SMatthew Dillon bzero(cursor, sizeof(*cursor)); 5761aeeb33SMatthew Dillon 5836f82b23SMatthew Dillon cursor->trans = trans; 5936f82b23SMatthew Dillon 6061aeeb33SMatthew Dillon /* 614e17f465SMatthew Dillon * If the cursor operation is on behalf of an inode, lock 624e17f465SMatthew Dillon * the inode. 634e17f465SMatthew Dillon */ 644e17f465SMatthew Dillon if ((cursor->ip = ip) != NULL) { 654e17f465SMatthew Dillon ++ip->cursor_ip_refs; 664e17f465SMatthew Dillon if (trans->type == HAMMER_TRANS_FLS) 674e17f465SMatthew Dillon hammer_lock_ex(&ip->lock); 684e17f465SMatthew Dillon else 694e17f465SMatthew Dillon hammer_lock_sh(&ip->lock); 704e17f465SMatthew Dillon } 714e17f465SMatthew Dillon 724e17f465SMatthew Dillon /* 7361aeeb33SMatthew Dillon * Step 1 - acquire a locked node from the cache if possible 7461aeeb33SMatthew Dillon */ 75bcac4bbbSMatthew Dillon if (cache && cache->node) { 764c286c36SMatthew Dillon node = hammer_ref_node_safe(trans, cache, &error); 778cd0a023SMatthew Dillon if (error == 0) { 7898da6d8cSMatthew Dillon hammer_lock_sh(&node->lock); 7961aeeb33SMatthew Dillon if (node->flags & HAMMER_NODE_DELETED) { 8061aeeb33SMatthew Dillon hammer_unlock(&node->lock); 8161aeeb33SMatthew Dillon hammer_rel_node(node); 8261aeeb33SMatthew Dillon node = NULL; 8361aeeb33SMatthew Dillon } 8461aeeb33SMatthew Dillon } 8539d8fd63SMatthew Dillon if (node == NULL) 8639d8fd63SMatthew Dillon ++hammer_stats_btree_root_iterations; 8761aeeb33SMatthew Dillon } else { 8861aeeb33SMatthew Dillon node = NULL; 8939d8fd63SMatthew Dillon ++hammer_stats_btree_root_iterations; 9061aeeb33SMatthew Dillon } 9161aeeb33SMatthew Dillon 9261aeeb33SMatthew Dillon /* 9361aeeb33SMatthew Dillon * Step 2 - If we couldn't get a node from the cache, get 9447197d71SMatthew Dillon * the one from the root of the filesystem. 9561aeeb33SMatthew Dillon */ 9661aeeb33SMatthew Dillon while (node == NULL) { 9736f82b23SMatthew Dillon volume = hammer_get_root_volume(trans->hmp, &error); 9861aeeb33SMatthew Dillon if (error) 9961aeeb33SMatthew Dillon break; 10082010f9fSMatthew Dillon node = hammer_get_node(trans, volume->ondisk->vol0_btree_root, 10119619882SMatthew Dillon 0, &error); 10247197d71SMatthew Dillon hammer_rel_volume(volume, 0); 10361aeeb33SMatthew Dillon if (error) 10461aeeb33SMatthew Dillon break; 10598da6d8cSMatthew Dillon hammer_lock_sh(&node->lock); 10611ad5adeSMatthew Dillon 10711ad5adeSMatthew Dillon /* 10811ad5adeSMatthew Dillon * If someone got in before we could lock the node, retry. 10911ad5adeSMatthew Dillon */ 11061aeeb33SMatthew Dillon if (node->flags & HAMMER_NODE_DELETED) { 11161aeeb33SMatthew Dillon hammer_unlock(&node->lock); 11261aeeb33SMatthew Dillon hammer_rel_node(node); 11361aeeb33SMatthew Dillon node = NULL; 11411ad5adeSMatthew Dillon continue; 11511ad5adeSMatthew Dillon } 11611ad5adeSMatthew Dillon if (volume->ondisk->vol0_btree_root != node->node_offset) { 11711ad5adeSMatthew Dillon hammer_unlock(&node->lock); 11811ad5adeSMatthew Dillon hammer_rel_node(node); 11911ad5adeSMatthew Dillon node = NULL; 12011ad5adeSMatthew Dillon continue; 1218cd0a023SMatthew Dillon } 12261aeeb33SMatthew Dillon } 12361aeeb33SMatthew Dillon 12461aeeb33SMatthew Dillon /* 12561aeeb33SMatthew Dillon * Step 3 - finish initializing the cursor by acquiring the parent 12661aeeb33SMatthew Dillon */ 12761aeeb33SMatthew Dillon cursor->node = node; 1288cd0a023SMatthew Dillon if (error == 0) 129f36a9737SMatthew Dillon error = hammer_load_cursor_parent(cursor, 0); 1300b075555SMatthew Dillon KKASSERT(error == 0); 131a84a197dSMatthew Dillon /* if (error) hammer_done_cursor(cursor); */ 1328cd0a023SMatthew Dillon return(error); 1338cd0a023SMatthew Dillon } 1348cd0a023SMatthew Dillon 1354e17f465SMatthew Dillon /* 1364e17f465SMatthew Dillon * Normalize a cursor. Sometimes cursors can be left in a state 1374e17f465SMatthew Dillon * where node is NULL. If the cursor is in this state, cursor up. 1384e17f465SMatthew Dillon */ 1394e17f465SMatthew Dillon void 1404e17f465SMatthew Dillon hammer_normalize_cursor(hammer_cursor_t cursor) 1414e17f465SMatthew Dillon { 1424e17f465SMatthew Dillon if (cursor->node == NULL) { 1434e17f465SMatthew Dillon KKASSERT(cursor->parent != NULL); 1444e17f465SMatthew Dillon hammer_cursor_up(cursor); 1454e17f465SMatthew Dillon } 1464e17f465SMatthew Dillon } 1474e17f465SMatthew Dillon 1484e17f465SMatthew Dillon 1498cd0a023SMatthew Dillon /* 1508cd0a023SMatthew Dillon * We are finished with a cursor. We NULL out various fields as sanity 1518cd0a023SMatthew Dillon * check, in case the structure is inappropriately used afterwords. 1528cd0a023SMatthew Dillon */ 1538cd0a023SMatthew Dillon void 1548cd0a023SMatthew Dillon hammer_done_cursor(hammer_cursor_t cursor) 1558cd0a023SMatthew Dillon { 1564e17f465SMatthew Dillon hammer_inode_t ip; 1574e17f465SMatthew Dillon 158adf01747SMatthew Dillon KKASSERT((cursor->flags & HAMMER_CURSOR_TRACKED) == 0); 1598cd0a023SMatthew Dillon if (cursor->parent) { 1608cd0a023SMatthew Dillon hammer_unlock(&cursor->parent->lock); 1618cd0a023SMatthew Dillon hammer_rel_node(cursor->parent); 1628cd0a023SMatthew Dillon cursor->parent = NULL; 1638cd0a023SMatthew Dillon } 1648cd0a023SMatthew Dillon if (cursor->node) { 1658cd0a023SMatthew Dillon hammer_unlock(&cursor->node->lock); 1668cd0a023SMatthew Dillon hammer_rel_node(cursor->node); 1678cd0a023SMatthew Dillon cursor->node = NULL; 1688cd0a023SMatthew Dillon } 1698cd0a023SMatthew Dillon if (cursor->data_buffer) { 1708cd0a023SMatthew Dillon hammer_rel_buffer(cursor->data_buffer, 0); 1718cd0a023SMatthew Dillon cursor->data_buffer = NULL; 1728cd0a023SMatthew Dillon } 1734e17f465SMatthew Dillon if ((ip = cursor->ip) != NULL) { 1744e17f465SMatthew Dillon KKASSERT(ip->cursor_ip_refs > 0); 1754e17f465SMatthew Dillon --ip->cursor_ip_refs; 1764e17f465SMatthew Dillon hammer_unlock(&ip->lock); 1774e17f465SMatthew Dillon cursor->ip = NULL; 1784e17f465SMatthew Dillon } 1793f43fb33SMatthew Dillon if (cursor->iprec) { 1803f43fb33SMatthew Dillon hammer_rel_mem_record(cursor->iprec); 1813f43fb33SMatthew Dillon cursor->iprec = NULL; 1823f43fb33SMatthew Dillon } 1834e17f465SMatthew Dillon 1846a37e7e4SMatthew Dillon /* 1856a37e7e4SMatthew Dillon * If we deadlocked this node will be referenced. Do a quick 1866a37e7e4SMatthew Dillon * lock/unlock to wait for the deadlock condition to clear. 1876a37e7e4SMatthew Dillon */ 1886a37e7e4SMatthew Dillon if (cursor->deadlk_node) { 189af209b0fSMatthew Dillon hammer_lock_ex_ident(&cursor->deadlk_node->lock, "hmrdlk"); 1906a37e7e4SMatthew Dillon hammer_unlock(&cursor->deadlk_node->lock); 1916a37e7e4SMatthew Dillon hammer_rel_node(cursor->deadlk_node); 1926a37e7e4SMatthew Dillon cursor->deadlk_node = NULL; 1936a37e7e4SMatthew Dillon } 194b84de5afSMatthew Dillon if (cursor->deadlk_rec) { 195a99b9ea2SMatthew Dillon hammer_wait_mem_record_ident(cursor->deadlk_rec, "hmmdlr"); 196b84de5afSMatthew Dillon hammer_rel_mem_record(cursor->deadlk_rec); 197b84de5afSMatthew Dillon cursor->deadlk_rec = NULL; 198b84de5afSMatthew Dillon } 1996a37e7e4SMatthew Dillon 20040043e7fSMatthew Dillon cursor->data = NULL; 20111ad5adeSMatthew Dillon cursor->leaf = NULL; 2028cd0a023SMatthew Dillon cursor->left_bound = NULL; 2038cd0a023SMatthew Dillon cursor->right_bound = NULL; 20436f82b23SMatthew Dillon cursor->trans = NULL; 2058cd0a023SMatthew Dillon } 2068cd0a023SMatthew Dillon 2078cd0a023SMatthew Dillon /* 2086a37e7e4SMatthew Dillon * Upgrade cursor->node and cursor->parent to exclusive locks. This 2096a37e7e4SMatthew Dillon * function can return EDEADLK. 2106a37e7e4SMatthew Dillon * 2117aa3b8a6SMatthew Dillon * The lock must already be either held shared or already held exclusively 2127aa3b8a6SMatthew Dillon * by us. 2137aa3b8a6SMatthew Dillon * 2146a37e7e4SMatthew Dillon * If we fail to upgrade the lock and cursor->deadlk_node is NULL, 2156a37e7e4SMatthew Dillon * we add another reference to the node that failed and set 2166a37e7e4SMatthew Dillon * cursor->deadlk_node so hammer_done_cursor() can block on it. 2176a37e7e4SMatthew Dillon */ 2186a37e7e4SMatthew Dillon int 2196a37e7e4SMatthew Dillon hammer_cursor_upgrade(hammer_cursor_t cursor) 2206a37e7e4SMatthew Dillon { 2216a37e7e4SMatthew Dillon int error; 2226a37e7e4SMatthew Dillon 2236a37e7e4SMatthew Dillon error = hammer_lock_upgrade(&cursor->node->lock); 2246a37e7e4SMatthew Dillon if (error && cursor->deadlk_node == NULL) { 2256a37e7e4SMatthew Dillon cursor->deadlk_node = cursor->node; 2266a37e7e4SMatthew Dillon hammer_ref_node(cursor->deadlk_node); 2277aa3b8a6SMatthew Dillon } else if (error == 0 && cursor->parent) { 2286a37e7e4SMatthew Dillon error = hammer_lock_upgrade(&cursor->parent->lock); 2296a37e7e4SMatthew Dillon if (error && cursor->deadlk_node == NULL) { 2306a37e7e4SMatthew Dillon cursor->deadlk_node = cursor->parent; 2316a37e7e4SMatthew Dillon hammer_ref_node(cursor->deadlk_node); 2326a37e7e4SMatthew Dillon } 2336a37e7e4SMatthew Dillon } 2346a37e7e4SMatthew Dillon return(error); 2356a37e7e4SMatthew Dillon } 2366a37e7e4SMatthew Dillon 2377bc5b8c2SMatthew Dillon int 2387bc5b8c2SMatthew Dillon hammer_cursor_upgrade_node(hammer_cursor_t cursor) 2397bc5b8c2SMatthew Dillon { 2407bc5b8c2SMatthew Dillon int error; 2417bc5b8c2SMatthew Dillon 2427bc5b8c2SMatthew Dillon error = hammer_lock_upgrade(&cursor->node->lock); 2437bc5b8c2SMatthew Dillon if (error && cursor->deadlk_node == NULL) { 2447bc5b8c2SMatthew Dillon cursor->deadlk_node = cursor->node; 2457bc5b8c2SMatthew Dillon hammer_ref_node(cursor->deadlk_node); 2467bc5b8c2SMatthew Dillon } 2477bc5b8c2SMatthew Dillon return(error); 2487bc5b8c2SMatthew Dillon } 2497bc5b8c2SMatthew Dillon 2506a37e7e4SMatthew Dillon /* 2516a37e7e4SMatthew Dillon * Downgrade cursor->node and cursor->parent to shared locks. This 2526a37e7e4SMatthew Dillon * function can return EDEADLK. 2536a37e7e4SMatthew Dillon */ 2546a37e7e4SMatthew Dillon void 2556a37e7e4SMatthew Dillon hammer_cursor_downgrade(hammer_cursor_t cursor) 2566a37e7e4SMatthew Dillon { 2577aa3b8a6SMatthew Dillon if (hammer_lock_excl_owned(&cursor->node->lock, curthread)) 2586a37e7e4SMatthew Dillon hammer_lock_downgrade(&cursor->node->lock); 2597aa3b8a6SMatthew Dillon if (cursor->parent && 2607aa3b8a6SMatthew Dillon hammer_lock_excl_owned(&cursor->parent->lock, curthread)) { 2616a37e7e4SMatthew Dillon hammer_lock_downgrade(&cursor->parent->lock); 2626a37e7e4SMatthew Dillon } 2637aa3b8a6SMatthew Dillon } 2646a37e7e4SMatthew Dillon 26532c90105SMatthew Dillon /* 26632c90105SMatthew Dillon * Seek the cursor to the specified node and index. 267cb51be26SMatthew Dillon * 268cb51be26SMatthew Dillon * The caller must ref the node prior to calling this routine and release 269cb51be26SMatthew Dillon * it after it returns. If the seek succeeds the cursor will gain its own 270cb51be26SMatthew Dillon * ref on the node. 27132c90105SMatthew Dillon */ 27232c90105SMatthew Dillon int 27332c90105SMatthew Dillon hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index) 27432c90105SMatthew Dillon { 27532c90105SMatthew Dillon int error; 27632c90105SMatthew Dillon 27732c90105SMatthew Dillon hammer_cursor_downgrade(cursor); 27832c90105SMatthew Dillon error = 0; 27932c90105SMatthew Dillon 28032c90105SMatthew Dillon if (cursor->node != node) { 28132c90105SMatthew Dillon hammer_unlock(&cursor->node->lock); 28232c90105SMatthew Dillon hammer_rel_node(cursor->node); 28332c90105SMatthew Dillon cursor->node = node; 28432c90105SMatthew Dillon hammer_ref_node(node); 28532c90105SMatthew Dillon hammer_lock_sh(&node->lock); 286f36a9737SMatthew Dillon KKASSERT ((node->flags & HAMMER_NODE_DELETED) == 0); 28732c90105SMatthew Dillon 28832c90105SMatthew Dillon if (cursor->parent) { 28932c90105SMatthew Dillon hammer_unlock(&cursor->parent->lock); 29032c90105SMatthew Dillon hammer_rel_node(cursor->parent); 29132c90105SMatthew Dillon cursor->parent = NULL; 29232c90105SMatthew Dillon cursor->parent_index = 0; 29332c90105SMatthew Dillon } 294f36a9737SMatthew Dillon error = hammer_load_cursor_parent(cursor, 0); 29532c90105SMatthew Dillon } 296a84a197dSMatthew Dillon cursor->index = index; 29732c90105SMatthew Dillon return (error); 29832c90105SMatthew Dillon } 2996a37e7e4SMatthew Dillon 3006a37e7e4SMatthew Dillon /* 30147197d71SMatthew Dillon * Load the parent of cursor->node into cursor->parent. 3028cd0a023SMatthew Dillon */ 3038cd0a023SMatthew Dillon static 3048cd0a023SMatthew Dillon int 305f36a9737SMatthew Dillon hammer_load_cursor_parent(hammer_cursor_t cursor, int try_exclusive) 3068cd0a023SMatthew Dillon { 30747197d71SMatthew Dillon hammer_mount_t hmp; 3088cd0a023SMatthew Dillon hammer_node_t parent; 30947197d71SMatthew Dillon hammer_node_t node; 3108cd0a023SMatthew Dillon hammer_btree_elm_t elm; 3118cd0a023SMatthew Dillon int error; 312c82af904SMatthew Dillon int parent_index; 3138cd0a023SMatthew Dillon 31436f82b23SMatthew Dillon hmp = cursor->trans->hmp; 31547197d71SMatthew Dillon 31647197d71SMatthew Dillon if (cursor->node->ondisk->parent) { 3178cd0a023SMatthew Dillon node = cursor->node; 31882010f9fSMatthew Dillon parent = hammer_btree_get_parent(cursor->trans, node, 31982010f9fSMatthew Dillon &parent_index, 320c82af904SMatthew Dillon &error, try_exclusive); 321c82af904SMatthew Dillon if (error == 0) { 322c82af904SMatthew Dillon elm = &parent->ondisk->elms[parent_index]; 3238cd0a023SMatthew Dillon cursor->parent = parent; 324c82af904SMatthew Dillon cursor->parent_index = parent_index; 3258cd0a023SMatthew Dillon cursor->left_bound = &elm[0].internal.base; 3268cd0a023SMatthew Dillon cursor->right_bound = &elm[1].internal.base; 327c82af904SMatthew Dillon } 32847197d71SMatthew Dillon } else { 32947197d71SMatthew Dillon cursor->parent = NULL; 33047197d71SMatthew Dillon cursor->parent_index = 0; 33147197d71SMatthew Dillon cursor->left_bound = &hmp->root_btree_beg; 33247197d71SMatthew Dillon cursor->right_bound = &hmp->root_btree_end; 33347197d71SMatthew Dillon error = 0; 3348cd0a023SMatthew Dillon } 3358cd0a023SMatthew Dillon return(error); 3368cd0a023SMatthew Dillon } 3378cd0a023SMatthew Dillon 3388cd0a023SMatthew Dillon /* 3398cd0a023SMatthew Dillon * Cursor up to our parent node. Return ENOENT if we are at the root of 34047197d71SMatthew Dillon * the filesystem. 3418cd0a023SMatthew Dillon */ 3428cd0a023SMatthew Dillon int 3436a37e7e4SMatthew Dillon hammer_cursor_up(hammer_cursor_t cursor) 3448cd0a023SMatthew Dillon { 3458cd0a023SMatthew Dillon int error; 3468cd0a023SMatthew Dillon 3476a37e7e4SMatthew Dillon hammer_cursor_downgrade(cursor); 348195c19a1SMatthew Dillon 349195c19a1SMatthew Dillon /* 3504e17f465SMatthew Dillon * If the parent is NULL we are at the root of the B-Tree and 3514e17f465SMatthew Dillon * return ENOENT. 3524e17f465SMatthew Dillon */ 3534e17f465SMatthew Dillon if (cursor->parent == NULL) 3544e17f465SMatthew Dillon return (ENOENT); 3554e17f465SMatthew Dillon 3564e17f465SMatthew Dillon /* 3574e17f465SMatthew Dillon * Set the node to its parent. 3588cd0a023SMatthew Dillon */ 3598cd0a023SMatthew Dillon hammer_unlock(&cursor->node->lock); 3608cd0a023SMatthew Dillon hammer_rel_node(cursor->node); 3618cd0a023SMatthew Dillon cursor->node = cursor->parent; 3628cd0a023SMatthew Dillon cursor->index = cursor->parent_index; 3638cd0a023SMatthew Dillon cursor->parent = NULL; 3648cd0a023SMatthew Dillon cursor->parent_index = 0; 3658cd0a023SMatthew Dillon 366f36a9737SMatthew Dillon error = hammer_load_cursor_parent(cursor, 0); 3678cd0a023SMatthew Dillon return(error); 3688cd0a023SMatthew Dillon } 3698cd0a023SMatthew Dillon 3708cd0a023SMatthew Dillon /* 371f36a9737SMatthew Dillon * Special cursor up given a locked cursor. The orignal node is not 372b3bad96fSMatthew Dillon * unlocked or released and the cursor is not downgraded. 373b3bad96fSMatthew Dillon * 374*f3a4893bSMatthew Dillon * This function can fail with EDEADLK. 375*f3a4893bSMatthew Dillon * 376*f3a4893bSMatthew Dillon * This function is only run when recursively deleting parent nodes 377*f3a4893bSMatthew Dillon * to get rid of an empty leaf. 378f36a9737SMatthew Dillon */ 379f36a9737SMatthew Dillon int 380f36a9737SMatthew Dillon hammer_cursor_up_locked(hammer_cursor_t cursor) 381f36a9737SMatthew Dillon { 382f36a9737SMatthew Dillon hammer_node_t save; 383f36a9737SMatthew Dillon int error; 384*f3a4893bSMatthew Dillon int save_index; 385f36a9737SMatthew Dillon 386f36a9737SMatthew Dillon /* 387f36a9737SMatthew Dillon * If the parent is NULL we are at the root of the B-Tree and 388f36a9737SMatthew Dillon * return ENOENT. 389f36a9737SMatthew Dillon */ 390f36a9737SMatthew Dillon if (cursor->parent == NULL) 391f36a9737SMatthew Dillon return (ENOENT); 392f36a9737SMatthew Dillon 393f36a9737SMatthew Dillon save = cursor->node; 394*f3a4893bSMatthew Dillon save_index = cursor->index; 395f36a9737SMatthew Dillon 396f36a9737SMatthew Dillon /* 397f36a9737SMatthew Dillon * Set the node to its parent. 398f36a9737SMatthew Dillon */ 399f36a9737SMatthew Dillon cursor->node = cursor->parent; 400f36a9737SMatthew Dillon cursor->index = cursor->parent_index; 401f36a9737SMatthew Dillon cursor->parent = NULL; 402f36a9737SMatthew Dillon cursor->parent_index = 0; 403f36a9737SMatthew Dillon 404f36a9737SMatthew Dillon /* 405f36a9737SMatthew Dillon * load the new parent, attempt to exclusively lock it. Note that 406f36a9737SMatthew Dillon * we are still holding the old parent (now cursor->node) exclusively 407*f3a4893bSMatthew Dillon * locked. 408*f3a4893bSMatthew Dillon * 409*f3a4893bSMatthew Dillon * This can return EDEADLK. Undo the operation on any error. These 410*f3a4893bSMatthew Dillon * up sequences can occur during iterations so be sure to restore 411*f3a4893bSMatthew Dillon * the index. 412f36a9737SMatthew Dillon */ 413f36a9737SMatthew Dillon error = hammer_load_cursor_parent(cursor, 1); 414f36a9737SMatthew Dillon if (error) { 415f36a9737SMatthew Dillon cursor->parent = cursor->node; 416f36a9737SMatthew Dillon cursor->parent_index = cursor->index; 417f36a9737SMatthew Dillon cursor->node = save; 418*f3a4893bSMatthew Dillon cursor->index = save_index; 419f36a9737SMatthew Dillon } 420f36a9737SMatthew Dillon return(error); 421f36a9737SMatthew Dillon } 422f36a9737SMatthew Dillon 423f36a9737SMatthew Dillon 424f36a9737SMatthew Dillon /* 4258cd0a023SMatthew Dillon * Cursor down through the current node, which must be an internal node. 4268cd0a023SMatthew Dillon * 4278cd0a023SMatthew Dillon * This routine adjusts the cursor and sets index to 0. 4288cd0a023SMatthew Dillon */ 4298cd0a023SMatthew Dillon int 4308cd0a023SMatthew Dillon hammer_cursor_down(hammer_cursor_t cursor) 4318cd0a023SMatthew Dillon { 4328cd0a023SMatthew Dillon hammer_node_t node; 4338cd0a023SMatthew Dillon hammer_btree_elm_t elm; 4348cd0a023SMatthew Dillon int error; 4358cd0a023SMatthew Dillon 4368cd0a023SMatthew Dillon /* 4378cd0a023SMatthew Dillon * The current node becomes the current parent 4388cd0a023SMatthew Dillon */ 4396a37e7e4SMatthew Dillon hammer_cursor_downgrade(cursor); 4408cd0a023SMatthew Dillon node = cursor->node; 4418cd0a023SMatthew Dillon KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count); 4428cd0a023SMatthew Dillon if (cursor->parent) { 4438cd0a023SMatthew Dillon hammer_unlock(&cursor->parent->lock); 4448cd0a023SMatthew Dillon hammer_rel_node(cursor->parent); 4458cd0a023SMatthew Dillon } 4468cd0a023SMatthew Dillon cursor->parent = node; 4478cd0a023SMatthew Dillon cursor->parent_index = cursor->index; 4488cd0a023SMatthew Dillon cursor->node = NULL; 4498cd0a023SMatthew Dillon cursor->index = 0; 4508cd0a023SMatthew Dillon 4518cd0a023SMatthew Dillon /* 45276376933SMatthew Dillon * Extract element to push into at (node,index), set bounds. 4538cd0a023SMatthew Dillon */ 4548cd0a023SMatthew Dillon elm = &node->ondisk->elms[cursor->parent_index]; 4558cd0a023SMatthew Dillon 4568cd0a023SMatthew Dillon /* 457fe7678eeSMatthew Dillon * Ok, push down into elm. If elm specifies an internal or leaf 458fe7678eeSMatthew Dillon * node the current node must be an internal node. If elm specifies 459fe7678eeSMatthew Dillon * a spike then the current node must be a leaf node. 4608cd0a023SMatthew Dillon */ 461fe7678eeSMatthew Dillon switch(elm->base.btype) { 462fe7678eeSMatthew Dillon case HAMMER_BTREE_TYPE_INTERNAL: 463fe7678eeSMatthew Dillon case HAMMER_BTREE_TYPE_LEAF: 464fe7678eeSMatthew Dillon KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL); 465fe7678eeSMatthew Dillon KKASSERT(elm->internal.subtree_offset != 0); 466fe7678eeSMatthew Dillon cursor->left_bound = &elm[0].internal.base; 467fe7678eeSMatthew Dillon cursor->right_bound = &elm[1].internal.base; 46882010f9fSMatthew Dillon node = hammer_get_node(cursor->trans, 46919619882SMatthew Dillon elm->internal.subtree_offset, 0, &error); 470fe7678eeSMatthew Dillon if (error == 0) { 47140043e7fSMatthew Dillon KASSERT(elm->base.btype == node->ondisk->type, ("BTYPE MISMATCH %c %c NODE %p\n", elm->base.btype, node->ondisk->type, node)); 472fe7678eeSMatthew Dillon if (node->ondisk->parent != cursor->parent->node_offset) 473973c11b9SMatthew Dillon panic("node %p %016llx vs %016llx\n", node, (long long)node->ondisk->parent, (long long)cursor->parent->node_offset); 474fe7678eeSMatthew Dillon KKASSERT(node->ondisk->parent == cursor->parent->node_offset); 475fe7678eeSMatthew Dillon } 476fe7678eeSMatthew Dillon break; 477fe7678eeSMatthew Dillon default: 478fe7678eeSMatthew Dillon panic("hammer_cursor_down: illegal btype %02x (%c)\n", 479fe7678eeSMatthew Dillon elm->base.btype, 480fe7678eeSMatthew Dillon (elm->base.btype ? elm->base.btype : '?')); 481fe7678eeSMatthew Dillon break; 4828cd0a023SMatthew Dillon } 4838cd0a023SMatthew Dillon if (error == 0) { 4846a37e7e4SMatthew Dillon hammer_lock_sh(&node->lock); 485f36a9737SMatthew Dillon KKASSERT ((node->flags & HAMMER_NODE_DELETED) == 0); 4868cd0a023SMatthew Dillon cursor->node = node; 4878cd0a023SMatthew Dillon cursor->index = 0; 4888cd0a023SMatthew Dillon } 4898cd0a023SMatthew Dillon return(error); 4908cd0a023SMatthew Dillon } 4918cd0a023SMatthew Dillon 492b3bad96fSMatthew Dillon /************************************************************************ 493b3bad96fSMatthew Dillon * DEADLOCK RECOVERY * 494b3bad96fSMatthew Dillon ************************************************************************ 495b3bad96fSMatthew Dillon * 496b3bad96fSMatthew Dillon * These are the new deadlock recovery functions. Currently they are only 497b3bad96fSMatthew Dillon * used for the mirror propagation and physical node removal cases but 498b3bad96fSMatthew Dillon * ultimately the intention is to use them for all deadlock recovery 499b3bad96fSMatthew Dillon * operations. 500c9ce54d6SMatthew Dillon * 501c9ce54d6SMatthew Dillon * WARNING! The contents of the cursor may be modified while unlocked. 502c9ce54d6SMatthew Dillon * passive modifications including adjusting the node, parent, 503c9ce54d6SMatthew Dillon * indexes, and leaf pointer. 504c9ce54d6SMatthew Dillon * 505c9ce54d6SMatthew Dillon * An outright removal of the element the cursor was pointing at 506c9ce54d6SMatthew Dillon * will cause the HAMMER_CURSOR_TRACKED_RIPOUT flag to be set, 507c9ce54d6SMatthew Dillon * which chains to causing the HAMMER_CURSOR_RETEST to be set 508c9ce54d6SMatthew Dillon * when the cursor is locked again. 509b3bad96fSMatthew Dillon */ 510adf01747SMatthew Dillon void 511982be4bfSMatthew Dillon hammer_unlock_cursor(hammer_cursor_t cursor) 512b3bad96fSMatthew Dillon { 513b3bad96fSMatthew Dillon hammer_node_t node; 514b3bad96fSMatthew Dillon 515adf01747SMatthew Dillon KKASSERT((cursor->flags & HAMMER_CURSOR_TRACKED) == 0); 516b3bad96fSMatthew Dillon KKASSERT(cursor->node); 51719168d41SMatthew Dillon 51819168d41SMatthew Dillon /* 519b3bad96fSMatthew Dillon * Release the cursor's locks and track B-Tree operations on node. 520b3bad96fSMatthew Dillon * While being tracked our cursor can be modified by other threads 5215c8d05e2SMatthew Dillon * and the node may be replaced. 522b3bad96fSMatthew Dillon */ 523b3bad96fSMatthew Dillon if (cursor->parent) { 524b3bad96fSMatthew Dillon hammer_unlock(&cursor->parent->lock); 525b3bad96fSMatthew Dillon hammer_rel_node(cursor->parent); 526b3bad96fSMatthew Dillon cursor->parent = NULL; 527b3bad96fSMatthew Dillon } 528b3bad96fSMatthew Dillon node = cursor->node; 529adf01747SMatthew Dillon cursor->flags |= HAMMER_CURSOR_TRACKED; 530b3bad96fSMatthew Dillon TAILQ_INSERT_TAIL(&node->cursor_list, cursor, deadlk_entry); 531b3bad96fSMatthew Dillon hammer_unlock(&node->lock); 532adf01747SMatthew Dillon } 533adf01747SMatthew Dillon 534adf01747SMatthew Dillon /* 535adf01747SMatthew Dillon * Get the cursor heated up again. The cursor's node may have 536adf01747SMatthew Dillon * changed and we might have to locate the new parent. 537adf01747SMatthew Dillon * 538adf01747SMatthew Dillon * If the exact element we were on got deleted RIPOUT will be 539adf01747SMatthew Dillon * set and we must clear ATEDISK so an iteration does not skip 540adf01747SMatthew Dillon * the element after it. 541adf01747SMatthew Dillon */ 542adf01747SMatthew Dillon int 543982be4bfSMatthew Dillon hammer_lock_cursor(hammer_cursor_t cursor) 544adf01747SMatthew Dillon { 545adf01747SMatthew Dillon hammer_node_t node; 546adf01747SMatthew Dillon int error; 547adf01747SMatthew Dillon 548adf01747SMatthew Dillon KKASSERT(cursor->flags & HAMMER_CURSOR_TRACKED); 549adf01747SMatthew Dillon 550adf01747SMatthew Dillon /* 551adf01747SMatthew Dillon * Relock the node 552adf01747SMatthew Dillon */ 553adf01747SMatthew Dillon for (;;) { 554adf01747SMatthew Dillon node = cursor->node; 555adf01747SMatthew Dillon hammer_ref_node(node); 556adf01747SMatthew Dillon hammer_lock_sh(&node->lock); 557adf01747SMatthew Dillon if (cursor->node == node) { 558adf01747SMatthew Dillon hammer_rel_node(node); 559adf01747SMatthew Dillon break; 560adf01747SMatthew Dillon } 561adf01747SMatthew Dillon hammer_unlock(&node->lock); 562adf01747SMatthew Dillon hammer_rel_node(node); 563adf01747SMatthew Dillon } 564adf01747SMatthew Dillon 565adf01747SMatthew Dillon /* 566adf01747SMatthew Dillon * Untrack the cursor, clean up, and re-establish the parent node. 567adf01747SMatthew Dillon */ 568adf01747SMatthew Dillon TAILQ_REMOVE(&node->cursor_list, cursor, deadlk_entry); 569adf01747SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_TRACKED; 570adf01747SMatthew Dillon 5715c8d05e2SMatthew Dillon /* 5725c8d05e2SMatthew Dillon * If a ripout has occured iterations must re-test the (new) 5735c8d05e2SMatthew Dillon * current element. Clearing ATEDISK prevents the element from 5745c8d05e2SMatthew Dillon * being skipped and RETEST causes it to be re-tested. 5755c8d05e2SMatthew Dillon */ 576adf01747SMatthew Dillon if (cursor->flags & HAMMER_CURSOR_TRACKED_RIPOUT) { 577adf01747SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_TRACKED_RIPOUT; 578adf01747SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 5795c8d05e2SMatthew Dillon cursor->flags |= HAMMER_CURSOR_RETEST; 580adf01747SMatthew Dillon } 581adf01747SMatthew Dillon error = hammer_load_cursor_parent(cursor, 0); 582adf01747SMatthew Dillon return(error); 583adf01747SMatthew Dillon } 584adf01747SMatthew Dillon 585adf01747SMatthew Dillon /* 586adf01747SMatthew Dillon * Recover from a deadlocked cursor, tracking any node removals or 587adf01747SMatthew Dillon * replacements. If the cursor's current node is removed by another 588adf01747SMatthew Dillon * thread (via btree_remove()) the cursor will be seeked upwards. 58998da6d8cSMatthew Dillon * 59098da6d8cSMatthew Dillon * The caller is working a modifying operation and must be holding the 59198da6d8cSMatthew Dillon * sync lock (shared). We do not release the sync lock because this 59298da6d8cSMatthew Dillon * would break atomicy. 593adf01747SMatthew Dillon */ 594adf01747SMatthew Dillon int 595adf01747SMatthew Dillon hammer_recover_cursor(hammer_cursor_t cursor) 596adf01747SMatthew Dillon { 597adf01747SMatthew Dillon int error; 598adf01747SMatthew Dillon 599982be4bfSMatthew Dillon hammer_unlock_cursor(cursor); 60098da6d8cSMatthew Dillon KKASSERT(cursor->trans->sync_lock_refs > 0); 601b3bad96fSMatthew Dillon 602b3bad96fSMatthew Dillon /* 603b3bad96fSMatthew Dillon * Wait for the deadlock to clear 604b3bad96fSMatthew Dillon */ 605b3bad96fSMatthew Dillon if (cursor->deadlk_node) { 606b3bad96fSMatthew Dillon hammer_lock_ex_ident(&cursor->deadlk_node->lock, "hmrdlk"); 607b3bad96fSMatthew Dillon hammer_unlock(&cursor->deadlk_node->lock); 608b3bad96fSMatthew Dillon hammer_rel_node(cursor->deadlk_node); 609b3bad96fSMatthew Dillon cursor->deadlk_node = NULL; 610b3bad96fSMatthew Dillon } 611b3bad96fSMatthew Dillon if (cursor->deadlk_rec) { 612b3bad96fSMatthew Dillon hammer_wait_mem_record_ident(cursor->deadlk_rec, "hmmdlr"); 613b3bad96fSMatthew Dillon hammer_rel_mem_record(cursor->deadlk_rec); 614b3bad96fSMatthew Dillon cursor->deadlk_rec = NULL; 615b3bad96fSMatthew Dillon } 616982be4bfSMatthew Dillon error = hammer_lock_cursor(cursor); 617b3bad96fSMatthew Dillon return(error); 618b3bad96fSMatthew Dillon } 619b3bad96fSMatthew Dillon 620b3bad96fSMatthew Dillon /* 621adf01747SMatthew Dillon * Dup ocursor to ncursor. ncursor inherits ocursor's locks and ocursor 622adf01747SMatthew Dillon * is effectively unlocked and becomes tracked. If ocursor was not locked 623adf01747SMatthew Dillon * then ncursor also inherits the tracking. 624adf01747SMatthew Dillon * 625adf01747SMatthew Dillon * After the caller finishes working with ncursor it must be cleaned up 626adf01747SMatthew Dillon * with hammer_done_cursor(), and the caller must re-lock ocursor. 627adf01747SMatthew Dillon */ 6283f43fb33SMatthew Dillon hammer_cursor_t 6293f43fb33SMatthew Dillon hammer_push_cursor(hammer_cursor_t ocursor) 630adf01747SMatthew Dillon { 6313f43fb33SMatthew Dillon hammer_cursor_t ncursor; 632adf01747SMatthew Dillon hammer_inode_t ip; 633adf01747SMatthew Dillon hammer_node_t node; 634bac808feSMatthew Dillon hammer_mount_t hmp; 635adf01747SMatthew Dillon 636bac808feSMatthew Dillon hmp = ocursor->trans->hmp; 637bac808feSMatthew Dillon ncursor = kmalloc(sizeof(*ncursor), hmp->m_misc, M_WAITOK | M_ZERO); 638adf01747SMatthew Dillon bcopy(ocursor, ncursor, sizeof(*ocursor)); 639adf01747SMatthew Dillon 640adf01747SMatthew Dillon node = ocursor->node; 641adf01747SMatthew Dillon hammer_ref_node(node); 642adf01747SMatthew Dillon if ((ocursor->flags & HAMMER_CURSOR_TRACKED) == 0) { 643adf01747SMatthew Dillon ocursor->flags |= HAMMER_CURSOR_TRACKED; 644adf01747SMatthew Dillon TAILQ_INSERT_TAIL(&node->cursor_list, ocursor, deadlk_entry); 645adf01747SMatthew Dillon } 646adf01747SMatthew Dillon if (ncursor->parent) 647adf01747SMatthew Dillon ocursor->parent = NULL; 648adf01747SMatthew Dillon ocursor->data_buffer = NULL; 649adf01747SMatthew Dillon ocursor->leaf = NULL; 650adf01747SMatthew Dillon ocursor->data = NULL; 651adf01747SMatthew Dillon if (ncursor->flags & HAMMER_CURSOR_TRACKED) 652adf01747SMatthew Dillon TAILQ_INSERT_TAIL(&node->cursor_list, ncursor, deadlk_entry); 653adf01747SMatthew Dillon if ((ip = ncursor->ip) != NULL) { 654adf01747SMatthew Dillon ++ip->cursor_ip_refs; 655adf01747SMatthew Dillon } 656adf01747SMatthew Dillon if (ncursor->iprec) 657adf01747SMatthew Dillon hammer_ref(&ncursor->iprec->lock); 6583f43fb33SMatthew Dillon return(ncursor); 6593f43fb33SMatthew Dillon } 6603f43fb33SMatthew Dillon 6613f43fb33SMatthew Dillon /* 6623f43fb33SMatthew Dillon * Destroy ncursor and restore ocursor 6633f43fb33SMatthew Dillon * 6643f43fb33SMatthew Dillon * This is a temporary hack for the release. We can't afford to lose 6653f43fb33SMatthew Dillon * the IP lock until the IP object scan code is able to deal with it, 6663f43fb33SMatthew Dillon * so have ocursor inherit it back. 6673f43fb33SMatthew Dillon */ 6683f43fb33SMatthew Dillon void 6693f43fb33SMatthew Dillon hammer_pop_cursor(hammer_cursor_t ocursor, hammer_cursor_t ncursor) 6703f43fb33SMatthew Dillon { 671bac808feSMatthew Dillon hammer_mount_t hmp; 6723f43fb33SMatthew Dillon hammer_inode_t ip; 6733f43fb33SMatthew Dillon 674bac808feSMatthew Dillon hmp = ncursor->trans->hmp; 6753f43fb33SMatthew Dillon ip = ncursor->ip; 6763f43fb33SMatthew Dillon ncursor->ip = NULL; 6773f43fb33SMatthew Dillon if (ip) 6783f43fb33SMatthew Dillon --ip->cursor_ip_refs; 6793f43fb33SMatthew Dillon hammer_done_cursor(ncursor); 680bac808feSMatthew Dillon kfree(ncursor, hmp->m_misc); 6813f43fb33SMatthew Dillon KKASSERT(ocursor->ip == ip); 682982be4bfSMatthew Dillon hammer_lock_cursor(ocursor); 683adf01747SMatthew Dillon } 684adf01747SMatthew Dillon 685adf01747SMatthew Dillon /* 686b3bad96fSMatthew Dillon * onode is being replaced by nnode by the reblocking code. 687b3bad96fSMatthew Dillon */ 688b3bad96fSMatthew Dillon void 689b3bad96fSMatthew Dillon hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode) 690b3bad96fSMatthew Dillon { 691b3bad96fSMatthew Dillon hammer_cursor_t cursor; 692c9ce54d6SMatthew Dillon hammer_node_ondisk_t ondisk; 693c9ce54d6SMatthew Dillon hammer_node_ondisk_t nndisk; 694c9ce54d6SMatthew Dillon 695c9ce54d6SMatthew Dillon ondisk = onode->ondisk; 696c9ce54d6SMatthew Dillon nndisk = nnode->ondisk; 697b3bad96fSMatthew Dillon 698b3bad96fSMatthew Dillon while ((cursor = TAILQ_FIRST(&onode->cursor_list)) != NULL) { 699b3bad96fSMatthew Dillon TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry); 700b3bad96fSMatthew Dillon TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry); 701b3bad96fSMatthew Dillon KKASSERT(cursor->node == onode); 702c9ce54d6SMatthew Dillon if (cursor->leaf == &ondisk->elms[cursor->index].leaf) 703c9ce54d6SMatthew Dillon cursor->leaf = &nndisk->elms[cursor->index].leaf; 704b3bad96fSMatthew Dillon cursor->node = nnode; 705b3bad96fSMatthew Dillon hammer_ref_node(nnode); 706b3bad96fSMatthew Dillon hammer_rel_node(onode); 707b3bad96fSMatthew Dillon } 708b3bad96fSMatthew Dillon } 709b3bad96fSMatthew Dillon 710b3bad96fSMatthew Dillon /* 711*f3a4893bSMatthew Dillon * We have removed <node> from the parent and collapsed the parent. 712*f3a4893bSMatthew Dillon * 713*f3a4893bSMatthew Dillon * Cursors in deadlock recovery are seeked upward to the parent so the 714*f3a4893bSMatthew Dillon * btree_remove() recursion works properly. 715b3bad96fSMatthew Dillon */ 716b3bad96fSMatthew Dillon void 717b3bad96fSMatthew Dillon hammer_cursor_removed_node(hammer_node_t node, hammer_node_t parent, int index) 718b3bad96fSMatthew Dillon { 719b3bad96fSMatthew Dillon hammer_cursor_t cursor; 720c9ce54d6SMatthew Dillon hammer_node_ondisk_t ondisk; 721b3bad96fSMatthew Dillon 722b3bad96fSMatthew Dillon KKASSERT(parent != NULL); 723c9ce54d6SMatthew Dillon ondisk = node->ondisk; 724c9ce54d6SMatthew Dillon 725b3bad96fSMatthew Dillon while ((cursor = TAILQ_FIRST(&node->cursor_list)) != NULL) { 726b3bad96fSMatthew Dillon KKASSERT(cursor->node == node); 727b3bad96fSMatthew Dillon KKASSERT(cursor->index == 0); 728b3bad96fSMatthew Dillon TAILQ_REMOVE(&node->cursor_list, cursor, deadlk_entry); 729b3bad96fSMatthew Dillon TAILQ_INSERT_TAIL(&parent->cursor_list, cursor, deadlk_entry); 730c9ce54d6SMatthew Dillon if (cursor->leaf == &ondisk->elms[cursor->index].leaf) 731c9ce54d6SMatthew Dillon cursor->leaf = NULL; 732adf01747SMatthew Dillon cursor->flags |= HAMMER_CURSOR_TRACKED_RIPOUT; 733b3bad96fSMatthew Dillon cursor->node = parent; 734b3bad96fSMatthew Dillon cursor->index = index; 735b3bad96fSMatthew Dillon hammer_ref_node(parent); 736b3bad96fSMatthew Dillon hammer_rel_node(node); 737b3bad96fSMatthew Dillon } 738b3bad96fSMatthew Dillon } 739b3bad96fSMatthew Dillon 740b3bad96fSMatthew Dillon /* 741b3bad96fSMatthew Dillon * node was split at (onode, index) with elements >= index moved to nnode. 742b3bad96fSMatthew Dillon */ 743b3bad96fSMatthew Dillon void 744b3bad96fSMatthew Dillon hammer_cursor_split_node(hammer_node_t onode, hammer_node_t nnode, int index) 745b3bad96fSMatthew Dillon { 746b3bad96fSMatthew Dillon hammer_cursor_t cursor; 747c9ce54d6SMatthew Dillon hammer_node_ondisk_t ondisk; 748c9ce54d6SMatthew Dillon hammer_node_ondisk_t nndisk; 749c9ce54d6SMatthew Dillon 750c9ce54d6SMatthew Dillon ondisk = onode->ondisk; 751c9ce54d6SMatthew Dillon nndisk = nnode->ondisk; 752b3bad96fSMatthew Dillon 753b3bad96fSMatthew Dillon again: 754b3bad96fSMatthew Dillon TAILQ_FOREACH(cursor, &onode->cursor_list, deadlk_entry) { 755b3bad96fSMatthew Dillon KKASSERT(cursor->node == onode); 756b3bad96fSMatthew Dillon if (cursor->index < index) 757b3bad96fSMatthew Dillon continue; 758b3bad96fSMatthew Dillon TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry); 759b3bad96fSMatthew Dillon TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry); 760c9ce54d6SMatthew Dillon if (cursor->leaf == &ondisk->elms[cursor->index].leaf) 761c9ce54d6SMatthew Dillon cursor->leaf = &nndisk->elms[cursor->index - index].leaf; 762b3bad96fSMatthew Dillon cursor->node = nnode; 763b3bad96fSMatthew Dillon cursor->index -= index; 764b3bad96fSMatthew Dillon hammer_ref_node(nnode); 765b3bad96fSMatthew Dillon hammer_rel_node(onode); 766b3bad96fSMatthew Dillon goto again; 767b3bad96fSMatthew Dillon } 768b3bad96fSMatthew Dillon } 769b3bad96fSMatthew Dillon 770b3bad96fSMatthew Dillon /* 7711775b6a0SMatthew Dillon * An element was moved from one node to another or within a node. The 7721775b6a0SMatthew Dillon * index may also represent the end of the node (index == numelements). 7731775b6a0SMatthew Dillon * 7741775b6a0SMatthew Dillon * This is used by the rebalancing code. This is not an insertion or 7751775b6a0SMatthew Dillon * deletion and any additional elements, including the degenerate case at 7761775b6a0SMatthew Dillon * the end of the node, will be dealt with by additional distinct calls. 7771775b6a0SMatthew Dillon */ 7781775b6a0SMatthew Dillon void 7791775b6a0SMatthew Dillon hammer_cursor_moved_element(hammer_node_t onode, hammer_node_t nnode, 7801775b6a0SMatthew Dillon int oindex, int nindex) 7811775b6a0SMatthew Dillon { 7821775b6a0SMatthew Dillon hammer_cursor_t cursor; 783c9ce54d6SMatthew Dillon hammer_node_ondisk_t ondisk; 784c9ce54d6SMatthew Dillon hammer_node_ondisk_t nndisk; 785c9ce54d6SMatthew Dillon 786c9ce54d6SMatthew Dillon ondisk = onode->ondisk; 787c9ce54d6SMatthew Dillon nndisk = nnode->ondisk; 7881775b6a0SMatthew Dillon 7891775b6a0SMatthew Dillon again: 7901775b6a0SMatthew Dillon TAILQ_FOREACH(cursor, &onode->cursor_list, deadlk_entry) { 7911775b6a0SMatthew Dillon KKASSERT(cursor->node == onode); 7921775b6a0SMatthew Dillon if (cursor->index != oindex) 7931775b6a0SMatthew Dillon continue; 7941775b6a0SMatthew Dillon TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry); 7951775b6a0SMatthew Dillon TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry); 796c9ce54d6SMatthew Dillon if (cursor->leaf == &ondisk->elms[oindex].leaf) 797c9ce54d6SMatthew Dillon cursor->leaf = &nndisk->elms[nindex].leaf; 7981775b6a0SMatthew Dillon cursor->node = nnode; 7991775b6a0SMatthew Dillon cursor->index = nindex; 8001775b6a0SMatthew Dillon hammer_ref_node(nnode); 8011775b6a0SMatthew Dillon hammer_rel_node(onode); 8021775b6a0SMatthew Dillon goto again; 8031775b6a0SMatthew Dillon } 8041775b6a0SMatthew Dillon } 8051775b6a0SMatthew Dillon 8061775b6a0SMatthew Dillon /* 8071775b6a0SMatthew Dillon * The B-Tree element pointing to the specified node was moved from (oparent) 8081775b6a0SMatthew Dillon * to (nparent, nindex). We must locate any tracked cursors pointing at 8091775b6a0SMatthew Dillon * node and adjust their parent accordingly. 8101775b6a0SMatthew Dillon * 8111775b6a0SMatthew Dillon * This is used by the rebalancing code when packing elements causes an 8121775b6a0SMatthew Dillon * element to shift from one node to another. 8131775b6a0SMatthew Dillon */ 8141775b6a0SMatthew Dillon void 8151775b6a0SMatthew Dillon hammer_cursor_parent_changed(hammer_node_t node, hammer_node_t oparent, 8161775b6a0SMatthew Dillon hammer_node_t nparent, int nindex) 8171775b6a0SMatthew Dillon { 8181775b6a0SMatthew Dillon hammer_cursor_t cursor; 8191775b6a0SMatthew Dillon 8201775b6a0SMatthew Dillon again: 8211775b6a0SMatthew Dillon TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) { 8221775b6a0SMatthew Dillon KKASSERT(cursor->node == node); 8231775b6a0SMatthew Dillon if (cursor->parent == oparent) { 8241775b6a0SMatthew Dillon cursor->parent = nparent; 8251775b6a0SMatthew Dillon cursor->parent_index = nindex; 8261775b6a0SMatthew Dillon hammer_ref_node(nparent); 8271775b6a0SMatthew Dillon hammer_rel_node(oparent); 8281775b6a0SMatthew Dillon goto again; 8291775b6a0SMatthew Dillon } 8301775b6a0SMatthew Dillon } 8311775b6a0SMatthew Dillon } 8321775b6a0SMatthew Dillon 8331775b6a0SMatthew Dillon /* 834e4a5ff06SMatthew Dillon * Deleted element at (node, index) 835b3bad96fSMatthew Dillon * 836b3bad96fSMatthew Dillon * Shift indexes >= index 837b3bad96fSMatthew Dillon */ 838b3bad96fSMatthew Dillon void 839b3bad96fSMatthew Dillon hammer_cursor_deleted_element(hammer_node_t node, int index) 840b3bad96fSMatthew Dillon { 841b3bad96fSMatthew Dillon hammer_cursor_t cursor; 842c9ce54d6SMatthew Dillon hammer_node_ondisk_t ondisk; 843c9ce54d6SMatthew Dillon 844c9ce54d6SMatthew Dillon ondisk = node->ondisk; 845b3bad96fSMatthew Dillon 846b3bad96fSMatthew Dillon TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) { 847b3bad96fSMatthew Dillon KKASSERT(cursor->node == node); 848b3bad96fSMatthew Dillon if (cursor->index == index) { 849adf01747SMatthew Dillon cursor->flags |= HAMMER_CURSOR_TRACKED_RIPOUT; 850c9ce54d6SMatthew Dillon if (cursor->leaf == &ondisk->elms[cursor->index].leaf) 851c9ce54d6SMatthew Dillon cursor->leaf = NULL; 852b3bad96fSMatthew Dillon } else if (cursor->index > index) { 853c9ce54d6SMatthew Dillon if (cursor->leaf == &ondisk->elms[cursor->index].leaf) 854c9ce54d6SMatthew Dillon cursor->leaf = &ondisk->elms[cursor->index - 1].leaf; 855b3bad96fSMatthew Dillon --cursor->index; 856b3bad96fSMatthew Dillon } 857b3bad96fSMatthew Dillon } 858b3bad96fSMatthew Dillon } 859b3bad96fSMatthew Dillon 860b3bad96fSMatthew Dillon /* 861e4a5ff06SMatthew Dillon * Inserted element at (node, index) 862b3bad96fSMatthew Dillon * 863b3bad96fSMatthew Dillon * Shift indexes >= index 864b3bad96fSMatthew Dillon */ 865b3bad96fSMatthew Dillon void 866b3bad96fSMatthew Dillon hammer_cursor_inserted_element(hammer_node_t node, int index) 867b3bad96fSMatthew Dillon { 868b3bad96fSMatthew Dillon hammer_cursor_t cursor; 869c9ce54d6SMatthew Dillon hammer_node_ondisk_t ondisk; 870c9ce54d6SMatthew Dillon 871c9ce54d6SMatthew Dillon ondisk = node->ondisk; 872b3bad96fSMatthew Dillon 873b3bad96fSMatthew Dillon TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) { 874b3bad96fSMatthew Dillon KKASSERT(cursor->node == node); 875c9ce54d6SMatthew Dillon if (cursor->index >= index) { 876c9ce54d6SMatthew Dillon if (cursor->leaf == &ondisk->elms[cursor->index].leaf) 877c9ce54d6SMatthew Dillon cursor->leaf = &ondisk->elms[cursor->index + 1].leaf; 878b3bad96fSMatthew Dillon ++cursor->index; 879b3bad96fSMatthew Dillon } 880b3bad96fSMatthew Dillon } 881c9ce54d6SMatthew Dillon } 882b3bad96fSMatthew Dillon 883