xref: /dflybsd-src/sys/vfs/hammer/hammer_cursor.c (revision f3a4893b0e844e1ab8c2334d304ebff61aa71ace)
18cd0a023SMatthew Dillon /*
2b84de5afSMatthew Dillon  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
38cd0a023SMatthew Dillon  *
48cd0a023SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
58cd0a023SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
68cd0a023SMatthew Dillon  *
78cd0a023SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
88cd0a023SMatthew Dillon  * modification, are permitted provided that the following conditions
98cd0a023SMatthew Dillon  * are met:
108cd0a023SMatthew Dillon  *
118cd0a023SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
128cd0a023SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
138cd0a023SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
148cd0a023SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
158cd0a023SMatthew Dillon  *    the documentation and/or other materials provided with the
168cd0a023SMatthew Dillon  *    distribution.
178cd0a023SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
188cd0a023SMatthew Dillon  *    contributors may be used to endorse or promote products derived
198cd0a023SMatthew Dillon  *    from this software without specific, prior written permission.
208cd0a023SMatthew Dillon  *
218cd0a023SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
228cd0a023SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
238cd0a023SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
248cd0a023SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
258cd0a023SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
268cd0a023SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
278cd0a023SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
288cd0a023SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
298cd0a023SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
308cd0a023SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
318cd0a023SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
328cd0a023SMatthew Dillon  * SUCH DAMAGE.
338cd0a023SMatthew Dillon  *
345c8d05e2SMatthew Dillon  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.42 2008/08/06 15:38:58 dillon Exp $
358cd0a023SMatthew Dillon  */
368cd0a023SMatthew Dillon 
378cd0a023SMatthew Dillon /*
388cd0a023SMatthew Dillon  * HAMMER B-Tree index - cursor support routines
398cd0a023SMatthew Dillon  */
408cd0a023SMatthew Dillon #include "hammer.h"
418cd0a023SMatthew Dillon 
42f36a9737SMatthew Dillon static int hammer_load_cursor_parent(hammer_cursor_t cursor, int try_exclusive);
438cd0a023SMatthew Dillon 
448cd0a023SMatthew Dillon /*
4561aeeb33SMatthew Dillon  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
4647197d71SMatthew Dillon  * is not available initialize a fresh cursor at the root of the filesystem.
478cd0a023SMatthew Dillon  */
488cd0a023SMatthew Dillon int
4936f82b23SMatthew Dillon hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor,
50bcac4bbbSMatthew Dillon 		   hammer_node_cache_t cache, hammer_inode_t ip)
518cd0a023SMatthew Dillon {
5247197d71SMatthew Dillon 	hammer_volume_t volume;
5361aeeb33SMatthew Dillon 	hammer_node_t node;
548cd0a023SMatthew Dillon 	int error;
558cd0a023SMatthew Dillon 
568cd0a023SMatthew Dillon 	bzero(cursor, sizeof(*cursor));
5761aeeb33SMatthew Dillon 
5836f82b23SMatthew Dillon 	cursor->trans = trans;
5936f82b23SMatthew Dillon 
6061aeeb33SMatthew Dillon 	/*
614e17f465SMatthew Dillon 	 * If the cursor operation is on behalf of an inode, lock
624e17f465SMatthew Dillon 	 * the inode.
634e17f465SMatthew Dillon 	 */
644e17f465SMatthew Dillon 	if ((cursor->ip = ip) != NULL) {
654e17f465SMatthew Dillon 		++ip->cursor_ip_refs;
664e17f465SMatthew Dillon 		if (trans->type == HAMMER_TRANS_FLS)
674e17f465SMatthew Dillon 			hammer_lock_ex(&ip->lock);
684e17f465SMatthew Dillon 		else
694e17f465SMatthew Dillon 			hammer_lock_sh(&ip->lock);
704e17f465SMatthew Dillon 	}
714e17f465SMatthew Dillon 
724e17f465SMatthew Dillon 	/*
7361aeeb33SMatthew Dillon 	 * Step 1 - acquire a locked node from the cache if possible
7461aeeb33SMatthew Dillon 	 */
75bcac4bbbSMatthew Dillon 	if (cache && cache->node) {
764c286c36SMatthew Dillon 		node = hammer_ref_node_safe(trans, cache, &error);
778cd0a023SMatthew Dillon 		if (error == 0) {
7898da6d8cSMatthew Dillon 			hammer_lock_sh(&node->lock);
7961aeeb33SMatthew Dillon 			if (node->flags & HAMMER_NODE_DELETED) {
8061aeeb33SMatthew Dillon 				hammer_unlock(&node->lock);
8161aeeb33SMatthew Dillon 				hammer_rel_node(node);
8261aeeb33SMatthew Dillon 				node = NULL;
8361aeeb33SMatthew Dillon 			}
8461aeeb33SMatthew Dillon 		}
8539d8fd63SMatthew Dillon 		if (node == NULL)
8639d8fd63SMatthew Dillon 			++hammer_stats_btree_root_iterations;
8761aeeb33SMatthew Dillon 	} else {
8861aeeb33SMatthew Dillon 		node = NULL;
8939d8fd63SMatthew Dillon 		++hammer_stats_btree_root_iterations;
9061aeeb33SMatthew Dillon 	}
9161aeeb33SMatthew Dillon 
9261aeeb33SMatthew Dillon 	/*
9361aeeb33SMatthew Dillon 	 * Step 2 - If we couldn't get a node from the cache, get
9447197d71SMatthew Dillon 	 * the one from the root of the filesystem.
9561aeeb33SMatthew Dillon 	 */
9661aeeb33SMatthew Dillon 	while (node == NULL) {
9736f82b23SMatthew Dillon 		volume = hammer_get_root_volume(trans->hmp, &error);
9861aeeb33SMatthew Dillon 		if (error)
9961aeeb33SMatthew Dillon 			break;
10082010f9fSMatthew Dillon 		node = hammer_get_node(trans, volume->ondisk->vol0_btree_root,
10119619882SMatthew Dillon 				       0, &error);
10247197d71SMatthew Dillon 		hammer_rel_volume(volume, 0);
10361aeeb33SMatthew Dillon 		if (error)
10461aeeb33SMatthew Dillon 			break;
10598da6d8cSMatthew Dillon 		hammer_lock_sh(&node->lock);
10611ad5adeSMatthew Dillon 
10711ad5adeSMatthew Dillon 		/*
10811ad5adeSMatthew Dillon 		 * If someone got in before we could lock the node, retry.
10911ad5adeSMatthew Dillon 		 */
11061aeeb33SMatthew Dillon 		if (node->flags & HAMMER_NODE_DELETED) {
11161aeeb33SMatthew Dillon 			hammer_unlock(&node->lock);
11261aeeb33SMatthew Dillon 			hammer_rel_node(node);
11361aeeb33SMatthew Dillon 			node = NULL;
11411ad5adeSMatthew Dillon 			continue;
11511ad5adeSMatthew Dillon 		}
11611ad5adeSMatthew Dillon 		if (volume->ondisk->vol0_btree_root != node->node_offset) {
11711ad5adeSMatthew Dillon 			hammer_unlock(&node->lock);
11811ad5adeSMatthew Dillon 			hammer_rel_node(node);
11911ad5adeSMatthew Dillon 			node = NULL;
12011ad5adeSMatthew Dillon 			continue;
1218cd0a023SMatthew Dillon 		}
12261aeeb33SMatthew Dillon 	}
12361aeeb33SMatthew Dillon 
12461aeeb33SMatthew Dillon 	/*
12561aeeb33SMatthew Dillon 	 * Step 3 - finish initializing the cursor by acquiring the parent
12661aeeb33SMatthew Dillon 	 */
12761aeeb33SMatthew Dillon 	cursor->node = node;
1288cd0a023SMatthew Dillon 	if (error == 0)
129f36a9737SMatthew Dillon 		error = hammer_load_cursor_parent(cursor, 0);
1300b075555SMatthew Dillon 	KKASSERT(error == 0);
131a84a197dSMatthew Dillon 	/* if (error) hammer_done_cursor(cursor); */
1328cd0a023SMatthew Dillon 	return(error);
1338cd0a023SMatthew Dillon }
1348cd0a023SMatthew Dillon 
1354e17f465SMatthew Dillon /*
1364e17f465SMatthew Dillon  * Normalize a cursor.  Sometimes cursors can be left in a state
1374e17f465SMatthew Dillon  * where node is NULL.  If the cursor is in this state, cursor up.
1384e17f465SMatthew Dillon  */
1394e17f465SMatthew Dillon void
1404e17f465SMatthew Dillon hammer_normalize_cursor(hammer_cursor_t cursor)
1414e17f465SMatthew Dillon {
1424e17f465SMatthew Dillon 	if (cursor->node == NULL) {
1434e17f465SMatthew Dillon 		KKASSERT(cursor->parent != NULL);
1444e17f465SMatthew Dillon 		hammer_cursor_up(cursor);
1454e17f465SMatthew Dillon 	}
1464e17f465SMatthew Dillon }
1474e17f465SMatthew Dillon 
1484e17f465SMatthew Dillon 
1498cd0a023SMatthew Dillon /*
1508cd0a023SMatthew Dillon  * We are finished with a cursor.  We NULL out various fields as sanity
1518cd0a023SMatthew Dillon  * check, in case the structure is inappropriately used afterwords.
1528cd0a023SMatthew Dillon  */
1538cd0a023SMatthew Dillon void
1548cd0a023SMatthew Dillon hammer_done_cursor(hammer_cursor_t cursor)
1558cd0a023SMatthew Dillon {
1564e17f465SMatthew Dillon 	hammer_inode_t ip;
1574e17f465SMatthew Dillon 
158adf01747SMatthew Dillon 	KKASSERT((cursor->flags & HAMMER_CURSOR_TRACKED) == 0);
1598cd0a023SMatthew Dillon 	if (cursor->parent) {
1608cd0a023SMatthew Dillon 		hammer_unlock(&cursor->parent->lock);
1618cd0a023SMatthew Dillon 		hammer_rel_node(cursor->parent);
1628cd0a023SMatthew Dillon 		cursor->parent = NULL;
1638cd0a023SMatthew Dillon 	}
1648cd0a023SMatthew Dillon 	if (cursor->node) {
1658cd0a023SMatthew Dillon 		hammer_unlock(&cursor->node->lock);
1668cd0a023SMatthew Dillon 		hammer_rel_node(cursor->node);
1678cd0a023SMatthew Dillon 		cursor->node = NULL;
1688cd0a023SMatthew Dillon 	}
1698cd0a023SMatthew Dillon         if (cursor->data_buffer) {
1708cd0a023SMatthew Dillon                 hammer_rel_buffer(cursor->data_buffer, 0);
1718cd0a023SMatthew Dillon                 cursor->data_buffer = NULL;
1728cd0a023SMatthew Dillon         }
1734e17f465SMatthew Dillon 	if ((ip = cursor->ip) != NULL) {
1744e17f465SMatthew Dillon                 KKASSERT(ip->cursor_ip_refs > 0);
1754e17f465SMatthew Dillon                 --ip->cursor_ip_refs;
1764e17f465SMatthew Dillon 		hammer_unlock(&ip->lock);
1774e17f465SMatthew Dillon                 cursor->ip = NULL;
1784e17f465SMatthew Dillon         }
1793f43fb33SMatthew Dillon 	if (cursor->iprec) {
1803f43fb33SMatthew Dillon 		hammer_rel_mem_record(cursor->iprec);
1813f43fb33SMatthew Dillon 		cursor->iprec = NULL;
1823f43fb33SMatthew Dillon 	}
1834e17f465SMatthew Dillon 
1846a37e7e4SMatthew Dillon 	/*
1856a37e7e4SMatthew Dillon 	 * If we deadlocked this node will be referenced.  Do a quick
1866a37e7e4SMatthew Dillon 	 * lock/unlock to wait for the deadlock condition to clear.
1876a37e7e4SMatthew Dillon 	 */
1886a37e7e4SMatthew Dillon 	if (cursor->deadlk_node) {
189af209b0fSMatthew Dillon 		hammer_lock_ex_ident(&cursor->deadlk_node->lock, "hmrdlk");
1906a37e7e4SMatthew Dillon 		hammer_unlock(&cursor->deadlk_node->lock);
1916a37e7e4SMatthew Dillon 		hammer_rel_node(cursor->deadlk_node);
1926a37e7e4SMatthew Dillon 		cursor->deadlk_node = NULL;
1936a37e7e4SMatthew Dillon 	}
194b84de5afSMatthew Dillon 	if (cursor->deadlk_rec) {
195a99b9ea2SMatthew Dillon 		hammer_wait_mem_record_ident(cursor->deadlk_rec, "hmmdlr");
196b84de5afSMatthew Dillon 		hammer_rel_mem_record(cursor->deadlk_rec);
197b84de5afSMatthew Dillon 		cursor->deadlk_rec = NULL;
198b84de5afSMatthew Dillon 	}
1996a37e7e4SMatthew Dillon 
20040043e7fSMatthew Dillon 	cursor->data = NULL;
20111ad5adeSMatthew Dillon 	cursor->leaf = NULL;
2028cd0a023SMatthew Dillon 	cursor->left_bound = NULL;
2038cd0a023SMatthew Dillon 	cursor->right_bound = NULL;
20436f82b23SMatthew Dillon 	cursor->trans = NULL;
2058cd0a023SMatthew Dillon }
2068cd0a023SMatthew Dillon 
2078cd0a023SMatthew Dillon /*
2086a37e7e4SMatthew Dillon  * Upgrade cursor->node and cursor->parent to exclusive locks.  This
2096a37e7e4SMatthew Dillon  * function can return EDEADLK.
2106a37e7e4SMatthew Dillon  *
2117aa3b8a6SMatthew Dillon  * The lock must already be either held shared or already held exclusively
2127aa3b8a6SMatthew Dillon  * by us.
2137aa3b8a6SMatthew Dillon  *
2146a37e7e4SMatthew Dillon  * If we fail to upgrade the lock and cursor->deadlk_node is NULL,
2156a37e7e4SMatthew Dillon  * we add another reference to the node that failed and set
2166a37e7e4SMatthew Dillon  * cursor->deadlk_node so hammer_done_cursor() can block on it.
2176a37e7e4SMatthew Dillon  */
2186a37e7e4SMatthew Dillon int
2196a37e7e4SMatthew Dillon hammer_cursor_upgrade(hammer_cursor_t cursor)
2206a37e7e4SMatthew Dillon {
2216a37e7e4SMatthew Dillon 	int error;
2226a37e7e4SMatthew Dillon 
2236a37e7e4SMatthew Dillon 	error = hammer_lock_upgrade(&cursor->node->lock);
2246a37e7e4SMatthew Dillon 	if (error && cursor->deadlk_node == NULL) {
2256a37e7e4SMatthew Dillon 		cursor->deadlk_node = cursor->node;
2266a37e7e4SMatthew Dillon 		hammer_ref_node(cursor->deadlk_node);
2277aa3b8a6SMatthew Dillon 	} else if (error == 0 && cursor->parent) {
2286a37e7e4SMatthew Dillon 		error = hammer_lock_upgrade(&cursor->parent->lock);
2296a37e7e4SMatthew Dillon 		if (error && cursor->deadlk_node == NULL) {
2306a37e7e4SMatthew Dillon 			cursor->deadlk_node = cursor->parent;
2316a37e7e4SMatthew Dillon 			hammer_ref_node(cursor->deadlk_node);
2326a37e7e4SMatthew Dillon 		}
2336a37e7e4SMatthew Dillon 	}
2346a37e7e4SMatthew Dillon 	return(error);
2356a37e7e4SMatthew Dillon }
2366a37e7e4SMatthew Dillon 
2377bc5b8c2SMatthew Dillon int
2387bc5b8c2SMatthew Dillon hammer_cursor_upgrade_node(hammer_cursor_t cursor)
2397bc5b8c2SMatthew Dillon {
2407bc5b8c2SMatthew Dillon 	int error;
2417bc5b8c2SMatthew Dillon 
2427bc5b8c2SMatthew Dillon 	error = hammer_lock_upgrade(&cursor->node->lock);
2437bc5b8c2SMatthew Dillon 	if (error && cursor->deadlk_node == NULL) {
2447bc5b8c2SMatthew Dillon 		cursor->deadlk_node = cursor->node;
2457bc5b8c2SMatthew Dillon 		hammer_ref_node(cursor->deadlk_node);
2467bc5b8c2SMatthew Dillon 	}
2477bc5b8c2SMatthew Dillon 	return(error);
2487bc5b8c2SMatthew Dillon }
2497bc5b8c2SMatthew Dillon 
2506a37e7e4SMatthew Dillon /*
2516a37e7e4SMatthew Dillon  * Downgrade cursor->node and cursor->parent to shared locks.  This
2526a37e7e4SMatthew Dillon  * function can return EDEADLK.
2536a37e7e4SMatthew Dillon  */
2546a37e7e4SMatthew Dillon void
2556a37e7e4SMatthew Dillon hammer_cursor_downgrade(hammer_cursor_t cursor)
2566a37e7e4SMatthew Dillon {
2577aa3b8a6SMatthew Dillon 	if (hammer_lock_excl_owned(&cursor->node->lock, curthread))
2586a37e7e4SMatthew Dillon 		hammer_lock_downgrade(&cursor->node->lock);
2597aa3b8a6SMatthew Dillon 	if (cursor->parent &&
2607aa3b8a6SMatthew Dillon 	    hammer_lock_excl_owned(&cursor->parent->lock, curthread)) {
2616a37e7e4SMatthew Dillon 		hammer_lock_downgrade(&cursor->parent->lock);
2626a37e7e4SMatthew Dillon 	}
2637aa3b8a6SMatthew Dillon }
2646a37e7e4SMatthew Dillon 
26532c90105SMatthew Dillon /*
26632c90105SMatthew Dillon  * Seek the cursor to the specified node and index.
267cb51be26SMatthew Dillon  *
268cb51be26SMatthew Dillon  * The caller must ref the node prior to calling this routine and release
269cb51be26SMatthew Dillon  * it after it returns.  If the seek succeeds the cursor will gain its own
270cb51be26SMatthew Dillon  * ref on the node.
27132c90105SMatthew Dillon  */
27232c90105SMatthew Dillon int
27332c90105SMatthew Dillon hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
27432c90105SMatthew Dillon {
27532c90105SMatthew Dillon 	int error;
27632c90105SMatthew Dillon 
27732c90105SMatthew Dillon 	hammer_cursor_downgrade(cursor);
27832c90105SMatthew Dillon 	error = 0;
27932c90105SMatthew Dillon 
28032c90105SMatthew Dillon 	if (cursor->node != node) {
28132c90105SMatthew Dillon 		hammer_unlock(&cursor->node->lock);
28232c90105SMatthew Dillon 		hammer_rel_node(cursor->node);
28332c90105SMatthew Dillon 		cursor->node = node;
28432c90105SMatthew Dillon 		hammer_ref_node(node);
28532c90105SMatthew Dillon 		hammer_lock_sh(&node->lock);
286f36a9737SMatthew Dillon 		KKASSERT ((node->flags & HAMMER_NODE_DELETED) == 0);
28732c90105SMatthew Dillon 
28832c90105SMatthew Dillon 		if (cursor->parent) {
28932c90105SMatthew Dillon 			hammer_unlock(&cursor->parent->lock);
29032c90105SMatthew Dillon 			hammer_rel_node(cursor->parent);
29132c90105SMatthew Dillon 			cursor->parent = NULL;
29232c90105SMatthew Dillon 			cursor->parent_index = 0;
29332c90105SMatthew Dillon 		}
294f36a9737SMatthew Dillon 		error = hammer_load_cursor_parent(cursor, 0);
29532c90105SMatthew Dillon 	}
296a84a197dSMatthew Dillon 	cursor->index = index;
29732c90105SMatthew Dillon 	return (error);
29832c90105SMatthew Dillon }
2996a37e7e4SMatthew Dillon 
3006a37e7e4SMatthew Dillon /*
30147197d71SMatthew Dillon  * Load the parent of cursor->node into cursor->parent.
3028cd0a023SMatthew Dillon  */
3038cd0a023SMatthew Dillon static
3048cd0a023SMatthew Dillon int
305f36a9737SMatthew Dillon hammer_load_cursor_parent(hammer_cursor_t cursor, int try_exclusive)
3068cd0a023SMatthew Dillon {
30747197d71SMatthew Dillon 	hammer_mount_t hmp;
3088cd0a023SMatthew Dillon 	hammer_node_t parent;
30947197d71SMatthew Dillon 	hammer_node_t node;
3108cd0a023SMatthew Dillon 	hammer_btree_elm_t elm;
3118cd0a023SMatthew Dillon 	int error;
312c82af904SMatthew Dillon 	int parent_index;
3138cd0a023SMatthew Dillon 
31436f82b23SMatthew Dillon 	hmp = cursor->trans->hmp;
31547197d71SMatthew Dillon 
31647197d71SMatthew Dillon 	if (cursor->node->ondisk->parent) {
3178cd0a023SMatthew Dillon 		node = cursor->node;
31882010f9fSMatthew Dillon 		parent = hammer_btree_get_parent(cursor->trans, node,
31982010f9fSMatthew Dillon 						 &parent_index,
320c82af904SMatthew Dillon 						 &error, try_exclusive);
321c82af904SMatthew Dillon 		if (error == 0) {
322c82af904SMatthew Dillon 			elm = &parent->ondisk->elms[parent_index];
3238cd0a023SMatthew Dillon 			cursor->parent = parent;
324c82af904SMatthew Dillon 			cursor->parent_index = parent_index;
3258cd0a023SMatthew Dillon 			cursor->left_bound = &elm[0].internal.base;
3268cd0a023SMatthew Dillon 			cursor->right_bound = &elm[1].internal.base;
327c82af904SMatthew Dillon 		}
32847197d71SMatthew Dillon 	} else {
32947197d71SMatthew Dillon 		cursor->parent = NULL;
33047197d71SMatthew Dillon 		cursor->parent_index = 0;
33147197d71SMatthew Dillon 		cursor->left_bound = &hmp->root_btree_beg;
33247197d71SMatthew Dillon 		cursor->right_bound = &hmp->root_btree_end;
33347197d71SMatthew Dillon 		error = 0;
3348cd0a023SMatthew Dillon 	}
3358cd0a023SMatthew Dillon 	return(error);
3368cd0a023SMatthew Dillon }
3378cd0a023SMatthew Dillon 
3388cd0a023SMatthew Dillon /*
3398cd0a023SMatthew Dillon  * Cursor up to our parent node.  Return ENOENT if we are at the root of
34047197d71SMatthew Dillon  * the filesystem.
3418cd0a023SMatthew Dillon  */
3428cd0a023SMatthew Dillon int
3436a37e7e4SMatthew Dillon hammer_cursor_up(hammer_cursor_t cursor)
3448cd0a023SMatthew Dillon {
3458cd0a023SMatthew Dillon 	int error;
3468cd0a023SMatthew Dillon 
3476a37e7e4SMatthew Dillon 	hammer_cursor_downgrade(cursor);
348195c19a1SMatthew Dillon 
349195c19a1SMatthew Dillon 	/*
3504e17f465SMatthew Dillon 	 * If the parent is NULL we are at the root of the B-Tree and
3514e17f465SMatthew Dillon 	 * return ENOENT.
3524e17f465SMatthew Dillon 	 */
3534e17f465SMatthew Dillon 	if (cursor->parent == NULL)
3544e17f465SMatthew Dillon 		return (ENOENT);
3554e17f465SMatthew Dillon 
3564e17f465SMatthew Dillon 	/*
3574e17f465SMatthew Dillon 	 * Set the node to its parent.
3588cd0a023SMatthew Dillon 	 */
3598cd0a023SMatthew Dillon 	hammer_unlock(&cursor->node->lock);
3608cd0a023SMatthew Dillon 	hammer_rel_node(cursor->node);
3618cd0a023SMatthew Dillon 	cursor->node = cursor->parent;
3628cd0a023SMatthew Dillon 	cursor->index = cursor->parent_index;
3638cd0a023SMatthew Dillon 	cursor->parent = NULL;
3648cd0a023SMatthew Dillon 	cursor->parent_index = 0;
3658cd0a023SMatthew Dillon 
366f36a9737SMatthew Dillon 	error = hammer_load_cursor_parent(cursor, 0);
3678cd0a023SMatthew Dillon 	return(error);
3688cd0a023SMatthew Dillon }
3698cd0a023SMatthew Dillon 
3708cd0a023SMatthew Dillon /*
371f36a9737SMatthew Dillon  * Special cursor up given a locked cursor.  The orignal node is not
372b3bad96fSMatthew Dillon  * unlocked or released and the cursor is not downgraded.
373b3bad96fSMatthew Dillon  *
374*f3a4893bSMatthew Dillon  * This function can fail with EDEADLK.
375*f3a4893bSMatthew Dillon  *
376*f3a4893bSMatthew Dillon  * This function is only run when recursively deleting parent nodes
377*f3a4893bSMatthew Dillon  * to get rid of an empty leaf.
378f36a9737SMatthew Dillon  */
379f36a9737SMatthew Dillon int
380f36a9737SMatthew Dillon hammer_cursor_up_locked(hammer_cursor_t cursor)
381f36a9737SMatthew Dillon {
382f36a9737SMatthew Dillon 	hammer_node_t save;
383f36a9737SMatthew Dillon 	int error;
384*f3a4893bSMatthew Dillon 	int save_index;
385f36a9737SMatthew Dillon 
386f36a9737SMatthew Dillon 	/*
387f36a9737SMatthew Dillon 	 * If the parent is NULL we are at the root of the B-Tree and
388f36a9737SMatthew Dillon 	 * return ENOENT.
389f36a9737SMatthew Dillon 	 */
390f36a9737SMatthew Dillon 	if (cursor->parent == NULL)
391f36a9737SMatthew Dillon 		return (ENOENT);
392f36a9737SMatthew Dillon 
393f36a9737SMatthew Dillon 	save = cursor->node;
394*f3a4893bSMatthew Dillon 	save_index = cursor->index;
395f36a9737SMatthew Dillon 
396f36a9737SMatthew Dillon 	/*
397f36a9737SMatthew Dillon 	 * Set the node to its parent.
398f36a9737SMatthew Dillon 	 */
399f36a9737SMatthew Dillon 	cursor->node = cursor->parent;
400f36a9737SMatthew Dillon 	cursor->index = cursor->parent_index;
401f36a9737SMatthew Dillon 	cursor->parent = NULL;
402f36a9737SMatthew Dillon 	cursor->parent_index = 0;
403f36a9737SMatthew Dillon 
404f36a9737SMatthew Dillon 	/*
405f36a9737SMatthew Dillon 	 * load the new parent, attempt to exclusively lock it.  Note that
406f36a9737SMatthew Dillon 	 * we are still holding the old parent (now cursor->node) exclusively
407*f3a4893bSMatthew Dillon 	 * locked.
408*f3a4893bSMatthew Dillon 	 *
409*f3a4893bSMatthew Dillon 	 * This can return EDEADLK.  Undo the operation on any error.  These
410*f3a4893bSMatthew Dillon 	 * up sequences can occur during iterations so be sure to restore
411*f3a4893bSMatthew Dillon 	 * the index.
412f36a9737SMatthew Dillon 	 */
413f36a9737SMatthew Dillon 	error = hammer_load_cursor_parent(cursor, 1);
414f36a9737SMatthew Dillon 	if (error) {
415f36a9737SMatthew Dillon 		cursor->parent = cursor->node;
416f36a9737SMatthew Dillon 		cursor->parent_index = cursor->index;
417f36a9737SMatthew Dillon 		cursor->node = save;
418*f3a4893bSMatthew Dillon 		cursor->index = save_index;
419f36a9737SMatthew Dillon 	}
420f36a9737SMatthew Dillon 	return(error);
421f36a9737SMatthew Dillon }
422f36a9737SMatthew Dillon 
423f36a9737SMatthew Dillon 
424f36a9737SMatthew Dillon /*
4258cd0a023SMatthew Dillon  * Cursor down through the current node, which must be an internal node.
4268cd0a023SMatthew Dillon  *
4278cd0a023SMatthew Dillon  * This routine adjusts the cursor and sets index to 0.
4288cd0a023SMatthew Dillon  */
4298cd0a023SMatthew Dillon int
4308cd0a023SMatthew Dillon hammer_cursor_down(hammer_cursor_t cursor)
4318cd0a023SMatthew Dillon {
4328cd0a023SMatthew Dillon 	hammer_node_t node;
4338cd0a023SMatthew Dillon 	hammer_btree_elm_t elm;
4348cd0a023SMatthew Dillon 	int error;
4358cd0a023SMatthew Dillon 
4368cd0a023SMatthew Dillon 	/*
4378cd0a023SMatthew Dillon 	 * The current node becomes the current parent
4388cd0a023SMatthew Dillon 	 */
4396a37e7e4SMatthew Dillon 	hammer_cursor_downgrade(cursor);
4408cd0a023SMatthew Dillon 	node = cursor->node;
4418cd0a023SMatthew Dillon 	KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
4428cd0a023SMatthew Dillon 	if (cursor->parent) {
4438cd0a023SMatthew Dillon 		hammer_unlock(&cursor->parent->lock);
4448cd0a023SMatthew Dillon 		hammer_rel_node(cursor->parent);
4458cd0a023SMatthew Dillon 	}
4468cd0a023SMatthew Dillon 	cursor->parent = node;
4478cd0a023SMatthew Dillon 	cursor->parent_index = cursor->index;
4488cd0a023SMatthew Dillon 	cursor->node = NULL;
4498cd0a023SMatthew Dillon 	cursor->index = 0;
4508cd0a023SMatthew Dillon 
4518cd0a023SMatthew Dillon 	/*
45276376933SMatthew Dillon 	 * Extract element to push into at (node,index), set bounds.
4538cd0a023SMatthew Dillon 	 */
4548cd0a023SMatthew Dillon 	elm = &node->ondisk->elms[cursor->parent_index];
4558cd0a023SMatthew Dillon 
4568cd0a023SMatthew Dillon 	/*
457fe7678eeSMatthew Dillon 	 * Ok, push down into elm.  If elm specifies an internal or leaf
458fe7678eeSMatthew Dillon 	 * node the current node must be an internal node.  If elm specifies
459fe7678eeSMatthew Dillon 	 * a spike then the current node must be a leaf node.
4608cd0a023SMatthew Dillon 	 */
461fe7678eeSMatthew Dillon 	switch(elm->base.btype) {
462fe7678eeSMatthew Dillon 	case HAMMER_BTREE_TYPE_INTERNAL:
463fe7678eeSMatthew Dillon 	case HAMMER_BTREE_TYPE_LEAF:
464fe7678eeSMatthew Dillon 		KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
465fe7678eeSMatthew Dillon 		KKASSERT(elm->internal.subtree_offset != 0);
466fe7678eeSMatthew Dillon 		cursor->left_bound = &elm[0].internal.base;
467fe7678eeSMatthew Dillon 		cursor->right_bound = &elm[1].internal.base;
46882010f9fSMatthew Dillon 		node = hammer_get_node(cursor->trans,
46919619882SMatthew Dillon 				       elm->internal.subtree_offset, 0, &error);
470fe7678eeSMatthew Dillon 		if (error == 0) {
47140043e7fSMatthew Dillon 			KASSERT(elm->base.btype == node->ondisk->type, ("BTYPE MISMATCH %c %c NODE %p\n", elm->base.btype, node->ondisk->type, node));
472fe7678eeSMatthew Dillon 			if (node->ondisk->parent != cursor->parent->node_offset)
473973c11b9SMatthew Dillon 				panic("node %p %016llx vs %016llx\n", node, (long long)node->ondisk->parent, (long long)cursor->parent->node_offset);
474fe7678eeSMatthew Dillon 			KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
475fe7678eeSMatthew Dillon 		}
476fe7678eeSMatthew Dillon 		break;
477fe7678eeSMatthew Dillon 	default:
478fe7678eeSMatthew Dillon 		panic("hammer_cursor_down: illegal btype %02x (%c)\n",
479fe7678eeSMatthew Dillon 		      elm->base.btype,
480fe7678eeSMatthew Dillon 		      (elm->base.btype ? elm->base.btype : '?'));
481fe7678eeSMatthew Dillon 		break;
4828cd0a023SMatthew Dillon 	}
4838cd0a023SMatthew Dillon 	if (error == 0) {
4846a37e7e4SMatthew Dillon 		hammer_lock_sh(&node->lock);
485f36a9737SMatthew Dillon 		KKASSERT ((node->flags & HAMMER_NODE_DELETED) == 0);
4868cd0a023SMatthew Dillon 		cursor->node = node;
4878cd0a023SMatthew Dillon 		cursor->index = 0;
4888cd0a023SMatthew Dillon 	}
4898cd0a023SMatthew Dillon 	return(error);
4908cd0a023SMatthew Dillon }
4918cd0a023SMatthew Dillon 
492b3bad96fSMatthew Dillon /************************************************************************
493b3bad96fSMatthew Dillon  *				DEADLOCK RECOVERY			*
494b3bad96fSMatthew Dillon  ************************************************************************
495b3bad96fSMatthew Dillon  *
496b3bad96fSMatthew Dillon  * These are the new deadlock recovery functions.  Currently they are only
497b3bad96fSMatthew Dillon  * used for the mirror propagation and physical node removal cases but
498b3bad96fSMatthew Dillon  * ultimately the intention is to use them for all deadlock recovery
499b3bad96fSMatthew Dillon  * operations.
500c9ce54d6SMatthew Dillon  *
501c9ce54d6SMatthew Dillon  * WARNING!  The contents of the cursor may be modified while unlocked.
502c9ce54d6SMatthew Dillon  *	     passive modifications including adjusting the node, parent,
503c9ce54d6SMatthew Dillon  *	     indexes, and leaf pointer.
504c9ce54d6SMatthew Dillon  *
505c9ce54d6SMatthew Dillon  *	     An outright removal of the element the cursor was pointing at
506c9ce54d6SMatthew Dillon  *	     will cause the HAMMER_CURSOR_TRACKED_RIPOUT flag to be set,
507c9ce54d6SMatthew Dillon  *	     which chains to causing the HAMMER_CURSOR_RETEST to be set
508c9ce54d6SMatthew Dillon  *	     when the cursor is locked again.
509b3bad96fSMatthew Dillon  */
510adf01747SMatthew Dillon void
511982be4bfSMatthew Dillon hammer_unlock_cursor(hammer_cursor_t cursor)
512b3bad96fSMatthew Dillon {
513b3bad96fSMatthew Dillon 	hammer_node_t node;
514b3bad96fSMatthew Dillon 
515adf01747SMatthew Dillon 	KKASSERT((cursor->flags & HAMMER_CURSOR_TRACKED) == 0);
516b3bad96fSMatthew Dillon 	KKASSERT(cursor->node);
51719168d41SMatthew Dillon 
51819168d41SMatthew Dillon 	/*
519b3bad96fSMatthew Dillon 	 * Release the cursor's locks and track B-Tree operations on node.
520b3bad96fSMatthew Dillon 	 * While being tracked our cursor can be modified by other threads
5215c8d05e2SMatthew Dillon 	 * and the node may be replaced.
522b3bad96fSMatthew Dillon 	 */
523b3bad96fSMatthew Dillon 	if (cursor->parent) {
524b3bad96fSMatthew Dillon 		hammer_unlock(&cursor->parent->lock);
525b3bad96fSMatthew Dillon 		hammer_rel_node(cursor->parent);
526b3bad96fSMatthew Dillon 		cursor->parent = NULL;
527b3bad96fSMatthew Dillon 	}
528b3bad96fSMatthew Dillon 	node = cursor->node;
529adf01747SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_TRACKED;
530b3bad96fSMatthew Dillon 	TAILQ_INSERT_TAIL(&node->cursor_list, cursor, deadlk_entry);
531b3bad96fSMatthew Dillon 	hammer_unlock(&node->lock);
532adf01747SMatthew Dillon }
533adf01747SMatthew Dillon 
534adf01747SMatthew Dillon /*
535adf01747SMatthew Dillon  * Get the cursor heated up again.  The cursor's node may have
536adf01747SMatthew Dillon  * changed and we might have to locate the new parent.
537adf01747SMatthew Dillon  *
538adf01747SMatthew Dillon  * If the exact element we were on got deleted RIPOUT will be
539adf01747SMatthew Dillon  * set and we must clear ATEDISK so an iteration does not skip
540adf01747SMatthew Dillon  * the element after it.
541adf01747SMatthew Dillon  */
542adf01747SMatthew Dillon int
543982be4bfSMatthew Dillon hammer_lock_cursor(hammer_cursor_t cursor)
544adf01747SMatthew Dillon {
545adf01747SMatthew Dillon 	hammer_node_t node;
546adf01747SMatthew Dillon 	int error;
547adf01747SMatthew Dillon 
548adf01747SMatthew Dillon 	KKASSERT(cursor->flags & HAMMER_CURSOR_TRACKED);
549adf01747SMatthew Dillon 
550adf01747SMatthew Dillon 	/*
551adf01747SMatthew Dillon 	 * Relock the node
552adf01747SMatthew Dillon 	 */
553adf01747SMatthew Dillon 	for (;;) {
554adf01747SMatthew Dillon 		node = cursor->node;
555adf01747SMatthew Dillon 		hammer_ref_node(node);
556adf01747SMatthew Dillon 		hammer_lock_sh(&node->lock);
557adf01747SMatthew Dillon 		if (cursor->node == node) {
558adf01747SMatthew Dillon 			hammer_rel_node(node);
559adf01747SMatthew Dillon 			break;
560adf01747SMatthew Dillon 		}
561adf01747SMatthew Dillon 		hammer_unlock(&node->lock);
562adf01747SMatthew Dillon 		hammer_rel_node(node);
563adf01747SMatthew Dillon 	}
564adf01747SMatthew Dillon 
565adf01747SMatthew Dillon 	/*
566adf01747SMatthew Dillon 	 * Untrack the cursor, clean up, and re-establish the parent node.
567adf01747SMatthew Dillon 	 */
568adf01747SMatthew Dillon 	TAILQ_REMOVE(&node->cursor_list, cursor, deadlk_entry);
569adf01747SMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_TRACKED;
570adf01747SMatthew Dillon 
5715c8d05e2SMatthew Dillon 	/*
5725c8d05e2SMatthew Dillon 	 * If a ripout has occured iterations must re-test the (new)
5735c8d05e2SMatthew Dillon 	 * current element.  Clearing ATEDISK prevents the element from
5745c8d05e2SMatthew Dillon 	 * being skipped and RETEST causes it to be re-tested.
5755c8d05e2SMatthew Dillon 	 */
576adf01747SMatthew Dillon 	if (cursor->flags & HAMMER_CURSOR_TRACKED_RIPOUT) {
577adf01747SMatthew Dillon 		cursor->flags &= ~HAMMER_CURSOR_TRACKED_RIPOUT;
578adf01747SMatthew Dillon 		cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
5795c8d05e2SMatthew Dillon 		cursor->flags |= HAMMER_CURSOR_RETEST;
580adf01747SMatthew Dillon 	}
581adf01747SMatthew Dillon 	error = hammer_load_cursor_parent(cursor, 0);
582adf01747SMatthew Dillon 	return(error);
583adf01747SMatthew Dillon }
584adf01747SMatthew Dillon 
585adf01747SMatthew Dillon /*
586adf01747SMatthew Dillon  * Recover from a deadlocked cursor, tracking any node removals or
587adf01747SMatthew Dillon  * replacements.  If the cursor's current node is removed by another
588adf01747SMatthew Dillon  * thread (via btree_remove()) the cursor will be seeked upwards.
58998da6d8cSMatthew Dillon  *
59098da6d8cSMatthew Dillon  * The caller is working a modifying operation and must be holding the
59198da6d8cSMatthew Dillon  * sync lock (shared).  We do not release the sync lock because this
59298da6d8cSMatthew Dillon  * would break atomicy.
593adf01747SMatthew Dillon  */
594adf01747SMatthew Dillon int
595adf01747SMatthew Dillon hammer_recover_cursor(hammer_cursor_t cursor)
596adf01747SMatthew Dillon {
597adf01747SMatthew Dillon 	int error;
598adf01747SMatthew Dillon 
599982be4bfSMatthew Dillon 	hammer_unlock_cursor(cursor);
60098da6d8cSMatthew Dillon 	KKASSERT(cursor->trans->sync_lock_refs > 0);
601b3bad96fSMatthew Dillon 
602b3bad96fSMatthew Dillon 	/*
603b3bad96fSMatthew Dillon 	 * Wait for the deadlock to clear
604b3bad96fSMatthew Dillon 	 */
605b3bad96fSMatthew Dillon 	if (cursor->deadlk_node) {
606b3bad96fSMatthew Dillon 		hammer_lock_ex_ident(&cursor->deadlk_node->lock, "hmrdlk");
607b3bad96fSMatthew Dillon 		hammer_unlock(&cursor->deadlk_node->lock);
608b3bad96fSMatthew Dillon 		hammer_rel_node(cursor->deadlk_node);
609b3bad96fSMatthew Dillon 		cursor->deadlk_node = NULL;
610b3bad96fSMatthew Dillon 	}
611b3bad96fSMatthew Dillon 	if (cursor->deadlk_rec) {
612b3bad96fSMatthew Dillon 		hammer_wait_mem_record_ident(cursor->deadlk_rec, "hmmdlr");
613b3bad96fSMatthew Dillon 		hammer_rel_mem_record(cursor->deadlk_rec);
614b3bad96fSMatthew Dillon 		cursor->deadlk_rec = NULL;
615b3bad96fSMatthew Dillon 	}
616982be4bfSMatthew Dillon 	error = hammer_lock_cursor(cursor);
617b3bad96fSMatthew Dillon 	return(error);
618b3bad96fSMatthew Dillon }
619b3bad96fSMatthew Dillon 
620b3bad96fSMatthew Dillon /*
621adf01747SMatthew Dillon  * Dup ocursor to ncursor.  ncursor inherits ocursor's locks and ocursor
622adf01747SMatthew Dillon  * is effectively unlocked and becomes tracked.  If ocursor was not locked
623adf01747SMatthew Dillon  * then ncursor also inherits the tracking.
624adf01747SMatthew Dillon  *
625adf01747SMatthew Dillon  * After the caller finishes working with ncursor it must be cleaned up
626adf01747SMatthew Dillon  * with hammer_done_cursor(), and the caller must re-lock ocursor.
627adf01747SMatthew Dillon  */
6283f43fb33SMatthew Dillon hammer_cursor_t
6293f43fb33SMatthew Dillon hammer_push_cursor(hammer_cursor_t ocursor)
630adf01747SMatthew Dillon {
6313f43fb33SMatthew Dillon 	hammer_cursor_t ncursor;
632adf01747SMatthew Dillon 	hammer_inode_t ip;
633adf01747SMatthew Dillon 	hammer_node_t node;
634bac808feSMatthew Dillon 	hammer_mount_t hmp;
635adf01747SMatthew Dillon 
636bac808feSMatthew Dillon 	hmp = ocursor->trans->hmp;
637bac808feSMatthew Dillon 	ncursor = kmalloc(sizeof(*ncursor), hmp->m_misc, M_WAITOK | M_ZERO);
638adf01747SMatthew Dillon 	bcopy(ocursor, ncursor, sizeof(*ocursor));
639adf01747SMatthew Dillon 
640adf01747SMatthew Dillon 	node = ocursor->node;
641adf01747SMatthew Dillon 	hammer_ref_node(node);
642adf01747SMatthew Dillon 	if ((ocursor->flags & HAMMER_CURSOR_TRACKED) == 0) {
643adf01747SMatthew Dillon 		ocursor->flags |= HAMMER_CURSOR_TRACKED;
644adf01747SMatthew Dillon 		TAILQ_INSERT_TAIL(&node->cursor_list, ocursor, deadlk_entry);
645adf01747SMatthew Dillon 	}
646adf01747SMatthew Dillon 	if (ncursor->parent)
647adf01747SMatthew Dillon 		ocursor->parent = NULL;
648adf01747SMatthew Dillon 	ocursor->data_buffer = NULL;
649adf01747SMatthew Dillon 	ocursor->leaf = NULL;
650adf01747SMatthew Dillon 	ocursor->data = NULL;
651adf01747SMatthew Dillon 	if (ncursor->flags & HAMMER_CURSOR_TRACKED)
652adf01747SMatthew Dillon 		TAILQ_INSERT_TAIL(&node->cursor_list, ncursor, deadlk_entry);
653adf01747SMatthew Dillon 	if ((ip = ncursor->ip) != NULL) {
654adf01747SMatthew Dillon                 ++ip->cursor_ip_refs;
655adf01747SMatthew Dillon 	}
656adf01747SMatthew Dillon 	if (ncursor->iprec)
657adf01747SMatthew Dillon 		hammer_ref(&ncursor->iprec->lock);
6583f43fb33SMatthew Dillon 	return(ncursor);
6593f43fb33SMatthew Dillon }
6603f43fb33SMatthew Dillon 
6613f43fb33SMatthew Dillon /*
6623f43fb33SMatthew Dillon  * Destroy ncursor and restore ocursor
6633f43fb33SMatthew Dillon  *
6643f43fb33SMatthew Dillon  * This is a temporary hack for the release.  We can't afford to lose
6653f43fb33SMatthew Dillon  * the IP lock until the IP object scan code is able to deal with it,
6663f43fb33SMatthew Dillon  * so have ocursor inherit it back.
6673f43fb33SMatthew Dillon  */
6683f43fb33SMatthew Dillon void
6693f43fb33SMatthew Dillon hammer_pop_cursor(hammer_cursor_t ocursor, hammer_cursor_t ncursor)
6703f43fb33SMatthew Dillon {
671bac808feSMatthew Dillon 	hammer_mount_t hmp;
6723f43fb33SMatthew Dillon 	hammer_inode_t ip;
6733f43fb33SMatthew Dillon 
674bac808feSMatthew Dillon 	hmp = ncursor->trans->hmp;
6753f43fb33SMatthew Dillon 	ip = ncursor->ip;
6763f43fb33SMatthew Dillon 	ncursor->ip = NULL;
6773f43fb33SMatthew Dillon 	if (ip)
6783f43fb33SMatthew Dillon                 --ip->cursor_ip_refs;
6793f43fb33SMatthew Dillon 	hammer_done_cursor(ncursor);
680bac808feSMatthew Dillon 	kfree(ncursor, hmp->m_misc);
6813f43fb33SMatthew Dillon 	KKASSERT(ocursor->ip == ip);
682982be4bfSMatthew Dillon 	hammer_lock_cursor(ocursor);
683adf01747SMatthew Dillon }
684adf01747SMatthew Dillon 
685adf01747SMatthew Dillon /*
686b3bad96fSMatthew Dillon  * onode is being replaced by nnode by the reblocking code.
687b3bad96fSMatthew Dillon  */
688b3bad96fSMatthew Dillon void
689b3bad96fSMatthew Dillon hammer_cursor_replaced_node(hammer_node_t onode, hammer_node_t nnode)
690b3bad96fSMatthew Dillon {
691b3bad96fSMatthew Dillon 	hammer_cursor_t cursor;
692c9ce54d6SMatthew Dillon 	hammer_node_ondisk_t ondisk;
693c9ce54d6SMatthew Dillon 	hammer_node_ondisk_t nndisk;
694c9ce54d6SMatthew Dillon 
695c9ce54d6SMatthew Dillon 	ondisk = onode->ondisk;
696c9ce54d6SMatthew Dillon 	nndisk = nnode->ondisk;
697b3bad96fSMatthew Dillon 
698b3bad96fSMatthew Dillon 	while ((cursor = TAILQ_FIRST(&onode->cursor_list)) != NULL) {
699b3bad96fSMatthew Dillon 		TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry);
700b3bad96fSMatthew Dillon 		TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry);
701b3bad96fSMatthew Dillon 		KKASSERT(cursor->node == onode);
702c9ce54d6SMatthew Dillon 		if (cursor->leaf == &ondisk->elms[cursor->index].leaf)
703c9ce54d6SMatthew Dillon 			cursor->leaf = &nndisk->elms[cursor->index].leaf;
704b3bad96fSMatthew Dillon 		cursor->node = nnode;
705b3bad96fSMatthew Dillon 		hammer_ref_node(nnode);
706b3bad96fSMatthew Dillon 		hammer_rel_node(onode);
707b3bad96fSMatthew Dillon 	}
708b3bad96fSMatthew Dillon }
709b3bad96fSMatthew Dillon 
710b3bad96fSMatthew Dillon /*
711*f3a4893bSMatthew Dillon  * We have removed <node> from the parent and collapsed the parent.
712*f3a4893bSMatthew Dillon  *
713*f3a4893bSMatthew Dillon  * Cursors in deadlock recovery are seeked upward to the parent so the
714*f3a4893bSMatthew Dillon  * btree_remove() recursion works properly.
715b3bad96fSMatthew Dillon  */
716b3bad96fSMatthew Dillon void
717b3bad96fSMatthew Dillon hammer_cursor_removed_node(hammer_node_t node, hammer_node_t parent, int index)
718b3bad96fSMatthew Dillon {
719b3bad96fSMatthew Dillon 	hammer_cursor_t cursor;
720c9ce54d6SMatthew Dillon 	hammer_node_ondisk_t ondisk;
721b3bad96fSMatthew Dillon 
722b3bad96fSMatthew Dillon 	KKASSERT(parent != NULL);
723c9ce54d6SMatthew Dillon 	ondisk = node->ondisk;
724c9ce54d6SMatthew Dillon 
725b3bad96fSMatthew Dillon 	while ((cursor = TAILQ_FIRST(&node->cursor_list)) != NULL) {
726b3bad96fSMatthew Dillon 		KKASSERT(cursor->node == node);
727b3bad96fSMatthew Dillon 		KKASSERT(cursor->index == 0);
728b3bad96fSMatthew Dillon 		TAILQ_REMOVE(&node->cursor_list, cursor, deadlk_entry);
729b3bad96fSMatthew Dillon 		TAILQ_INSERT_TAIL(&parent->cursor_list, cursor, deadlk_entry);
730c9ce54d6SMatthew Dillon 		if (cursor->leaf == &ondisk->elms[cursor->index].leaf)
731c9ce54d6SMatthew Dillon 			cursor->leaf = NULL;
732adf01747SMatthew Dillon 		cursor->flags |= HAMMER_CURSOR_TRACKED_RIPOUT;
733b3bad96fSMatthew Dillon 		cursor->node = parent;
734b3bad96fSMatthew Dillon 		cursor->index = index;
735b3bad96fSMatthew Dillon 		hammer_ref_node(parent);
736b3bad96fSMatthew Dillon 		hammer_rel_node(node);
737b3bad96fSMatthew Dillon 	}
738b3bad96fSMatthew Dillon }
739b3bad96fSMatthew Dillon 
740b3bad96fSMatthew Dillon /*
741b3bad96fSMatthew Dillon  * node was split at (onode, index) with elements >= index moved to nnode.
742b3bad96fSMatthew Dillon  */
743b3bad96fSMatthew Dillon void
744b3bad96fSMatthew Dillon hammer_cursor_split_node(hammer_node_t onode, hammer_node_t nnode, int index)
745b3bad96fSMatthew Dillon {
746b3bad96fSMatthew Dillon 	hammer_cursor_t cursor;
747c9ce54d6SMatthew Dillon 	hammer_node_ondisk_t ondisk;
748c9ce54d6SMatthew Dillon 	hammer_node_ondisk_t nndisk;
749c9ce54d6SMatthew Dillon 
750c9ce54d6SMatthew Dillon 	ondisk = onode->ondisk;
751c9ce54d6SMatthew Dillon 	nndisk = nnode->ondisk;
752b3bad96fSMatthew Dillon 
753b3bad96fSMatthew Dillon again:
754b3bad96fSMatthew Dillon 	TAILQ_FOREACH(cursor, &onode->cursor_list, deadlk_entry) {
755b3bad96fSMatthew Dillon 		KKASSERT(cursor->node == onode);
756b3bad96fSMatthew Dillon 		if (cursor->index < index)
757b3bad96fSMatthew Dillon 			continue;
758b3bad96fSMatthew Dillon 		TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry);
759b3bad96fSMatthew Dillon 		TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry);
760c9ce54d6SMatthew Dillon 		if (cursor->leaf == &ondisk->elms[cursor->index].leaf)
761c9ce54d6SMatthew Dillon 			cursor->leaf = &nndisk->elms[cursor->index - index].leaf;
762b3bad96fSMatthew Dillon 		cursor->node = nnode;
763b3bad96fSMatthew Dillon 		cursor->index -= index;
764b3bad96fSMatthew Dillon 		hammer_ref_node(nnode);
765b3bad96fSMatthew Dillon 		hammer_rel_node(onode);
766b3bad96fSMatthew Dillon 		goto again;
767b3bad96fSMatthew Dillon 	}
768b3bad96fSMatthew Dillon }
769b3bad96fSMatthew Dillon 
770b3bad96fSMatthew Dillon /*
7711775b6a0SMatthew Dillon  * An element was moved from one node to another or within a node.  The
7721775b6a0SMatthew Dillon  * index may also represent the end of the node (index == numelements).
7731775b6a0SMatthew Dillon  *
7741775b6a0SMatthew Dillon  * This is used by the rebalancing code.  This is not an insertion or
7751775b6a0SMatthew Dillon  * deletion and any additional elements, including the degenerate case at
7761775b6a0SMatthew Dillon  * the end of the node, will be dealt with by additional distinct calls.
7771775b6a0SMatthew Dillon  */
7781775b6a0SMatthew Dillon void
7791775b6a0SMatthew Dillon hammer_cursor_moved_element(hammer_node_t onode, hammer_node_t nnode,
7801775b6a0SMatthew Dillon 			    int oindex, int nindex)
7811775b6a0SMatthew Dillon {
7821775b6a0SMatthew Dillon 	hammer_cursor_t cursor;
783c9ce54d6SMatthew Dillon 	hammer_node_ondisk_t ondisk;
784c9ce54d6SMatthew Dillon 	hammer_node_ondisk_t nndisk;
785c9ce54d6SMatthew Dillon 
786c9ce54d6SMatthew Dillon 	ondisk = onode->ondisk;
787c9ce54d6SMatthew Dillon 	nndisk = nnode->ondisk;
7881775b6a0SMatthew Dillon 
7891775b6a0SMatthew Dillon again:
7901775b6a0SMatthew Dillon 	TAILQ_FOREACH(cursor, &onode->cursor_list, deadlk_entry) {
7911775b6a0SMatthew Dillon 		KKASSERT(cursor->node == onode);
7921775b6a0SMatthew Dillon 		if (cursor->index != oindex)
7931775b6a0SMatthew Dillon 			continue;
7941775b6a0SMatthew Dillon 		TAILQ_REMOVE(&onode->cursor_list, cursor, deadlk_entry);
7951775b6a0SMatthew Dillon 		TAILQ_INSERT_TAIL(&nnode->cursor_list, cursor, deadlk_entry);
796c9ce54d6SMatthew Dillon 		if (cursor->leaf == &ondisk->elms[oindex].leaf)
797c9ce54d6SMatthew Dillon 			cursor->leaf = &nndisk->elms[nindex].leaf;
7981775b6a0SMatthew Dillon 		cursor->node = nnode;
7991775b6a0SMatthew Dillon 		cursor->index = nindex;
8001775b6a0SMatthew Dillon 		hammer_ref_node(nnode);
8011775b6a0SMatthew Dillon 		hammer_rel_node(onode);
8021775b6a0SMatthew Dillon 		goto again;
8031775b6a0SMatthew Dillon 	}
8041775b6a0SMatthew Dillon }
8051775b6a0SMatthew Dillon 
8061775b6a0SMatthew Dillon /*
8071775b6a0SMatthew Dillon  * The B-Tree element pointing to the specified node was moved from (oparent)
8081775b6a0SMatthew Dillon  * to (nparent, nindex).  We must locate any tracked cursors pointing at
8091775b6a0SMatthew Dillon  * node and adjust their parent accordingly.
8101775b6a0SMatthew Dillon  *
8111775b6a0SMatthew Dillon  * This is used by the rebalancing code when packing elements causes an
8121775b6a0SMatthew Dillon  * element to shift from one node to another.
8131775b6a0SMatthew Dillon  */
8141775b6a0SMatthew Dillon void
8151775b6a0SMatthew Dillon hammer_cursor_parent_changed(hammer_node_t node, hammer_node_t oparent,
8161775b6a0SMatthew Dillon 			     hammer_node_t nparent, int nindex)
8171775b6a0SMatthew Dillon {
8181775b6a0SMatthew Dillon 	hammer_cursor_t cursor;
8191775b6a0SMatthew Dillon 
8201775b6a0SMatthew Dillon again:
8211775b6a0SMatthew Dillon 	TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) {
8221775b6a0SMatthew Dillon 		KKASSERT(cursor->node == node);
8231775b6a0SMatthew Dillon 		if (cursor->parent == oparent) {
8241775b6a0SMatthew Dillon 			cursor->parent = nparent;
8251775b6a0SMatthew Dillon 			cursor->parent_index = nindex;
8261775b6a0SMatthew Dillon 			hammer_ref_node(nparent);
8271775b6a0SMatthew Dillon 			hammer_rel_node(oparent);
8281775b6a0SMatthew Dillon 			goto again;
8291775b6a0SMatthew Dillon 		}
8301775b6a0SMatthew Dillon 	}
8311775b6a0SMatthew Dillon }
8321775b6a0SMatthew Dillon 
8331775b6a0SMatthew Dillon /*
834e4a5ff06SMatthew Dillon  * Deleted element at (node, index)
835b3bad96fSMatthew Dillon  *
836b3bad96fSMatthew Dillon  * Shift indexes >= index
837b3bad96fSMatthew Dillon  */
838b3bad96fSMatthew Dillon void
839b3bad96fSMatthew Dillon hammer_cursor_deleted_element(hammer_node_t node, int index)
840b3bad96fSMatthew Dillon {
841b3bad96fSMatthew Dillon 	hammer_cursor_t cursor;
842c9ce54d6SMatthew Dillon 	hammer_node_ondisk_t ondisk;
843c9ce54d6SMatthew Dillon 
844c9ce54d6SMatthew Dillon 	ondisk = node->ondisk;
845b3bad96fSMatthew Dillon 
846b3bad96fSMatthew Dillon 	TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) {
847b3bad96fSMatthew Dillon 		KKASSERT(cursor->node == node);
848b3bad96fSMatthew Dillon 		if (cursor->index == index) {
849adf01747SMatthew Dillon 			cursor->flags |= HAMMER_CURSOR_TRACKED_RIPOUT;
850c9ce54d6SMatthew Dillon 			if (cursor->leaf == &ondisk->elms[cursor->index].leaf)
851c9ce54d6SMatthew Dillon 				cursor->leaf = NULL;
852b3bad96fSMatthew Dillon 		} else if (cursor->index > index) {
853c9ce54d6SMatthew Dillon 			if (cursor->leaf == &ondisk->elms[cursor->index].leaf)
854c9ce54d6SMatthew Dillon 				cursor->leaf = &ondisk->elms[cursor->index - 1].leaf;
855b3bad96fSMatthew Dillon 			--cursor->index;
856b3bad96fSMatthew Dillon 		}
857b3bad96fSMatthew Dillon 	}
858b3bad96fSMatthew Dillon }
859b3bad96fSMatthew Dillon 
860b3bad96fSMatthew Dillon /*
861e4a5ff06SMatthew Dillon  * Inserted element at (node, index)
862b3bad96fSMatthew Dillon  *
863b3bad96fSMatthew Dillon  * Shift indexes >= index
864b3bad96fSMatthew Dillon  */
865b3bad96fSMatthew Dillon void
866b3bad96fSMatthew Dillon hammer_cursor_inserted_element(hammer_node_t node, int index)
867b3bad96fSMatthew Dillon {
868b3bad96fSMatthew Dillon 	hammer_cursor_t cursor;
869c9ce54d6SMatthew Dillon 	hammer_node_ondisk_t ondisk;
870c9ce54d6SMatthew Dillon 
871c9ce54d6SMatthew Dillon 	ondisk = node->ondisk;
872b3bad96fSMatthew Dillon 
873b3bad96fSMatthew Dillon 	TAILQ_FOREACH(cursor, &node->cursor_list, deadlk_entry) {
874b3bad96fSMatthew Dillon 		KKASSERT(cursor->node == node);
875c9ce54d6SMatthew Dillon 		if (cursor->index >= index) {
876c9ce54d6SMatthew Dillon 			if (cursor->leaf == &ondisk->elms[cursor->index].leaf)
877c9ce54d6SMatthew Dillon 				cursor->leaf = &ondisk->elms[cursor->index + 1].leaf;
878b3bad96fSMatthew Dillon 			++cursor->index;
879b3bad96fSMatthew Dillon 		}
880b3bad96fSMatthew Dillon 	}
881c9ce54d6SMatthew Dillon }
882b3bad96fSMatthew Dillon 
883