xref: /dflybsd-src/sys/vfs/hammer/hammer_cursor.c (revision 32c90105bec38cf3ca15d5f66aa481edd792a49c)
18cd0a023SMatthew Dillon /*
28cd0a023SMatthew Dillon  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
38cd0a023SMatthew Dillon  *
48cd0a023SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
58cd0a023SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
68cd0a023SMatthew Dillon  *
78cd0a023SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
88cd0a023SMatthew Dillon  * modification, are permitted provided that the following conditions
98cd0a023SMatthew Dillon  * are met:
108cd0a023SMatthew Dillon  *
118cd0a023SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
128cd0a023SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
138cd0a023SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
148cd0a023SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
158cd0a023SMatthew Dillon  *    the documentation and/or other materials provided with the
168cd0a023SMatthew Dillon  *    distribution.
178cd0a023SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
188cd0a023SMatthew Dillon  *    contributors may be used to endorse or promote products derived
198cd0a023SMatthew Dillon  *    from this software without specific, prior written permission.
208cd0a023SMatthew Dillon  *
218cd0a023SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
228cd0a023SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
238cd0a023SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
248cd0a023SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
258cd0a023SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
268cd0a023SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
278cd0a023SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
288cd0a023SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
298cd0a023SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
308cd0a023SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
318cd0a023SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
328cd0a023SMatthew Dillon  * SUCH DAMAGE.
338cd0a023SMatthew Dillon  *
34*32c90105SMatthew Dillon  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.16 2008/02/05 07:58:43 dillon Exp $
358cd0a023SMatthew Dillon  */
368cd0a023SMatthew Dillon 
378cd0a023SMatthew Dillon /*
388cd0a023SMatthew Dillon  * HAMMER B-Tree index - cursor support routines
398cd0a023SMatthew Dillon  */
408cd0a023SMatthew Dillon #include "hammer.h"
418cd0a023SMatthew Dillon 
428cd0a023SMatthew Dillon static int hammer_load_cursor_parent(hammer_cursor_t cursor);
438cd0a023SMatthew Dillon static int hammer_load_cursor_parent_local(hammer_cursor_t cursor);
448cd0a023SMatthew Dillon static int hammer_load_cursor_parent_cluster(hammer_cursor_t cursor);
458cd0a023SMatthew Dillon 
468cd0a023SMatthew Dillon /*
4761aeeb33SMatthew Dillon  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
4861aeeb33SMatthew Dillon  * is not available initialize a fresh cursor at the root of the root
4961aeeb33SMatthew Dillon  * cluster.
508cd0a023SMatthew Dillon  */
518cd0a023SMatthew Dillon int
5261aeeb33SMatthew Dillon hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache,
5361aeeb33SMatthew Dillon 		       hammer_mount_t hmp)
548cd0a023SMatthew Dillon {
558cd0a023SMatthew Dillon 	hammer_cluster_t cluster;
5661aeeb33SMatthew Dillon 	hammer_node_t node;
578cd0a023SMatthew Dillon 	int error;
588cd0a023SMatthew Dillon 
598cd0a023SMatthew Dillon 	bzero(cursor, sizeof(*cursor));
6061aeeb33SMatthew Dillon 
6161aeeb33SMatthew Dillon 	/*
6261aeeb33SMatthew Dillon 	 * Step 1 - acquire a locked node from the cache if possible
6361aeeb33SMatthew Dillon 	 */
6461aeeb33SMatthew Dillon 	if (cache && *cache) {
65055f5ff8SMatthew Dillon 		node = hammer_ref_node_safe(hmp, cache, &error);
668cd0a023SMatthew Dillon 		if (error == 0) {
676a37e7e4SMatthew Dillon 			hammer_lock_sh(&node->lock);
6861aeeb33SMatthew Dillon 			if (node->flags & HAMMER_NODE_DELETED) {
6961aeeb33SMatthew Dillon 				hammer_unlock(&node->lock);
7061aeeb33SMatthew Dillon 				hammer_rel_node(node);
7161aeeb33SMatthew Dillon 				node = NULL;
7261aeeb33SMatthew Dillon 			}
7361aeeb33SMatthew Dillon 		}
7461aeeb33SMatthew Dillon 	} else {
7561aeeb33SMatthew Dillon 		node = NULL;
7661aeeb33SMatthew Dillon 	}
7761aeeb33SMatthew Dillon 
7861aeeb33SMatthew Dillon 	/*
7961aeeb33SMatthew Dillon 	 * Step 2 - If we couldn't get a node from the cache, get
8061aeeb33SMatthew Dillon 	 * the one from the root of the root cluster.
8161aeeb33SMatthew Dillon 	 */
8261aeeb33SMatthew Dillon 	while (node == NULL) {
8361aeeb33SMatthew Dillon 		cluster = hammer_get_root_cluster(hmp, &error);
8461aeeb33SMatthew Dillon 		if (error)
8561aeeb33SMatthew Dillon 			break;
8661aeeb33SMatthew Dillon 		node = hammer_get_node(cluster,
878cd0a023SMatthew Dillon 				       cluster->ondisk->clu_btree_root,
888cd0a023SMatthew Dillon 				       &error);
898cd0a023SMatthew Dillon 		hammer_rel_cluster(cluster, 0);
9061aeeb33SMatthew Dillon 		if (error)
9161aeeb33SMatthew Dillon 			break;
926a37e7e4SMatthew Dillon 		hammer_lock_sh(&node->lock);
9361aeeb33SMatthew Dillon 		if (node->flags & HAMMER_NODE_DELETED) {
9461aeeb33SMatthew Dillon 			hammer_unlock(&node->lock);
9561aeeb33SMatthew Dillon 			hammer_rel_node(node);
9661aeeb33SMatthew Dillon 			node = NULL;
978cd0a023SMatthew Dillon 		}
9861aeeb33SMatthew Dillon 	}
9961aeeb33SMatthew Dillon 
10061aeeb33SMatthew Dillon 	/*
10161aeeb33SMatthew Dillon 	 * Step 3 - finish initializing the cursor by acquiring the parent
10261aeeb33SMatthew Dillon 	 */
10361aeeb33SMatthew Dillon 	cursor->node = node;
1048cd0a023SMatthew Dillon 	if (error == 0)
1058cd0a023SMatthew Dillon 		error = hammer_load_cursor_parent(cursor);
1060b075555SMatthew Dillon 	KKASSERT(error == 0);
1078cd0a023SMatthew Dillon 	return(error);
1088cd0a023SMatthew Dillon }
1098cd0a023SMatthew Dillon 
1108cd0a023SMatthew Dillon /*
111d26d0ae9SMatthew Dillon  * Initialize a fresh cursor at the root of the specified cluster and
112d26d0ae9SMatthew Dillon  * limit operations to within the cluster.
113b33e2cc0SMatthew Dillon  *
114b33e2cc0SMatthew Dillon  * NOTE: cursor->parent will be set to NULL to avoid referencing B-Tree
115b33e2cc0SMatthew Dillon  * nodes in other clusters.
116d26d0ae9SMatthew Dillon  */
117d26d0ae9SMatthew Dillon int
118d26d0ae9SMatthew Dillon hammer_init_cursor_cluster(hammer_cursor_t cursor, hammer_cluster_t cluster)
119d26d0ae9SMatthew Dillon {
120d26d0ae9SMatthew Dillon 	int error;
121d26d0ae9SMatthew Dillon 
122d26d0ae9SMatthew Dillon 	bzero(cursor, sizeof(*cursor));
123d26d0ae9SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_INCLUSTER;
124d26d0ae9SMatthew Dillon 	cursor->node = hammer_get_node(cluster,
125d26d0ae9SMatthew Dillon 				       cluster->ondisk->clu_btree_root,
126d26d0ae9SMatthew Dillon 				       &error);
127b33e2cc0SMatthew Dillon 	if (error == 0)
1286a37e7e4SMatthew Dillon 		hammer_lock_sh(&cursor->node->lock);
129b33e2cc0SMatthew Dillon 	cursor->left_bound = &cluster->ondisk->clu_btree_beg;
130b33e2cc0SMatthew Dillon 	cursor->right_bound = &cluster->ondisk->clu_btree_end;
1310b075555SMatthew Dillon 	KKASSERT(error == 0);
132d26d0ae9SMatthew Dillon 	return(error);
133d26d0ae9SMatthew Dillon }
134d26d0ae9SMatthew Dillon 
135d26d0ae9SMatthew Dillon /*
1368cd0a023SMatthew Dillon  * We are finished with a cursor.  We NULL out various fields as sanity
1378cd0a023SMatthew Dillon  * check, in case the structure is inappropriately used afterwords.
1388cd0a023SMatthew Dillon  */
1398cd0a023SMatthew Dillon void
1408cd0a023SMatthew Dillon hammer_done_cursor(hammer_cursor_t cursor)
1418cd0a023SMatthew Dillon {
1428cd0a023SMatthew Dillon 	if (cursor->parent) {
1438cd0a023SMatthew Dillon 		hammer_unlock(&cursor->parent->lock);
1448cd0a023SMatthew Dillon 		hammer_rel_node(cursor->parent);
1458cd0a023SMatthew Dillon 		cursor->parent = NULL;
1468cd0a023SMatthew Dillon 	}
1478cd0a023SMatthew Dillon 	if (cursor->node) {
1488cd0a023SMatthew Dillon 		hammer_unlock(&cursor->node->lock);
1498cd0a023SMatthew Dillon 		hammer_rel_node(cursor->node);
1508cd0a023SMatthew Dillon 		cursor->node = NULL;
1518cd0a023SMatthew Dillon 	}
1528cd0a023SMatthew Dillon         if (cursor->data_buffer) {
1538cd0a023SMatthew Dillon                 hammer_rel_buffer(cursor->data_buffer, 0);
1548cd0a023SMatthew Dillon                 cursor->data_buffer = NULL;
1558cd0a023SMatthew Dillon         }
1568cd0a023SMatthew Dillon         if (cursor->record_buffer) {
1578cd0a023SMatthew Dillon                 hammer_rel_buffer(cursor->record_buffer, 0);
1588cd0a023SMatthew Dillon                 cursor->record_buffer = NULL;
1598cd0a023SMatthew Dillon         }
1606b4f890bSMatthew Dillon 	if (cursor->ip)
1616b4f890bSMatthew Dillon 		hammer_mem_done(cursor);
162a89aec1bSMatthew Dillon 
1636a37e7e4SMatthew Dillon 	/*
1646a37e7e4SMatthew Dillon 	 * If we deadlocked this node will be referenced.  Do a quick
1656a37e7e4SMatthew Dillon 	 * lock/unlock to wait for the deadlock condition to clear.
1666a37e7e4SMatthew Dillon 	 */
1676a37e7e4SMatthew Dillon 	if (cursor->deadlk_node) {
1686a37e7e4SMatthew Dillon 		hammer_lock_ex(&cursor->deadlk_node->lock);
1696a37e7e4SMatthew Dillon 		hammer_unlock(&cursor->deadlk_node->lock);
1706a37e7e4SMatthew Dillon 		hammer_rel_node(cursor->deadlk_node);
1716a37e7e4SMatthew Dillon 		cursor->deadlk_node = NULL;
1726a37e7e4SMatthew Dillon 	}
1736a37e7e4SMatthew Dillon 
1748cd0a023SMatthew Dillon 	cursor->data = NULL;
1758cd0a023SMatthew Dillon 	cursor->record = NULL;
1768cd0a023SMatthew Dillon 	cursor->left_bound = NULL;
1778cd0a023SMatthew Dillon 	cursor->right_bound = NULL;
1788cd0a023SMatthew Dillon }
1798cd0a023SMatthew Dillon 
1808cd0a023SMatthew Dillon /*
1816a37e7e4SMatthew Dillon  * Upgrade cursor->node and cursor->parent to exclusive locks.  This
1826a37e7e4SMatthew Dillon  * function can return EDEADLK.
1836a37e7e4SMatthew Dillon  *
1846a37e7e4SMatthew Dillon  * If we fail to upgrade the lock and cursor->deadlk_node is NULL,
1856a37e7e4SMatthew Dillon  * we add another reference to the node that failed and set
1866a37e7e4SMatthew Dillon  * cursor->deadlk_node so hammer_done_cursor() can block on it.
1876a37e7e4SMatthew Dillon  */
1886a37e7e4SMatthew Dillon int
1896a37e7e4SMatthew Dillon hammer_cursor_upgrade(hammer_cursor_t cursor)
1906a37e7e4SMatthew Dillon {
1916a37e7e4SMatthew Dillon 	int error;
1926a37e7e4SMatthew Dillon 
1936a37e7e4SMatthew Dillon 	if (hammer_lock_held(&cursor->node->lock) < 0) {
1946a37e7e4SMatthew Dillon 		error = hammer_lock_upgrade(&cursor->node->lock);
1956a37e7e4SMatthew Dillon 		if (error && cursor->deadlk_node == NULL) {
1966a37e7e4SMatthew Dillon 			cursor->deadlk_node = cursor->node;
1976a37e7e4SMatthew Dillon 			hammer_ref_node(cursor->deadlk_node);
1986a37e7e4SMatthew Dillon 		}
1996a37e7e4SMatthew Dillon 	} else {
2006a37e7e4SMatthew Dillon 		error = 0;
2016a37e7e4SMatthew Dillon 	}
2026a37e7e4SMatthew Dillon 	if (cursor->parent && hammer_lock_held(&cursor->parent->lock) < 0) {
2036a37e7e4SMatthew Dillon 		error = hammer_lock_upgrade(&cursor->parent->lock);
2046a37e7e4SMatthew Dillon 		if (error && cursor->deadlk_node == NULL) {
2056a37e7e4SMatthew Dillon 			cursor->deadlk_node = cursor->parent;
2066a37e7e4SMatthew Dillon 			hammer_ref_node(cursor->deadlk_node);
2076a37e7e4SMatthew Dillon 		}
2086a37e7e4SMatthew Dillon 	} else {
2096a37e7e4SMatthew Dillon 		error = 0;
2106a37e7e4SMatthew Dillon 	}
2116a37e7e4SMatthew Dillon 	return(error);
2126a37e7e4SMatthew Dillon }
2136a37e7e4SMatthew Dillon 
2146a37e7e4SMatthew Dillon /*
2156a37e7e4SMatthew Dillon  * Downgrade cursor->node and cursor->parent to shared locks.  This
2166a37e7e4SMatthew Dillon  * function can return EDEADLK.
2176a37e7e4SMatthew Dillon  */
2186a37e7e4SMatthew Dillon void
2196a37e7e4SMatthew Dillon hammer_cursor_downgrade(hammer_cursor_t cursor)
2206a37e7e4SMatthew Dillon {
2216a37e7e4SMatthew Dillon 	if (hammer_lock_held(&cursor->node->lock) > 0)
2226a37e7e4SMatthew Dillon 		hammer_lock_downgrade(&cursor->node->lock);
2236a37e7e4SMatthew Dillon 	if (cursor->parent && hammer_lock_held(&cursor->parent->lock) > 0)
2246a37e7e4SMatthew Dillon 		hammer_lock_downgrade(&cursor->parent->lock);
2256a37e7e4SMatthew Dillon }
2266a37e7e4SMatthew Dillon 
227*32c90105SMatthew Dillon /*
228*32c90105SMatthew Dillon  * Seek the cursor to the specified node and index.
229*32c90105SMatthew Dillon  */
230*32c90105SMatthew Dillon int
231*32c90105SMatthew Dillon hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
232*32c90105SMatthew Dillon {
233*32c90105SMatthew Dillon 	int error;
234*32c90105SMatthew Dillon 
235*32c90105SMatthew Dillon 	hammer_cursor_downgrade(cursor);
236*32c90105SMatthew Dillon 	error = 0;
237*32c90105SMatthew Dillon 
238*32c90105SMatthew Dillon 	if (cursor->node != node) {
239*32c90105SMatthew Dillon 		hammer_unlock(&cursor->node->lock);
240*32c90105SMatthew Dillon 		hammer_rel_node(cursor->node);
241*32c90105SMatthew Dillon 		cursor->node = node;
242*32c90105SMatthew Dillon 		cursor->index = index;
243*32c90105SMatthew Dillon 		hammer_ref_node(node);
244*32c90105SMatthew Dillon 		hammer_lock_sh(&node->lock);
245*32c90105SMatthew Dillon 
246*32c90105SMatthew Dillon 		if (cursor->parent) {
247*32c90105SMatthew Dillon 			hammer_unlock(&cursor->parent->lock);
248*32c90105SMatthew Dillon 			hammer_rel_node(cursor->parent);
249*32c90105SMatthew Dillon 			cursor->parent = NULL;
250*32c90105SMatthew Dillon 			cursor->parent_index = 0;
251*32c90105SMatthew Dillon 		}
252*32c90105SMatthew Dillon 		error = hammer_load_cursor_parent(cursor);
253*32c90105SMatthew Dillon 	}
254*32c90105SMatthew Dillon 	return (error);
255*32c90105SMatthew Dillon }
2566a37e7e4SMatthew Dillon 
2576a37e7e4SMatthew Dillon #if 0
2586a37e7e4SMatthew Dillon 
2596a37e7e4SMatthew Dillon /*
260195c19a1SMatthew Dillon  * Acquire the parent B-Tree node of the specified node, returning a
261195c19a1SMatthew Dillon  * referenced but unlocked node.  NULL can be returned with *errorp == 0
262195c19a1SMatthew Dillon  * if node is the root node of the root cluster.
263195c19a1SMatthew Dillon  */
264195c19a1SMatthew Dillon static
265195c19a1SMatthew Dillon hammer_node_t
266195c19a1SMatthew Dillon hammer_get_parent_node(hammer_node_t node, int *errorp)
267195c19a1SMatthew Dillon {
268195c19a1SMatthew Dillon 	hammer_cluster_t cluster;
269195c19a1SMatthew Dillon 	hammer_node_t parent;
270195c19a1SMatthew Dillon 
271195c19a1SMatthew Dillon 	cluster = node->cluster;
272195c19a1SMatthew Dillon 	if (node->ondisk->parent) {
273195c19a1SMatthew Dillon 		/*
274195c19a1SMatthew Dillon 		 * Local parent
275195c19a1SMatthew Dillon 		 */
276195c19a1SMatthew Dillon 		parent = hammer_get_node(cluster, node->ondisk->parent, errorp);
277195c19a1SMatthew Dillon 	} else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) {
278195c19a1SMatthew Dillon 		/*
279195c19a1SMatthew Dillon 		 * At cluster root, locate node in parent cluster
280195c19a1SMatthew Dillon 		 */
281195c19a1SMatthew Dillon 		hammer_cluster_ondisk_t ondisk;
282195c19a1SMatthew Dillon 		hammer_cluster_t pcluster;
283195c19a1SMatthew Dillon 		hammer_volume_t pvolume;
284195c19a1SMatthew Dillon 		int32_t clu_no;
285195c19a1SMatthew Dillon 		int32_t vol_no;
286195c19a1SMatthew Dillon 
287195c19a1SMatthew Dillon 		ondisk = cluster->ondisk;
288195c19a1SMatthew Dillon 		vol_no = ondisk->clu_btree_parent_vol_no;
289195c19a1SMatthew Dillon 		clu_no = ondisk->clu_btree_parent_clu_no;
290195c19a1SMatthew Dillon 
291195c19a1SMatthew Dillon 		/*
292195c19a1SMatthew Dillon 		 * Acquire the node from (volume, cluster, offset)
293195c19a1SMatthew Dillon 		 */
294195c19a1SMatthew Dillon 		pvolume = hammer_get_volume(cluster->volume->hmp, vol_no,
295195c19a1SMatthew Dillon 					    errorp);
296195c19a1SMatthew Dillon 		if (*errorp)
297195c19a1SMatthew Dillon 			return (NULL);
298195c19a1SMatthew Dillon 		pcluster = hammer_get_cluster(pvolume, clu_no, errorp, 0);
299195c19a1SMatthew Dillon 		hammer_rel_volume(pvolume, 0);
300195c19a1SMatthew Dillon 		if (*errorp)
301195c19a1SMatthew Dillon 			return (NULL);
302195c19a1SMatthew Dillon 		parent = hammer_get_node(pcluster,
303195c19a1SMatthew Dillon 					 ondisk->clu_btree_parent_offset,
304195c19a1SMatthew Dillon 					 errorp);
305195c19a1SMatthew Dillon 		hammer_rel_cluster(pcluster, 0);
306195c19a1SMatthew Dillon 	} else {
307195c19a1SMatthew Dillon 		/*
308195c19a1SMatthew Dillon 		 * At root of root cluster, there is no parent.
309195c19a1SMatthew Dillon 		 */
310d26d0ae9SMatthew Dillon 		KKASSERT(cluster->ondisk->clu_btree_parent_vol_no == -1);
311195c19a1SMatthew Dillon 		parent = NULL;
312195c19a1SMatthew Dillon 		*errorp = 0;
313195c19a1SMatthew Dillon 	}
314195c19a1SMatthew Dillon 	return(parent);
315195c19a1SMatthew Dillon }
316195c19a1SMatthew Dillon 
3176a37e7e4SMatthew Dillon #endif
3186a37e7e4SMatthew Dillon 
319195c19a1SMatthew Dillon /*
3208cd0a023SMatthew Dillon  * Load the parent of cursor->node into cursor->parent.  There are several
3218cd0a023SMatthew Dillon  * cases.  (1) The parent is in the current cluster.  (2) We are at the
3228cd0a023SMatthew Dillon  * root of the cluster and the parent is in another cluster.  (3) We are at
3238cd0a023SMatthew Dillon  * the root of the root cluster.
324d26d0ae9SMatthew Dillon  *
325d26d0ae9SMatthew Dillon  * If HAMMER_CURSOR_INCLUSTER is set and we are at the root of the cluster,
326d26d0ae9SMatthew Dillon  * we do not access the parent cluster at all and make the cursor look like
327d26d0ae9SMatthew Dillon  * its at the root.
3288cd0a023SMatthew Dillon  */
3298cd0a023SMatthew Dillon static
3308cd0a023SMatthew Dillon int
3318cd0a023SMatthew Dillon hammer_load_cursor_parent(hammer_cursor_t cursor)
3328cd0a023SMatthew Dillon {
3338cd0a023SMatthew Dillon 	hammer_cluster_t cluster;
3348cd0a023SMatthew Dillon 	int error;
3358cd0a023SMatthew Dillon 
3368cd0a023SMatthew Dillon 	cluster = cursor->node->cluster;
3378cd0a023SMatthew Dillon 
3388cd0a023SMatthew Dillon 	if (cursor->node->ondisk->parent) {
3398cd0a023SMatthew Dillon 		error = hammer_load_cursor_parent_local(cursor);
340d26d0ae9SMatthew Dillon 	} else if (cluster->ondisk->clu_btree_parent_vol_no >= 0 &&
341d26d0ae9SMatthew Dillon 		   ((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0)
342d26d0ae9SMatthew Dillon 	) {
3438cd0a023SMatthew Dillon 		error = hammer_load_cursor_parent_cluster(cursor);
3448cd0a023SMatthew Dillon 	} else {
3458cd0a023SMatthew Dillon 		cursor->parent = NULL;
3468cd0a023SMatthew Dillon 		cursor->parent_index = 0;
3478cd0a023SMatthew Dillon 		cursor->left_bound = &cluster->ondisk->clu_btree_beg;
3488cd0a023SMatthew Dillon 		cursor->right_bound = &cluster->ondisk->clu_btree_end;
3498cd0a023SMatthew Dillon 		error = 0;
3508cd0a023SMatthew Dillon 	}
3518cd0a023SMatthew Dillon 	return(error);
3528cd0a023SMatthew Dillon }
3538cd0a023SMatthew Dillon 
3548cd0a023SMatthew Dillon static
3558cd0a023SMatthew Dillon int
3568cd0a023SMatthew Dillon hammer_load_cursor_parent_local(hammer_cursor_t cursor)
3578cd0a023SMatthew Dillon {
3588cd0a023SMatthew Dillon 	hammer_node_t node;
3598cd0a023SMatthew Dillon 	hammer_node_t parent;
3608cd0a023SMatthew Dillon 	hammer_btree_elm_t elm;
3618cd0a023SMatthew Dillon 	int error;
3628cd0a023SMatthew Dillon 	int i;
3638cd0a023SMatthew Dillon 
3648cd0a023SMatthew Dillon 	node = cursor->node;
3658cd0a023SMatthew Dillon 	parent = hammer_get_node(node->cluster, node->ondisk->parent, &error);
3668cd0a023SMatthew Dillon 	if (error)
3678cd0a023SMatthew Dillon 		return(error);
3688cd0a023SMatthew Dillon 	elm = NULL;
3698cd0a023SMatthew Dillon 	for (i = 0; i < parent->ondisk->count; ++i) {
3708cd0a023SMatthew Dillon 		elm = &parent->ondisk->elms[i];
3718cd0a023SMatthew Dillon 		if (parent->ondisk->elms[i].internal.subtree_offset ==
3728cd0a023SMatthew Dillon 		    node->node_offset) {
3738cd0a023SMatthew Dillon 			break;
3748cd0a023SMatthew Dillon 		}
3758cd0a023SMatthew Dillon 	}
3767a04d74fSMatthew Dillon 	if (i == parent->ondisk->count)
3777a04d74fSMatthew Dillon 		panic("Bad B-Tree link: parent %p node %p\n", parent, node);
3788cd0a023SMatthew Dillon 	KKASSERT(i != parent->ondisk->count);
3798cd0a023SMatthew Dillon 	cursor->parent = parent;
3808cd0a023SMatthew Dillon 	cursor->parent_index = i;
3818cd0a023SMatthew Dillon 	cursor->left_bound = &elm[0].internal.base;
3828cd0a023SMatthew Dillon 	cursor->right_bound = &elm[1].internal.base;
3838cd0a023SMatthew Dillon 
3846a37e7e4SMatthew Dillon 	hammer_lock_sh(&parent->lock);
3858cd0a023SMatthew Dillon 	return(error);
3868cd0a023SMatthew Dillon }
3878cd0a023SMatthew Dillon 
3888cd0a023SMatthew Dillon static
3898cd0a023SMatthew Dillon int
3908cd0a023SMatthew Dillon hammer_load_cursor_parent_cluster(hammer_cursor_t cursor)
3918cd0a023SMatthew Dillon {
3928cd0a023SMatthew Dillon 	hammer_cluster_ondisk_t ondisk;
393d26d0ae9SMatthew Dillon 	hammer_cluster_t pcluster;
394d26d0ae9SMatthew Dillon 	hammer_cluster_t ccluster;
3958cd0a023SMatthew Dillon 	hammer_volume_t volume;
3968cd0a023SMatthew Dillon 	hammer_node_t node;
3978cd0a023SMatthew Dillon 	hammer_node_t parent;
3988cd0a023SMatthew Dillon 	hammer_btree_elm_t elm;
3998cd0a023SMatthew Dillon 	int32_t clu_no;
4008cd0a023SMatthew Dillon 	int32_t vol_no;
4018cd0a023SMatthew Dillon 	int error;
4028cd0a023SMatthew Dillon 	int i;
4038cd0a023SMatthew Dillon 
4048cd0a023SMatthew Dillon 	node = cursor->node;
405d26d0ae9SMatthew Dillon 	ccluster = node->cluster;
406d26d0ae9SMatthew Dillon 	ondisk = ccluster->ondisk;
4078cd0a023SMatthew Dillon 	vol_no = ondisk->clu_btree_parent_vol_no;
4088cd0a023SMatthew Dillon 	clu_no = ondisk->clu_btree_parent_clu_no;
4098cd0a023SMatthew Dillon 
4108cd0a023SMatthew Dillon 	/*
411fe7678eeSMatthew Dillon 	 * Acquire the node from (volume, cluster, offset).  This should
412fe7678eeSMatthew Dillon 	 * be a leaf node containing the HAMMER_BTREE_TYPE_SPIKE_END element.
4138cd0a023SMatthew Dillon 	 */
414d26d0ae9SMatthew Dillon 	volume = hammer_get_volume(ccluster->volume->hmp, vol_no, &error);
4158cd0a023SMatthew Dillon 	if (error)
4168cd0a023SMatthew Dillon 		return (error);
417d26d0ae9SMatthew Dillon 	pcluster = hammer_get_cluster(volume, clu_no, &error, 0);
4188cd0a023SMatthew Dillon 	hammer_rel_volume(volume, 0);
4198cd0a023SMatthew Dillon 	if (error)
4208cd0a023SMatthew Dillon 		return (error);
421d26d0ae9SMatthew Dillon 	parent = hammer_get_node(pcluster, ondisk->clu_btree_parent_offset,
4228cd0a023SMatthew Dillon 				 &error);
423d26d0ae9SMatthew Dillon 	hammer_rel_cluster(pcluster, 0);
4248cd0a023SMatthew Dillon 	if (error)
4258cd0a023SMatthew Dillon 		return (error);
426fe7678eeSMatthew Dillon 	KKASSERT(parent->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
4278cd0a023SMatthew Dillon 
4288cd0a023SMatthew Dillon 	/*
4298cd0a023SMatthew Dillon 	 * Ok, we have the node.  Locate the inter-cluster element
4308cd0a023SMatthew Dillon 	 */
4318cd0a023SMatthew Dillon 	elm = NULL;
4328cd0a023SMatthew Dillon 	for (i = 0; i < parent->ondisk->count; ++i) {
4338cd0a023SMatthew Dillon 		elm = &parent->ondisk->elms[i];
434fe7678eeSMatthew Dillon 
435fe7678eeSMatthew Dillon 		if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END &&
436fe7678eeSMatthew Dillon 		    elm->leaf.spike_clu_no == cursor->node->cluster->clu_no) {
4378cd0a023SMatthew Dillon 			break;
4388cd0a023SMatthew Dillon 		}
4398cd0a023SMatthew Dillon 	}
4408cd0a023SMatthew Dillon 	KKASSERT(i != parent->ondisk->count);
441fe7678eeSMatthew Dillon 	KKASSERT(i && elm[-1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_BEG);
4428cd0a023SMatthew Dillon 	cursor->parent = parent;
4438cd0a023SMatthew Dillon 	cursor->parent_index = i;
444fe7678eeSMatthew Dillon 	cursor->left_bound = &ccluster->ondisk->clu_btree_beg;
445fe7678eeSMatthew Dillon 	cursor->right_bound = &ccluster->ondisk->clu_btree_end;
4468cd0a023SMatthew Dillon 
447fe7678eeSMatthew Dillon 	KKASSERT(hammer_btree_cmp(&elm[-1].leaf.base,
448fe7678eeSMatthew Dillon 		 &ccluster->ondisk->clu_btree_beg) == 0);
449fe7678eeSMatthew Dillon 	    /*
450fe7678eeSMatthew Dillon 	     * spike_end is an inclusive boundary and will != clu_btree_end
4518cd0a023SMatthew Dillon 	KKASSERT(hammer_btree_cmp(cursor->right_bound,
452b3deaf57SMatthew Dillon 		 &ccluster->ondisk->clu_btree_end) >= 0);
453fe7678eeSMatthew Dillon 	    */
4548cd0a023SMatthew Dillon 
4556a37e7e4SMatthew Dillon 	hammer_lock_sh(&parent->lock);
4568cd0a023SMatthew Dillon 	return(0);
4578cd0a023SMatthew Dillon }
4588cd0a023SMatthew Dillon 
4598cd0a023SMatthew Dillon /*
4608cd0a023SMatthew Dillon  * Cursor up to our parent node.  Return ENOENT if we are at the root of
4618cd0a023SMatthew Dillon  * the root cluster.
462195c19a1SMatthew Dillon  *
463195c19a1SMatthew Dillon  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
464195c19a1SMatthew Dillon  * the cursor remains unchanged.
4658cd0a023SMatthew Dillon  */
4668cd0a023SMatthew Dillon int
4676a37e7e4SMatthew Dillon hammer_cursor_up(hammer_cursor_t cursor)
4688cd0a023SMatthew Dillon {
4698cd0a023SMatthew Dillon 	int error;
4708cd0a023SMatthew Dillon 
4716a37e7e4SMatthew Dillon 	hammer_cursor_downgrade(cursor);
472195c19a1SMatthew Dillon 
473195c19a1SMatthew Dillon 	/*
4748cd0a023SMatthew Dillon 	 * Set the node to its parent.  If the parent is NULL we are at
4758cd0a023SMatthew Dillon 	 * the root of the root cluster and return ENOENT.
4768cd0a023SMatthew Dillon 	 */
4778cd0a023SMatthew Dillon 	hammer_unlock(&cursor->node->lock);
4788cd0a023SMatthew Dillon 	hammer_rel_node(cursor->node);
4798cd0a023SMatthew Dillon 	cursor->node = cursor->parent;
4808cd0a023SMatthew Dillon 	cursor->index = cursor->parent_index;
4818cd0a023SMatthew Dillon 	cursor->parent = NULL;
4828cd0a023SMatthew Dillon 	cursor->parent_index = 0;
4838cd0a023SMatthew Dillon 
4848cd0a023SMatthew Dillon 	if (cursor->node == NULL) {
4858cd0a023SMatthew Dillon 		error = ENOENT;
486d26d0ae9SMatthew Dillon 	} else if ((cursor->flags & HAMMER_CURSOR_INCLUSTER) &&
487d26d0ae9SMatthew Dillon 		   cursor->node->ondisk->parent == 0
488d26d0ae9SMatthew Dillon 	) {
489d26d0ae9SMatthew Dillon 		error = ENOENT;
4908cd0a023SMatthew Dillon 	} else {
4918cd0a023SMatthew Dillon 		error = hammer_load_cursor_parent(cursor);
4928cd0a023SMatthew Dillon 	}
4938cd0a023SMatthew Dillon 	return(error);
4948cd0a023SMatthew Dillon }
4958cd0a023SMatthew Dillon 
4968cd0a023SMatthew Dillon /*
4978cd0a023SMatthew Dillon  * Set the cursor to the root of the current cursor's cluster.
4988cd0a023SMatthew Dillon  */
4998cd0a023SMatthew Dillon int
5008cd0a023SMatthew Dillon hammer_cursor_toroot(hammer_cursor_t cursor)
5018cd0a023SMatthew Dillon {
5028cd0a023SMatthew Dillon 	hammer_cluster_t cluster;
5038cd0a023SMatthew Dillon 	int error;
5048cd0a023SMatthew Dillon 
5058cd0a023SMatthew Dillon 	/*
5068cd0a023SMatthew Dillon 	 * Already at root of cluster?
5078cd0a023SMatthew Dillon 	 */
5088cd0a023SMatthew Dillon 	if (cursor->node->ondisk->parent == 0)
5098cd0a023SMatthew Dillon 		return (0);
5108cd0a023SMatthew Dillon 
5116a37e7e4SMatthew Dillon 	hammer_cursor_downgrade(cursor);
5126a37e7e4SMatthew Dillon 
5138cd0a023SMatthew Dillon 	/*
5148cd0a023SMatthew Dillon 	 * Parent is root of cluster?
5158cd0a023SMatthew Dillon 	 */
5168cd0a023SMatthew Dillon 	if (cursor->parent->ondisk->parent == 0)
5176a37e7e4SMatthew Dillon 		return (hammer_cursor_up(cursor));
5188cd0a023SMatthew Dillon 
5198cd0a023SMatthew Dillon 	/*
5208cd0a023SMatthew Dillon 	 * Ok, reload the cursor with the root of the cluster, then
5218cd0a023SMatthew Dillon 	 * locate its parent.
5228cd0a023SMatthew Dillon 	 */
5238cd0a023SMatthew Dillon 	cluster = cursor->node->cluster;
5248cd0a023SMatthew Dillon 	error = hammer_ref_cluster(cluster);
5258cd0a023SMatthew Dillon 	if (error)
5268cd0a023SMatthew Dillon 		return (error);
5278cd0a023SMatthew Dillon 
5288cd0a023SMatthew Dillon 	hammer_unlock(&cursor->parent->lock);
5298cd0a023SMatthew Dillon 	hammer_rel_node(cursor->parent);
5308cd0a023SMatthew Dillon 	hammer_unlock(&cursor->node->lock);
5318cd0a023SMatthew Dillon 	hammer_rel_node(cursor->node);
5328cd0a023SMatthew Dillon 	cursor->parent = NULL;
5338cd0a023SMatthew Dillon 	cursor->parent_index = 0;
5348cd0a023SMatthew Dillon 
5358cd0a023SMatthew Dillon 	cursor->node = hammer_get_node(cluster, cluster->ondisk->clu_btree_root,
5368cd0a023SMatthew Dillon 				       &error);
5378cd0a023SMatthew Dillon 	cursor->index = 0;
5386a37e7e4SMatthew Dillon 	hammer_lock_sh(&cursor->node->lock);
5398cd0a023SMatthew Dillon 	hammer_rel_cluster(cluster, 0);
5408cd0a023SMatthew Dillon 	if (error == 0)
5418cd0a023SMatthew Dillon 		error = hammer_load_cursor_parent(cursor);
5428cd0a023SMatthew Dillon 	return(error);
5438cd0a023SMatthew Dillon }
5448cd0a023SMatthew Dillon 
5458cd0a023SMatthew Dillon /*
5468cd0a023SMatthew Dillon  * Cursor down through the current node, which must be an internal node.
5478cd0a023SMatthew Dillon  *
5488cd0a023SMatthew Dillon  * This routine adjusts the cursor and sets index to 0.
5498cd0a023SMatthew Dillon  */
5508cd0a023SMatthew Dillon int
5518cd0a023SMatthew Dillon hammer_cursor_down(hammer_cursor_t cursor)
5528cd0a023SMatthew Dillon {
5538cd0a023SMatthew Dillon 	hammer_node_t node;
5548cd0a023SMatthew Dillon 	hammer_btree_elm_t elm;
5558cd0a023SMatthew Dillon 	hammer_volume_t volume;
5568cd0a023SMatthew Dillon 	hammer_cluster_t cluster;
5578cd0a023SMatthew Dillon 	int32_t vol_no;
5588cd0a023SMatthew Dillon 	int32_t clu_no;
5598cd0a023SMatthew Dillon 	int error;
5608cd0a023SMatthew Dillon 
5618cd0a023SMatthew Dillon 	/*
5628cd0a023SMatthew Dillon 	 * The current node becomes the current parent
5638cd0a023SMatthew Dillon 	 */
5646a37e7e4SMatthew Dillon 	hammer_cursor_downgrade(cursor);
5658cd0a023SMatthew Dillon 	node = cursor->node;
5668cd0a023SMatthew Dillon 	KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
5678cd0a023SMatthew Dillon 	if (cursor->parent) {
5688cd0a023SMatthew Dillon 		hammer_unlock(&cursor->parent->lock);
5698cd0a023SMatthew Dillon 		hammer_rel_node(cursor->parent);
5708cd0a023SMatthew Dillon 	}
5718cd0a023SMatthew Dillon 	cursor->parent = node;
5728cd0a023SMatthew Dillon 	cursor->parent_index = cursor->index;
5738cd0a023SMatthew Dillon 	cursor->node = NULL;
5748cd0a023SMatthew Dillon 	cursor->index = 0;
5758cd0a023SMatthew Dillon 
5768cd0a023SMatthew Dillon 	/*
57776376933SMatthew Dillon 	 * Extract element to push into at (node,index), set bounds.
5788cd0a023SMatthew Dillon 	 */
5798cd0a023SMatthew Dillon 	elm = &node->ondisk->elms[cursor->parent_index];
5808cd0a023SMatthew Dillon 
5818cd0a023SMatthew Dillon 	/*
582fe7678eeSMatthew Dillon 	 * Ok, push down into elm.  If elm specifies an internal or leaf
583fe7678eeSMatthew Dillon 	 * node the current node must be an internal node.  If elm specifies
584fe7678eeSMatthew Dillon 	 * a spike then the current node must be a leaf node.
585d26d0ae9SMatthew Dillon 	 *
586d26d0ae9SMatthew Dillon 	 * Cursoring down through a cluster transition when the INCLUSTER
587d26d0ae9SMatthew Dillon 	 * flag is set is not legal.
5888cd0a023SMatthew Dillon 	 */
589fe7678eeSMatthew Dillon 	switch(elm->base.btype) {
590fe7678eeSMatthew Dillon 	case HAMMER_BTREE_TYPE_INTERNAL:
591fe7678eeSMatthew Dillon 	case HAMMER_BTREE_TYPE_LEAF:
592fe7678eeSMatthew Dillon 		KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
593fe7678eeSMatthew Dillon 		KKASSERT(elm->internal.subtree_offset != 0);
594fe7678eeSMatthew Dillon 		cursor->left_bound = &elm[0].internal.base;
595fe7678eeSMatthew Dillon 		cursor->right_bound = &elm[1].internal.base;
596fe7678eeSMatthew Dillon 		node = hammer_get_node(node->cluster,
597fe7678eeSMatthew Dillon 				       elm->internal.subtree_offset,
598fe7678eeSMatthew Dillon 				       &error);
599fe7678eeSMatthew Dillon 		if (error == 0) {
600fe7678eeSMatthew Dillon 			KKASSERT(elm->base.btype == node->ondisk->type);
601fe7678eeSMatthew Dillon 			if (node->ondisk->parent != cursor->parent->node_offset)
602b33e2cc0SMatthew Dillon 				panic("node %p %d vs %d\n", node, node->ondisk->parent, cursor->parent->node_offset);
603fe7678eeSMatthew Dillon 			KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
604fe7678eeSMatthew Dillon 		}
605fe7678eeSMatthew Dillon 		break;
606fe7678eeSMatthew Dillon 	case HAMMER_BTREE_TYPE_SPIKE_BEG:
607b33e2cc0SMatthew Dillon 		/* case not supported yet */
608b33e2cc0SMatthew Dillon 		KKASSERT(0);
609b33e2cc0SMatthew Dillon 		KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
610b33e2cc0SMatthew Dillon 		KKASSERT(cursor->parent_index < node->ondisk->count - 1);
611b33e2cc0SMatthew Dillon 		KKASSERT(elm[1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END);
612b33e2cc0SMatthew Dillon 		++elm;
613b33e2cc0SMatthew Dillon 		++cursor->parent_index;
614b33e2cc0SMatthew Dillon 		/* fall through */
615fe7678eeSMatthew Dillon 	case HAMMER_BTREE_TYPE_SPIKE_END:
616fe7678eeSMatthew Dillon 		KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
617d26d0ae9SMatthew Dillon 		KKASSERT((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0);
618fe7678eeSMatthew Dillon 		vol_no = elm->leaf.spike_vol_no;
619fe7678eeSMatthew Dillon 		clu_no = elm->leaf.spike_clu_no;
6208cd0a023SMatthew Dillon 		volume = hammer_get_volume(node->cluster->volume->hmp,
6218cd0a023SMatthew Dillon 					   vol_no, &error);
6220b075555SMatthew Dillon 		KKASSERT(error != EINVAL);
6238cd0a023SMatthew Dillon 		if (error)
6248cd0a023SMatthew Dillon 			return(error);
6258cd0a023SMatthew Dillon 		cluster = hammer_get_cluster(volume, clu_no, &error, 0);
6260b075555SMatthew Dillon 		KKASSERT(error != EINVAL);
6278cd0a023SMatthew Dillon 		hammer_rel_volume(volume, 0);
6288cd0a023SMatthew Dillon 		if (error)
6298cd0a023SMatthew Dillon 			return(error);
630b33e2cc0SMatthew Dillon 		KKASSERT(cluster->ondisk->clu_btree_parent_offset ==
631b33e2cc0SMatthew Dillon 			 cursor->parent->node_offset);
632b33e2cc0SMatthew Dillon 		KKASSERT(cluster->ondisk->clu_btree_parent_clu_no ==
633b33e2cc0SMatthew Dillon 			 cursor->parent->cluster->clu_no);
634b33e2cc0SMatthew Dillon 
635fe7678eeSMatthew Dillon 		cursor->left_bound = &cluster->ondisk->clu_btree_beg;
636fe7678eeSMatthew Dillon 		cursor->right_bound = &cluster->ondisk->clu_btree_end;
6378cd0a023SMatthew Dillon 		node = hammer_get_node(cluster,
6388cd0a023SMatthew Dillon 				       cluster->ondisk->clu_btree_root,
6398cd0a023SMatthew Dillon 				       &error);
6408cd0a023SMatthew Dillon 		hammer_rel_cluster(cluster, 0);
641fe7678eeSMatthew Dillon 		break;
642fe7678eeSMatthew Dillon 	default:
643fe7678eeSMatthew Dillon 		panic("hammer_cursor_down: illegal btype %02x (%c)\n",
644fe7678eeSMatthew Dillon 		      elm->base.btype,
645fe7678eeSMatthew Dillon 		      (elm->base.btype ? elm->base.btype : '?'));
646fe7678eeSMatthew Dillon 		break;
6478cd0a023SMatthew Dillon 	}
6488cd0a023SMatthew Dillon 	if (error == 0) {
6496a37e7e4SMatthew Dillon 		hammer_lock_sh(&node->lock);
6508cd0a023SMatthew Dillon 		cursor->node = node;
6518cd0a023SMatthew Dillon 		cursor->index = 0;
6528cd0a023SMatthew Dillon 	}
6538cd0a023SMatthew Dillon 	return(error);
6548cd0a023SMatthew Dillon }
6558cd0a023SMatthew Dillon 
656