xref: /dflybsd-src/sys/vfs/hammer/hammer_cursor.c (revision 21d8bc42f570abe46007647277c2c8d68cf8b55a)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.15 2008/01/24 02:14:45 dillon Exp $
35  */
36 
37 /*
38  * HAMMER B-Tree index - cursor support routines
39  */
40 #include "hammer.h"
41 
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43 static int hammer_load_cursor_parent_local(hammer_cursor_t cursor);
44 static int hammer_load_cursor_parent_cluster(hammer_cursor_t cursor);
45 
46 /*
47  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
48  * is not available initialize a fresh cursor at the root of the root
49  * cluster.
50  */
51 int
52 hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache,
53 		       hammer_mount_t hmp)
54 {
55 	hammer_cluster_t cluster;
56 	hammer_node_t node;
57 	int error;
58 
59 	bzero(cursor, sizeof(*cursor));
60 
61 	/*
62 	 * Step 1 - acquire a locked node from the cache if possible
63 	 */
64 	if (cache && *cache) {
65 		node = hammer_ref_node_safe(hmp, cache, &error);
66 		if (error == 0) {
67 			hammer_lock_sh(&node->lock);
68 			if (node->flags & HAMMER_NODE_DELETED) {
69 				hammer_unlock(&node->lock);
70 				hammer_rel_node(node);
71 				node = NULL;
72 			}
73 		}
74 	} else {
75 		node = NULL;
76 	}
77 
78 	/*
79 	 * Step 2 - If we couldn't get a node from the cache, get
80 	 * the one from the root of the root cluster.
81 	 */
82 	while (node == NULL) {
83 		cluster = hammer_get_root_cluster(hmp, &error);
84 		if (error)
85 			break;
86 		node = hammer_get_node(cluster,
87 				       cluster->ondisk->clu_btree_root,
88 				       &error);
89 		hammer_rel_cluster(cluster, 0);
90 		if (error)
91 			break;
92 		hammer_lock_sh(&node->lock);
93 		if (node->flags & HAMMER_NODE_DELETED) {
94 			hammer_unlock(&node->lock);
95 			hammer_rel_node(node);
96 			node = NULL;
97 		}
98 	}
99 
100 	/*
101 	 * Step 3 - finish initializing the cursor by acquiring the parent
102 	 */
103 	cursor->node = node;
104 	if (error == 0)
105 		error = hammer_load_cursor_parent(cursor);
106 	KKASSERT(error == 0);
107 	return(error);
108 }
109 
110 /*
111  * Initialize a fresh cursor at the root of the specified cluster and
112  * limit operations to within the cluster.
113  *
114  * NOTE: cursor->parent will be set to NULL to avoid referencing B-Tree
115  * nodes in other clusters.
116  */
117 int
118 hammer_init_cursor_cluster(hammer_cursor_t cursor, hammer_cluster_t cluster)
119 {
120 	int error;
121 
122 	bzero(cursor, sizeof(*cursor));
123 	cursor->flags |= HAMMER_CURSOR_INCLUSTER;
124 	cursor->node = hammer_get_node(cluster,
125 				       cluster->ondisk->clu_btree_root,
126 				       &error);
127 	if (error == 0)
128 		hammer_lock_sh(&cursor->node->lock);
129 	cursor->left_bound = &cluster->ondisk->clu_btree_beg;
130 	cursor->right_bound = &cluster->ondisk->clu_btree_end;
131 	KKASSERT(error == 0);
132 	return(error);
133 }
134 
135 /*
136  * We are finished with a cursor.  We NULL out various fields as sanity
137  * check, in case the structure is inappropriately used afterwords.
138  */
139 void
140 hammer_done_cursor(hammer_cursor_t cursor)
141 {
142 	if (cursor->parent) {
143 		hammer_unlock(&cursor->parent->lock);
144 		hammer_rel_node(cursor->parent);
145 		cursor->parent = NULL;
146 	}
147 	if (cursor->node) {
148 		hammer_unlock(&cursor->node->lock);
149 		hammer_rel_node(cursor->node);
150 		cursor->node = NULL;
151 	}
152         if (cursor->data_buffer) {
153                 hammer_rel_buffer(cursor->data_buffer, 0);
154                 cursor->data_buffer = NULL;
155         }
156         if (cursor->record_buffer) {
157                 hammer_rel_buffer(cursor->record_buffer, 0);
158                 cursor->record_buffer = NULL;
159         }
160 	if (cursor->ip)
161 		hammer_mem_done(cursor);
162 
163 	/*
164 	 * If we deadlocked this node will be referenced.  Do a quick
165 	 * lock/unlock to wait for the deadlock condition to clear.
166 	 */
167 	if (cursor->deadlk_node) {
168 		hammer_lock_ex(&cursor->deadlk_node->lock);
169 		hammer_unlock(&cursor->deadlk_node->lock);
170 		hammer_rel_node(cursor->deadlk_node);
171 		cursor->deadlk_node = NULL;
172 	}
173 
174 	cursor->data = NULL;
175 	cursor->record = NULL;
176 	cursor->left_bound = NULL;
177 	cursor->right_bound = NULL;
178 }
179 
180 /*
181  * Upgrade cursor->node and cursor->parent to exclusive locks.  This
182  * function can return EDEADLK.
183  *
184  * If we fail to upgrade the lock and cursor->deadlk_node is NULL,
185  * we add another reference to the node that failed and set
186  * cursor->deadlk_node so hammer_done_cursor() can block on it.
187  */
188 int
189 hammer_cursor_upgrade(hammer_cursor_t cursor)
190 {
191 	int error;
192 
193 	if (hammer_lock_held(&cursor->node->lock) < 0) {
194 		error = hammer_lock_upgrade(&cursor->node->lock);
195 		if (error && cursor->deadlk_node == NULL) {
196 			cursor->deadlk_node = cursor->node;
197 			hammer_ref_node(cursor->deadlk_node);
198 		}
199 	} else {
200 		error = 0;
201 	}
202 	if (cursor->parent && hammer_lock_held(&cursor->parent->lock) < 0) {
203 		error = hammer_lock_upgrade(&cursor->parent->lock);
204 		if (error && cursor->deadlk_node == NULL) {
205 			cursor->deadlk_node = cursor->parent;
206 			hammer_ref_node(cursor->deadlk_node);
207 		}
208 	} else {
209 		error = 0;
210 	}
211 	return(error);
212 }
213 
214 /*
215  * Downgrade cursor->node and cursor->parent to shared locks.  This
216  * function can return EDEADLK.
217  */
218 void
219 hammer_cursor_downgrade(hammer_cursor_t cursor)
220 {
221 	if (hammer_lock_held(&cursor->node->lock) > 0)
222 		hammer_lock_downgrade(&cursor->node->lock);
223 	if (cursor->parent && hammer_lock_held(&cursor->parent->lock) > 0)
224 		hammer_lock_downgrade(&cursor->parent->lock);
225 }
226 
227 
228 #if 0
229 
230 /*
231  * Acquire the parent B-Tree node of the specified node, returning a
232  * referenced but unlocked node.  NULL can be returned with *errorp == 0
233  * if node is the root node of the root cluster.
234  */
235 static
236 hammer_node_t
237 hammer_get_parent_node(hammer_node_t node, int *errorp)
238 {
239 	hammer_cluster_t cluster;
240 	hammer_node_t parent;
241 
242 	cluster = node->cluster;
243 	if (node->ondisk->parent) {
244 		/*
245 		 * Local parent
246 		 */
247 		parent = hammer_get_node(cluster, node->ondisk->parent, errorp);
248 	} else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) {
249 		/*
250 		 * At cluster root, locate node in parent cluster
251 		 */
252 		hammer_cluster_ondisk_t ondisk;
253 		hammer_cluster_t pcluster;
254 		hammer_volume_t pvolume;
255 		int32_t clu_no;
256 		int32_t vol_no;
257 
258 		ondisk = cluster->ondisk;
259 		vol_no = ondisk->clu_btree_parent_vol_no;
260 		clu_no = ondisk->clu_btree_parent_clu_no;
261 
262 		/*
263 		 * Acquire the node from (volume, cluster, offset)
264 		 */
265 		pvolume = hammer_get_volume(cluster->volume->hmp, vol_no,
266 					    errorp);
267 		if (*errorp)
268 			return (NULL);
269 		pcluster = hammer_get_cluster(pvolume, clu_no, errorp, 0);
270 		hammer_rel_volume(pvolume, 0);
271 		if (*errorp)
272 			return (NULL);
273 		parent = hammer_get_node(pcluster,
274 					 ondisk->clu_btree_parent_offset,
275 					 errorp);
276 		hammer_rel_cluster(pcluster, 0);
277 	} else {
278 		/*
279 		 * At root of root cluster, there is no parent.
280 		 */
281 		KKASSERT(cluster->ondisk->clu_btree_parent_vol_no == -1);
282 		parent = NULL;
283 		*errorp = 0;
284 	}
285 	return(parent);
286 }
287 
288 #endif
289 
290 /*
291  * Load the parent of cursor->node into cursor->parent.  There are several
292  * cases.  (1) The parent is in the current cluster.  (2) We are at the
293  * root of the cluster and the parent is in another cluster.  (3) We are at
294  * the root of the root cluster.
295  *
296  * If HAMMER_CURSOR_INCLUSTER is set and we are at the root of the cluster,
297  * we do not access the parent cluster at all and make the cursor look like
298  * its at the root.
299  */
300 static
301 int
302 hammer_load_cursor_parent(hammer_cursor_t cursor)
303 {
304 	hammer_cluster_t cluster;
305 	int error;
306 
307 	cluster = cursor->node->cluster;
308 
309 	if (cursor->node->ondisk->parent) {
310 		error = hammer_load_cursor_parent_local(cursor);
311 	} else if (cluster->ondisk->clu_btree_parent_vol_no >= 0 &&
312 		   ((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0)
313 	) {
314 		error = hammer_load_cursor_parent_cluster(cursor);
315 	} else {
316 		cursor->parent = NULL;
317 		cursor->parent_index = 0;
318 		cursor->left_bound = &cluster->ondisk->clu_btree_beg;
319 		cursor->right_bound = &cluster->ondisk->clu_btree_end;
320 		error = 0;
321 	}
322 	return(error);
323 }
324 
325 static
326 int
327 hammer_load_cursor_parent_local(hammer_cursor_t cursor)
328 {
329 	hammer_node_t node;
330 	hammer_node_t parent;
331 	hammer_btree_elm_t elm;
332 	int error;
333 	int i;
334 
335 	node = cursor->node;
336 	parent = hammer_get_node(node->cluster, node->ondisk->parent, &error);
337 	if (error)
338 		return(error);
339 	elm = NULL;
340 	for (i = 0; i < parent->ondisk->count; ++i) {
341 		elm = &parent->ondisk->elms[i];
342 		if (parent->ondisk->elms[i].internal.subtree_offset ==
343 		    node->node_offset) {
344 			break;
345 		}
346 	}
347 	if (i == parent->ondisk->count)
348 		panic("Bad B-Tree link: parent %p node %p\n", parent, node);
349 	KKASSERT(i != parent->ondisk->count);
350 	cursor->parent = parent;
351 	cursor->parent_index = i;
352 	cursor->left_bound = &elm[0].internal.base;
353 	cursor->right_bound = &elm[1].internal.base;
354 
355 	hammer_lock_sh(&parent->lock);
356 	return(error);
357 }
358 
359 static
360 int
361 hammer_load_cursor_parent_cluster(hammer_cursor_t cursor)
362 {
363 	hammer_cluster_ondisk_t ondisk;
364 	hammer_cluster_t pcluster;
365 	hammer_cluster_t ccluster;
366 	hammer_volume_t volume;
367 	hammer_node_t node;
368 	hammer_node_t parent;
369 	hammer_btree_elm_t elm;
370 	int32_t clu_no;
371 	int32_t vol_no;
372 	int error;
373 	int i;
374 
375 	node = cursor->node;
376 	ccluster = node->cluster;
377 	ondisk = ccluster->ondisk;
378 	vol_no = ondisk->clu_btree_parent_vol_no;
379 	clu_no = ondisk->clu_btree_parent_clu_no;
380 
381 	/*
382 	 * Acquire the node from (volume, cluster, offset).  This should
383 	 * be a leaf node containing the HAMMER_BTREE_TYPE_SPIKE_END element.
384 	 */
385 	volume = hammer_get_volume(ccluster->volume->hmp, vol_no, &error);
386 	if (error)
387 		return (error);
388 	pcluster = hammer_get_cluster(volume, clu_no, &error, 0);
389 	hammer_rel_volume(volume, 0);
390 	if (error)
391 		return (error);
392 	parent = hammer_get_node(pcluster, ondisk->clu_btree_parent_offset,
393 				 &error);
394 	hammer_rel_cluster(pcluster, 0);
395 	if (error)
396 		return (error);
397 	KKASSERT(parent->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
398 
399 	/*
400 	 * Ok, we have the node.  Locate the inter-cluster element
401 	 */
402 	elm = NULL;
403 	for (i = 0; i < parent->ondisk->count; ++i) {
404 		elm = &parent->ondisk->elms[i];
405 
406 		if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END &&
407 		    elm->leaf.spike_clu_no == cursor->node->cluster->clu_no) {
408 			break;
409 		}
410 	}
411 	KKASSERT(i != parent->ondisk->count);
412 	KKASSERT(i && elm[-1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_BEG);
413 	cursor->parent = parent;
414 	cursor->parent_index = i;
415 	cursor->left_bound = &ccluster->ondisk->clu_btree_beg;
416 	cursor->right_bound = &ccluster->ondisk->clu_btree_end;
417 
418 	KKASSERT(hammer_btree_cmp(&elm[-1].leaf.base,
419 		 &ccluster->ondisk->clu_btree_beg) == 0);
420 	    /*
421 	     * spike_end is an inclusive boundary and will != clu_btree_end
422 	KKASSERT(hammer_btree_cmp(cursor->right_bound,
423 		 &ccluster->ondisk->clu_btree_end) >= 0);
424 	    */
425 
426 	hammer_lock_sh(&parent->lock);
427 	return(0);
428 }
429 
430 /*
431  * Cursor up to our parent node.  Return ENOENT if we are at the root of
432  * the root cluster.
433  *
434  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
435  * the cursor remains unchanged.
436  */
437 int
438 hammer_cursor_up(hammer_cursor_t cursor)
439 {
440 	int error;
441 
442 	hammer_cursor_downgrade(cursor);
443 
444 	/*
445 	 * Set the node to its parent.  If the parent is NULL we are at
446 	 * the root of the root cluster and return ENOENT.
447 	 */
448 	hammer_unlock(&cursor->node->lock);
449 	hammer_rel_node(cursor->node);
450 	cursor->node = cursor->parent;
451 	cursor->index = cursor->parent_index;
452 	cursor->parent = NULL;
453 	cursor->parent_index = 0;
454 
455 	if (cursor->node == NULL) {
456 		error = ENOENT;
457 	} else if ((cursor->flags & HAMMER_CURSOR_INCLUSTER) &&
458 		   cursor->node->ondisk->parent == 0
459 	) {
460 		error = ENOENT;
461 	} else {
462 		error = hammer_load_cursor_parent(cursor);
463 	}
464 	return(error);
465 }
466 
467 /*
468  * Set the cursor to the root of the current cursor's cluster.
469  */
470 int
471 hammer_cursor_toroot(hammer_cursor_t cursor)
472 {
473 	hammer_cluster_t cluster;
474 	int error;
475 
476 	/*
477 	 * Already at root of cluster?
478 	 */
479 	if (cursor->node->ondisk->parent == 0)
480 		return (0);
481 
482 	hammer_cursor_downgrade(cursor);
483 
484 	/*
485 	 * Parent is root of cluster?
486 	 */
487 	if (cursor->parent->ondisk->parent == 0)
488 		return (hammer_cursor_up(cursor));
489 
490 	/*
491 	 * Ok, reload the cursor with the root of the cluster, then
492 	 * locate its parent.
493 	 */
494 	cluster = cursor->node->cluster;
495 	error = hammer_ref_cluster(cluster);
496 	if (error)
497 		return (error);
498 
499 	hammer_unlock(&cursor->parent->lock);
500 	hammer_rel_node(cursor->parent);
501 	hammer_unlock(&cursor->node->lock);
502 	hammer_rel_node(cursor->node);
503 	cursor->parent = NULL;
504 	cursor->parent_index = 0;
505 
506 	cursor->node = hammer_get_node(cluster, cluster->ondisk->clu_btree_root,
507 				       &error);
508 	cursor->index = 0;
509 	hammer_lock_sh(&cursor->node->lock);
510 	hammer_rel_cluster(cluster, 0);
511 	if (error == 0)
512 		error = hammer_load_cursor_parent(cursor);
513 	return(error);
514 }
515 
516 /*
517  * Cursor down through the current node, which must be an internal node.
518  *
519  * This routine adjusts the cursor and sets index to 0.
520  */
521 int
522 hammer_cursor_down(hammer_cursor_t cursor)
523 {
524 	hammer_node_t node;
525 	hammer_btree_elm_t elm;
526 	hammer_volume_t volume;
527 	hammer_cluster_t cluster;
528 	int32_t vol_no;
529 	int32_t clu_no;
530 	int error;
531 
532 	/*
533 	 * The current node becomes the current parent
534 	 */
535 	hammer_cursor_downgrade(cursor);
536 	node = cursor->node;
537 	KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
538 	if (cursor->parent) {
539 		hammer_unlock(&cursor->parent->lock);
540 		hammer_rel_node(cursor->parent);
541 	}
542 	cursor->parent = node;
543 	cursor->parent_index = cursor->index;
544 	cursor->node = NULL;
545 	cursor->index = 0;
546 
547 	/*
548 	 * Extract element to push into at (node,index), set bounds.
549 	 */
550 	elm = &node->ondisk->elms[cursor->parent_index];
551 
552 	/*
553 	 * Ok, push down into elm.  If elm specifies an internal or leaf
554 	 * node the current node must be an internal node.  If elm specifies
555 	 * a spike then the current node must be a leaf node.
556 	 *
557 	 * Cursoring down through a cluster transition when the INCLUSTER
558 	 * flag is set is not legal.
559 	 */
560 	switch(elm->base.btype) {
561 	case HAMMER_BTREE_TYPE_INTERNAL:
562 	case HAMMER_BTREE_TYPE_LEAF:
563 		KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
564 		KKASSERT(elm->internal.subtree_offset != 0);
565 		cursor->left_bound = &elm[0].internal.base;
566 		cursor->right_bound = &elm[1].internal.base;
567 		node = hammer_get_node(node->cluster,
568 				       elm->internal.subtree_offset,
569 				       &error);
570 		if (error == 0) {
571 			KKASSERT(elm->base.btype == node->ondisk->type);
572 			if (node->ondisk->parent != cursor->parent->node_offset)
573 				panic("node %p %d vs %d\n", node, node->ondisk->parent, cursor->parent->node_offset);
574 			KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
575 		}
576 		break;
577 	case HAMMER_BTREE_TYPE_SPIKE_BEG:
578 		/* case not supported yet */
579 		KKASSERT(0);
580 		KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
581 		KKASSERT(cursor->parent_index < node->ondisk->count - 1);
582 		KKASSERT(elm[1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END);
583 		++elm;
584 		++cursor->parent_index;
585 		/* fall through */
586 	case HAMMER_BTREE_TYPE_SPIKE_END:
587 		KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
588 		KKASSERT((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0);
589 		vol_no = elm->leaf.spike_vol_no;
590 		clu_no = elm->leaf.spike_clu_no;
591 		volume = hammer_get_volume(node->cluster->volume->hmp,
592 					   vol_no, &error);
593 		KKASSERT(error != EINVAL);
594 		if (error)
595 			return(error);
596 		cluster = hammer_get_cluster(volume, clu_no, &error, 0);
597 		KKASSERT(error != EINVAL);
598 		hammer_rel_volume(volume, 0);
599 		if (error)
600 			return(error);
601 		KKASSERT(cluster->ondisk->clu_btree_parent_offset ==
602 			 cursor->parent->node_offset);
603 		KKASSERT(cluster->ondisk->clu_btree_parent_clu_no ==
604 			 cursor->parent->cluster->clu_no);
605 
606 		cursor->left_bound = &cluster->ondisk->clu_btree_beg;
607 		cursor->right_bound = &cluster->ondisk->clu_btree_end;
608 		node = hammer_get_node(cluster,
609 				       cluster->ondisk->clu_btree_root,
610 				       &error);
611 		hammer_rel_cluster(cluster, 0);
612 		break;
613 	default:
614 		panic("hammer_cursor_down: illegal btype %02x (%c)\n",
615 		      elm->base.btype,
616 		      (elm->base.btype ? elm->base.btype : '?'));
617 		break;
618 	}
619 	if (error == 0) {
620 		hammer_lock_sh(&node->lock);
621 		cursor->node = node;
622 		cursor->index = 0;
623 	}
624 	return(error);
625 }
626 
627