1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.15 2008/01/24 02:14:45 dillon Exp $ 35 */ 36 37 /* 38 * HAMMER B-Tree index - cursor support routines 39 */ 40 #include "hammer.h" 41 42 static int hammer_load_cursor_parent(hammer_cursor_t cursor); 43 static int hammer_load_cursor_parent_local(hammer_cursor_t cursor); 44 static int hammer_load_cursor_parent_cluster(hammer_cursor_t cursor); 45 46 /* 47 * Initialize a fresh cursor using the B-Tree node cache. If the cache 48 * is not available initialize a fresh cursor at the root of the root 49 * cluster. 50 */ 51 int 52 hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache, 53 hammer_mount_t hmp) 54 { 55 hammer_cluster_t cluster; 56 hammer_node_t node; 57 int error; 58 59 bzero(cursor, sizeof(*cursor)); 60 61 /* 62 * Step 1 - acquire a locked node from the cache if possible 63 */ 64 if (cache && *cache) { 65 node = hammer_ref_node_safe(hmp, cache, &error); 66 if (error == 0) { 67 hammer_lock_sh(&node->lock); 68 if (node->flags & HAMMER_NODE_DELETED) { 69 hammer_unlock(&node->lock); 70 hammer_rel_node(node); 71 node = NULL; 72 } 73 } 74 } else { 75 node = NULL; 76 } 77 78 /* 79 * Step 2 - If we couldn't get a node from the cache, get 80 * the one from the root of the root cluster. 81 */ 82 while (node == NULL) { 83 cluster = hammer_get_root_cluster(hmp, &error); 84 if (error) 85 break; 86 node = hammer_get_node(cluster, 87 cluster->ondisk->clu_btree_root, 88 &error); 89 hammer_rel_cluster(cluster, 0); 90 if (error) 91 break; 92 hammer_lock_sh(&node->lock); 93 if (node->flags & HAMMER_NODE_DELETED) { 94 hammer_unlock(&node->lock); 95 hammer_rel_node(node); 96 node = NULL; 97 } 98 } 99 100 /* 101 * Step 3 - finish initializing the cursor by acquiring the parent 102 */ 103 cursor->node = node; 104 if (error == 0) 105 error = hammer_load_cursor_parent(cursor); 106 KKASSERT(error == 0); 107 return(error); 108 } 109 110 /* 111 * Initialize a fresh cursor at the root of the specified cluster and 112 * limit operations to within the cluster. 113 * 114 * NOTE: cursor->parent will be set to NULL to avoid referencing B-Tree 115 * nodes in other clusters. 116 */ 117 int 118 hammer_init_cursor_cluster(hammer_cursor_t cursor, hammer_cluster_t cluster) 119 { 120 int error; 121 122 bzero(cursor, sizeof(*cursor)); 123 cursor->flags |= HAMMER_CURSOR_INCLUSTER; 124 cursor->node = hammer_get_node(cluster, 125 cluster->ondisk->clu_btree_root, 126 &error); 127 if (error == 0) 128 hammer_lock_sh(&cursor->node->lock); 129 cursor->left_bound = &cluster->ondisk->clu_btree_beg; 130 cursor->right_bound = &cluster->ondisk->clu_btree_end; 131 KKASSERT(error == 0); 132 return(error); 133 } 134 135 /* 136 * We are finished with a cursor. We NULL out various fields as sanity 137 * check, in case the structure is inappropriately used afterwords. 138 */ 139 void 140 hammer_done_cursor(hammer_cursor_t cursor) 141 { 142 if (cursor->parent) { 143 hammer_unlock(&cursor->parent->lock); 144 hammer_rel_node(cursor->parent); 145 cursor->parent = NULL; 146 } 147 if (cursor->node) { 148 hammer_unlock(&cursor->node->lock); 149 hammer_rel_node(cursor->node); 150 cursor->node = NULL; 151 } 152 if (cursor->data_buffer) { 153 hammer_rel_buffer(cursor->data_buffer, 0); 154 cursor->data_buffer = NULL; 155 } 156 if (cursor->record_buffer) { 157 hammer_rel_buffer(cursor->record_buffer, 0); 158 cursor->record_buffer = NULL; 159 } 160 if (cursor->ip) 161 hammer_mem_done(cursor); 162 163 /* 164 * If we deadlocked this node will be referenced. Do a quick 165 * lock/unlock to wait for the deadlock condition to clear. 166 */ 167 if (cursor->deadlk_node) { 168 hammer_lock_ex(&cursor->deadlk_node->lock); 169 hammer_unlock(&cursor->deadlk_node->lock); 170 hammer_rel_node(cursor->deadlk_node); 171 cursor->deadlk_node = NULL; 172 } 173 174 cursor->data = NULL; 175 cursor->record = NULL; 176 cursor->left_bound = NULL; 177 cursor->right_bound = NULL; 178 } 179 180 /* 181 * Upgrade cursor->node and cursor->parent to exclusive locks. This 182 * function can return EDEADLK. 183 * 184 * If we fail to upgrade the lock and cursor->deadlk_node is NULL, 185 * we add another reference to the node that failed and set 186 * cursor->deadlk_node so hammer_done_cursor() can block on it. 187 */ 188 int 189 hammer_cursor_upgrade(hammer_cursor_t cursor) 190 { 191 int error; 192 193 if (hammer_lock_held(&cursor->node->lock) < 0) { 194 error = hammer_lock_upgrade(&cursor->node->lock); 195 if (error && cursor->deadlk_node == NULL) { 196 cursor->deadlk_node = cursor->node; 197 hammer_ref_node(cursor->deadlk_node); 198 } 199 } else { 200 error = 0; 201 } 202 if (cursor->parent && hammer_lock_held(&cursor->parent->lock) < 0) { 203 error = hammer_lock_upgrade(&cursor->parent->lock); 204 if (error && cursor->deadlk_node == NULL) { 205 cursor->deadlk_node = cursor->parent; 206 hammer_ref_node(cursor->deadlk_node); 207 } 208 } else { 209 error = 0; 210 } 211 return(error); 212 } 213 214 /* 215 * Downgrade cursor->node and cursor->parent to shared locks. This 216 * function can return EDEADLK. 217 */ 218 void 219 hammer_cursor_downgrade(hammer_cursor_t cursor) 220 { 221 if (hammer_lock_held(&cursor->node->lock) > 0) 222 hammer_lock_downgrade(&cursor->node->lock); 223 if (cursor->parent && hammer_lock_held(&cursor->parent->lock) > 0) 224 hammer_lock_downgrade(&cursor->parent->lock); 225 } 226 227 228 #if 0 229 230 /* 231 * Acquire the parent B-Tree node of the specified node, returning a 232 * referenced but unlocked node. NULL can be returned with *errorp == 0 233 * if node is the root node of the root cluster. 234 */ 235 static 236 hammer_node_t 237 hammer_get_parent_node(hammer_node_t node, int *errorp) 238 { 239 hammer_cluster_t cluster; 240 hammer_node_t parent; 241 242 cluster = node->cluster; 243 if (node->ondisk->parent) { 244 /* 245 * Local parent 246 */ 247 parent = hammer_get_node(cluster, node->ondisk->parent, errorp); 248 } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) { 249 /* 250 * At cluster root, locate node in parent cluster 251 */ 252 hammer_cluster_ondisk_t ondisk; 253 hammer_cluster_t pcluster; 254 hammer_volume_t pvolume; 255 int32_t clu_no; 256 int32_t vol_no; 257 258 ondisk = cluster->ondisk; 259 vol_no = ondisk->clu_btree_parent_vol_no; 260 clu_no = ondisk->clu_btree_parent_clu_no; 261 262 /* 263 * Acquire the node from (volume, cluster, offset) 264 */ 265 pvolume = hammer_get_volume(cluster->volume->hmp, vol_no, 266 errorp); 267 if (*errorp) 268 return (NULL); 269 pcluster = hammer_get_cluster(pvolume, clu_no, errorp, 0); 270 hammer_rel_volume(pvolume, 0); 271 if (*errorp) 272 return (NULL); 273 parent = hammer_get_node(pcluster, 274 ondisk->clu_btree_parent_offset, 275 errorp); 276 hammer_rel_cluster(pcluster, 0); 277 } else { 278 /* 279 * At root of root cluster, there is no parent. 280 */ 281 KKASSERT(cluster->ondisk->clu_btree_parent_vol_no == -1); 282 parent = NULL; 283 *errorp = 0; 284 } 285 return(parent); 286 } 287 288 #endif 289 290 /* 291 * Load the parent of cursor->node into cursor->parent. There are several 292 * cases. (1) The parent is in the current cluster. (2) We are at the 293 * root of the cluster and the parent is in another cluster. (3) We are at 294 * the root of the root cluster. 295 * 296 * If HAMMER_CURSOR_INCLUSTER is set and we are at the root of the cluster, 297 * we do not access the parent cluster at all and make the cursor look like 298 * its at the root. 299 */ 300 static 301 int 302 hammer_load_cursor_parent(hammer_cursor_t cursor) 303 { 304 hammer_cluster_t cluster; 305 int error; 306 307 cluster = cursor->node->cluster; 308 309 if (cursor->node->ondisk->parent) { 310 error = hammer_load_cursor_parent_local(cursor); 311 } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0 && 312 ((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0) 313 ) { 314 error = hammer_load_cursor_parent_cluster(cursor); 315 } else { 316 cursor->parent = NULL; 317 cursor->parent_index = 0; 318 cursor->left_bound = &cluster->ondisk->clu_btree_beg; 319 cursor->right_bound = &cluster->ondisk->clu_btree_end; 320 error = 0; 321 } 322 return(error); 323 } 324 325 static 326 int 327 hammer_load_cursor_parent_local(hammer_cursor_t cursor) 328 { 329 hammer_node_t node; 330 hammer_node_t parent; 331 hammer_btree_elm_t elm; 332 int error; 333 int i; 334 335 node = cursor->node; 336 parent = hammer_get_node(node->cluster, node->ondisk->parent, &error); 337 if (error) 338 return(error); 339 elm = NULL; 340 for (i = 0; i < parent->ondisk->count; ++i) { 341 elm = &parent->ondisk->elms[i]; 342 if (parent->ondisk->elms[i].internal.subtree_offset == 343 node->node_offset) { 344 break; 345 } 346 } 347 if (i == parent->ondisk->count) 348 panic("Bad B-Tree link: parent %p node %p\n", parent, node); 349 KKASSERT(i != parent->ondisk->count); 350 cursor->parent = parent; 351 cursor->parent_index = i; 352 cursor->left_bound = &elm[0].internal.base; 353 cursor->right_bound = &elm[1].internal.base; 354 355 hammer_lock_sh(&parent->lock); 356 return(error); 357 } 358 359 static 360 int 361 hammer_load_cursor_parent_cluster(hammer_cursor_t cursor) 362 { 363 hammer_cluster_ondisk_t ondisk; 364 hammer_cluster_t pcluster; 365 hammer_cluster_t ccluster; 366 hammer_volume_t volume; 367 hammer_node_t node; 368 hammer_node_t parent; 369 hammer_btree_elm_t elm; 370 int32_t clu_no; 371 int32_t vol_no; 372 int error; 373 int i; 374 375 node = cursor->node; 376 ccluster = node->cluster; 377 ondisk = ccluster->ondisk; 378 vol_no = ondisk->clu_btree_parent_vol_no; 379 clu_no = ondisk->clu_btree_parent_clu_no; 380 381 /* 382 * Acquire the node from (volume, cluster, offset). This should 383 * be a leaf node containing the HAMMER_BTREE_TYPE_SPIKE_END element. 384 */ 385 volume = hammer_get_volume(ccluster->volume->hmp, vol_no, &error); 386 if (error) 387 return (error); 388 pcluster = hammer_get_cluster(volume, clu_no, &error, 0); 389 hammer_rel_volume(volume, 0); 390 if (error) 391 return (error); 392 parent = hammer_get_node(pcluster, ondisk->clu_btree_parent_offset, 393 &error); 394 hammer_rel_cluster(pcluster, 0); 395 if (error) 396 return (error); 397 KKASSERT(parent->ondisk->type == HAMMER_BTREE_TYPE_LEAF); 398 399 /* 400 * Ok, we have the node. Locate the inter-cluster element 401 */ 402 elm = NULL; 403 for (i = 0; i < parent->ondisk->count; ++i) { 404 elm = &parent->ondisk->elms[i]; 405 406 if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END && 407 elm->leaf.spike_clu_no == cursor->node->cluster->clu_no) { 408 break; 409 } 410 } 411 KKASSERT(i != parent->ondisk->count); 412 KKASSERT(i && elm[-1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_BEG); 413 cursor->parent = parent; 414 cursor->parent_index = i; 415 cursor->left_bound = &ccluster->ondisk->clu_btree_beg; 416 cursor->right_bound = &ccluster->ondisk->clu_btree_end; 417 418 KKASSERT(hammer_btree_cmp(&elm[-1].leaf.base, 419 &ccluster->ondisk->clu_btree_beg) == 0); 420 /* 421 * spike_end is an inclusive boundary and will != clu_btree_end 422 KKASSERT(hammer_btree_cmp(cursor->right_bound, 423 &ccluster->ondisk->clu_btree_end) >= 0); 424 */ 425 426 hammer_lock_sh(&parent->lock); 427 return(0); 428 } 429 430 /* 431 * Cursor up to our parent node. Return ENOENT if we are at the root of 432 * the root cluster. 433 * 434 * If doing a nonblocking cursor-up and we are unable to acquire the lock, 435 * the cursor remains unchanged. 436 */ 437 int 438 hammer_cursor_up(hammer_cursor_t cursor) 439 { 440 int error; 441 442 hammer_cursor_downgrade(cursor); 443 444 /* 445 * Set the node to its parent. If the parent is NULL we are at 446 * the root of the root cluster and return ENOENT. 447 */ 448 hammer_unlock(&cursor->node->lock); 449 hammer_rel_node(cursor->node); 450 cursor->node = cursor->parent; 451 cursor->index = cursor->parent_index; 452 cursor->parent = NULL; 453 cursor->parent_index = 0; 454 455 if (cursor->node == NULL) { 456 error = ENOENT; 457 } else if ((cursor->flags & HAMMER_CURSOR_INCLUSTER) && 458 cursor->node->ondisk->parent == 0 459 ) { 460 error = ENOENT; 461 } else { 462 error = hammer_load_cursor_parent(cursor); 463 } 464 return(error); 465 } 466 467 /* 468 * Set the cursor to the root of the current cursor's cluster. 469 */ 470 int 471 hammer_cursor_toroot(hammer_cursor_t cursor) 472 { 473 hammer_cluster_t cluster; 474 int error; 475 476 /* 477 * Already at root of cluster? 478 */ 479 if (cursor->node->ondisk->parent == 0) 480 return (0); 481 482 hammer_cursor_downgrade(cursor); 483 484 /* 485 * Parent is root of cluster? 486 */ 487 if (cursor->parent->ondisk->parent == 0) 488 return (hammer_cursor_up(cursor)); 489 490 /* 491 * Ok, reload the cursor with the root of the cluster, then 492 * locate its parent. 493 */ 494 cluster = cursor->node->cluster; 495 error = hammer_ref_cluster(cluster); 496 if (error) 497 return (error); 498 499 hammer_unlock(&cursor->parent->lock); 500 hammer_rel_node(cursor->parent); 501 hammer_unlock(&cursor->node->lock); 502 hammer_rel_node(cursor->node); 503 cursor->parent = NULL; 504 cursor->parent_index = 0; 505 506 cursor->node = hammer_get_node(cluster, cluster->ondisk->clu_btree_root, 507 &error); 508 cursor->index = 0; 509 hammer_lock_sh(&cursor->node->lock); 510 hammer_rel_cluster(cluster, 0); 511 if (error == 0) 512 error = hammer_load_cursor_parent(cursor); 513 return(error); 514 } 515 516 /* 517 * Cursor down through the current node, which must be an internal node. 518 * 519 * This routine adjusts the cursor and sets index to 0. 520 */ 521 int 522 hammer_cursor_down(hammer_cursor_t cursor) 523 { 524 hammer_node_t node; 525 hammer_btree_elm_t elm; 526 hammer_volume_t volume; 527 hammer_cluster_t cluster; 528 int32_t vol_no; 529 int32_t clu_no; 530 int error; 531 532 /* 533 * The current node becomes the current parent 534 */ 535 hammer_cursor_downgrade(cursor); 536 node = cursor->node; 537 KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count); 538 if (cursor->parent) { 539 hammer_unlock(&cursor->parent->lock); 540 hammer_rel_node(cursor->parent); 541 } 542 cursor->parent = node; 543 cursor->parent_index = cursor->index; 544 cursor->node = NULL; 545 cursor->index = 0; 546 547 /* 548 * Extract element to push into at (node,index), set bounds. 549 */ 550 elm = &node->ondisk->elms[cursor->parent_index]; 551 552 /* 553 * Ok, push down into elm. If elm specifies an internal or leaf 554 * node the current node must be an internal node. If elm specifies 555 * a spike then the current node must be a leaf node. 556 * 557 * Cursoring down through a cluster transition when the INCLUSTER 558 * flag is set is not legal. 559 */ 560 switch(elm->base.btype) { 561 case HAMMER_BTREE_TYPE_INTERNAL: 562 case HAMMER_BTREE_TYPE_LEAF: 563 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL); 564 KKASSERT(elm->internal.subtree_offset != 0); 565 cursor->left_bound = &elm[0].internal.base; 566 cursor->right_bound = &elm[1].internal.base; 567 node = hammer_get_node(node->cluster, 568 elm->internal.subtree_offset, 569 &error); 570 if (error == 0) { 571 KKASSERT(elm->base.btype == node->ondisk->type); 572 if (node->ondisk->parent != cursor->parent->node_offset) 573 panic("node %p %d vs %d\n", node, node->ondisk->parent, cursor->parent->node_offset); 574 KKASSERT(node->ondisk->parent == cursor->parent->node_offset); 575 } 576 break; 577 case HAMMER_BTREE_TYPE_SPIKE_BEG: 578 /* case not supported yet */ 579 KKASSERT(0); 580 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF); 581 KKASSERT(cursor->parent_index < node->ondisk->count - 1); 582 KKASSERT(elm[1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END); 583 ++elm; 584 ++cursor->parent_index; 585 /* fall through */ 586 case HAMMER_BTREE_TYPE_SPIKE_END: 587 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF); 588 KKASSERT((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0); 589 vol_no = elm->leaf.spike_vol_no; 590 clu_no = elm->leaf.spike_clu_no; 591 volume = hammer_get_volume(node->cluster->volume->hmp, 592 vol_no, &error); 593 KKASSERT(error != EINVAL); 594 if (error) 595 return(error); 596 cluster = hammer_get_cluster(volume, clu_no, &error, 0); 597 KKASSERT(error != EINVAL); 598 hammer_rel_volume(volume, 0); 599 if (error) 600 return(error); 601 KKASSERT(cluster->ondisk->clu_btree_parent_offset == 602 cursor->parent->node_offset); 603 KKASSERT(cluster->ondisk->clu_btree_parent_clu_no == 604 cursor->parent->cluster->clu_no); 605 606 cursor->left_bound = &cluster->ondisk->clu_btree_beg; 607 cursor->right_bound = &cluster->ondisk->clu_btree_end; 608 node = hammer_get_node(cluster, 609 cluster->ondisk->clu_btree_root, 610 &error); 611 hammer_rel_cluster(cluster, 0); 612 break; 613 default: 614 panic("hammer_cursor_down: illegal btype %02x (%c)\n", 615 elm->base.btype, 616 (elm->base.btype ? elm->base.btype : '?')); 617 break; 618 } 619 if (error == 0) { 620 hammer_lock_sh(&node->lock); 621 cursor->node = node; 622 cursor->index = 0; 623 } 624 return(error); 625 } 626 627