1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.36 2008/03/25 06:43:44 dillon Exp $ 35 */ 36 /* 37 * Manage HAMMER's on-disk structures. These routines are primarily 38 * responsible for interfacing with the kernel's I/O subsystem and for 39 * managing in-memory structures. 40 */ 41 42 #include "hammer.h" 43 #include <sys/fcntl.h> 44 #include <sys/nlookup.h> 45 #include <sys/buf.h> 46 #include <sys/buf2.h> 47 48 static void hammer_free_volume(hammer_volume_t volume); 49 static int hammer_load_volume(hammer_volume_t volume); 50 static int hammer_load_buffer(hammer_buffer_t buffer, int isnew); 51 static int hammer_load_node(hammer_node_t node); 52 53 /* 54 * Red-Black tree support for various structures 55 */ 56 static int 57 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2) 58 { 59 if (ip1->obj_id < ip2->obj_id) 60 return(-1); 61 if (ip1->obj_id > ip2->obj_id) 62 return(1); 63 if (ip1->obj_asof < ip2->obj_asof) 64 return(-1); 65 if (ip1->obj_asof > ip2->obj_asof) 66 return(1); 67 return(0); 68 } 69 70 static int 71 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip) 72 { 73 if (info->obj_id < ip->obj_id) 74 return(-1); 75 if (info->obj_id > ip->obj_id) 76 return(1); 77 if (info->obj_asof < ip->obj_asof) 78 return(-1); 79 if (info->obj_asof > ip->obj_asof) 80 return(1); 81 return(0); 82 } 83 84 static int 85 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2) 86 { 87 if (vol1->vol_no < vol2->vol_no) 88 return(-1); 89 if (vol1->vol_no > vol2->vol_no) 90 return(1); 91 return(0); 92 } 93 94 static int 95 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2) 96 { 97 if (buf1->zone2_offset < buf2->zone2_offset) 98 return(-1); 99 if (buf1->zone2_offset > buf2->zone2_offset) 100 return(1); 101 return(0); 102 } 103 104 static int 105 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2) 106 { 107 if (node1->node_offset < node2->node_offset) 108 return(-1); 109 if (node1->node_offset > node2->node_offset) 110 return(1); 111 return(0); 112 } 113 114 /* 115 * Note: The lookup function for hammer_ino_rb_tree winds up being named 116 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup 117 * functions are normal, e.g. hammer_buf_rb_tree_RB_LOOKUP(root, zone2_offset). 118 */ 119 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare); 120 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node, 121 hammer_inode_info_cmp, hammer_inode_info_t); 122 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node, 123 hammer_vol_rb_compare, int32_t, vol_no); 124 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node, 125 hammer_buf_rb_compare, hammer_off_t, zone2_offset); 126 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node, 127 hammer_nod_rb_compare, hammer_off_t, node_offset); 128 129 /************************************************************************ 130 * VOLUMES * 131 ************************************************************************ 132 * 133 * Load a HAMMER volume by name. Returns 0 on success or a positive error 134 * code on failure. Volumes must be loaded at mount time, get_volume() will 135 * not load a new volume. 136 * 137 * Calls made to hammer_load_volume() or single-threaded 138 */ 139 int 140 hammer_install_volume(struct hammer_mount *hmp, const char *volname) 141 { 142 struct mount *mp; 143 hammer_volume_t volume; 144 struct hammer_volume_ondisk *ondisk; 145 struct nlookupdata nd; 146 struct buf *bp = NULL; 147 int error; 148 int ronly; 149 int setmp = 0; 150 151 mp = hmp->mp; 152 ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0); 153 154 /* 155 * Allocate a volume structure 156 */ 157 ++hammer_count_volumes; 158 volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO); 159 volume->vol_name = kstrdup(volname, M_HAMMER); 160 volume->hmp = hmp; 161 hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME); 162 volume->io.offset = 0LL; 163 164 /* 165 * Get the device vnode 166 */ 167 error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW); 168 if (error == 0) 169 error = nlookup(&nd); 170 if (error == 0) 171 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp); 172 nlookup_done(&nd); 173 if (error == 0) { 174 if (vn_isdisk(volume->devvp, &error)) { 175 error = vfs_mountedon(volume->devvp); 176 } 177 } 178 if (error == 0 && 179 count_udev(volume->devvp->v_umajor, volume->devvp->v_uminor) > 0) { 180 error = EBUSY; 181 } 182 if (error == 0) { 183 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY); 184 error = vinvalbuf(volume->devvp, V_SAVE, 0, 0); 185 if (error == 0) { 186 error = VOP_OPEN(volume->devvp, 187 (ronly ? FREAD : FREAD|FWRITE), 188 FSCRED, NULL); 189 } 190 vn_unlock(volume->devvp); 191 } 192 if (error) { 193 hammer_free_volume(volume); 194 return(error); 195 } 196 volume->devvp->v_rdev->si_mountpoint = mp; 197 setmp = 1; 198 199 /* 200 * Extract the volume number from the volume header and do various 201 * sanity checks. 202 */ 203 error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp); 204 if (error) 205 goto late_failure; 206 ondisk = (void *)bp->b_data; 207 if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) { 208 kprintf("hammer_mount: volume %s has an invalid header\n", 209 volume->vol_name); 210 error = EFTYPE; 211 goto late_failure; 212 } 213 volume->vol_no = ondisk->vol_no; 214 volume->buffer_base = ondisk->vol_buf_beg; 215 volume->vol_flags = ondisk->vol_flags; 216 volume->nblocks = ondisk->vol_nblocks; 217 volume->maxbuf_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 218 ondisk->vol_buf_end - ondisk->vol_buf_beg); 219 RB_INIT(&volume->rb_bufs_root); 220 221 hmp->mp->mnt_stat.f_blocks += volume->nblocks; 222 223 if (RB_EMPTY(&hmp->rb_vols_root)) { 224 hmp->fsid = ondisk->vol_fsid; 225 } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) { 226 kprintf("hammer_mount: volume %s's fsid does not match " 227 "other volumes\n", volume->vol_name); 228 error = EFTYPE; 229 goto late_failure; 230 } 231 232 /* 233 * Insert the volume structure into the red-black tree. 234 */ 235 if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) { 236 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n", 237 volume->vol_name, volume->vol_no); 238 error = EEXIST; 239 } 240 241 /* 242 * Set the root volume . HAMMER special cases rootvol the structure. 243 * We do not hold a ref because this would prevent related I/O 244 * from being flushed. 245 */ 246 if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) { 247 hmp->rootvol = volume; 248 if (bp) { 249 brelse(bp); 250 bp = NULL; 251 } 252 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp)); 253 } 254 late_failure: 255 if (bp) 256 brelse(bp); 257 if (error) { 258 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/ 259 if (setmp) 260 volume->devvp->v_rdev->si_mountpoint = NULL; 261 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE); 262 hammer_free_volume(volume); 263 } 264 return (error); 265 } 266 267 /* 268 * Unload and free a HAMMER volume. Must return >= 0 to continue scan 269 * so returns -1 on failure. 270 */ 271 int 272 hammer_unload_volume(hammer_volume_t volume, void *data __unused) 273 { 274 struct hammer_mount *hmp = volume->hmp; 275 int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0); 276 277 /* 278 * Sync clusters, sync volume 279 */ 280 281 hmp->mp->mnt_stat.f_blocks -= volume->nblocks; 282 283 /* 284 * Clean up the root volume pointer, which is held unlocked in hmp. 285 */ 286 if (hmp->rootvol == volume) 287 hmp->rootvol = NULL; 288 289 /* 290 * Unload buffers. 291 */ 292 RB_SCAN(hammer_buf_rb_tree, &volume->rb_bufs_root, NULL, 293 hammer_unload_buffer, NULL); 294 295 /* 296 * Release our buffer and flush anything left in the buffer cache. 297 */ 298 volume->io.flush = 1; 299 volume->io.waitdep = 1; 300 hammer_io_release(&volume->io); 301 302 /* 303 * There should be no references on the volume, no clusters, and 304 * no super-clusters. 305 */ 306 KKASSERT(volume->io.lock.refs == 0); 307 KKASSERT(RB_EMPTY(&volume->rb_bufs_root)); 308 309 volume->ondisk = NULL; 310 if (volume->devvp) { 311 if (volume->devvp->v_rdev && 312 volume->devvp->v_rdev->si_mountpoint == hmp->mp 313 ) { 314 volume->devvp->v_rdev->si_mountpoint = NULL; 315 } 316 if (ronly) { 317 vinvalbuf(volume->devvp, 0, 0, 0); 318 VOP_CLOSE(volume->devvp, FREAD); 319 } else { 320 vinvalbuf(volume->devvp, V_SAVE, 0, 0); 321 VOP_CLOSE(volume->devvp, FREAD|FWRITE); 322 } 323 } 324 325 /* 326 * Destroy the structure 327 */ 328 RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume); 329 hammer_free_volume(volume); 330 return(0); 331 } 332 333 static 334 void 335 hammer_free_volume(hammer_volume_t volume) 336 { 337 if (volume->vol_name) { 338 kfree(volume->vol_name, M_HAMMER); 339 volume->vol_name = NULL; 340 } 341 if (volume->devvp) { 342 vrele(volume->devvp); 343 volume->devvp = NULL; 344 } 345 --hammer_count_volumes; 346 kfree(volume, M_HAMMER); 347 } 348 349 /* 350 * Get a HAMMER volume. The volume must already exist. 351 */ 352 hammer_volume_t 353 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp) 354 { 355 struct hammer_volume *volume; 356 357 /* 358 * Locate the volume structure 359 */ 360 volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no); 361 if (volume == NULL) { 362 *errorp = ENOENT; 363 return(NULL); 364 } 365 hammer_ref(&volume->io.lock); 366 367 /* 368 * Deal with on-disk info 369 */ 370 if (volume->ondisk == NULL || volume->io.loading) { 371 *errorp = hammer_load_volume(volume); 372 if (*errorp) { 373 hammer_rel_volume(volume, 1); 374 volume = NULL; 375 } 376 } else { 377 *errorp = 0; 378 } 379 return(volume); 380 } 381 382 int 383 hammer_ref_volume(hammer_volume_t volume) 384 { 385 int error; 386 387 hammer_ref(&volume->io.lock); 388 389 /* 390 * Deal with on-disk info 391 */ 392 if (volume->ondisk == NULL || volume->io.loading) { 393 error = hammer_load_volume(volume); 394 if (error) 395 hammer_rel_volume(volume, 1); 396 } else { 397 error = 0; 398 } 399 return (error); 400 } 401 402 hammer_volume_t 403 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp) 404 { 405 hammer_volume_t volume; 406 407 volume = hmp->rootvol; 408 KKASSERT(volume != NULL); 409 hammer_ref(&volume->io.lock); 410 411 /* 412 * Deal with on-disk info 413 */ 414 if (volume->ondisk == NULL || volume->io.loading) { 415 *errorp = hammer_load_volume(volume); 416 if (*errorp) { 417 hammer_rel_volume(volume, 1); 418 volume = NULL; 419 } 420 } else { 421 *errorp = 0; 422 } 423 return (volume); 424 } 425 426 /* 427 * Load a volume's on-disk information. The volume must be referenced and 428 * not locked. We temporarily acquire an exclusive lock to interlock 429 * against releases or multiple get's. 430 */ 431 static int 432 hammer_load_volume(hammer_volume_t volume) 433 { 434 int error; 435 436 ++volume->io.loading; 437 hammer_lock_ex(&volume->io.lock); 438 439 if (volume->ondisk == NULL) { 440 error = hammer_io_read(volume->devvp, &volume->io); 441 if (error == 0) 442 volume->ondisk = (void *)volume->io.bp->b_data; 443 } else { 444 error = 0; 445 } 446 --volume->io.loading; 447 hammer_unlock(&volume->io.lock); 448 return(error); 449 } 450 451 /* 452 * Release a volume. Call hammer_io_release on the last reference. We have 453 * to acquire an exclusive lock to interlock against volume->ondisk tests 454 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive 455 * lock to be held. 456 * 457 * Volumes are not unloaded from memory during normal operation. 458 */ 459 void 460 hammer_rel_volume(hammer_volume_t volume, int flush) 461 { 462 if (flush) 463 volume->io.flush = 1; 464 crit_enter(); 465 if (volume->io.lock.refs == 1) { 466 ++volume->io.loading; 467 hammer_lock_ex(&volume->io.lock); 468 if (volume->io.lock.refs == 1) { 469 volume->ondisk = NULL; 470 hammer_io_release(&volume->io); 471 } 472 --volume->io.loading; 473 hammer_unlock(&volume->io.lock); 474 } 475 hammer_unref(&volume->io.lock); 476 crit_exit(); 477 } 478 479 /************************************************************************ 480 * BUFFERS * 481 ************************************************************************ 482 * 483 * Manage buffers. Currently all blockmap-backed zones are translated 484 * to zone-2 buffer offsets. 485 */ 486 hammer_buffer_t 487 hammer_get_buffer(hammer_mount_t hmp, hammer_off_t buf_offset, 488 int isnew, int *errorp) 489 { 490 hammer_buffer_t buffer; 491 hammer_volume_t volume; 492 hammer_off_t zoneX_offset; 493 int vol_no; 494 int zone; 495 496 zoneX_offset = buf_offset; 497 zone = HAMMER_ZONE_DECODE(buf_offset); 498 if (zone >= HAMMER_ZONE_BTREE_INDEX) { 499 buf_offset = hammer_blockmap_lookup(hmp, buf_offset, errorp); 500 KKASSERT(*errorp == 0); 501 } else if (zone == HAMMER_ZONE_UNDO_INDEX) { 502 buf_offset = hammer_undo_lookup(hmp, buf_offset, errorp); 503 KKASSERT(*errorp == 0); 504 } 505 buf_offset &= ~HAMMER_BUFMASK64; 506 KKASSERT((buf_offset & HAMMER_ZONE_RAW_BUFFER) == 507 HAMMER_ZONE_RAW_BUFFER); 508 vol_no = HAMMER_VOL_DECODE(buf_offset); 509 volume = hammer_get_volume(hmp, vol_no, errorp); 510 if (volume == NULL) 511 return(NULL); 512 513 /* 514 * NOTE: buf_offset and maxbuf_off are both full offset 515 * specifications. 516 */ 517 KKASSERT(buf_offset < volume->maxbuf_off); 518 519 /* 520 * Locate and lock the buffer structure, creating one if necessary. 521 */ 522 again: 523 buffer = RB_LOOKUP(hammer_buf_rb_tree, &volume->rb_bufs_root, 524 buf_offset); 525 if (buffer == NULL) { 526 ++hammer_count_buffers; 527 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO); 528 buffer->zone2_offset = buf_offset; 529 buffer->volume = volume; 530 hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER); 531 buffer->io.offset = volume->ondisk->vol_buf_beg + 532 (buf_offset & HAMMER_OFF_SHORT_MASK); 533 TAILQ_INIT(&buffer->clist); 534 hammer_ref(&buffer->io.lock); 535 536 /* 537 * Insert the buffer into the RB tree and handle late 538 * collisions. 539 */ 540 if (RB_INSERT(hammer_buf_rb_tree, &volume->rb_bufs_root, buffer)) { 541 hammer_unref(&buffer->io.lock); 542 --hammer_count_buffers; 543 kfree(buffer, M_HAMMER); 544 goto again; 545 } 546 hammer_ref(&volume->io.lock); 547 } else { 548 hammer_ref(&buffer->io.lock); 549 } 550 551 /* 552 * Cache the blockmap translation 553 */ 554 if ((zoneX_offset & HAMMER_ZONE_RAW_BUFFER) != HAMMER_ZONE_RAW_BUFFER) 555 buffer->zoneX_offset = zoneX_offset; 556 557 /* 558 * Deal with on-disk info 559 */ 560 if (buffer->ondisk == NULL || buffer->io.loading) { 561 *errorp = hammer_load_buffer(buffer, isnew); 562 if (*errorp) { 563 hammer_rel_buffer(buffer, 1); 564 buffer = NULL; 565 } 566 } else { 567 *errorp = 0; 568 } 569 hammer_rel_volume(volume, 0); 570 return(buffer); 571 } 572 573 static int 574 hammer_load_buffer(hammer_buffer_t buffer, int isnew) 575 { 576 hammer_volume_t volume; 577 int error; 578 579 /* 580 * Load the buffer's on-disk info 581 */ 582 volume = buffer->volume; 583 ++buffer->io.loading; 584 hammer_lock_ex(&buffer->io.lock); 585 586 if (buffer->ondisk == NULL) { 587 if (isnew) { 588 error = hammer_io_new(volume->devvp, &buffer->io); 589 } else { 590 error = hammer_io_read(volume->devvp, &buffer->io); 591 } 592 if (error == 0) 593 buffer->ondisk = (void *)buffer->io.bp->b_data; 594 } else if (isnew) { 595 error = hammer_io_new(volume->devvp, &buffer->io); 596 } else { 597 error = 0; 598 } 599 if (error == 0 && isnew) { 600 hammer_modify_buffer(NULL, buffer, NULL, 0); 601 /* additional initialization goes here */ 602 } 603 --buffer->io.loading; 604 hammer_unlock(&buffer->io.lock); 605 return (error); 606 } 607 608 /* 609 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue. 610 */ 611 int 612 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused) 613 { 614 hammer_ref(&buffer->io.lock); 615 hammer_flush_buffer_nodes(buffer); 616 KKASSERT(buffer->io.lock.refs == 1); 617 hammer_rel_buffer(buffer, 2); 618 return(0); 619 } 620 621 /* 622 * Reference a buffer that is either already referenced or via a specially 623 * handled pointer (aka cursor->buffer). 624 */ 625 int 626 hammer_ref_buffer(hammer_buffer_t buffer) 627 { 628 int error; 629 630 hammer_ref(&buffer->io.lock); 631 if (buffer->ondisk == NULL || buffer->io.loading) { 632 error = hammer_load_buffer(buffer, 0); 633 if (error) { 634 hammer_rel_buffer(buffer, 1); 635 /* 636 * NOTE: buffer pointer can become stale after 637 * the above release. 638 */ 639 } 640 } else { 641 error = 0; 642 } 643 return(error); 644 } 645 646 /* 647 * Release a buffer. We have to deal with several places where 648 * another thread can ref the buffer. 649 * 650 * Only destroy the structure itself if the related buffer cache buffer 651 * was disassociated from it. This ties the management of the structure 652 * to the buffer cache subsystem. buffer->ondisk determines whether the 653 * embedded io is referenced or not. 654 */ 655 void 656 hammer_rel_buffer(hammer_buffer_t buffer, int flush) 657 { 658 hammer_volume_t volume; 659 int freeme = 0; 660 661 if (flush) 662 buffer->io.flush = 1; 663 crit_enter(); 664 if (buffer->io.lock.refs == 1) { 665 ++buffer->io.loading; /* force interlock check */ 666 hammer_lock_ex(&buffer->io.lock); 667 if (buffer->io.lock.refs == 1) { 668 hammer_io_release(&buffer->io); 669 hammer_flush_buffer_nodes(buffer); 670 KKASSERT(TAILQ_EMPTY(&buffer->clist)); 671 672 if (buffer->io.bp == NULL && 673 buffer->io.lock.refs == 1) { 674 /* 675 * Final cleanup 676 */ 677 volume = buffer->volume; 678 RB_REMOVE(hammer_buf_rb_tree, 679 &volume->rb_bufs_root, buffer); 680 buffer->volume = NULL; /* sanity */ 681 hammer_rel_volume(volume, 0); 682 freeme = 1; 683 } 684 } 685 --buffer->io.loading; 686 hammer_unlock(&buffer->io.lock); 687 } 688 hammer_unref(&buffer->io.lock); 689 crit_exit(); 690 if (freeme) { 691 --hammer_count_buffers; 692 kfree(buffer, M_HAMMER); 693 } 694 } 695 696 /* 697 * Remove the zoneX translation cache for a buffer given its zone-2 offset. 698 */ 699 void 700 hammer_uncache_buffer(hammer_mount_t hmp, hammer_off_t buf_offset) 701 { 702 hammer_volume_t volume; 703 hammer_buffer_t buffer; 704 int vol_no; 705 int error; 706 707 buf_offset &= ~HAMMER_BUFMASK64; 708 KKASSERT((buf_offset & HAMMER_ZONE_RAW_BUFFER) == 709 HAMMER_ZONE_RAW_BUFFER); 710 vol_no = HAMMER_VOL_DECODE(buf_offset); 711 volume = hammer_get_volume(hmp, vol_no, &error); 712 KKASSERT(volume != 0); 713 KKASSERT(buf_offset < volume->maxbuf_off); 714 715 buffer = RB_LOOKUP(hammer_buf_rb_tree, &volume->rb_bufs_root, 716 buf_offset); 717 if (buffer) 718 buffer->zoneX_offset = 0; 719 hammer_rel_volume(volume, 0); 720 } 721 722 /* 723 * Access the filesystem buffer containing the specified hammer offset. 724 * buf_offset is a conglomeration of the volume number and vol_buf_beg 725 * relative buffer offset. It must also have bit 55 set to be valid. 726 * (see hammer_off_t in hammer_disk.h). 727 * 728 * Any prior buffer in *bufferp will be released and replaced by the 729 * requested buffer. 730 */ 731 void * 732 hammer_bread(hammer_mount_t hmp, hammer_off_t buf_offset, int *errorp, 733 struct hammer_buffer **bufferp) 734 { 735 hammer_buffer_t buffer; 736 int32_t xoff = (int32_t)buf_offset & HAMMER_BUFMASK; 737 738 buf_offset &= ~HAMMER_BUFMASK64; 739 KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) != 0); 740 741 buffer = *bufferp; 742 if (buffer == NULL || (buffer->zone2_offset != buf_offset && 743 buffer->zoneX_offset != buf_offset)) { 744 if (buffer) 745 hammer_rel_buffer(buffer, 0); 746 buffer = hammer_get_buffer(hmp, buf_offset, 0, errorp); 747 *bufferp = buffer; 748 } else { 749 *errorp = 0; 750 } 751 752 /* 753 * Return a pointer to the buffer data. 754 */ 755 if (buffer == NULL) 756 return(NULL); 757 else 758 return((char *)buffer->ondisk + xoff); 759 } 760 761 /* 762 * Access the filesystem buffer containing the specified hammer offset. 763 * No disk read operation occurs. The result buffer may contain garbage. 764 * 765 * Any prior buffer in *bufferp will be released and replaced by the 766 * requested buffer. 767 */ 768 void * 769 hammer_bnew(hammer_mount_t hmp, hammer_off_t buf_offset, int *errorp, 770 struct hammer_buffer **bufferp) 771 { 772 hammer_buffer_t buffer; 773 int32_t xoff = (int32_t)buf_offset & HAMMER_BUFMASK; 774 775 buf_offset &= ~HAMMER_BUFMASK64; 776 777 buffer = *bufferp; 778 if (buffer == NULL || (buffer->zone2_offset != buf_offset && 779 buffer->zoneX_offset != buf_offset)) { 780 if (buffer) 781 hammer_rel_buffer(buffer, 0); 782 buffer = hammer_get_buffer(hmp, buf_offset, 1, errorp); 783 *bufferp = buffer; 784 } else { 785 *errorp = 0; 786 } 787 788 /* 789 * Return a pointer to the buffer data. 790 */ 791 if (buffer == NULL) 792 return(NULL); 793 else 794 return((char *)buffer->ondisk + xoff); 795 } 796 797 /************************************************************************ 798 * NODES * 799 ************************************************************************ 800 * 801 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing 802 * method used by the HAMMER filesystem. 803 * 804 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY 805 * associated with its buffer, and will only referenced the buffer while 806 * the node itself is referenced. 807 * 808 * A hammer_node can also be passively associated with other HAMMER 809 * structures, such as inodes, while retaining 0 references. These 810 * associations can be cleared backwards using a pointer-to-pointer in 811 * the hammer_node. 812 * 813 * This allows the HAMMER implementation to cache hammer_nodes long-term 814 * and short-cut a great deal of the infrastructure's complexity. In 815 * most cases a cached node can be reacquired without having to dip into 816 * either the buffer or cluster management code. 817 * 818 * The caller must pass a referenced cluster on call and will retain 819 * ownership of the reference on return. The node will acquire its own 820 * additional references, if necessary. 821 */ 822 hammer_node_t 823 hammer_get_node(hammer_mount_t hmp, hammer_off_t node_offset, int *errorp) 824 { 825 hammer_node_t node; 826 827 KKASSERT((node_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_BTREE); 828 829 /* 830 * Locate the structure, allocating one if necessary. 831 */ 832 again: 833 node = RB_LOOKUP(hammer_nod_rb_tree, &hmp->rb_nods_root, node_offset); 834 if (node == NULL) { 835 ++hammer_count_nodes; 836 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO); 837 node->node_offset = node_offset; 838 node->hmp = hmp; 839 if (RB_INSERT(hammer_nod_rb_tree, &hmp->rb_nods_root, node)) { 840 --hammer_count_nodes; 841 kfree(node, M_HAMMER); 842 goto again; 843 } 844 } 845 hammer_ref(&node->lock); 846 if (node->ondisk) 847 *errorp = 0; 848 else 849 *errorp = hammer_load_node(node); 850 if (*errorp) { 851 hammer_rel_node(node); 852 node = NULL; 853 } 854 return(node); 855 } 856 857 /* 858 * Reference an already-referenced node. 859 */ 860 void 861 hammer_ref_node(hammer_node_t node) 862 { 863 KKASSERT(node->lock.refs > 0 && node->ondisk != NULL); 864 hammer_ref(&node->lock); 865 } 866 867 /* 868 * Load a node's on-disk data reference. 869 */ 870 static int 871 hammer_load_node(hammer_node_t node) 872 { 873 hammer_buffer_t buffer; 874 int error; 875 876 error = 0; 877 ++node->loading; 878 hammer_lock_ex(&node->lock); 879 if (node->ondisk == NULL) { 880 /* 881 * This is a little confusing but the jist is that 882 * node->buffer determines whether the node is on 883 * the buffer's clist and node->ondisk determines 884 * whether the buffer is referenced. 885 * 886 * We could be racing a buffer release, in which case 887 * node->buffer may become NULL while we are blocked 888 * referencing the buffer. 889 */ 890 if ((buffer = node->buffer) != NULL) { 891 error = hammer_ref_buffer(buffer); 892 if (error == 0 && node->buffer == NULL) { 893 TAILQ_INSERT_TAIL(&buffer->clist, 894 node, entry); 895 node->buffer = buffer; 896 } 897 } else { 898 buffer = hammer_get_buffer(node->hmp, 899 node->node_offset, 0, 900 &error); 901 if (buffer) { 902 KKASSERT(error == 0); 903 TAILQ_INSERT_TAIL(&buffer->clist, 904 node, entry); 905 node->buffer = buffer; 906 } 907 } 908 if (error == 0) { 909 node->ondisk = (void *)((char *)buffer->ondisk + 910 (node->node_offset & HAMMER_BUFMASK)); 911 } 912 } 913 --node->loading; 914 hammer_unlock(&node->lock); 915 return (error); 916 } 917 918 /* 919 * Safely reference a node, interlock against flushes via the IO subsystem. 920 */ 921 hammer_node_t 922 hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache, 923 int *errorp) 924 { 925 hammer_node_t node; 926 927 node = *cache; 928 if (node != NULL) { 929 hammer_ref(&node->lock); 930 if (node->ondisk) 931 *errorp = 0; 932 else 933 *errorp = hammer_load_node(node); 934 if (*errorp) { 935 hammer_rel_node(node); 936 node = NULL; 937 } 938 } else { 939 *errorp = ENOENT; 940 } 941 return(node); 942 } 943 944 /* 945 * Release a hammer_node. On the last release the node dereferences 946 * its underlying buffer and may or may not be destroyed. 947 */ 948 void 949 hammer_rel_node(hammer_node_t node) 950 { 951 hammer_buffer_t buffer; 952 953 /* 954 * If this isn't the last ref just decrement the ref count and 955 * return. 956 */ 957 if (node->lock.refs > 1) { 958 hammer_unref(&node->lock); 959 return; 960 } 961 962 /* 963 * If there is no ondisk info or no buffer the node failed to load, 964 * remove the last reference and destroy the node. 965 */ 966 if (node->ondisk == NULL) { 967 hammer_unref(&node->lock); 968 hammer_flush_node(node); 969 /* node is stale now */ 970 return; 971 } 972 973 /* 974 * Do final cleanups and then either destroy the node and leave it 975 * passively cached. The buffer reference is removed regardless. 976 */ 977 buffer = node->buffer; 978 node->ondisk = NULL; 979 980 if ((node->flags & HAMMER_NODE_FLUSH) == 0) { 981 hammer_unref(&node->lock); 982 hammer_rel_buffer(buffer, 0); 983 return; 984 } 985 986 /* 987 * Destroy the node. 988 */ 989 hammer_unref(&node->lock); 990 hammer_flush_node(node); 991 /* node is stale */ 992 hammer_rel_buffer(buffer, 0); 993 } 994 995 /* 996 * 997 * 998 */ 999 void 1000 hammer_delete_node(hammer_transaction_t trans, hammer_node_t node) 1001 { 1002 node->flags |= HAMMER_NODE_DELETED; 1003 hammer_blockmap_free(trans, node->node_offset, sizeof(*node->ondisk)); 1004 } 1005 1006 /* 1007 * Passively cache a referenced hammer_node in *cache. The caller may 1008 * release the node on return. 1009 */ 1010 void 1011 hammer_cache_node(hammer_node_t node, struct hammer_node **cache) 1012 { 1013 hammer_node_t old; 1014 1015 /* 1016 * If the node is being deleted, don't cache it! 1017 */ 1018 if (node->flags & HAMMER_NODE_DELETED) 1019 return; 1020 1021 /* 1022 * Cache the node. If we previously cached a different node we 1023 * have to give HAMMER a chance to destroy it. 1024 */ 1025 again: 1026 if (node->cache1 != cache) { 1027 if (node->cache2 != cache) { 1028 if ((old = *cache) != NULL) { 1029 KKASSERT(node->lock.refs != 0); 1030 hammer_uncache_node(cache); 1031 goto again; 1032 } 1033 if (node->cache2) 1034 *node->cache2 = NULL; 1035 node->cache2 = node->cache1; 1036 node->cache1 = cache; 1037 *cache = node; 1038 } else { 1039 struct hammer_node **tmp; 1040 tmp = node->cache1; 1041 node->cache1 = node->cache2; 1042 node->cache2 = tmp; 1043 } 1044 } 1045 } 1046 1047 void 1048 hammer_uncache_node(struct hammer_node **cache) 1049 { 1050 hammer_node_t node; 1051 1052 if ((node = *cache) != NULL) { 1053 *cache = NULL; 1054 if (node->cache1 == cache) { 1055 node->cache1 = node->cache2; 1056 node->cache2 = NULL; 1057 } else if (node->cache2 == cache) { 1058 node->cache2 = NULL; 1059 } else { 1060 panic("hammer_uncache_node: missing cache linkage"); 1061 } 1062 if (node->cache1 == NULL && node->cache2 == NULL) 1063 hammer_flush_node(node); 1064 } 1065 } 1066 1067 /* 1068 * Remove a node's cache references and destroy the node if it has no 1069 * other references or backing store. 1070 */ 1071 void 1072 hammer_flush_node(hammer_node_t node) 1073 { 1074 hammer_buffer_t buffer; 1075 1076 if (node->cache1) 1077 *node->cache1 = NULL; 1078 if (node->cache2) 1079 *node->cache2 = NULL; 1080 if (node->lock.refs == 0 && node->ondisk == NULL) { 1081 RB_REMOVE(hammer_nod_rb_tree, &node->hmp->rb_nods_root, node); 1082 if ((buffer = node->buffer) != NULL) { 1083 node->buffer = NULL; 1084 TAILQ_REMOVE(&buffer->clist, node, entry); 1085 /* buffer is unreferenced because ondisk is NULL */ 1086 } 1087 --hammer_count_nodes; 1088 kfree(node, M_HAMMER); 1089 } 1090 } 1091 1092 /* 1093 * Flush passively cached B-Tree nodes associated with this buffer. 1094 * This is only called when the buffer is about to be destroyed, so 1095 * none of the nodes should have any references. The buffer is locked. 1096 * 1097 * We may be interlocked with the buffer. 1098 */ 1099 void 1100 hammer_flush_buffer_nodes(hammer_buffer_t buffer) 1101 { 1102 hammer_node_t node; 1103 1104 while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) { 1105 KKASSERT(node->ondisk == NULL); 1106 1107 if (node->lock.refs == 0) { 1108 hammer_ref(&node->lock); 1109 node->flags |= HAMMER_NODE_FLUSH; 1110 hammer_rel_node(node); 1111 } else { 1112 KKASSERT(node->loading != 0); 1113 KKASSERT(node->buffer != NULL); 1114 buffer = node->buffer; 1115 node->buffer = NULL; 1116 TAILQ_REMOVE(&buffer->clist, node, entry); 1117 /* buffer is unreferenced because ondisk is NULL */ 1118 } 1119 } 1120 } 1121 1122 1123 /************************************************************************ 1124 * ALLOCATORS * 1125 ************************************************************************/ 1126 1127 /* 1128 * Allocate a B-Tree node. 1129 */ 1130 hammer_node_t 1131 hammer_alloc_btree(hammer_transaction_t trans, int *errorp) 1132 { 1133 hammer_buffer_t buffer = NULL; 1134 hammer_node_t node = NULL; 1135 hammer_off_t node_offset; 1136 1137 node_offset = hammer_blockmap_alloc(trans, HAMMER_ZONE_BTREE_INDEX, 1138 sizeof(struct hammer_node_ondisk), 1139 errorp); 1140 if (*errorp == 0) { 1141 node = hammer_get_node(trans->hmp, node_offset, errorp); 1142 hammer_modify_node_noundo(trans, node); 1143 bzero(node->ondisk, sizeof(*node->ondisk)); 1144 } 1145 if (buffer) 1146 hammer_rel_buffer(buffer, 0); 1147 return(node); 1148 } 1149 1150 /* 1151 * The returned buffers are already appropriately marked as being modified. 1152 * If the caller marks them again unnecessary undo records may be generated. 1153 * 1154 * In-band data is indicated by data_bufferp == NULL. Pass a data_len of 0 1155 * for zero-fill (caller modifies data_len afterwords). 1156 */ 1157 void * 1158 hammer_alloc_record(hammer_transaction_t trans, 1159 hammer_off_t *rec_offp, u_int16_t rec_type, 1160 struct hammer_buffer **rec_bufferp, 1161 int32_t data_len, void **datap, 1162 struct hammer_buffer **data_bufferp, int *errorp) 1163 { 1164 hammer_record_ondisk_t rec; 1165 hammer_off_t rec_offset; 1166 hammer_off_t data_offset; 1167 int32_t reclen; 1168 1169 if (datap) 1170 *datap = NULL; 1171 1172 /* 1173 * Allocate the record 1174 */ 1175 rec_offset = hammer_blockmap_alloc(trans, HAMMER_ZONE_RECORD_INDEX, 1176 HAMMER_RECORD_SIZE, errorp); 1177 if (*errorp) 1178 return(NULL); 1179 1180 /* 1181 * Allocate data 1182 */ 1183 if (data_len) { 1184 if (data_bufferp == NULL) { 1185 switch(rec_type) { 1186 case HAMMER_RECTYPE_DATA: 1187 reclen = offsetof(struct hammer_data_record, 1188 data[0]); 1189 break; 1190 case HAMMER_RECTYPE_DIRENTRY: 1191 reclen = offsetof(struct hammer_entry_record, 1192 name[0]); 1193 break; 1194 default: 1195 panic("hammer_alloc_record: illegal " 1196 "in-band data"); 1197 /* NOT REACHED */ 1198 reclen = 0; 1199 break; 1200 } 1201 KKASSERT(reclen + data_len <= HAMMER_RECORD_SIZE); 1202 data_offset = rec_offset + reclen; 1203 } else if (data_len < HAMMER_BUFSIZE) { 1204 data_offset = hammer_blockmap_alloc(trans, 1205 HAMMER_ZONE_SMALL_DATA_INDEX, 1206 data_len, errorp); 1207 } else { 1208 data_offset = hammer_blockmap_alloc(trans, 1209 HAMMER_ZONE_LARGE_DATA_INDEX, 1210 data_len, errorp); 1211 } 1212 } else { 1213 data_offset = 0; 1214 } 1215 if (*errorp) { 1216 hammer_blockmap_free(trans, rec_offset, HAMMER_RECORD_SIZE); 1217 return(NULL); 1218 } 1219 1220 /* 1221 * Basic return values. 1222 */ 1223 *rec_offp = rec_offset; 1224 rec = hammer_bread(trans->hmp, rec_offset, errorp, rec_bufferp); 1225 hammer_modify_buffer(trans, *rec_bufferp, NULL, 0); 1226 bzero(rec, sizeof(*rec)); 1227 KKASSERT(*errorp == 0); 1228 rec->base.data_off = data_offset; 1229 rec->base.data_len = data_len; 1230 1231 if (data_bufferp) { 1232 if (data_len) { 1233 *datap = hammer_bread(trans->hmp, data_offset, errorp, 1234 data_bufferp); 1235 KKASSERT(*errorp == 0); 1236 hammer_modify_buffer(trans, *data_bufferp, NULL, 0); 1237 } else { 1238 *datap = NULL; 1239 } 1240 } else if (data_len) { 1241 KKASSERT(data_offset + data_len - rec_offset <= 1242 HAMMER_RECORD_SIZE); 1243 if (datap) { 1244 *datap = (void *)((char *)rec + 1245 (int32_t)(data_offset - rec_offset)); 1246 } 1247 } else { 1248 KKASSERT(datap == NULL); 1249 } 1250 KKASSERT(*errorp == 0); 1251 return(rec); 1252 } 1253 1254 void * 1255 hammer_alloc_data(hammer_transaction_t trans, int32_t data_len, 1256 hammer_off_t *data_offsetp, 1257 struct hammer_buffer **data_bufferp, int *errorp) 1258 { 1259 void *data; 1260 1261 /* 1262 * Allocate data 1263 */ 1264 if (data_len) { 1265 if (data_len < HAMMER_BUFSIZE) { 1266 *data_offsetp = hammer_blockmap_alloc(trans, 1267 HAMMER_ZONE_SMALL_DATA_INDEX, 1268 data_len, errorp); 1269 } else { 1270 *data_offsetp = hammer_blockmap_alloc(trans, 1271 HAMMER_ZONE_LARGE_DATA_INDEX, 1272 data_len, errorp); 1273 } 1274 } else { 1275 *data_offsetp = 0; 1276 } 1277 if (*errorp == 0 && data_bufferp) { 1278 if (data_len) { 1279 data = hammer_bread(trans->hmp, *data_offsetp, errorp, 1280 data_bufferp); 1281 KKASSERT(*errorp == 0); 1282 hammer_modify_buffer(trans, *data_bufferp, NULL, 0); 1283 } else { 1284 data = NULL; 1285 } 1286 } else { 1287 data = NULL; 1288 } 1289 KKASSERT(*errorp == 0); 1290 return(data); 1291 } 1292 1293 1294 /* 1295 * Sync dirty buffers to the media 1296 */ 1297 1298 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data); 1299 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 1300 1301 int 1302 hammer_sync_hmp(hammer_mount_t hmp, int waitfor) 1303 { 1304 struct hammer_sync_info info; 1305 1306 info.error = 0; 1307 info.waitfor = waitfor; 1308 1309 vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT, 1310 hammer_sync_scan1, hammer_sync_scan2, &info); 1311 1312 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, 1313 hammer_sync_volume, &info); 1314 return(info.error); 1315 } 1316 1317 static int 1318 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data) 1319 { 1320 struct hammer_inode *ip; 1321 1322 ip = VTOI(vp); 1323 if (vp->v_type == VNON || ip == NULL || 1324 ((ip->flags & HAMMER_INODE_MODMASK) == 0 && 1325 RB_EMPTY(&vp->v_rbdirty_tree))) { 1326 return(-1); 1327 } 1328 return(0); 1329 } 1330 1331 static int 1332 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 1333 { 1334 struct hammer_sync_info *info = data; 1335 struct hammer_inode *ip; 1336 int error; 1337 1338 ip = VTOI(vp); 1339 if (vp->v_type == VNON || vp->v_type == VBAD || 1340 ((ip->flags & HAMMER_INODE_MODMASK) == 0 && 1341 RB_EMPTY(&vp->v_rbdirty_tree))) { 1342 return(0); 1343 } 1344 error = VOP_FSYNC(vp, info->waitfor); 1345 if (error) 1346 info->error = error; 1347 return(0); 1348 } 1349 1350 int 1351 hammer_sync_volume(hammer_volume_t volume, void *data) 1352 { 1353 struct hammer_sync_info *info = data; 1354 1355 hammer_ref(&volume->io.lock); 1356 RB_SCAN(hammer_buf_rb_tree, &volume->rb_bufs_root, NULL, 1357 hammer_sync_buffer, info); 1358 hammer_rel_volume(volume, 1); 1359 return(0); 1360 } 1361 1362 int 1363 hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused) 1364 { 1365 hammer_ref(&buffer->io.lock); 1366 hammer_rel_buffer(buffer, 1); 1367 return(0); 1368 } 1369 1370