1 /* 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org> 5 * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Matthew Dillon <dillon@dragonflybsd.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 /* 38 * This subsystem implements most of the core support functions for 39 * the hammer2_chain structure. 40 * 41 * Chains are the in-memory version on media objects (volume header, inodes, 42 * indirect blocks, data blocks, etc). Chains represent a portion of the 43 * HAMMER2 topology. 44 * 45 * Chains are no-longer delete-duplicated. Instead, the original in-memory 46 * chain will be moved along with its block reference (e.g. for things like 47 * renames, hardlink operations, modifications, etc), and will be indexed 48 * on a secondary list for flush handling instead of propagating a flag 49 * upward to the root. 50 * 51 * Concurrent front-end operations can still run against backend flushes 52 * as long as they do not cross the current flush boundary. An operation 53 * running above the current flush (in areas not yet flushed) can become 54 * part of the current flush while ano peration running below the current 55 * flush can become part of the next flush. 56 */ 57 /* 58 #include <sys/cdefs.h> 59 #include <sys/param.h> 60 #include <sys/systm.h> 61 #include <sys/types.h> 62 #include <sys/lock.h> 63 #include <sys/buf.h> 64 65 #include <crypto/sha2/sha2.h> 66 */ 67 68 #include "hammer2.h" 69 70 static hammer2_chain_t *hammer2_chain_create_indirect( 71 hammer2_chain_t *parent, 72 hammer2_key_t key, int keybits, 73 hammer2_tid_t mtid, int for_type, int *errorp); 74 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 75 hammer2_chain_t *chain, 76 hammer2_tid_t mtid, int flags, 77 hammer2_blockref_t *obref); 78 static hammer2_chain_t *hammer2_combined_find( 79 hammer2_chain_t *parent, 80 hammer2_blockref_t *base, int count, 81 hammer2_key_t *key_nextp, 82 hammer2_key_t key_beg, hammer2_key_t key_end, 83 hammer2_blockref_t **brefp); 84 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 85 int depth); 86 /* 87 * There are many degenerate situations where an extreme rate of console 88 * output can occur from warnings and errors. Make sure this output does 89 * not impede operations. 90 */ 91 /* 92 static struct krate krate_h2chk = { .freq = 5 }; 93 static struct krate krate_h2me = { .freq = 1 }; 94 static struct krate krate_h2em = { .freq = 1 }; 95 */ 96 97 /* 98 * Basic RBTree for chains (core.rbtree). 99 */ 100 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 101 102 int 103 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 104 { 105 hammer2_key_t c1_beg; 106 hammer2_key_t c1_end; 107 hammer2_key_t c2_beg; 108 hammer2_key_t c2_end; 109 110 /* 111 * Compare chains. Overlaps are not supposed to happen and catch 112 * any software issues early we count overlaps as a match. 113 */ 114 c1_beg = chain1->bref.key; 115 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 116 c2_beg = chain2->bref.key; 117 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 118 119 if (c1_end < c2_beg) /* fully to the left */ 120 return(-1); 121 if (c1_beg > c2_end) /* fully to the right */ 122 return(1); 123 return(0); /* overlap (must not cross edge boundary) */ 124 } 125 126 /* 127 * Assert that a chain has no media data associated with it. 128 */ 129 static __inline void 130 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 131 { 132 KKASSERT(chain->dio == NULL); 133 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 134 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 135 chain->data) { 136 panic("hammer2_chain_assert_no_data: chain %p still has data", 137 chain); 138 } 139 } 140 141 /* 142 * Make a chain visible to the flusher. The flusher operates using a top-down 143 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 144 * flushes them, and updates blocks back to the volume root. 145 * 146 * This routine sets the ONFLUSH flag upward from the triggering chain until 147 * it hits an inode root or the volume root. Inode chains serve as inflection 148 * points, requiring the flusher to bridge across trees. Inodes include 149 * regular inodes, PFS roots (pmp->iroot), and the media super root 150 * (spmp->iroot). 151 */ 152 void 153 hammer2_chain_setflush(hammer2_chain_t *chain) 154 { 155 hammer2_chain_t *parent; 156 157 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 158 hammer2_spin_sh(&chain->core.spin); 159 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 160 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 161 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 162 break; 163 if ((parent = chain->parent) == NULL) 164 break; 165 hammer2_spin_sh(&parent->core.spin); 166 hammer2_spin_unsh(&chain->core.spin); 167 chain = parent; 168 } 169 hammer2_spin_unsh(&chain->core.spin); 170 } 171 } 172 173 /* 174 * Allocate a new disconnected chain element representing the specified 175 * bref. chain->refs is set to 1 and the passed bref is copied to 176 * chain->bref. chain->bytes is derived from the bref. 177 * 178 * chain->pmp inherits pmp unless the chain is an inode (other than the 179 * super-root inode). 180 * 181 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 182 */ 183 hammer2_chain_t * 184 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 185 hammer2_blockref_t *bref) 186 { 187 hammer2_chain_t *chain; 188 u_int bytes; 189 190 /* 191 * Special case - radix of 0 indicates a chain that does not 192 * need a data reference (context is completely embedded in the 193 * bref). 194 */ 195 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 196 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 197 else 198 bytes = 0; 199 200 switch(bref->type) { 201 case HAMMER2_BREF_TYPE_INODE: 202 case HAMMER2_BREF_TYPE_INDIRECT: 203 case HAMMER2_BREF_TYPE_DATA: 204 case HAMMER2_BREF_TYPE_DIRENT: 205 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 206 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 207 case HAMMER2_BREF_TYPE_FREEMAP: 208 case HAMMER2_BREF_TYPE_VOLUME: 209 chain = kmalloc_obj(sizeof(*chain), hmp->mchain, 210 M_WAITOK | M_ZERO); 211 atomic_add_long(&hammer2_chain_allocs, 1); 212 break; 213 case HAMMER2_BREF_TYPE_EMPTY: 214 default: 215 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 216 bref->type); 217 break; 218 } 219 220 /* 221 * Initialize the new chain structure. pmp must be set to NULL for 222 * chains belonging to the super-root topology of a device mount. 223 */ 224 if (pmp == hmp->spmp) 225 chain->pmp = NULL; 226 else 227 chain->pmp = pmp; 228 229 chain->hmp = hmp; 230 chain->bref = *bref; 231 chain->bytes = bytes; 232 chain->refs = 1; 233 chain->flags = HAMMER2_CHAIN_ALLOCATED; 234 235 /* 236 * Set the PFS boundary flag if this chain represents a PFS root. 237 */ 238 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 239 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 240 hammer2_chain_init(chain); 241 242 return (chain); 243 } 244 245 /* 246 * A common function to initialize chains including fchain and vchain. 247 */ 248 void 249 hammer2_chain_init(hammer2_chain_t *chain) 250 { 251 RB_INIT(&chain->core.rbtree); /* live chains */ 252 hammer2_mtx_init(&chain->lock, "h2chain"); 253 hammer2_spin_init(&chain->core.spin, "h2chain"); 254 lockinit(&chain->diolk, "chdio", 0, 0); 255 } 256 257 /* 258 * Add a reference to a chain element, preventing its destruction. 259 * Undone via hammer2_chain_drop() 260 * 261 * (can be called with spinlock held) 262 */ 263 void 264 hammer2_chain_ref(hammer2_chain_t *chain) 265 { 266 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 267 /* NOP */ 268 } 269 } 270 271 /* 272 * Ref a locked chain and force the data to be held across an unlock. 273 * Chain must be currently locked. The user of the chain who desires 274 * to release the hold must call hammer2_chain_lock_unhold() to relock 275 * and unhold the chain, then unlock normally, or may simply call 276 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 277 */ 278 void 279 hammer2_chain_ref_hold(hammer2_chain_t *chain) 280 { 281 atomic_add_int(&chain->lockcnt, 1); 282 hammer2_chain_ref(chain); 283 } 284 285 /* 286 * Insert the chain in the core rbtree. 287 * 288 * Normal insertions are placed in the live rbtree. Insertion of a deleted 289 * chain is a special case used by the flush code that is placed on the 290 * unstaged deleted list to avoid confusing the live view. 291 */ 292 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 293 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 294 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 295 296 static 297 int 298 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 299 int flags, int generation) 300 { 301 hammer2_chain_t *xchain __debugvar; 302 int error = 0; 303 304 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 305 hammer2_spin_ex(&parent->core.spin); 306 307 /* 308 * Interlocked by spinlock, check for race 309 */ 310 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 311 parent->core.generation != generation) { 312 error = HAMMER2_ERROR_EAGAIN; 313 goto failed; 314 } 315 316 /* 317 * Insert chain 318 */ 319 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 320 KASSERT(xchain == NULL, 321 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 322 chain, xchain, chain->bref.key)); 323 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 324 chain->parent = parent; 325 ++parent->core.chain_count; 326 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 327 328 /* 329 * We have to keep track of the effective live-view blockref count 330 * so the create code knows when to push an indirect block. 331 */ 332 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 333 atomic_add_int(&parent->core.live_count, 1); 334 failed: 335 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 336 hammer2_spin_unex(&parent->core.spin); 337 return error; 338 } 339 340 /* 341 * Drop the caller's reference to the chain. When the ref count drops to 342 * zero this function will try to disassociate the chain from its parent and 343 * deallocate it, then recursely drop the parent using the implied ref 344 * from the chain's chain->parent. 345 * 346 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 347 * races an acquisition by another cpu. Therefore we can loop if we are 348 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 349 * race against another drop. 350 */ 351 void 352 hammer2_chain_drop(hammer2_chain_t *chain) 353 { 354 u_int refs; 355 356 KKASSERT(chain->refs > 0); 357 358 while (chain) { 359 refs = chain->refs; 360 cpu_ccfence(); 361 KKASSERT(refs > 0); 362 363 if (refs == 1) { 364 if (hammer2_mtx_ex_try(&chain->lock) == 0) 365 chain = hammer2_chain_lastdrop(chain, 0); 366 /* retry the same chain, or chain from lastdrop */ 367 } else { 368 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 369 break; 370 /* retry the same chain */ 371 } 372 cpu_pause(); 373 } 374 } 375 376 /* 377 * Unhold a held and probably not-locked chain, ensure that the data is 378 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 379 * lock and then simply unlocking the chain. 380 */ 381 void 382 hammer2_chain_unhold(hammer2_chain_t *chain) 383 { 384 u_int lockcnt; 385 int iter = 0; 386 387 for (;;) { 388 lockcnt = chain->lockcnt; 389 cpu_ccfence(); 390 if (lockcnt > 1) { 391 if (atomic_cmpset_int(&chain->lockcnt, 392 lockcnt, lockcnt - 1)) { 393 break; 394 } 395 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 396 hammer2_chain_unlock(chain); 397 break; 398 } else { 399 /* 400 * This situation can easily occur on SMP due to 401 * the gap inbetween the 1->0 transition and the 402 * final unlock. We cannot safely block on the 403 * mutex because lockcnt might go above 1. 404 * 405 * XXX Sleep for one tick if it takes too long. 406 */ 407 if (++iter > 1000) { 408 if (iter > 1000 + hz) { 409 kprintf("hammer2: h2race1 %p\n", chain); 410 iter = 1000; 411 } 412 tsleep(&iter, 0, "h2race1", 1); 413 } 414 cpu_pause(); 415 } 416 } 417 } 418 419 void 420 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 421 { 422 hammer2_chain_unhold(chain); 423 hammer2_chain_drop(chain); 424 } 425 426 void 427 hammer2_chain_rehold(hammer2_chain_t *chain) 428 { 429 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 430 atomic_add_int(&chain->lockcnt, 1); 431 hammer2_chain_unlock(chain); 432 } 433 434 /* 435 * Handles the (potential) last drop of chain->refs from 1->0. Called with 436 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 437 * possible against refs and lockcnt. We must dispose of the mutex on chain. 438 * 439 * This function returns an unlocked chain for recursive drop or NULL. It 440 * can return the same chain if it determines it has raced another ref. 441 * 442 * -- 443 * 444 * When two chains need to be recursively dropped we use the chain we 445 * would otherwise free to placehold the additional chain. It's a bit 446 * convoluted but we can't just recurse without potentially blowing out 447 * the kernel stack. 448 * 449 * The chain cannot be freed if it has any children. 450 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 451 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 452 * Any dedup registration can remain intact. 453 * 454 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 455 */ 456 static 457 hammer2_chain_t * 458 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 459 { 460 hammer2_pfs_t *pmp; 461 hammer2_dev_t *hmp; 462 hammer2_chain_t *parent; 463 hammer2_chain_t *rdrop; 464 465 /* 466 * We need chain's spinlock to interlock the sub-tree test. 467 * We already have chain's mutex, protecting chain->parent. 468 * 469 * Remember that chain->refs can be in flux. 470 */ 471 hammer2_spin_ex(&chain->core.spin); 472 473 if (chain->parent != NULL) { 474 /* 475 * If the chain has a parent the UPDATE bit prevents scrapping 476 * as the chain is needed to properly flush the parent. Try 477 * to complete the 1->0 transition and return NULL. Retry 478 * (return chain) if we are unable to complete the 1->0 479 * transition, else return NULL (nothing more to do). 480 * 481 * If the chain has a parent the MODIFIED bit prevents 482 * scrapping. 483 * 484 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 485 */ 486 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 487 HAMMER2_CHAIN_MODIFIED)) { 488 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 489 hammer2_spin_unex(&chain->core.spin); 490 hammer2_chain_assert_no_data(chain); 491 hammer2_mtx_unlock(&chain->lock); 492 chain = NULL; 493 } else { 494 hammer2_spin_unex(&chain->core.spin); 495 hammer2_mtx_unlock(&chain->lock); 496 } 497 return (chain); 498 } 499 /* spinlock still held */ 500 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 501 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 502 /* 503 * Retain the static vchain and fchain. Clear bits that 504 * are not relevant. Do not clear the MODIFIED bit, 505 * and certainly do not put it on the delayed-flush queue. 506 */ 507 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 508 } else { 509 /* 510 * The chain has no parent and can be flagged for destruction. 511 * Since it has no parent, UPDATE can also be cleared. 512 */ 513 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 514 if (chain->flags & HAMMER2_CHAIN_UPDATE) 515 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 516 517 /* 518 * If the chain has children we must propagate the DESTROY 519 * flag downward and rip the disconnected topology apart. 520 * This is accomplished by calling hammer2_flush() on the 521 * chain. 522 * 523 * Any dedup is already handled by the underlying DIO, so 524 * we do not have to specifically flush it here. 525 */ 526 if (chain->core.chain_count) { 527 hammer2_spin_unex(&chain->core.spin); 528 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 529 HAMMER2_FLUSH_ALL); 530 hammer2_mtx_unlock(&chain->lock); 531 532 return(chain); /* retry drop */ 533 } 534 535 /* 536 * Otherwise we can scrap the MODIFIED bit if it is set, 537 * and continue along the freeing path. 538 * 539 * Be sure to clean-out any dedup bits. Without a parent 540 * this chain will no longer be visible to the flush code. 541 * Easy check data_off to avoid the volume root. 542 */ 543 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 544 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 545 atomic_add_long(&hammer2_count_modified_chains, -1); 546 if (chain->pmp) 547 hammer2_pfs_memory_wakeup(chain->pmp, -1); 548 } 549 /* spinlock still held */ 550 } 551 552 /* spinlock still held */ 553 554 /* 555 * If any children exist we must leave the chain intact with refs == 0. 556 * They exist because chains are retained below us which have refs or 557 * may require flushing. 558 * 559 * Retry (return chain) if we fail to transition the refs to 0, else 560 * return NULL indication nothing more to do. 561 * 562 * Chains with children are NOT put on the LRU list. 563 */ 564 if (chain->core.chain_count) { 565 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 566 hammer2_spin_unex(&chain->core.spin); 567 hammer2_chain_assert_no_data(chain); 568 hammer2_mtx_unlock(&chain->lock); 569 chain = NULL; 570 } else { 571 hammer2_spin_unex(&chain->core.spin); 572 hammer2_mtx_unlock(&chain->lock); 573 } 574 return (chain); 575 } 576 /* spinlock still held */ 577 /* no chains left under us */ 578 579 /* 580 * chain->core has no children left so no accessors can get to our 581 * chain from there. Now we have to lock the parent core to interlock 582 * remaining possible accessors that might bump chain's refs before 583 * we can safely drop chain's refs with intent to free the chain. 584 */ 585 hmp = chain->hmp; 586 pmp = chain->pmp; /* can be NULL */ 587 rdrop = NULL; 588 589 parent = chain->parent; 590 591 /* 592 * WARNING! chain's spin lock is still held here, and other spinlocks 593 * will be acquired and released in the code below. We 594 * cannot be making fancy procedure calls! 595 */ 596 597 /* 598 * Spinlock the parent and try to drop the last ref on chain. 599 * On success determine if we should dispose of the chain 600 * (remove the chain from its parent, etc). 601 * 602 * (normal core locks are top-down recursive but we define 603 * core spinlocks as bottom-up recursive, so this is safe). 604 */ 605 if (parent) { 606 hammer2_spin_ex(&parent->core.spin); 607 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 608 /* 609 * 1->0 transition failed, retry. 610 */ 611 hammer2_spin_unex(&parent->core.spin); 612 hammer2_spin_unex(&chain->core.spin); 613 hammer2_mtx_unlock(&chain->lock); 614 615 return(chain); 616 } 617 618 /* 619 * 1->0 transition successful, parent spin held to prevent 620 * new lookups, chain spinlock held to protect parent field. 621 * Remove chain from the parent. 622 * 623 * If the chain is being removed from the parent's rbtree but 624 * is not blkmapped, we have to adjust live_count downward. If 625 * it is blkmapped then the blockref is retained in the parent 626 * as is its associated live_count. This case can occur when 627 * a chain added to the topology is unable to flush and is 628 * then later deleted. 629 */ 630 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 631 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 632 (chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0) { 633 atomic_add_int(&parent->core.live_count, -1); 634 } 635 RB_REMOVE(hammer2_chain_tree, 636 &parent->core.rbtree, chain); 637 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 638 --parent->core.chain_count; 639 chain->parent = NULL; 640 } 641 642 /* 643 * If our chain was the last chain in the parent's core the 644 * core is now empty and its parent might have to be 645 * re-dropped if it has 0 refs. 646 */ 647 if (parent->core.chain_count == 0) { 648 rdrop = parent; 649 atomic_add_int(&rdrop->refs, 1); 650 /* 651 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 652 rdrop = NULL; 653 */ 654 } 655 hammer2_spin_unex(&parent->core.spin); 656 parent = NULL; /* safety */ 657 /* FALL THROUGH */ 658 } else { 659 /* 660 * No-parent case. 661 */ 662 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 663 /* 664 * 1->0 transition failed, retry. 665 */ 666 hammer2_spin_unex(&parent->core.spin); 667 hammer2_spin_unex(&chain->core.spin); 668 hammer2_mtx_unlock(&chain->lock); 669 670 return(chain); 671 } 672 } 673 674 /* 675 * Successful 1->0 transition, no parent, no children... no way for 676 * anyone to ref this chain any more. We can clean-up and free it. 677 * 678 * We still have the core spinlock, and core's chain_count is 0. 679 * Any parent spinlock is gone. 680 */ 681 hammer2_spin_unex(&chain->core.spin); 682 hammer2_chain_assert_no_data(chain); 683 hammer2_mtx_unlock(&chain->lock); 684 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 685 chain->core.chain_count == 0); 686 687 /* 688 * All locks are gone, no pointers remain to the chain, finish 689 * freeing it. 690 */ 691 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 692 HAMMER2_CHAIN_MODIFIED)) == 0); 693 694 /* 695 * Once chain resources are gone we can use the now dead chain 696 * structure to placehold what might otherwise require a recursive 697 * drop, because we have potentially two things to drop and can only 698 * return one directly. 699 */ 700 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 701 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 702 chain->hmp = NULL; 703 kfree_obj(chain, hmp->mchain); 704 atomic_add_long(&hammer2_chain_allocs, -1); 705 } 706 707 /* 708 * Possible chaining loop when parent re-drop needed. 709 */ 710 return(rdrop); 711 } 712 713 /* 714 * On last lock release. 715 */ 716 static hammer2_io_t * 717 hammer2_chain_drop_data(hammer2_chain_t *chain) 718 { 719 hammer2_io_t *dio; 720 721 if ((dio = chain->dio) != NULL) { 722 chain->dio = NULL; 723 chain->data = NULL; 724 } else { 725 switch(chain->bref.type) { 726 case HAMMER2_BREF_TYPE_VOLUME: 727 case HAMMER2_BREF_TYPE_FREEMAP: 728 break; 729 default: 730 if (chain->data != NULL) { 731 hammer2_spin_unex(&chain->core.spin); 732 panic("chain data not null: " 733 "chain %p bref %016jx.%02x " 734 "refs %d parent %p dio %p data %p", 735 chain, chain->bref.data_off, 736 chain->bref.type, chain->refs, 737 chain->parent, 738 chain->dio, chain->data); 739 } 740 KKASSERT(chain->data == NULL); 741 break; 742 } 743 } 744 return dio; 745 } 746 747 /* 748 * Lock a referenced chain element, acquiring its data with I/O if necessary, 749 * and specify how you would like the data to be resolved. 750 * 751 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 752 * 753 * The lock is allowed to recurse, multiple locking ops will aggregate 754 * the requested resolve types. Once data is assigned it will not be 755 * removed until the last unlock. 756 * 757 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 758 * (typically used to avoid device/logical buffer 759 * aliasing for data) 760 * 761 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 762 * the INITIAL-create state (indirect blocks only). 763 * 764 * Do not resolve data elements for DATA chains. 765 * (typically used to avoid device/logical buffer 766 * aliasing for data) 767 * 768 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 769 * 770 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 771 * it will be locked exclusive. 772 * 773 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 774 * the lock fails, EAGAIN is returned. 775 * 776 * NOTE: Embedded elements (volume header, inodes) are always resolved 777 * regardless. 778 * 779 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 780 * element will instantiate and zero its buffer, and flush it on 781 * release. 782 * 783 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 784 * so as not to instantiate a device buffer, which could alias against 785 * a logical file buffer. However, if ALWAYS is specified the 786 * device buffer will be instantiated anyway. 787 * 788 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 789 * case it can be either 0 or EAGAIN. 790 * 791 * WARNING! This function blocks on I/O if data needs to be fetched. This 792 * blocking can run concurrent with other compatible lock holders 793 * who do not need data returning. The lock is not upgraded to 794 * exclusive during a data fetch, a separate bit is used to 795 * interlock I/O. However, an exclusive lock holder can still count 796 * on being interlocked against an I/O fetch managed by a shared 797 * lock holder. 798 */ 799 int 800 hammer2_chain_lock(hammer2_chain_t *chain, int how) 801 { 802 KKASSERT(chain->refs > 0); 803 804 if (how & HAMMER2_RESOLVE_NONBLOCK) { 805 /* 806 * We still have to bump lockcnt before acquiring the lock, 807 * even for non-blocking operation, because the unlock code 808 * live-loops on lockcnt == 1 when dropping the last lock. 809 * 810 * If the non-blocking operation fails we have to use an 811 * unhold sequence to undo the mess. 812 * 813 * NOTE: LOCKAGAIN must always succeed without blocking, 814 * even if NONBLOCK is specified. 815 */ 816 atomic_add_int(&chain->lockcnt, 1); 817 if (how & HAMMER2_RESOLVE_SHARED) { 818 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 819 hammer2_mtx_sh_again(&chain->lock); 820 } else { 821 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 822 hammer2_chain_unhold(chain); 823 return EAGAIN; 824 } 825 } 826 } else { 827 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 828 hammer2_chain_unhold(chain); 829 return EAGAIN; 830 } 831 } 832 } else { 833 /* 834 * Get the appropriate lock. If LOCKAGAIN is flagged with 835 * SHARED the caller expects a shared lock to already be 836 * present and we are giving it another ref. This case must 837 * importantly not block if there is a pending exclusive lock 838 * request. 839 */ 840 atomic_add_int(&chain->lockcnt, 1); 841 if (how & HAMMER2_RESOLVE_SHARED) { 842 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 843 hammer2_mtx_sh_again(&chain->lock); 844 } else { 845 hammer2_mtx_sh(&chain->lock); 846 } 847 } else { 848 hammer2_mtx_ex(&chain->lock); 849 } 850 } 851 852 /* 853 * If we already have a valid data pointer make sure the data is 854 * synchronized to the current cpu, and then no further action is 855 * necessary. 856 */ 857 if (chain->data) { 858 if (chain->dio) 859 hammer2_io_bkvasync(chain->dio); 860 return 0; 861 } 862 863 /* 864 * Do we have to resolve the data? This is generally only 865 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 866 * Other BREF types expects the data to be there. 867 */ 868 switch(how & HAMMER2_RESOLVE_MASK) { 869 case HAMMER2_RESOLVE_NEVER: 870 return 0; 871 case HAMMER2_RESOLVE_MAYBE: 872 if (chain->flags & HAMMER2_CHAIN_INITIAL) 873 return 0; 874 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 875 return 0; 876 #if 0 877 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 878 return 0; 879 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 880 return 0; 881 #endif 882 /* fall through */ 883 case HAMMER2_RESOLVE_ALWAYS: 884 default: 885 break; 886 } 887 888 /* 889 * Caller requires data 890 */ 891 hammer2_chain_load_data(chain); 892 893 return 0; 894 } 895 896 #if 0 897 /* 898 * Lock the chain, retain the hold, and drop the data persistence count. 899 * The data should remain valid because we never transitioned lockcnt 900 * through 0. 901 */ 902 void 903 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 904 { 905 hammer2_chain_lock(chain, how); 906 atomic_add_int(&chain->lockcnt, -1); 907 } 908 909 /* 910 * Downgrade an exclusive chain lock to a shared chain lock. 911 * 912 * NOTE: There is no upgrade equivalent due to the ease of 913 * deadlocks in that direction. 914 */ 915 void 916 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 917 { 918 hammer2_mtx_downgrade(&chain->lock); 919 } 920 #endif 921 922 /* 923 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 924 * may be of any type. 925 * 926 * Once chain->data is set it cannot be disposed of until all locks are 927 * released. 928 * 929 * Make sure the data is synchronized to the current cpu. 930 */ 931 void 932 hammer2_chain_load_data(hammer2_chain_t *chain) 933 { 934 hammer2_blockref_t *bref; 935 hammer2_dev_t *hmp; 936 hammer2_io_t *dio; 937 char *bdata; 938 int error; 939 940 /* 941 * Degenerate case, data already present, or chain has no media 942 * reference to load. 943 */ 944 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 945 if (chain->data) { 946 if (chain->dio) 947 hammer2_io_bkvasync(chain->dio); 948 return; 949 } 950 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 951 return; 952 953 hmp = chain->hmp; 954 KKASSERT(hmp != NULL); 955 956 /* 957 * Gain the IOINPROG bit, interlocked block. 958 */ 959 for (;;) { 960 u_int oflags; 961 u_int nflags; 962 963 oflags = chain->flags; 964 cpu_ccfence(); 965 if (oflags & HAMMER2_CHAIN_IOINPROG) { 966 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 967 tsleep_interlock(&chain->flags, 0); 968 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 969 tsleep(&chain->flags, PINTERLOCKED, 970 "h2iocw", 0); 971 } 972 /* retry */ 973 } else { 974 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 975 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 976 break; 977 } 978 /* retry */ 979 } 980 } 981 982 /* 983 * We own CHAIN_IOINPROG 984 * 985 * Degenerate case if we raced another load. 986 */ 987 if (chain->data) { 988 if (chain->dio) 989 hammer2_io_bkvasync(chain->dio); 990 goto done; 991 } 992 993 /* 994 * We must resolve to a device buffer, either by issuing I/O or 995 * by creating a zero-fill element. We do not mark the buffer 996 * dirty when creating a zero-fill element (the hammer2_chain_modify() 997 * API must still be used to do that). 998 * 999 * The device buffer is variable-sized in powers of 2 down 1000 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 1001 * chunk always contains buffers of the same size. (XXX) 1002 * 1003 * The minimum physical IO size may be larger than the variable 1004 * block size. 1005 */ 1006 bref = &chain->bref; 1007 1008 /* 1009 * The getblk() optimization can only be used on newly created 1010 * elements if the physical block size matches the request. 1011 */ 1012 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1013 error = hammer2_io_new(hmp, bref->type, 1014 bref->data_off, chain->bytes, 1015 &chain->dio); 1016 } else { 1017 error = hammer2_io_bread(hmp, bref->type, 1018 bref->data_off, chain->bytes, 1019 &chain->dio); 1020 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1021 } 1022 if (error) { 1023 chain->error = HAMMER2_ERROR_EIO; 1024 kprintf("hammer2_chain_load_data: I/O error %016jx: %d\n", 1025 (intmax_t)bref->data_off, error); 1026 hammer2_io_bqrelse(&chain->dio); 1027 goto done; 1028 } 1029 chain->error = 0; 1030 1031 /* 1032 * This isn't perfect and can be ignored on OSs which do not have 1033 * an indication as to whether a buffer is coming from cache or 1034 * if I/O was actually issued for the read. TESTEDGOOD will work 1035 * pretty well without the B_IOISSUED logic because chains are 1036 * cached, but in that situation (without B_IOISSUED) it will not 1037 * detect whether a re-read via I/O is corrupted verses the original 1038 * read. 1039 * 1040 * We can't re-run the CRC on every fresh lock. That would be 1041 * insanely expensive. 1042 * 1043 * If the underlying kernel buffer covers the entire chain we can 1044 * use the B_IOISSUED indication to determine if we have to re-run 1045 * the CRC on chain data for chains that managed to stay cached 1046 * across the kernel disposal of the original buffer. 1047 */ 1048 if ((dio = chain->dio) != NULL && dio->bp) { 1049 //struct m_buf *bp = dio->bp; 1050 1051 if (dio->psize == chain->bytes //&& 1052 /*(bp->b_flags & B_IOISSUED)*/) { 1053 atomic_clear_int(&chain->flags, 1054 HAMMER2_CHAIN_TESTEDGOOD); 1055 //bp->b_flags &= ~B_IOISSUED; 1056 } 1057 } 1058 1059 /* 1060 * NOTE: A locked chain's data cannot be modified without first 1061 * calling hammer2_chain_modify(). 1062 */ 1063 1064 /* 1065 * NOTE: hammer2_io_data() call issues bkvasync() 1066 */ 1067 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1068 1069 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1070 /* 1071 * Clear INITIAL. In this case we used io_new() and the 1072 * buffer has been zero'd and marked dirty. 1073 * 1074 * CHAIN_MODIFIED has not been set yet, and we leave it 1075 * that way for now. Set a temporary CHAIN_NOTTESTED flag 1076 * to prevent hammer2_chain_testcheck() from trying to match 1077 * a check code that has not yet been generated. This bit 1078 * should NOT end up on the actual media. 1079 */ 1080 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1081 atomic_set_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 1082 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1083 /* 1084 * check data not currently synchronized due to 1085 * modification. XXX assumes data stays in the buffer 1086 * cache, which might not be true (need biodep on flush 1087 * to calculate crc? or simple crc?). 1088 */ 1089 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1090 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1091 chain->error = HAMMER2_ERROR_CHECK; 1092 } else { 1093 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1094 } 1095 } 1096 1097 /* 1098 * Setup the data pointer by pointing it into the buffer. 1099 * WARNING! Other threads can start using the data the instant we 1100 * set chain->data non-NULL. 1101 */ 1102 switch (bref->type) { 1103 case HAMMER2_BREF_TYPE_VOLUME: 1104 case HAMMER2_BREF_TYPE_FREEMAP: 1105 panic("hammer2_chain_load_data: unresolved volume header"); 1106 break; 1107 case HAMMER2_BREF_TYPE_DIRENT: 1108 KKASSERT(chain->bytes != 0); 1109 /* fall through */ 1110 case HAMMER2_BREF_TYPE_INODE: 1111 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1112 case HAMMER2_BREF_TYPE_INDIRECT: 1113 case HAMMER2_BREF_TYPE_DATA: 1114 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1115 default: 1116 /* 1117 * Point data at the device buffer and leave dio intact. 1118 */ 1119 chain->data = (void *)bdata; 1120 break; 1121 } 1122 1123 /* 1124 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1125 */ 1126 done: 1127 for (;;) { 1128 u_int oflags; 1129 u_int nflags; 1130 1131 oflags = chain->flags; 1132 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1133 HAMMER2_CHAIN_IOSIGNAL); 1134 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1135 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1136 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1137 wakeup(&chain->flags); 1138 break; 1139 } 1140 } 1141 } 1142 1143 /* 1144 * Unlock and deref a chain element. 1145 * 1146 * Remember that the presence of children under chain prevent the chain's 1147 * destruction but do not add additional references, so the dio will still 1148 * be dropped. 1149 */ 1150 void 1151 hammer2_chain_unlock(hammer2_chain_t *chain) 1152 { 1153 hammer2_io_t *dio; 1154 u_int lockcnt; 1155 int iter = 0; 1156 1157 /* 1158 * If multiple locks are present (or being attempted) on this 1159 * particular chain we can just unlock, drop refs, and return. 1160 * 1161 * Otherwise fall-through on the 1->0 transition. 1162 */ 1163 for (;;) { 1164 lockcnt = chain->lockcnt; 1165 KKASSERT(lockcnt > 0); 1166 cpu_ccfence(); 1167 if (lockcnt > 1) { 1168 if (atomic_cmpset_int(&chain->lockcnt, 1169 lockcnt, lockcnt - 1)) { 1170 hammer2_mtx_unlock(&chain->lock); 1171 return; 1172 } 1173 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1174 /* while holding the mutex exclusively */ 1175 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1176 break; 1177 } else { 1178 /* 1179 * This situation can easily occur on SMP due to 1180 * the gap inbetween the 1->0 transition and the 1181 * final unlock. We cannot safely block on the 1182 * mutex because lockcnt might go above 1. 1183 * 1184 * XXX Sleep for one tick if it takes too long. 1185 */ 1186 if (++iter > 1000) { 1187 if (iter > 1000 + hz) { 1188 kprintf("hammer2: h2race2 %p\n", chain); 1189 iter = 1000; 1190 } 1191 tsleep(&iter, 0, "h2race2", 1); 1192 } 1193 cpu_pause(); 1194 } 1195 /* retry */ 1196 } 1197 1198 /* 1199 * Last unlock / mutex upgraded to exclusive. Drop the data 1200 * reference. 1201 */ 1202 dio = hammer2_chain_drop_data(chain); 1203 if (dio) 1204 hammer2_io_bqrelse(&dio); 1205 hammer2_mtx_unlock(&chain->lock); 1206 } 1207 1208 #if 0 1209 /* 1210 * Unlock and hold chain data intact 1211 */ 1212 void 1213 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1214 { 1215 atomic_add_int(&chain->lockcnt, 1); 1216 hammer2_chain_unlock(chain); 1217 } 1218 #endif 1219 1220 /* 1221 * Helper to obtain the blockref[] array base and count for a chain. 1222 * 1223 * XXX Not widely used yet, various use cases need to be validated and 1224 * converted to use this function. 1225 */ 1226 static 1227 hammer2_blockref_t * 1228 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1229 { 1230 hammer2_blockref_t *base; 1231 int count; 1232 1233 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1234 base = NULL; 1235 1236 switch(parent->bref.type) { 1237 case HAMMER2_BREF_TYPE_INODE: 1238 count = HAMMER2_SET_COUNT; 1239 break; 1240 case HAMMER2_BREF_TYPE_INDIRECT: 1241 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1242 count = parent->bytes / sizeof(hammer2_blockref_t); 1243 break; 1244 case HAMMER2_BREF_TYPE_VOLUME: 1245 count = HAMMER2_SET_COUNT; 1246 break; 1247 case HAMMER2_BREF_TYPE_FREEMAP: 1248 count = HAMMER2_SET_COUNT; 1249 break; 1250 default: 1251 panic("hammer2_chain_base_and_count: " 1252 "unrecognized blockref type: %d", 1253 parent->bref.type); 1254 count = 0; 1255 break; 1256 } 1257 } else { 1258 switch(parent->bref.type) { 1259 case HAMMER2_BREF_TYPE_INODE: 1260 base = &parent->data->ipdata.u.blockset.blockref[0]; 1261 count = HAMMER2_SET_COUNT; 1262 break; 1263 case HAMMER2_BREF_TYPE_INDIRECT: 1264 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1265 base = &parent->data->npdata[0]; 1266 count = parent->bytes / sizeof(hammer2_blockref_t); 1267 break; 1268 case HAMMER2_BREF_TYPE_VOLUME: 1269 base = &parent->data->voldata. 1270 sroot_blockset.blockref[0]; 1271 count = HAMMER2_SET_COUNT; 1272 break; 1273 case HAMMER2_BREF_TYPE_FREEMAP: 1274 base = &parent->data->blkset.blockref[0]; 1275 count = HAMMER2_SET_COUNT; 1276 break; 1277 default: 1278 panic("hammer2_chain_base_and_count: " 1279 "unrecognized blockref type: %d", 1280 parent->bref.type); 1281 base = NULL; 1282 count = 0; 1283 break; 1284 } 1285 } 1286 *countp = count; 1287 1288 return base; 1289 } 1290 1291 /* 1292 * This counts the number of live blockrefs in a block array and 1293 * also calculates the point at which all remaining blockrefs are empty. 1294 * This routine can only be called on a live chain. 1295 * 1296 * Caller holds the chain locked, but possibly with a shared lock. We 1297 * must use an exclusive spinlock to prevent corruption. 1298 * 1299 * NOTE: Flag is not set until after the count is complete, allowing 1300 * callers to test the flag without holding the spinlock. 1301 * 1302 * NOTE: If base is NULL the related chain is still in the INITIAL 1303 * state and there are no blockrefs to count. 1304 * 1305 * NOTE: live_count may already have some counts accumulated due to 1306 * creation and deletion and could even be initially negative. 1307 */ 1308 void 1309 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1310 hammer2_blockref_t *base, int count) 1311 { 1312 hammer2_spin_ex(&chain->core.spin); 1313 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1314 if (base) { 1315 while (--count >= 0) { 1316 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1317 break; 1318 } 1319 chain->core.live_zero = count + 1; 1320 while (count >= 0) { 1321 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1322 atomic_add_int(&chain->core.live_count, 1323 1); 1324 --count; 1325 } 1326 } else { 1327 chain->core.live_zero = 0; 1328 } 1329 /* else do not modify live_count */ 1330 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1331 } 1332 hammer2_spin_unex(&chain->core.spin); 1333 } 1334 1335 /* 1336 * Resize the chain's physical storage allocation in-place. This function does 1337 * not usually adjust the data pointer and must be followed by (typically) a 1338 * hammer2_chain_modify() call to copy any old data over and adjust the 1339 * data pointer. 1340 * 1341 * Chains can be resized smaller without reallocating the storage. Resizing 1342 * larger will reallocate the storage. Excess or prior storage is reclaimed 1343 * asynchronously at a later time. 1344 * 1345 * An nradix value of 0 is special-cased to mean that the storage should 1346 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1347 * byte). 1348 * 1349 * Must be passed an exclusively locked parent and chain. 1350 * 1351 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1352 * to avoid instantiating a device buffer that conflicts with the vnode data 1353 * buffer. However, because H2 can compress or encrypt data, the chain may 1354 * have a dio assigned to it in those situations, and they do not conflict. 1355 * 1356 * XXX return error if cannot resize. 1357 */ 1358 int 1359 hammer2_chain_resize(hammer2_chain_t *chain, 1360 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1361 int nradix, int flags) 1362 { 1363 hammer2_dev_t *hmp; 1364 size_t obytes; 1365 size_t nbytes; 1366 int error; 1367 1368 hmp = chain->hmp; 1369 1370 /* 1371 * Only data and indirect blocks can be resized for now. 1372 * (The volu root, inodes, and freemap elements use a fixed size). 1373 */ 1374 KKASSERT(chain != &hmp->vchain); 1375 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1376 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1377 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1378 1379 /* 1380 * Nothing to do if the element is already the proper size 1381 */ 1382 obytes = chain->bytes; 1383 nbytes = (nradix) ? (1U << nradix) : 0; 1384 if (obytes == nbytes) 1385 return (chain->error); 1386 1387 /* 1388 * Make sure the old data is instantiated so we can copy it. If this 1389 * is a data block, the device data may be superfluous since the data 1390 * might be in a logical block, but compressed or encrypted data is 1391 * another matter. 1392 * 1393 * NOTE: The modify will set BLKMAPUPD for us if BLKMAPPED is set. 1394 */ 1395 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1396 if (error) 1397 return error; 1398 1399 /* 1400 * Reallocate the block, even if making it smaller (because different 1401 * block sizes may be in different regions). 1402 * 1403 * NOTE: Operation does not copy the data and may only be used 1404 * to resize data blocks in-place, or directory entry blocks 1405 * which are about to be modified in some manner. 1406 */ 1407 error = hammer2_freemap_alloc(chain, nbytes); 1408 if (error) 1409 return error; 1410 1411 chain->bytes = nbytes; 1412 1413 /* 1414 * We don't want the followup chain_modify() to try to copy data 1415 * from the old (wrong-sized) buffer. It won't know how much to 1416 * copy. This case should only occur during writes when the 1417 * originator already has the data to write in-hand. 1418 */ 1419 if (chain->dio) { 1420 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1421 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1422 hammer2_io_brelse(&chain->dio); 1423 chain->data = NULL; 1424 } 1425 return (chain->error); 1426 } 1427 1428 /* 1429 * Set the chain modified so its data can be changed by the caller, or 1430 * install deduplicated data. The caller must call this routine for each 1431 * set of modifications it makes, even if the chain is already flagged 1432 * MODIFIED. 1433 * 1434 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1435 * is a CLC (cluster level change) field and is not updated by parent 1436 * propagation during a flush. 1437 * 1438 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1439 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1440 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1441 * remains unmodified with its old data ref intact and chain->error 1442 * unchanged. 1443 * 1444 * Dedup Handling 1445 * 1446 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1447 * even if the chain is still flagged MODIFIED. In this case the chain's 1448 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1449 * 1450 * If the caller passes a non-zero dedup_off we will use it to assign the 1451 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1452 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1453 * must not modify the data content upon return. 1454 */ 1455 int 1456 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1457 hammer2_off_t dedup_off, int flags) 1458 { 1459 hammer2_dev_t *hmp; 1460 hammer2_io_t *dio; 1461 int error; 1462 int wasinitial; 1463 int setmodified; 1464 int setupdate; 1465 int newmod; 1466 char *bdata; 1467 1468 hmp = chain->hmp; 1469 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1470 1471 /* 1472 * Data is not optional for freemap chains (we must always be sure 1473 * to copy the data on COW storage allocations). 1474 */ 1475 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1476 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1477 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1478 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1479 } 1480 1481 /* 1482 * Data must be resolved if already assigned, unless explicitly 1483 * flagged otherwise. If we cannot safety load the data the 1484 * modification fails and we return early. 1485 */ 1486 if (chain->data == NULL && chain->bytes != 0 && 1487 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1488 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1489 hammer2_chain_load_data(chain); 1490 if (chain->error) 1491 return (chain->error); 1492 } 1493 error = 0; 1494 1495 /* 1496 * Set MODIFIED to indicate that the chain has been modified. A new 1497 * allocation is required when modifying a chain. 1498 * 1499 * Set UPDATE to ensure that the blockref is updated in the parent. 1500 * 1501 * If MODIFIED is already set determine if we can reuse the assigned 1502 * data block or if we need a new data block. 1503 */ 1504 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1505 /* 1506 * Must set modified bit. 1507 */ 1508 atomic_add_long(&hammer2_count_modified_chains, 1); 1509 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1510 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1511 setmodified = 1; 1512 1513 /* 1514 * We may be able to avoid a copy-on-write if the chain's 1515 * check mode is set to NONE and the chain's current 1516 * modify_tid is beyond the last explicit snapshot tid. 1517 * 1518 * This implements HAMMER2's overwrite-in-place feature. 1519 * 1520 * NOTE! This data-block cannot be used as a de-duplication 1521 * source when the check mode is set to NONE. 1522 */ 1523 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1524 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1525 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1526 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1527 HAMMER2_DEC_CHECK(chain->bref.methods) == 1528 HAMMER2_CHECK_NONE && 1529 chain->pmp && 1530 chain->bref.modify_tid > 1531 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1532 /* 1533 * Sector overwrite allowed. 1534 */ 1535 newmod = 0; 1536 } else if ((hmp->hflags & HMNT2_EMERG) && 1537 chain->pmp && 1538 chain->bref.modify_tid > 1539 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1540 /* 1541 * If in emergency delete mode then do a modify-in- 1542 * place on any chain type belonging to the PFS as 1543 * long as it doesn't mess up a snapshot. We might 1544 * be forced to do this anyway a little further down 1545 * in the code if the allocation fails. 1546 * 1547 * Also note that in emergency mode, these modify-in- 1548 * place operations are NOT SAFE. A storage failure, 1549 * power failure, or panic can corrupt the filesystem. 1550 */ 1551 newmod = 0; 1552 } else { 1553 /* 1554 * Sector overwrite not allowed, must copy-on-write. 1555 */ 1556 newmod = 1; 1557 } 1558 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1559 /* 1560 * If the modified chain was registered for dedup we need 1561 * a new allocation. This only happens for delayed-flush 1562 * chains (i.e. which run through the front-end buffer 1563 * cache). 1564 */ 1565 newmod = 1; 1566 setmodified = 0; 1567 } else { 1568 /* 1569 * Already flagged modified, no new allocation is needed. 1570 */ 1571 newmod = 0; 1572 setmodified = 0; 1573 } 1574 1575 /* 1576 * Flag parent update required. 1577 */ 1578 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1579 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1580 setupdate = 1; 1581 } else { 1582 setupdate = 0; 1583 } 1584 1585 /* 1586 * The XOP code returns held but unlocked focus chains. This 1587 * prevents the chain from being destroyed but does not prevent 1588 * it from being modified. diolk is used to interlock modifications 1589 * against XOP frontend accesses to the focus. 1590 * 1591 * This allows us to theoretically avoid deadlocking the frontend 1592 * if one of the backends lock up by not formally locking the 1593 * focused chain in the frontend. In addition, the synchronization 1594 * code relies on this mechanism to avoid deadlocking concurrent 1595 * synchronization threads. 1596 */ 1597 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1598 1599 /* 1600 * The modification or re-modification requires an allocation and 1601 * possible COW. If an error occurs, the previous content and data 1602 * reference is retained and the modification fails. 1603 * 1604 * If dedup_off is non-zero, the caller is requesting a deduplication 1605 * rather than a modification. The MODIFIED bit is not set and the 1606 * data offset is set to the deduplication offset. The data cannot 1607 * be modified. 1608 * 1609 * NOTE: The dedup offset is allowed to be in a partially free state 1610 * and we must be sure to reset it to a fully allocated state 1611 * to force two bulkfree passes to free it again. 1612 * 1613 * NOTE: Only applicable when chain->bytes != 0. 1614 * 1615 * XXX can a chain already be marked MODIFIED without a data 1616 * assignment? If not, assert here instead of testing the case. 1617 */ 1618 if (chain != &hmp->vchain && chain != &hmp->fchain && 1619 chain->bytes) { 1620 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1621 newmod 1622 ) { 1623 /* 1624 * NOTE: We do not have to remove the dedup 1625 * registration because the area is still 1626 * allocated and the underlying DIO will 1627 * still be flushed. 1628 */ 1629 if (dedup_off) { 1630 chain->bref.data_off = dedup_off; 1631 if ((int)(dedup_off & HAMMER2_OFF_MASK_RADIX)) 1632 chain->bytes = 1 << 1633 (int)(dedup_off & 1634 HAMMER2_OFF_MASK_RADIX); 1635 else 1636 chain->bytes = 0; 1637 chain->error = 0; 1638 atomic_clear_int(&chain->flags, 1639 HAMMER2_CHAIN_MODIFIED); 1640 atomic_add_long(&hammer2_count_modified_chains, 1641 -1); 1642 if (chain->pmp) { 1643 hammer2_pfs_memory_wakeup( 1644 chain->pmp, -1); 1645 } 1646 hammer2_freemap_adjust(hmp, &chain->bref, 1647 HAMMER2_FREEMAP_DORECOVER); 1648 atomic_set_int(&chain->flags, 1649 HAMMER2_CHAIN_DEDUPABLE); 1650 } else { 1651 error = hammer2_freemap_alloc(chain, 1652 chain->bytes); 1653 atomic_clear_int(&chain->flags, 1654 HAMMER2_CHAIN_DEDUPABLE); 1655 1656 /* 1657 * If we are unable to allocate a new block 1658 * but we are in emergency mode, issue a 1659 * warning to the console and reuse the same 1660 * block. 1661 * 1662 * We behave as if the allocation were 1663 * successful. 1664 * 1665 * THIS IS IMPORTANT: These modifications 1666 * are virtually guaranteed to corrupt any 1667 * snapshots related to this filesystem. 1668 */ 1669 if (error && (hmp->hflags & HMNT2_EMERG)) { 1670 error = 0; 1671 chain->bref.flags |= 1672 HAMMER2_BREF_FLAG_EMERG_MIP; 1673 1674 krateprintf(&krate_h2em, 1675 "hammer2: Emergency Mode WARNING: " 1676 "Operation will likely corrupt " 1677 "related snapshot: " 1678 "%016jx.%02x key=%016jx\n", 1679 chain->bref.data_off, 1680 chain->bref.type, 1681 chain->bref.key); 1682 } else if (error == 0) { 1683 chain->bref.flags &= 1684 ~HAMMER2_BREF_FLAG_EMERG_MIP; 1685 } 1686 } 1687 } 1688 } 1689 1690 /* 1691 * Stop here if error. We have to undo any flag bits we might 1692 * have set above. 1693 */ 1694 if (error) { 1695 if (setmodified) { 1696 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1697 atomic_add_long(&hammer2_count_modified_chains, -1); 1698 if (chain->pmp) 1699 hammer2_pfs_memory_wakeup(chain->pmp, -1); 1700 } 1701 if (setupdate) { 1702 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1703 } 1704 lockmgr(&chain->diolk, LK_RELEASE); 1705 1706 return error; 1707 } 1708 1709 /* 1710 * Update mirror_tid and modify_tid. modify_tid is only updated 1711 * if not passed as zero (during flushes, parent propagation passes 1712 * the value 0). 1713 * 1714 * NOTE: chain->pmp could be the device spmp. 1715 */ 1716 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1717 if (mtid) 1718 chain->bref.modify_tid = mtid; 1719 1720 /* 1721 * Set BLKMAPUPD to tell the flush code that an existing blockmap entry 1722 * requires updating as well as to tell the delete code that the 1723 * chain's blockref might not exactly match (in terms of physical size 1724 * or block offset) the one in the parent's blocktable. The base key 1725 * of course will still match. 1726 */ 1727 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) 1728 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD); 1729 1730 /* 1731 * Short-cut data block handling when the caller does not need an 1732 * actual data reference to (aka OPTDATA), as long as the chain does 1733 * not already have a data pointer to the data and no de-duplication 1734 * occurred. 1735 * 1736 * This generally means that the modifications are being done via the 1737 * logical buffer cache. 1738 * 1739 * NOTE: If deduplication occurred we have to run through the data 1740 * stuff to clear INITIAL, and the caller will likely want to 1741 * assign the check code anyway. Leaving INITIAL set on a 1742 * dedup can be deadly (it can cause the block to be zero'd!). 1743 * 1744 * This code also handles bytes == 0 (most dirents). 1745 */ 1746 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1747 (flags & HAMMER2_MODIFY_OPTDATA) && 1748 chain->data == NULL) { 1749 if (dedup_off == 0) { 1750 KKASSERT(chain->dio == NULL); 1751 goto skip2; 1752 } 1753 } 1754 1755 /* 1756 * Clearing the INITIAL flag (for indirect blocks) indicates that 1757 * we've processed the uninitialized storage allocation. 1758 * 1759 * If this flag is already clear we are likely in a copy-on-write 1760 * situation but we have to be sure NOT to bzero the storage if 1761 * no data is present. 1762 * 1763 * Clearing of NOTTESTED is allowed if the MODIFIED bit is set, 1764 */ 1765 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1766 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1767 wasinitial = 1; 1768 } else { 1769 wasinitial = 0; 1770 } 1771 1772 /* 1773 * Instantiate data buffer and possibly execute COW operation 1774 */ 1775 switch(chain->bref.type) { 1776 case HAMMER2_BREF_TYPE_VOLUME: 1777 case HAMMER2_BREF_TYPE_FREEMAP: 1778 /* 1779 * The data is embedded, no copy-on-write operation is 1780 * needed. 1781 */ 1782 KKASSERT(chain->dio == NULL); 1783 break; 1784 case HAMMER2_BREF_TYPE_DIRENT: 1785 /* 1786 * The data might be fully embedded. 1787 */ 1788 if (chain->bytes == 0) { 1789 KKASSERT(chain->dio == NULL); 1790 break; 1791 } 1792 /* fall through */ 1793 case HAMMER2_BREF_TYPE_INODE: 1794 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1795 case HAMMER2_BREF_TYPE_DATA: 1796 case HAMMER2_BREF_TYPE_INDIRECT: 1797 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1798 /* 1799 * Perform the copy-on-write operation 1800 * 1801 * zero-fill or copy-on-write depending on whether 1802 * chain->data exists or not and set the dirty state for 1803 * the new buffer. hammer2_io_new() will handle the 1804 * zero-fill. 1805 * 1806 * If a dedup_off was supplied this is an existing block 1807 * and no COW, copy, or further modification is required. 1808 */ 1809 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1810 1811 if (wasinitial && dedup_off == 0) { 1812 error = hammer2_io_new(hmp, chain->bref.type, 1813 chain->bref.data_off, 1814 chain->bytes, &dio); 1815 } else { 1816 error = hammer2_io_bread(hmp, chain->bref.type, 1817 chain->bref.data_off, 1818 chain->bytes, &dio); 1819 } 1820 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1821 1822 /* 1823 * If an I/O error occurs make sure callers cannot accidently 1824 * modify the old buffer's contents and corrupt the filesystem. 1825 * 1826 * NOTE: hammer2_io_data() call issues bkvasync() 1827 */ 1828 if (error) { 1829 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1830 hmp); 1831 chain->error = HAMMER2_ERROR_EIO; 1832 hammer2_io_brelse(&dio); 1833 hammer2_io_brelse(&chain->dio); 1834 chain->data = NULL; 1835 break; 1836 } 1837 chain->error = 0; 1838 bdata = hammer2_io_data(dio, chain->bref.data_off); 1839 1840 if (chain->data) { 1841 /* 1842 * COW (unless a dedup). 1843 */ 1844 KKASSERT(chain->dio != NULL); 1845 if (chain->data != (void *)bdata && dedup_off == 0) { 1846 bcopy(chain->data, bdata, chain->bytes); 1847 } 1848 } else if (wasinitial == 0 && dedup_off == 0) { 1849 /* 1850 * We have a problem. We were asked to COW but 1851 * we don't have any data to COW with! 1852 */ 1853 panic("hammer2_chain_modify: having a COW %p\n", 1854 chain); 1855 } 1856 1857 /* 1858 * Retire the old buffer, replace with the new. Dirty or 1859 * redirty the new buffer. 1860 * 1861 * WARNING! The system buffer cache may have already flushed 1862 * the buffer, so we must be sure to [re]dirty it 1863 * for further modification. 1864 * 1865 * If dedup_off was supplied, the caller is not 1866 * expected to make any further modification to the 1867 * buffer. 1868 * 1869 * WARNING! hammer2_get_gdata() assumes dio never transitions 1870 * through NULL in order to optimize away unnecessary 1871 * diolk operations. 1872 */ 1873 { 1874 hammer2_io_t *tio; 1875 1876 if ((tio = chain->dio) != NULL) 1877 hammer2_io_bqrelse(&tio); 1878 chain->data = (void *)bdata; 1879 chain->dio = dio; 1880 if (dedup_off == 0) 1881 hammer2_io_setdirty(dio); 1882 } 1883 break; 1884 default: 1885 panic("hammer2_chain_modify: illegal non-embedded type %d", 1886 chain->bref.type); 1887 break; 1888 1889 } 1890 skip2: 1891 /* 1892 * setflush on parent indicating that the parent must recurse down 1893 * to us. Do not call on chain itself which might already have it 1894 * set. 1895 */ 1896 if (chain->parent) 1897 hammer2_chain_setflush(chain->parent); 1898 lockmgr(&chain->diolk, LK_RELEASE); 1899 1900 return (chain->error); 1901 } 1902 1903 /* 1904 * Modify the chain associated with an inode. 1905 */ 1906 int 1907 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 1908 hammer2_tid_t mtid, int flags) 1909 { 1910 int error; 1911 1912 hammer2_inode_modify(ip); 1913 error = hammer2_chain_modify(chain, mtid, 0, flags); 1914 1915 return error; 1916 } 1917 1918 /* 1919 * This function returns the chain at the nearest key within the specified 1920 * range. The returned chain will be referenced but not locked. 1921 * 1922 * This function will recurse through chain->rbtree as necessary and will 1923 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1924 * the iteration value is less than the current value of *key_nextp. 1925 * 1926 * The caller should use (*key_nextp) to calculate the actual range of 1927 * the returned element, which will be (key_beg to *key_nextp - 1), because 1928 * there might be another element which is superior to the returned element 1929 * and overlaps it. 1930 * 1931 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1932 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1933 * it will wind up being (key_end + 1). 1934 * 1935 * WARNING! Must be called with child's spinlock held. Spinlock remains 1936 * held through the operation. 1937 */ 1938 struct hammer2_chain_find_info { 1939 hammer2_chain_t *best; 1940 hammer2_key_t key_beg; 1941 hammer2_key_t key_end; 1942 hammer2_key_t key_next; 1943 }; 1944 1945 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1946 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1947 1948 static 1949 hammer2_chain_t * 1950 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1951 hammer2_key_t key_beg, hammer2_key_t key_end) 1952 { 1953 struct hammer2_chain_find_info info; 1954 1955 info.best = NULL; 1956 info.key_beg = key_beg; 1957 info.key_end = key_end; 1958 info.key_next = *key_nextp; 1959 1960 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 1961 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1962 &info); 1963 *key_nextp = info.key_next; 1964 #if 0 1965 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1966 parent, key_beg, key_end, *key_nextp); 1967 #endif 1968 1969 return (info.best); 1970 } 1971 1972 static 1973 int 1974 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1975 { 1976 struct hammer2_chain_find_info *info = data; 1977 hammer2_key_t child_beg; 1978 hammer2_key_t child_end; 1979 1980 child_beg = child->bref.key; 1981 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1982 1983 if (child_end < info->key_beg) 1984 return(-1); 1985 if (child_beg > info->key_end) 1986 return(1); 1987 return(0); 1988 } 1989 1990 static 1991 int 1992 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1993 { 1994 struct hammer2_chain_find_info *info = data; 1995 hammer2_chain_t *best; 1996 hammer2_key_t child_end; 1997 1998 if ((best = info->best) == NULL) { 1999 /* 2000 * No previous best. Assign best 2001 */ 2002 info->best = child; 2003 } else if (best->bref.key <= info->key_beg && 2004 child->bref.key <= info->key_beg) { 2005 /* 2006 * Illegal overlap. 2007 */ 2008 KKASSERT(0); 2009 /*info->best = child;*/ 2010 } else if (child->bref.key < best->bref.key) { 2011 /* 2012 * Child has a nearer key and best is not flush with key_beg. 2013 * Set best to child. Truncate key_next to the old best key. 2014 */ 2015 info->best = child; 2016 if (info->key_next > best->bref.key || info->key_next == 0) 2017 info->key_next = best->bref.key; 2018 } else if (child->bref.key == best->bref.key) { 2019 /* 2020 * If our current best is flush with the child then this 2021 * is an illegal overlap. 2022 * 2023 * key_next will automatically be limited to the smaller of 2024 * the two end-points. 2025 */ 2026 KKASSERT(0); 2027 info->best = child; 2028 } else { 2029 /* 2030 * Keep the current best but truncate key_next to the child's 2031 * base. 2032 * 2033 * key_next will also automatically be limited to the smaller 2034 * of the two end-points (probably not necessary for this case 2035 * but we do it anyway). 2036 */ 2037 if (info->key_next > child->bref.key || info->key_next == 0) 2038 info->key_next = child->bref.key; 2039 } 2040 2041 /* 2042 * Always truncate key_next based on child's end-of-range. 2043 */ 2044 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2045 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2046 info->key_next = child_end; 2047 2048 return(0); 2049 } 2050 2051 /* 2052 * Retrieve the specified chain from a media blockref, creating the 2053 * in-memory chain structure which reflects it. The returned chain is 2054 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2055 * handle crc-checks and so forth, and should check chain->error before 2056 * assuming that the data is good. 2057 * 2058 * To handle insertion races pass the INSERT_RACE flag along with the 2059 * generation number of the core. NULL will be returned if the generation 2060 * number changes before we have a chance to insert the chain. Insert 2061 * races can occur because the parent might be held shared. 2062 * 2063 * Caller must hold the parent locked shared or exclusive since we may 2064 * need the parent's bref array to find our block. 2065 * 2066 * WARNING! chain->pmp is always set to NULL for any chain representing 2067 * part of the super-root topology. 2068 */ 2069 hammer2_chain_t * 2070 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2071 hammer2_blockref_t *bref, int how) 2072 { 2073 hammer2_dev_t *hmp = parent->hmp; 2074 hammer2_chain_t *chain; 2075 int error; 2076 2077 /* 2078 * Allocate a chain structure representing the existing media 2079 * entry. Resulting chain has one ref and is not locked. 2080 */ 2081 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2082 chain = hammer2_chain_alloc(hmp, NULL, bref); 2083 else 2084 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2085 /* ref'd chain returned */ 2086 2087 /* 2088 * Flag that the chain is in the parent's blockmap so delete/flush 2089 * knows what to do with it. 2090 */ 2091 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 2092 2093 /* 2094 * chain must be locked to avoid unexpected ripouts 2095 */ 2096 hammer2_chain_lock(chain, how); 2097 2098 /* 2099 * Link the chain into its parent. A spinlock is required to safely 2100 * access the RBTREE, and it is possible to collide with another 2101 * hammer2_chain_get() operation because the caller might only hold 2102 * a shared lock on the parent. 2103 * 2104 * NOTE: Get races can occur quite often when we distribute 2105 * asynchronous read-aheads across multiple threads. 2106 */ 2107 KKASSERT(parent->refs > 0); 2108 error = hammer2_chain_insert(parent, chain, 2109 HAMMER2_CHAIN_INSERT_SPIN | 2110 HAMMER2_CHAIN_INSERT_RACE, 2111 generation); 2112 if (error) { 2113 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2114 /*kprintf("chain %p get race\n", chain);*/ 2115 hammer2_chain_unlock(chain); 2116 hammer2_chain_drop(chain); 2117 chain = NULL; 2118 } else { 2119 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2120 } 2121 2122 /* 2123 * Return our new chain referenced but not locked, or NULL if 2124 * a race occurred. 2125 */ 2126 return (chain); 2127 } 2128 2129 /* 2130 * Lookup initialization/completion API 2131 */ 2132 hammer2_chain_t * 2133 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2134 { 2135 hammer2_chain_ref(parent); 2136 if (flags & HAMMER2_LOOKUP_SHARED) { 2137 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2138 HAMMER2_RESOLVE_SHARED); 2139 } else { 2140 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2141 } 2142 return (parent); 2143 } 2144 2145 void 2146 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2147 { 2148 if (parent) { 2149 hammer2_chain_unlock(parent); 2150 hammer2_chain_drop(parent); 2151 } 2152 } 2153 2154 /* 2155 * Take the locked chain and return a locked parent. The chain remains 2156 * locked on return, but may have to be temporarily unlocked to acquire 2157 * the parent. Because of this, (chain) must be stable and cannot be 2158 * deleted while it was temporarily unlocked (typically means that (chain) 2159 * is an inode). 2160 * 2161 * Pass HAMMER2_RESOLVE_* flags in flags. 2162 * 2163 * This will work even if the chain is errored, and the caller can check 2164 * parent->error on return if desired since the parent will be locked. 2165 * 2166 * This function handles the lock order reversal. 2167 */ 2168 hammer2_chain_t * 2169 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2170 { 2171 hammer2_chain_t *parent; 2172 2173 /* 2174 * Be careful of order, chain must be unlocked before parent 2175 * is locked below to avoid a deadlock. Try it trivially first. 2176 */ 2177 parent = chain->parent; 2178 if (parent == NULL) 2179 panic("hammer2_chain_getparent: no parent"); 2180 hammer2_chain_ref(parent); 2181 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2182 return parent; 2183 2184 for (;;) { 2185 hammer2_chain_unlock(chain); 2186 hammer2_chain_lock(parent, flags); 2187 hammer2_chain_lock(chain, flags); 2188 2189 /* 2190 * Parent relinking races are quite common. We have to get 2191 * it right or we will blow up the block table. 2192 */ 2193 if (chain->parent == parent) 2194 break; 2195 hammer2_chain_unlock(parent); 2196 hammer2_chain_drop(parent); 2197 cpu_ccfence(); 2198 parent = chain->parent; 2199 if (parent == NULL) 2200 panic("hammer2_chain_getparent: no parent"); 2201 hammer2_chain_ref(parent); 2202 } 2203 return parent; 2204 } 2205 2206 /* 2207 * Take the locked chain and return a locked parent. The chain is unlocked 2208 * and dropped. *chainp is set to the returned parent as a convenience. 2209 * Pass HAMMER2_RESOLVE_* flags in flags. 2210 * 2211 * This will work even if the chain is errored, and the caller can check 2212 * parent->error on return if desired since the parent will be locked. 2213 * 2214 * The chain does NOT need to be stable. We use a tracking structure 2215 * to track the expected parent if the chain is deleted out from under us. 2216 * 2217 * This function handles the lock order reversal. 2218 */ 2219 hammer2_chain_t * 2220 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2221 { 2222 hammer2_chain_t *chain; 2223 hammer2_chain_t *parent; 2224 struct hammer2_reptrack reptrack; 2225 struct hammer2_reptrack **repp; 2226 2227 /* 2228 * Be careful of order, chain must be unlocked before parent 2229 * is locked below to avoid a deadlock. Try it trivially first. 2230 */ 2231 chain = *chainp; 2232 parent = chain->parent; 2233 if (parent == NULL) { 2234 hammer2_spin_unex(&chain->core.spin); 2235 panic("hammer2_chain_repparent: no parent"); 2236 } 2237 hammer2_chain_ref(parent); 2238 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2239 hammer2_chain_unlock(chain); 2240 hammer2_chain_drop(chain); 2241 *chainp = parent; 2242 2243 return parent; 2244 } 2245 2246 /* 2247 * Ok, now it gets a bit nasty. There are multiple situations where 2248 * the parent might be in the middle of a deletion, or where the child 2249 * (chain) might be deleted the instant we let go of its lock. 2250 * We can potentially end up in a no-win situation! 2251 * 2252 * In particular, the indirect_maintenance() case can cause these 2253 * situations. 2254 * 2255 * To deal with this we install a reptrack structure in the parent 2256 * This reptrack structure 'owns' the parent ref and will automatically 2257 * migrate to the parent's parent if the parent is deleted permanently. 2258 */ 2259 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2260 reptrack.chain = parent; 2261 hammer2_chain_ref(parent); /* for the reptrack */ 2262 2263 hammer2_spin_ex(&parent->core.spin); 2264 reptrack.next = parent->core.reptrack; 2265 parent->core.reptrack = &reptrack; 2266 hammer2_spin_unex(&parent->core.spin); 2267 2268 hammer2_chain_unlock(chain); 2269 hammer2_chain_drop(chain); 2270 chain = NULL; /* gone */ 2271 2272 /* 2273 * At the top of this loop, chain is gone and parent is refd both 2274 * by us explicitly AND via our reptrack. We are attempting to 2275 * lock parent. 2276 */ 2277 for (;;) { 2278 hammer2_chain_lock(parent, flags); 2279 2280 if (reptrack.chain == parent) 2281 break; 2282 hammer2_chain_unlock(parent); 2283 hammer2_chain_drop(parent); 2284 2285 kprintf("hammer2: debug REPTRACK %p->%p\n", 2286 parent, reptrack.chain); 2287 hammer2_spin_ex(&reptrack.spin); 2288 parent = reptrack.chain; 2289 hammer2_chain_ref(parent); 2290 hammer2_spin_unex(&reptrack.spin); 2291 } 2292 2293 /* 2294 * Once parent is locked and matches our reptrack, our reptrack 2295 * will be stable and we have our parent. We can unlink our 2296 * reptrack. 2297 * 2298 * WARNING! Remember that the chain lock might be shared. Chains 2299 * locked shared have stable parent linkages. 2300 */ 2301 hammer2_spin_ex(&parent->core.spin); 2302 repp = &parent->core.reptrack; 2303 while (*repp != &reptrack) 2304 repp = &(*repp)->next; 2305 *repp = reptrack.next; 2306 hammer2_spin_unex(&parent->core.spin); 2307 2308 hammer2_chain_drop(parent); /* reptrack ref */ 2309 *chainp = parent; /* return parent lock+ref */ 2310 2311 return parent; 2312 } 2313 2314 /* 2315 * Dispose of any linked reptrack structures in (chain) by shifting them to 2316 * (parent). Both (chain) and (parent) must be exclusively locked. 2317 * 2318 * This is interlocked against any children of (chain) on the other side. 2319 * No children so remain as-of when this is called so we can test 2320 * core.reptrack without holding the spin-lock. 2321 * 2322 * Used whenever the caller intends to permanently delete chains related 2323 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2324 * where the chains underneath the node being deleted are given a new parent 2325 * above the node being deleted. 2326 */ 2327 static 2328 void 2329 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2330 { 2331 struct hammer2_reptrack *reptrack; 2332 2333 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2334 while (chain->core.reptrack) { 2335 hammer2_spin_ex(&parent->core.spin); 2336 hammer2_spin_ex(&chain->core.spin); 2337 reptrack = chain->core.reptrack; 2338 if (reptrack == NULL) { 2339 hammer2_spin_unex(&chain->core.spin); 2340 hammer2_spin_unex(&parent->core.spin); 2341 break; 2342 } 2343 hammer2_spin_ex(&reptrack->spin); 2344 chain->core.reptrack = reptrack->next; 2345 reptrack->chain = parent; 2346 reptrack->next = parent->core.reptrack; 2347 parent->core.reptrack = reptrack; 2348 hammer2_chain_ref(parent); /* reptrack */ 2349 2350 hammer2_spin_unex(&chain->core.spin); 2351 hammer2_spin_unex(&parent->core.spin); 2352 kprintf("hammer2: debug repchange %p %p->%p\n", 2353 reptrack, chain, parent); 2354 hammer2_chain_drop(chain); /* reptrack */ 2355 } 2356 } 2357 2358 /* 2359 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2360 * (*parentp) typically points to an inode but can also point to a related 2361 * indirect block and this function will recurse upwards and find the inode 2362 * or the nearest undeleted indirect block covering the key range. 2363 * 2364 * This function unconditionally sets *errorp, replacing any previous value. 2365 * 2366 * (*parentp) must be exclusive or shared locked (depending on flags) and 2367 * referenced and can be an inode or an existing indirect block within the 2368 * inode. 2369 * 2370 * If (*parent) is errored out, this function will not attempt to recurse 2371 * the radix tree and will return NULL along with an appropriate *errorp. 2372 * If NULL is returned and *errorp is 0, the requested lookup could not be 2373 * located. 2374 * 2375 * On return (*parentp) will be modified to point at the deepest parent chain 2376 * element encountered during the search, as a helper for an insertion or 2377 * deletion. 2378 * 2379 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2380 * and referenced, and the old will be unlocked and dereferenced (no change 2381 * if they are both the same). This is particularly important if the caller 2382 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2383 * is returned, as long as no error occurred. 2384 * 2385 * The matching chain will be returned locked according to flags. 2386 * 2387 * -- 2388 * 2389 * NULL is returned if no match was found, but (*parentp) will still 2390 * potentially be adjusted. 2391 * 2392 * On return (*key_nextp) will point to an iterative value for key_beg. 2393 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2394 * 2395 * This function will also recurse up the chain if the key is not within the 2396 * current parent's range. (*parentp) can never be set to NULL. An iteration 2397 * can simply allow (*parentp) to float inside the loop. 2398 * 2399 * NOTE! chain->data is not always resolved. By default it will not be 2400 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2401 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2402 * BREF_TYPE_DATA as the device buffer can alias the logical file 2403 * buffer). 2404 */ 2405 hammer2_chain_t * 2406 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2407 hammer2_key_t key_beg, hammer2_key_t key_end, 2408 int *errorp, int flags) 2409 { 2410 hammer2_chain_t *parent; 2411 hammer2_chain_t *chain; 2412 hammer2_blockref_t *base; 2413 hammer2_blockref_t *bref; 2414 hammer2_blockref_t bsave; 2415 hammer2_key_t scan_beg; 2416 hammer2_key_t scan_end; 2417 int count = 0; 2418 int how_always = HAMMER2_RESOLVE_ALWAYS; 2419 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2420 int how; 2421 int generation; 2422 int maxloops = 300000; 2423 2424 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2425 how_maybe = how_always; 2426 how = HAMMER2_RESOLVE_ALWAYS; 2427 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2428 how = HAMMER2_RESOLVE_NEVER; 2429 } else { 2430 how = HAMMER2_RESOLVE_MAYBE; 2431 } 2432 if (flags & HAMMER2_LOOKUP_SHARED) { 2433 how_maybe |= HAMMER2_RESOLVE_SHARED; 2434 how_always |= HAMMER2_RESOLVE_SHARED; 2435 how |= HAMMER2_RESOLVE_SHARED; 2436 } 2437 2438 /* 2439 * Recurse (*parentp) upward if necessary until the parent completely 2440 * encloses the key range or we hit the inode. 2441 * 2442 * Handle races against the flusher deleting indirect nodes on its 2443 * way back up by continuing to recurse upward past the deletion. 2444 */ 2445 parent = *parentp; 2446 *errorp = 0; 2447 2448 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2449 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2450 scan_beg = parent->bref.key; 2451 scan_end = scan_beg + 2452 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2453 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2454 if (key_beg >= scan_beg && key_end <= scan_end) 2455 break; 2456 } 2457 parent = hammer2_chain_repparent(parentp, how_maybe); 2458 } 2459 again: 2460 if (--maxloops == 0) 2461 panic("hammer2_chain_lookup: maxloops"); 2462 2463 /* 2464 * MATCHIND case that does not require parent->data (do prior to 2465 * parent->error check). 2466 */ 2467 switch(parent->bref.type) { 2468 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2469 case HAMMER2_BREF_TYPE_INDIRECT: 2470 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2471 scan_beg = parent->bref.key; 2472 scan_end = scan_beg + 2473 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2474 if (key_beg == scan_beg && key_end == scan_end) { 2475 chain = parent; 2476 hammer2_chain_ref(chain); 2477 hammer2_chain_lock(chain, how_maybe); 2478 *key_nextp = scan_end + 1; 2479 goto done; 2480 } 2481 } 2482 break; 2483 default: 2484 break; 2485 } 2486 2487 /* 2488 * No lookup is possible if the parent is errored. We delayed 2489 * this check as long as we could to ensure that the parent backup, 2490 * embedded data, and MATCHIND code could still execute. 2491 */ 2492 if (parent->error) { 2493 *errorp = parent->error; 2494 return NULL; 2495 } 2496 2497 /* 2498 * Locate the blockref array. Currently we do a fully associative 2499 * search through the array. 2500 */ 2501 switch(parent->bref.type) { 2502 case HAMMER2_BREF_TYPE_INODE: 2503 /* 2504 * Special shortcut for embedded data returns the inode 2505 * itself. Callers must detect this condition and access 2506 * the embedded data (the strategy code does this for us). 2507 * 2508 * This is only applicable to regular files and softlinks. 2509 * 2510 * We need a second lock on parent. Since we already have 2511 * a lock we must pass LOCKAGAIN to prevent unexpected 2512 * blocking (we don't want to block on a second shared 2513 * ref if an exclusive lock is pending) 2514 */ 2515 if (parent->data->ipdata.meta.op_flags & 2516 HAMMER2_OPFLAG_DIRECTDATA) { 2517 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2518 chain = NULL; 2519 *key_nextp = key_end + 1; 2520 goto done; 2521 } 2522 hammer2_chain_ref(parent); 2523 hammer2_chain_lock(parent, how_always | 2524 HAMMER2_RESOLVE_LOCKAGAIN); 2525 *key_nextp = key_end + 1; 2526 return (parent); 2527 } 2528 base = &parent->data->ipdata.u.blockset.blockref[0]; 2529 count = HAMMER2_SET_COUNT; 2530 break; 2531 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2532 case HAMMER2_BREF_TYPE_INDIRECT: 2533 /* 2534 * Optimize indirect blocks in the INITIAL state to avoid 2535 * I/O. 2536 * 2537 * Debugging: Enter permanent wait state instead of 2538 * panicing on unexpectedly NULL data for the moment. 2539 */ 2540 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2541 base = NULL; 2542 } else { 2543 if (parent->data == NULL) { 2544 kprintf("hammer2: unexpected NULL data " 2545 "on %p\n", parent); 2546 while (1) 2547 tsleep(parent, 0, "xxx", 0); 2548 } 2549 base = &parent->data->npdata[0]; 2550 } 2551 count = parent->bytes / sizeof(hammer2_blockref_t); 2552 break; 2553 case HAMMER2_BREF_TYPE_VOLUME: 2554 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2555 count = HAMMER2_SET_COUNT; 2556 break; 2557 case HAMMER2_BREF_TYPE_FREEMAP: 2558 base = &parent->data->blkset.blockref[0]; 2559 count = HAMMER2_SET_COUNT; 2560 break; 2561 default: 2562 panic("hammer2_chain_lookup: unrecognized " 2563 "blockref(B) type: %d", 2564 parent->bref.type); 2565 base = NULL; /* safety */ 2566 count = 0; /* safety */ 2567 break; 2568 } 2569 2570 /* 2571 * Merged scan to find next candidate. 2572 * 2573 * hammer2_base_*() functions require the parent->core.live_* fields 2574 * to be synchronized. 2575 * 2576 * We need to hold the spinlock to access the block array and RB tree 2577 * and to interlock chain creation. 2578 */ 2579 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2580 hammer2_chain_countbrefs(parent, base, count); 2581 2582 /* 2583 * Combined search 2584 */ 2585 hammer2_spin_ex(&parent->core.spin); 2586 chain = hammer2_combined_find(parent, base, count, 2587 key_nextp, 2588 key_beg, key_end, 2589 &bref); 2590 generation = parent->core.generation; 2591 2592 /* 2593 * Exhausted parent chain, iterate. 2594 */ 2595 if (bref == NULL) { 2596 KKASSERT(chain == NULL); 2597 hammer2_spin_unex(&parent->core.spin); 2598 if (key_beg == key_end) /* short cut single-key case */ 2599 return (NULL); 2600 2601 /* 2602 * Stop if we reached the end of the iteration. 2603 */ 2604 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2605 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2606 return (NULL); 2607 } 2608 2609 /* 2610 * Calculate next key, stop if we reached the end of the 2611 * iteration, otherwise go up one level and loop. 2612 */ 2613 key_beg = parent->bref.key + 2614 ((hammer2_key_t)1 << parent->bref.keybits); 2615 if (key_beg == 0 || key_beg > key_end) 2616 return (NULL); 2617 parent = hammer2_chain_repparent(parentp, how_maybe); 2618 goto again; 2619 } 2620 2621 /* 2622 * Selected from blockref or in-memory chain. 2623 */ 2624 bsave = *bref; 2625 if (chain == NULL) { 2626 hammer2_spin_unex(&parent->core.spin); 2627 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2628 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2629 chain = hammer2_chain_get(parent, generation, 2630 &bsave, how_maybe); 2631 } else { 2632 chain = hammer2_chain_get(parent, generation, 2633 &bsave, how); 2634 } 2635 if (chain == NULL) 2636 goto again; 2637 } else { 2638 hammer2_chain_ref(chain); 2639 hammer2_spin_unex(&parent->core.spin); 2640 2641 /* 2642 * chain is referenced but not locked. We must lock the 2643 * chain to obtain definitive state. 2644 */ 2645 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2646 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2647 hammer2_chain_lock(chain, how_maybe); 2648 } else { 2649 hammer2_chain_lock(chain, how); 2650 } 2651 KKASSERT(chain->parent == parent); 2652 } 2653 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 2654 chain->parent != parent) { 2655 hammer2_chain_unlock(chain); 2656 hammer2_chain_drop(chain); 2657 chain = NULL; /* SAFETY */ 2658 goto again; 2659 } 2660 2661 2662 /* 2663 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2664 * 2665 * NOTE: Chain's key range is not relevant as there might be 2666 * one-offs within the range that are not deleted. 2667 * 2668 * NOTE: Lookups can race delete-duplicate because 2669 * delete-duplicate does not lock the parent's core 2670 * (they just use the spinlock on the core). 2671 */ 2672 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2673 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2674 chain->bref.data_off, chain->bref.type, 2675 chain->bref.key); 2676 hammer2_chain_unlock(chain); 2677 hammer2_chain_drop(chain); 2678 chain = NULL; /* SAFETY */ 2679 key_beg = *key_nextp; 2680 if (key_beg == 0 || key_beg > key_end) 2681 return(NULL); 2682 goto again; 2683 } 2684 2685 /* 2686 * If the chain element is an indirect block it becomes the new 2687 * parent and we loop on it. We must maintain our top-down locks 2688 * to prevent the flusher from interfering (i.e. doing a 2689 * delete-duplicate and leaving us recursing down a deleted chain). 2690 * 2691 * The parent always has to be locked with at least RESOLVE_MAYBE 2692 * so we can access its data. It might need a fixup if the caller 2693 * passed incompatible flags. Be careful not to cause a deadlock 2694 * as a data-load requires an exclusive lock. 2695 * 2696 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2697 * range is within the requested key range we return the indirect 2698 * block and do NOT loop. This is usually only used to acquire 2699 * freemap nodes. 2700 */ 2701 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2702 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2703 hammer2_chain_unlock(parent); 2704 hammer2_chain_drop(parent); 2705 *parentp = parent = chain; 2706 chain = NULL; /* SAFETY */ 2707 goto again; 2708 } 2709 done: 2710 /* 2711 * All done, return the locked chain. 2712 * 2713 * If the caller does not want a locked chain, replace the lock with 2714 * a ref. Perhaps this can eventually be optimized to not obtain the 2715 * lock in the first place for situations where the data does not 2716 * need to be resolved. 2717 * 2718 * NOTE! A chain->error must be tested by the caller upon return. 2719 * *errorp is only set based on issues which occur while 2720 * trying to reach the chain. 2721 */ 2722 return (chain); 2723 } 2724 2725 /* 2726 * After having issued a lookup we can iterate all matching keys. 2727 * 2728 * If chain is non-NULL we continue the iteration from just after it's index. 2729 * 2730 * If chain is NULL we assume the parent was exhausted and continue the 2731 * iteration at the next parent. 2732 * 2733 * If a fatal error occurs (typically an I/O error), a dummy chain is 2734 * returned with chain->error and error-identifying information set. This 2735 * chain will assert if you try to do anything fancy with it. 2736 * 2737 * XXX Depending on where the error occurs we should allow continued iteration. 2738 * 2739 * parent must be locked on entry and remains locked throughout. chain's 2740 * lock status must match flags. Chain is always at least referenced. 2741 * 2742 * WARNING! The MATCHIND flag does not apply to this function. 2743 */ 2744 hammer2_chain_t * 2745 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2746 hammer2_key_t *key_nextp, 2747 hammer2_key_t key_beg, hammer2_key_t key_end, 2748 int *errorp, int flags) 2749 { 2750 hammer2_chain_t *parent; 2751 int how_maybe; 2752 2753 /* 2754 * Calculate locking flags for upward recursion. 2755 */ 2756 how_maybe = HAMMER2_RESOLVE_MAYBE; 2757 if (flags & HAMMER2_LOOKUP_SHARED) 2758 how_maybe |= HAMMER2_RESOLVE_SHARED; 2759 2760 parent = *parentp; 2761 *errorp = 0; 2762 2763 /* 2764 * Calculate the next index and recalculate the parent if necessary. 2765 */ 2766 if (chain) { 2767 key_beg = chain->bref.key + 2768 ((hammer2_key_t)1 << chain->bref.keybits); 2769 hammer2_chain_unlock(chain); 2770 hammer2_chain_drop(chain); 2771 2772 /* 2773 * chain invalid past this point, but we can still do a 2774 * pointer comparison w/parent. 2775 * 2776 * Any scan where the lookup returned degenerate data embedded 2777 * in the inode has an invalid index and must terminate. 2778 */ 2779 if (chain == parent) 2780 return(NULL); 2781 if (key_beg == 0 || key_beg > key_end) 2782 return(NULL); 2783 chain = NULL; 2784 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2785 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2786 /* 2787 * We reached the end of the iteration. 2788 */ 2789 return (NULL); 2790 } else { 2791 /* 2792 * Continue iteration with next parent unless the current 2793 * parent covers the range. 2794 * 2795 * (This also handles the case of a deleted, empty indirect 2796 * node). 2797 */ 2798 key_beg = parent->bref.key + 2799 ((hammer2_key_t)1 << parent->bref.keybits); 2800 if (key_beg == 0 || key_beg > key_end) 2801 return (NULL); 2802 parent = hammer2_chain_repparent(parentp, how_maybe); 2803 } 2804 2805 /* 2806 * And execute 2807 */ 2808 return (hammer2_chain_lookup(parentp, key_nextp, 2809 key_beg, key_end, 2810 errorp, flags)); 2811 } 2812 2813 /* 2814 * Caller wishes to iterate chains under parent, loading new chains into 2815 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 2816 * then call hammer2_chain_scan() repeatedly until a non-zero return. 2817 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 2818 * with the returned chain for the scan. The returned *chainp will be 2819 * locked and referenced. Any prior contents will be unlocked and dropped. 2820 * 2821 * Caller should check the return value. A normal scan EOF will return 2822 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 2823 * error trying to access parent data. Any error in the returned chain 2824 * must be tested separately by the caller. 2825 * 2826 * (*chainp) is dropped on each scan, but will only be set if the returned 2827 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 2828 * returned via *chainp. The caller will get their bref only. 2829 * 2830 * The raw scan function is similar to lookup/next but does not seek to a key. 2831 * Blockrefs are iterated via first_bref = (parent, NULL) and 2832 * next_chain = (parent, bref). 2833 * 2834 * The passed-in parent must be locked and its data resolved. The function 2835 * nominally returns a locked and referenced *chainp != NULL for chains 2836 * the caller might need to recurse on (and will dipose of any *chainp passed 2837 * in). The caller must check the chain->bref.type either way. 2838 */ 2839 int 2840 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 2841 hammer2_blockref_t *bref, int *firstp, 2842 int flags) 2843 { 2844 hammer2_blockref_t *base; 2845 hammer2_blockref_t *bref_ptr; 2846 hammer2_key_t key; 2847 hammer2_key_t next_key; 2848 hammer2_chain_t *chain = NULL; 2849 int count = 0; 2850 int how; 2851 int generation; 2852 int maxloops = 300000; 2853 int error; 2854 2855 error = 0; 2856 2857 /* 2858 * Scan flags borrowed from lookup. 2859 */ 2860 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2861 how = HAMMER2_RESOLVE_ALWAYS; 2862 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2863 how = HAMMER2_RESOLVE_NEVER; 2864 } else { 2865 how = HAMMER2_RESOLVE_MAYBE; 2866 } 2867 if (flags & HAMMER2_LOOKUP_SHARED) { 2868 how |= HAMMER2_RESOLVE_SHARED; 2869 } 2870 2871 /* 2872 * Calculate key to locate first/next element, unlocking the previous 2873 * element as we go. Be careful, the key calculation can overflow. 2874 * 2875 * (also reset bref to NULL) 2876 */ 2877 if (*firstp) { 2878 key = 0; 2879 *firstp = 0; 2880 } else { 2881 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 2882 if ((chain = *chainp) != NULL) { 2883 *chainp = NULL; 2884 hammer2_chain_unlock(chain); 2885 hammer2_chain_drop(chain); 2886 chain = NULL; 2887 } 2888 if (key == 0) { 2889 error |= HAMMER2_ERROR_EOF; 2890 goto done; 2891 } 2892 } 2893 2894 again: 2895 if (parent->error) { 2896 error = parent->error; 2897 goto done; 2898 } 2899 if (--maxloops == 0) 2900 panic("hammer2_chain_scan: maxloops"); 2901 2902 /* 2903 * Locate the blockref array. Currently we do a fully associative 2904 * search through the array. 2905 */ 2906 switch(parent->bref.type) { 2907 case HAMMER2_BREF_TYPE_INODE: 2908 /* 2909 * An inode with embedded data has no sub-chains. 2910 * 2911 * WARNING! Bulk scan code may pass a static chain marked 2912 * as BREF_TYPE_INODE with a copy of the volume 2913 * root blockset to snapshot the volume. 2914 */ 2915 if (parent->data->ipdata.meta.op_flags & 2916 HAMMER2_OPFLAG_DIRECTDATA) { 2917 error |= HAMMER2_ERROR_EOF; 2918 goto done; 2919 } 2920 base = &parent->data->ipdata.u.blockset.blockref[0]; 2921 count = HAMMER2_SET_COUNT; 2922 break; 2923 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2924 case HAMMER2_BREF_TYPE_INDIRECT: 2925 /* 2926 * Optimize indirect blocks in the INITIAL state to avoid 2927 * I/O. 2928 */ 2929 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2930 base = NULL; 2931 } else { 2932 if (parent->data == NULL) 2933 panic("parent->data is NULL"); 2934 base = &parent->data->npdata[0]; 2935 } 2936 count = parent->bytes / sizeof(hammer2_blockref_t); 2937 break; 2938 case HAMMER2_BREF_TYPE_VOLUME: 2939 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2940 count = HAMMER2_SET_COUNT; 2941 break; 2942 case HAMMER2_BREF_TYPE_FREEMAP: 2943 base = &parent->data->blkset.blockref[0]; 2944 count = HAMMER2_SET_COUNT; 2945 break; 2946 default: 2947 panic("hammer2_chain_scan: unrecognized blockref type: %d", 2948 parent->bref.type); 2949 base = NULL; /* safety */ 2950 count = 0; /* safety */ 2951 break; 2952 } 2953 2954 /* 2955 * Merged scan to find next candidate. 2956 * 2957 * hammer2_base_*() functions require the parent->core.live_* fields 2958 * to be synchronized. 2959 * 2960 * We need to hold the spinlock to access the block array and RB tree 2961 * and to interlock chain creation. 2962 */ 2963 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2964 hammer2_chain_countbrefs(parent, base, count); 2965 2966 next_key = 0; 2967 bref_ptr = NULL; 2968 hammer2_spin_ex(&parent->core.spin); 2969 chain = hammer2_combined_find(parent, base, count, 2970 &next_key, 2971 key, HAMMER2_KEY_MAX, 2972 &bref_ptr); 2973 generation = parent->core.generation; 2974 2975 /* 2976 * Exhausted parent chain, we're done. 2977 */ 2978 if (bref_ptr == NULL) { 2979 hammer2_spin_unex(&parent->core.spin); 2980 KKASSERT(chain == NULL); 2981 error |= HAMMER2_ERROR_EOF; 2982 goto done; 2983 } 2984 2985 /* 2986 * Copy into the supplied stack-based blockref. 2987 */ 2988 *bref = *bref_ptr; 2989 2990 /* 2991 * Selected from blockref or in-memory chain. 2992 */ 2993 if (chain == NULL) { 2994 switch(bref->type) { 2995 case HAMMER2_BREF_TYPE_INODE: 2996 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2997 case HAMMER2_BREF_TYPE_INDIRECT: 2998 case HAMMER2_BREF_TYPE_VOLUME: 2999 case HAMMER2_BREF_TYPE_FREEMAP: 3000 /* 3001 * Recursion, always get the chain 3002 */ 3003 hammer2_spin_unex(&parent->core.spin); 3004 chain = hammer2_chain_get(parent, generation, 3005 bref, how); 3006 if (chain == NULL) 3007 goto again; 3008 break; 3009 default: 3010 /* 3011 * No recursion, do not waste time instantiating 3012 * a chain, just iterate using the bref. 3013 */ 3014 hammer2_spin_unex(&parent->core.spin); 3015 break; 3016 } 3017 } else { 3018 /* 3019 * Recursion or not we need the chain in order to supply 3020 * the bref. 3021 */ 3022 hammer2_chain_ref(chain); 3023 hammer2_spin_unex(&parent->core.spin); 3024 hammer2_chain_lock(chain, how); 3025 } 3026 if (chain && 3027 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3028 chain->parent != parent)) { 3029 hammer2_chain_unlock(chain); 3030 hammer2_chain_drop(chain); 3031 chain = NULL; 3032 goto again; 3033 } 3034 3035 /* 3036 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3037 * 3038 * NOTE: chain's key range is not relevant as there might be 3039 * one-offs within the range that are not deleted. 3040 * 3041 * NOTE: XXX this could create problems with scans used in 3042 * situations other than mount-time recovery. 3043 * 3044 * NOTE: Lookups can race delete-duplicate because 3045 * delete-duplicate does not lock the parent's core 3046 * (they just use the spinlock on the core). 3047 */ 3048 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3049 hammer2_chain_unlock(chain); 3050 hammer2_chain_drop(chain); 3051 chain = NULL; 3052 3053 key = next_key; 3054 if (key == 0) { 3055 error |= HAMMER2_ERROR_EOF; 3056 goto done; 3057 } 3058 goto again; 3059 } 3060 3061 done: 3062 /* 3063 * All done, return the bref or NULL, supply chain if necessary. 3064 */ 3065 if (chain) 3066 *chainp = chain; 3067 return (error); 3068 } 3069 3070 /* 3071 * Create and return a new hammer2 system memory structure of the specified 3072 * key, type and size and insert it under (*parentp). This is a full 3073 * insertion, based on the supplied key/keybits, and may involve creating 3074 * indirect blocks and moving other chains around via delete/duplicate. 3075 * 3076 * This call can be made with parent == NULL as long as a non -1 methods 3077 * is supplied. hmp must also be supplied in this situation (otherwise 3078 * hmp is extracted from the supplied parent). The chain will be detached 3079 * from the topology. A later call with both parent and chain can be made 3080 * to attach it. 3081 * 3082 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3083 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3084 * FULL. This typically means that the caller is creating the chain after 3085 * doing a hammer2_chain_lookup(). 3086 * 3087 * (*parentp) must be exclusive locked and may be replaced on return 3088 * depending on how much work the function had to do. 3089 * 3090 * (*parentp) must not be errored or this function will assert. 3091 * 3092 * (*chainp) usually starts out NULL and returns the newly created chain, 3093 * but if the caller desires the caller may allocate a disconnected chain 3094 * and pass it in instead. 3095 * 3096 * This function should NOT be used to insert INDIRECT blocks. It is 3097 * typically used to create/insert inodes and data blocks. 3098 * 3099 * Caller must pass-in an exclusively locked parent the new chain is to 3100 * be inserted under, and optionally pass-in a disconnected, exclusively 3101 * locked chain to insert (else we create a new chain). The function will 3102 * adjust (*parentp) as necessary, create or connect the chain, and 3103 * return an exclusively locked chain in *chainp. 3104 * 3105 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3106 * and will be reassigned. 3107 * 3108 * NOTE: returns HAMMER_ERROR_* flags 3109 */ 3110 int 3111 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3112 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3113 hammer2_key_t key, int keybits, int type, size_t bytes, 3114 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3115 { 3116 hammer2_chain_t *chain; 3117 hammer2_chain_t *parent; 3118 hammer2_blockref_t *base; 3119 hammer2_blockref_t dummy; 3120 int allocated = 0; 3121 int error = 0; 3122 int count; 3123 int maxloops = 300000; 3124 3125 /* 3126 * Topology may be crossing a PFS boundary. 3127 */ 3128 parent = *parentp; 3129 if (parent) { 3130 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3131 KKASSERT(parent->error == 0); 3132 hmp = parent->hmp; 3133 } 3134 chain = *chainp; 3135 3136 if (chain == NULL) { 3137 /* 3138 * First allocate media space and construct the dummy bref, 3139 * then allocate the in-memory chain structure. Set the 3140 * INITIAL flag for fresh chains which do not have embedded 3141 * data. 3142 */ 3143 bzero(&dummy, sizeof(dummy)); 3144 dummy.type = type; 3145 dummy.key = key; 3146 dummy.keybits = keybits; 3147 dummy.data_off = hammer2_getradix(bytes); 3148 3149 /* 3150 * Inherit methods from parent by default. Primarily used 3151 * for BREF_TYPE_DATA. Non-data types *must* be set to 3152 * a non-NONE check algorithm. 3153 */ 3154 if (methods == HAMMER2_METH_DEFAULT) 3155 dummy.methods = parent->bref.methods; 3156 else 3157 dummy.methods = (uint8_t)methods; 3158 3159 if (type != HAMMER2_BREF_TYPE_DATA && 3160 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3161 dummy.methods |= 3162 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3163 } 3164 3165 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3166 3167 /* 3168 * Lock the chain manually, chain_lock will load the chain 3169 * which we do NOT want to do. (note: chain->refs is set 3170 * to 1 by chain_alloc() for us, but lockcnt is not). 3171 */ 3172 chain->lockcnt = 1; 3173 hammer2_mtx_ex(&chain->lock); 3174 allocated = 1; 3175 3176 /* 3177 * Set INITIAL to optimize I/O. The flag will generally be 3178 * processed when we call hammer2_chain_modify(). 3179 */ 3180 switch(type) { 3181 case HAMMER2_BREF_TYPE_VOLUME: 3182 case HAMMER2_BREF_TYPE_FREEMAP: 3183 panic("hammer2_chain_create: called with volume type"); 3184 break; 3185 case HAMMER2_BREF_TYPE_INDIRECT: 3186 panic("hammer2_chain_create: cannot be used to" 3187 "create indirect block"); 3188 break; 3189 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3190 panic("hammer2_chain_create: cannot be used to" 3191 "create freemap root or node"); 3192 break; 3193 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3194 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3195 /* fall through */ 3196 case HAMMER2_BREF_TYPE_DIRENT: 3197 case HAMMER2_BREF_TYPE_INODE: 3198 case HAMMER2_BREF_TYPE_DATA: 3199 default: 3200 /* 3201 * leave chain->data NULL, set INITIAL 3202 */ 3203 KKASSERT(chain->data == NULL); 3204 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3205 break; 3206 } 3207 } else { 3208 /* 3209 * We are reattaching a previously deleted chain, possibly 3210 * under a new parent and possibly with a new key/keybits. 3211 * The chain does not have to be in a modified state. The 3212 * UPDATE flag will be set later on in this routine. 3213 * 3214 * Do NOT mess with the current state of the INITIAL flag. 3215 */ 3216 chain->bref.key = key; 3217 chain->bref.keybits = keybits; 3218 if (chain->flags & HAMMER2_CHAIN_DELETED) 3219 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3220 KKASSERT(chain->parent == NULL); 3221 } 3222 3223 /* 3224 * Set the appropriate bref flag if requested. 3225 * 3226 * NOTE! Callers can call this function to move chains without 3227 * knowing about special flags, so don't clear bref flags 3228 * here! 3229 */ 3230 if (flags & HAMMER2_INSERT_PFSROOT) 3231 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3232 3233 if (parent == NULL) 3234 goto skip; 3235 3236 /* 3237 * Calculate how many entries we have in the blockref array and 3238 * determine if an indirect block is required when inserting into 3239 * the parent. 3240 */ 3241 again: 3242 if (--maxloops == 0) 3243 panic("hammer2_chain_create: maxloops"); 3244 3245 switch(parent->bref.type) { 3246 case HAMMER2_BREF_TYPE_INODE: 3247 if ((parent->data->ipdata.meta.op_flags & 3248 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3249 kprintf("hammer2: parent set for direct-data! " 3250 "pkey=%016jx ckey=%016jx\n", 3251 parent->bref.key, 3252 chain->bref.key); 3253 } 3254 KKASSERT((parent->data->ipdata.meta.op_flags & 3255 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3256 KKASSERT(parent->data != NULL); 3257 base = &parent->data->ipdata.u.blockset.blockref[0]; 3258 count = HAMMER2_SET_COUNT; 3259 break; 3260 case HAMMER2_BREF_TYPE_INDIRECT: 3261 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3262 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3263 base = NULL; 3264 else 3265 base = &parent->data->npdata[0]; 3266 count = parent->bytes / sizeof(hammer2_blockref_t); 3267 break; 3268 case HAMMER2_BREF_TYPE_VOLUME: 3269 KKASSERT(parent->data != NULL); 3270 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3271 count = HAMMER2_SET_COUNT; 3272 break; 3273 case HAMMER2_BREF_TYPE_FREEMAP: 3274 KKASSERT(parent->data != NULL); 3275 base = &parent->data->blkset.blockref[0]; 3276 count = HAMMER2_SET_COUNT; 3277 break; 3278 default: 3279 panic("hammer2_chain_create: unrecognized blockref type: %d", 3280 parent->bref.type); 3281 base = NULL; 3282 count = 0; 3283 break; 3284 } 3285 3286 /* 3287 * Make sure we've counted the brefs 3288 */ 3289 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3290 hammer2_chain_countbrefs(parent, base, count); 3291 3292 KASSERT(parent->core.live_count >= 0 && 3293 parent->core.live_count <= count, 3294 ("bad live_count %d/%d (%02x, %d)", 3295 parent->core.live_count, count, 3296 parent->bref.type, parent->bytes)); 3297 3298 /* 3299 * If no free blockref could be found we must create an indirect 3300 * block and move a number of blockrefs into it. With the parent 3301 * locked we can safely lock each child in order to delete+duplicate 3302 * it without causing a deadlock. 3303 * 3304 * This may return the new indirect block or the old parent depending 3305 * on where the key falls. NULL is returned on error. 3306 */ 3307 if (parent->core.live_count == count) { 3308 hammer2_chain_t *nparent; 3309 3310 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3311 3312 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3313 mtid, type, &error); 3314 if (nparent == NULL) { 3315 if (allocated) 3316 hammer2_chain_drop(chain); 3317 chain = NULL; 3318 goto done; 3319 } 3320 if (parent != nparent) { 3321 hammer2_chain_unlock(parent); 3322 hammer2_chain_drop(parent); 3323 parent = *parentp = nparent; 3324 } 3325 goto again; 3326 } 3327 3328 /* 3329 * fall through if parent, or skip to here if no parent. 3330 */ 3331 skip: 3332 if (chain->flags & HAMMER2_CHAIN_DELETED) 3333 kprintf("Inserting deleted chain @%016jx\n", 3334 chain->bref.key); 3335 3336 /* 3337 * Link the chain into its parent. 3338 */ 3339 if (chain->parent != NULL) 3340 panic("hammer2: hammer2_chain_create: chain already connected"); 3341 KKASSERT(chain->parent == NULL); 3342 if (parent) { 3343 KKASSERT(parent->core.live_count < count); 3344 hammer2_chain_insert(parent, chain, 3345 HAMMER2_CHAIN_INSERT_SPIN | 3346 HAMMER2_CHAIN_INSERT_LIVE, 3347 0); 3348 } 3349 3350 if (allocated) { 3351 /* 3352 * Mark the newly created chain modified. This will cause 3353 * UPDATE to be set and process the INITIAL flag. 3354 * 3355 * Device buffers are not instantiated for DATA elements 3356 * as these are handled by logical buffers. 3357 * 3358 * Indirect and freemap node indirect blocks are handled 3359 * by hammer2_chain_create_indirect() and not by this 3360 * function. 3361 * 3362 * Data for all other bref types is expected to be 3363 * instantiated (INODE, LEAF). 3364 */ 3365 switch(chain->bref.type) { 3366 case HAMMER2_BREF_TYPE_DATA: 3367 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3368 case HAMMER2_BREF_TYPE_DIRENT: 3369 case HAMMER2_BREF_TYPE_INODE: 3370 error = hammer2_chain_modify(chain, mtid, dedup_off, 3371 HAMMER2_MODIFY_OPTDATA); 3372 break; 3373 default: 3374 /* 3375 * Remaining types are not supported by this function. 3376 * In particular, INDIRECT and LEAF_NODE types are 3377 * handled by create_indirect(). 3378 */ 3379 panic("hammer2_chain_create: bad type: %d", 3380 chain->bref.type); 3381 /* NOT REACHED */ 3382 break; 3383 } 3384 } else { 3385 /* 3386 * When reconnecting a chain we must set UPDATE and 3387 * setflush so the flush recognizes that it must update 3388 * the bref in the parent. 3389 */ 3390 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3391 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3392 } 3393 3394 /* 3395 * We must setflush(parent) to ensure that it recurses through to 3396 * chain. setflush(chain) might not work because ONFLUSH is possibly 3397 * already set in the chain (so it won't recurse up to set it in the 3398 * parent). 3399 */ 3400 if (parent) 3401 hammer2_chain_setflush(parent); 3402 3403 done: 3404 *chainp = chain; 3405 3406 return (error); 3407 } 3408 3409 /* 3410 * Move the chain from its old parent to a new parent. The chain must have 3411 * already been deleted or already disconnected (or never associated) with 3412 * a parent. The chain is reassociated with the new parent and the deleted 3413 * flag will be cleared (no longer deleted). The chain's modification state 3414 * is not altered. 3415 * 3416 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3417 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3418 * FULL. This typically means that the caller is creating the chain after 3419 * doing a hammer2_chain_lookup(). 3420 * 3421 * Neither (parent) or (chain) can be errored. 3422 * 3423 * If (parent) is non-NULL then the chain is inserted under the parent. 3424 * 3425 * If (parent) is NULL then the newly duplicated chain is not inserted 3426 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3427 * passing into hammer2_chain_create() after this function returns). 3428 * 3429 * WARNING! This function calls create which means it can insert indirect 3430 * blocks. This can cause other unrelated chains in the parent to 3431 * be moved to a newly inserted indirect block in addition to the 3432 * specific chain. 3433 */ 3434 void 3435 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3436 hammer2_tid_t mtid, int flags) 3437 { 3438 hammer2_blockref_t *bref; 3439 hammer2_chain_t *parent; 3440 3441 /* 3442 * WARNING! We should never resolve DATA to device buffers 3443 * (XXX allow it if the caller did?), and since 3444 * we currently do not have the logical buffer cache 3445 * buffer in-hand to fix its cached physical offset 3446 * we also force the modify code to not COW it. XXX 3447 * 3448 * NOTE! We allow error'd chains to be renamed. The bref itself 3449 * is good and can be renamed. The content, however, may 3450 * be inaccessible. 3451 */ 3452 KKASSERT(chain->parent == NULL); 3453 /*KKASSERT(chain->error == 0); allow */ 3454 bref = &chain->bref; 3455 3456 /* 3457 * If parent is not NULL the duplicated chain will be entered under 3458 * the parent and the UPDATE bit set to tell flush to update 3459 * the blockref. 3460 * 3461 * We must setflush(parent) to ensure that it recurses through to 3462 * chain. setflush(chain) might not work because ONFLUSH is possibly 3463 * already set in the chain (so it won't recurse up to set it in the 3464 * parent). 3465 * 3466 * Having both chains locked is extremely important for atomicy. 3467 */ 3468 if (parentp && (parent = *parentp) != NULL) { 3469 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3470 KKASSERT(parent->refs > 0); 3471 KKASSERT(parent->error == 0); 3472 3473 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3474 HAMMER2_METH_DEFAULT, 3475 bref->key, bref->keybits, bref->type, 3476 chain->bytes, mtid, 0, flags); 3477 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3478 hammer2_chain_setflush(*parentp); 3479 } 3480 } 3481 3482 /* 3483 * This works in tandem with delete_obref() to install a blockref in 3484 * (typically) an indirect block that is associated with the chain being 3485 * moved to *parentp. 3486 * 3487 * The reason we need this function is that the caller needs to maintain 3488 * the blockref as it was, and not generate a new blockref for what might 3489 * be a modified chain. Otherwise stuff will leak into the flush that 3490 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3491 * 3492 * It is EXTREMELY important that we properly set CHAIN_BLKMAPUPD and 3493 * CHAIN_UPDATE. We must set BLKMAPUPD if the bref does not match, and 3494 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3495 * it does. Otherwise we can end up in a situation where H2 is unable to 3496 * clean up the in-memory chain topology. 3497 * 3498 * The reason for this is that flushes do not generally flush through 3499 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3500 * or sideq to properly flush and dispose of the related inode chain's flags. 3501 * Situations where the inode is not actually modified by the frontend, 3502 * but where we have to move the related chains around as we insert or cleanup 3503 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3504 * inode chain that does not have a hammer2_inode_t associated with it. 3505 */ 3506 static void 3507 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3508 hammer2_tid_t mtid, int flags, 3509 hammer2_blockref_t *obref) 3510 { 3511 hammer2_chain_rename(parentp, chain, mtid, flags); 3512 3513 if (obref->type != HAMMER2_BREF_TYPE_EMPTY) { 3514 hammer2_blockref_t *tbase; 3515 int tcount; 3516 3517 KKASSERT((chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0); 3518 hammer2_chain_modify(*parentp, mtid, 0, 0); 3519 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3520 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3521 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3522 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD | 3523 HAMMER2_CHAIN_UPDATE); 3524 } else { 3525 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3526 } 3527 } 3528 } 3529 3530 /* 3531 * Helper function for deleting chains. 3532 * 3533 * The chain is removed from the live view (the RBTREE) as well as the parent's 3534 * blockmap. Both chain and its parent must be locked. 3535 * 3536 * parent may not be errored. chain can be errored. 3537 */ 3538 static int 3539 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3540 hammer2_tid_t mtid, int flags, 3541 hammer2_blockref_t *obref) 3542 { 3543 int error = 0; 3544 3545 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); 3546 KKASSERT(chain->parent == parent); 3547 3548 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) { 3549 /* 3550 * Chain is blockmapped, so there must be a parent. 3551 * Atomically remove the chain from the parent and remove 3552 * the blockmap entry. The parent must be set modified 3553 * to remove the blockmap entry. 3554 */ 3555 hammer2_blockref_t *base; 3556 int count; 3557 3558 KKASSERT(parent != NULL); 3559 KKASSERT(parent->error == 0); 3560 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3561 error = hammer2_chain_modify(parent, mtid, 0, 0); 3562 if (error) 3563 goto done; 3564 3565 /* 3566 * Calculate blockmap pointer 3567 */ 3568 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3569 hammer2_spin_ex(&chain->core.spin); 3570 hammer2_spin_ex(&parent->core.spin); 3571 3572 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3573 atomic_add_int(&parent->core.live_count, -1); 3574 ++parent->core.generation; 3575 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3576 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3577 --parent->core.chain_count; 3578 chain->parent = NULL; 3579 3580 switch(parent->bref.type) { 3581 case HAMMER2_BREF_TYPE_INODE: 3582 /* 3583 * Access the inode's block array. However, there 3584 * is no block array if the inode is flagged 3585 * DIRECTDATA. 3586 */ 3587 if (parent->data && 3588 (parent->data->ipdata.meta.op_flags & 3589 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3590 base = 3591 &parent->data->ipdata.u.blockset.blockref[0]; 3592 } else { 3593 base = NULL; 3594 } 3595 count = HAMMER2_SET_COUNT; 3596 break; 3597 case HAMMER2_BREF_TYPE_INDIRECT: 3598 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3599 if (parent->data) 3600 base = &parent->data->npdata[0]; 3601 else 3602 base = NULL; 3603 count = parent->bytes / sizeof(hammer2_blockref_t); 3604 break; 3605 case HAMMER2_BREF_TYPE_VOLUME: 3606 base = &parent->data->voldata. 3607 sroot_blockset.blockref[0]; 3608 count = HAMMER2_SET_COUNT; 3609 break; 3610 case HAMMER2_BREF_TYPE_FREEMAP: 3611 base = &parent->data->blkset.blockref[0]; 3612 count = HAMMER2_SET_COUNT; 3613 break; 3614 default: 3615 base = NULL; 3616 count = 0; 3617 panic("_hammer2_chain_delete_helper: " 3618 "unrecognized blockref type: %d", 3619 parent->bref.type); 3620 break; 3621 } 3622 3623 /* 3624 * delete blockmapped chain from its parent. 3625 * 3626 * The parent is not affected by any statistics in chain 3627 * which are pending synchronization. That is, there is 3628 * nothing to undo in the parent since they have not yet 3629 * been incorporated into the parent. 3630 * 3631 * The parent is affected by statistics stored in inodes. 3632 * Those have already been synchronized, so they must be 3633 * undone. XXX split update possible w/delete in middle? 3634 */ 3635 if (base) { 3636 hammer2_base_delete(parent, base, count, chain, obref); 3637 } 3638 hammer2_spin_unex(&parent->core.spin); 3639 hammer2_spin_unex(&chain->core.spin); 3640 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3641 /* 3642 * Chain is not blockmapped but a parent is present. 3643 * Atomically remove the chain from the parent. There is 3644 * no blockmap entry to remove. 3645 * 3646 * Because chain was associated with a parent but not 3647 * synchronized, the chain's *_count_up fields contain 3648 * inode adjustment statistics which must be undone. 3649 */ 3650 hammer2_spin_ex(&chain->core.spin); 3651 hammer2_spin_ex(&parent->core.spin); 3652 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3653 atomic_add_int(&parent->core.live_count, -1); 3654 ++parent->core.generation; 3655 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3656 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3657 --parent->core.chain_count; 3658 chain->parent = NULL; 3659 hammer2_spin_unex(&parent->core.spin); 3660 hammer2_spin_unex(&chain->core.spin); 3661 } else { 3662 /* 3663 * Chain is not blockmapped and has no parent. This 3664 * is a degenerate case. 3665 */ 3666 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3667 } 3668 done: 3669 return error; 3670 } 3671 3672 /* 3673 * Create an indirect block that covers one or more of the elements in the 3674 * current parent. Either returns the existing parent with no locking or 3675 * ref changes or returns the new indirect block locked and referenced 3676 * and leaving the original parent lock/ref intact as well. 3677 * 3678 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3679 * 3680 * The returned chain depends on where the specified key falls. 3681 * 3682 * The key/keybits for the indirect mode only needs to follow three rules: 3683 * 3684 * (1) That all elements underneath it fit within its key space and 3685 * 3686 * (2) That all elements outside it are outside its key space. 3687 * 3688 * (3) When creating the new indirect block any elements in the current 3689 * parent that fit within the new indirect block's keyspace must be 3690 * moved into the new indirect block. 3691 * 3692 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3693 * keyspace the the current parent, but lookup/iteration rules will 3694 * ensure (and must ensure) that rule (2) for all parents leading up 3695 * to the nearest inode or the root volume header is adhered to. This 3696 * is accomplished by always recursing through matching keyspaces in 3697 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3698 * 3699 * The current implementation calculates the current worst-case keyspace by 3700 * iterating the current parent and then divides it into two halves, choosing 3701 * whichever half has the most elements (not necessarily the half containing 3702 * the requested key). 3703 * 3704 * We can also opt to use the half with the least number of elements. This 3705 * causes lower-numbered keys (aka logical file offsets) to recurse through 3706 * fewer indirect blocks and higher-numbered keys to recurse through more. 3707 * This also has the risk of not moving enough elements to the new indirect 3708 * block and being forced to create several indirect blocks before the element 3709 * can be inserted. 3710 * 3711 * Must be called with an exclusively locked parent. 3712 * 3713 * NOTE: *errorp set to HAMMER_ERROR_* flags 3714 */ 3715 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3716 hammer2_key_t *keyp, int keybits, 3717 hammer2_blockref_t *base, int count); 3718 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3719 hammer2_key_t *keyp, int keybits, 3720 hammer2_blockref_t *base, int count, 3721 int ncount); 3722 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3723 hammer2_key_t *keyp, int keybits, 3724 hammer2_blockref_t *base, int count, 3725 int ncount); 3726 static 3727 hammer2_chain_t * 3728 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3729 hammer2_key_t create_key, int create_bits, 3730 hammer2_tid_t mtid, int for_type, int *errorp) 3731 { 3732 hammer2_dev_t *hmp; 3733 hammer2_blockref_t *base; 3734 hammer2_blockref_t *bref; 3735 hammer2_blockref_t bsave; 3736 hammer2_blockref_t dummy; 3737 hammer2_chain_t *chain; 3738 hammer2_chain_t *ichain; 3739 hammer2_key_t key = create_key; 3740 hammer2_key_t key_beg; 3741 hammer2_key_t key_end; 3742 hammer2_key_t key_next; 3743 int keybits = create_bits; 3744 int count; 3745 int ncount; 3746 int nbytes; 3747 int loops; 3748 int error; 3749 int reason; 3750 int generation; 3751 int maxloops = 300000; 3752 3753 /* 3754 * Calculate the base blockref pointer or NULL if the chain 3755 * is known to be empty. We need to calculate the array count 3756 * for RB lookups either way. 3757 */ 3758 hmp = parent->hmp; 3759 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3760 3761 /* 3762 * Pre-modify the parent now to avoid having to deal with error 3763 * processing if we tried to later (in the middle of our loop). 3764 * 3765 * We are going to be moving bref's around, the indirect blocks 3766 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 3767 */ 3768 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3769 if (*errorp) { 3770 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3771 *errorp, hammer2_error_str(*errorp)); 3772 return NULL; 3773 } 3774 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3775 3776 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3777 base = hammer2_chain_base_and_count(parent, &count); 3778 3779 /* 3780 * How big should our new indirect block be? It has to be at least 3781 * as large as its parent for splits to work properly. 3782 * 3783 * The freemap uses a specific indirect block size. The number of 3784 * levels are built dynamically and ultimately depend on the size 3785 * volume. Because freemap blocks are taken from the reserved areas 3786 * of the volume our goal is efficiency (fewer levels) and not so 3787 * much to save disk space. 3788 * 3789 * The first indirect block level for a directory usually uses 3790 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 3791 * the hash mechanism, this typically gives us a nominal 3792 * 32 * 4 entries with one level of indirection. 3793 * 3794 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 3795 * indirect blocks. The initial 4 entries in the inode gives us 3796 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 3797 * of indirection gives us 137GB, and so forth. H2 can support 3798 * huge file sizes but they are not typical, so we try to stick 3799 * with compactness and do not use a larger indirect block size. 3800 * 3801 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 3802 * due to the way indirect blocks are created this usually winds 3803 * up being extremely inefficient for small files. Even though 3804 * 16KB requires more levels of indirection for very large files, 3805 * the 16KB records can be ganged together into 64KB DIOs. 3806 */ 3807 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3808 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3809 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3810 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3811 if (parent->data->ipdata.meta.type == 3812 HAMMER2_OBJTYPE_DIRECTORY) 3813 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 3814 else 3815 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 3816 3817 } else { 3818 nbytes = HAMMER2_IND_BYTES_NOM; 3819 } 3820 if (nbytes < count * sizeof(hammer2_blockref_t)) { 3821 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 3822 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 3823 nbytes = count * sizeof(hammer2_blockref_t); 3824 } 3825 ncount = nbytes / sizeof(hammer2_blockref_t); 3826 3827 /* 3828 * When creating an indirect block for a freemap node or leaf 3829 * the key/keybits must be fitted to static radix levels because 3830 * particular radix levels use particular reserved blocks in the 3831 * related zone. 3832 * 3833 * This routine calculates the key/radix of the indirect block 3834 * we need to create, and whether it is on the high-side or the 3835 * low-side. 3836 */ 3837 switch(for_type) { 3838 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3839 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3840 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3841 base, count); 3842 break; 3843 case HAMMER2_BREF_TYPE_DATA: 3844 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 3845 base, count, ncount); 3846 break; 3847 case HAMMER2_BREF_TYPE_DIRENT: 3848 case HAMMER2_BREF_TYPE_INODE: 3849 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 3850 base, count, ncount); 3851 break; 3852 default: 3853 panic("illegal indirect block for bref type %d", for_type); 3854 break; 3855 } 3856 3857 /* 3858 * Normalize the key for the radix being represented, keeping the 3859 * high bits and throwing away the low bits. 3860 */ 3861 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3862 3863 /* 3864 * Ok, create our new indirect block 3865 */ 3866 bzero(&dummy, sizeof(dummy)); 3867 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3868 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3869 dummy.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3870 } else { 3871 dummy.type = HAMMER2_BREF_TYPE_INDIRECT; 3872 } 3873 dummy.key = key; 3874 dummy.keybits = keybits; 3875 dummy.data_off = hammer2_getradix(nbytes); 3876 dummy.methods = 3877 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 3878 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 3879 3880 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy); 3881 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3882 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3883 /* ichain has one ref at this point */ 3884 3885 /* 3886 * We have to mark it modified to allocate its block, but use 3887 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3888 * it won't be acted upon by the flush code. 3889 * 3890 * XXX remove OPTDATA, we need a fully initialized indirect block to 3891 * be able to move the original blockref. 3892 */ 3893 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 3894 if (*errorp) { 3895 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3896 *errorp, hammer2_error_str(*errorp)); 3897 hammer2_chain_unlock(ichain); 3898 hammer2_chain_drop(ichain); 3899 return NULL; 3900 } 3901 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 3902 3903 /* 3904 * Iterate the original parent and move the matching brefs into 3905 * the new indirect block. 3906 * 3907 * XXX handle flushes. 3908 */ 3909 key_beg = 0; 3910 key_end = HAMMER2_KEY_MAX; 3911 key_next = 0; /* avoid gcc warnings */ 3912 hammer2_spin_ex(&parent->core.spin); 3913 loops = 0; 3914 reason = 0; 3915 3916 for (;;) { 3917 /* 3918 * Parent may have been modified, relocating its block array. 3919 * Reload the base pointer. 3920 */ 3921 base = hammer2_chain_base_and_count(parent, &count); 3922 3923 if (++loops > 100000) { 3924 hammer2_spin_unex(&parent->core.spin); 3925 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3926 reason, parent, base, count, key_next); 3927 } 3928 3929 /* 3930 * NOTE: spinlock stays intact, returned chain (if not NULL) 3931 * is not referenced or locked which means that we 3932 * cannot safely check its flagged / deletion status 3933 * until we lock it. 3934 */ 3935 chain = hammer2_combined_find(parent, base, count, 3936 &key_next, 3937 key_beg, key_end, 3938 &bref); 3939 generation = parent->core.generation; 3940 if (bref == NULL) 3941 break; 3942 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3943 3944 /* 3945 * Skip keys that are not within the key/radix of the new 3946 * indirect block. They stay in the parent. 3947 */ 3948 if (rounddown2(key ^ bref->key, (hammer2_key_t)1 << keybits) != 0) { 3949 goto next_key_spinlocked; 3950 } 3951 3952 /* 3953 * Load the new indirect block by acquiring the related 3954 * chains (potentially from media as it might not be 3955 * in-memory). Then move it to the new parent (ichain). 3956 * 3957 * chain is referenced but not locked. We must lock the 3958 * chain to obtain definitive state. 3959 */ 3960 bsave = *bref; 3961 if (chain) { 3962 /* 3963 * Use chain already present in the RBTREE 3964 */ 3965 hammer2_chain_ref(chain); 3966 hammer2_spin_unex(&parent->core.spin); 3967 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3968 } else { 3969 /* 3970 * Get chain for blockref element. _get returns NULL 3971 * on insertion race. 3972 */ 3973 hammer2_spin_unex(&parent->core.spin); 3974 chain = hammer2_chain_get(parent, generation, &bsave, 3975 HAMMER2_RESOLVE_NEVER); 3976 if (chain == NULL) { 3977 reason = 1; 3978 hammer2_spin_ex(&parent->core.spin); 3979 continue; 3980 } 3981 } 3982 3983 /* 3984 * This is always live so if the chain has been deleted 3985 * we raced someone and we have to retry. 3986 * 3987 * NOTE: Lookups can race delete-duplicate because 3988 * delete-duplicate does not lock the parent's core 3989 * (they just use the spinlock on the core). 3990 * 3991 * (note reversed logic for this one) 3992 */ 3993 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 3994 chain->parent != parent || 3995 (chain->flags & HAMMER2_CHAIN_DELETED)) { 3996 hammer2_chain_unlock(chain); 3997 hammer2_chain_drop(chain); 3998 if (hammer2_debug & 0x0040) { 3999 kprintf("LOST PARENT RETRY " 4000 "RETRY (%p,%p)->%p %08x\n", 4001 parent, chain->parent, chain, chain->flags); 4002 } 4003 hammer2_spin_ex(&parent->core.spin); 4004 continue; 4005 } 4006 4007 /* 4008 * Shift the chain to the indirect block. 4009 * 4010 * WARNING! No reason for us to load chain data, pass NOSTATS 4011 * to prevent delete/insert from trying to access 4012 * inode stats (and thus asserting if there is no 4013 * chain->data loaded). 4014 * 4015 * WARNING! The (parent, chain) deletion may modify the parent 4016 * and invalidate the base pointer. 4017 * 4018 * WARNING! Parent must already be marked modified, so we 4019 * can assume that chain_delete always suceeds. 4020 * 4021 * WARNING! hammer2_chain_repchange() does not have to be 4022 * called (and doesn't work anyway because we are 4023 * only doing a partial shift). A recursion that is 4024 * in-progress can continue at the current parent 4025 * and will be able to properly find its next key. 4026 */ 4027 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 4028 &bsave); 4029 KKASSERT(error == 0); 4030 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bsave); 4031 hammer2_chain_unlock(chain); 4032 hammer2_chain_drop(chain); 4033 KKASSERT(parent->refs > 0); 4034 chain = NULL; 4035 base = NULL; /* safety */ 4036 hammer2_spin_ex(&parent->core.spin); 4037 next_key_spinlocked: 4038 if (--maxloops == 0) 4039 panic("hammer2_chain_create_indirect: maxloops"); 4040 reason = 4; 4041 if (key_next == 0 || key_next > key_end) 4042 break; 4043 key_beg = key_next; 4044 /* loop */ 4045 } 4046 hammer2_spin_unex(&parent->core.spin); 4047 4048 /* 4049 * Insert the new indirect block into the parent now that we've 4050 * cleared out some entries in the parent. We calculated a good 4051 * insertion index in the loop above (ichain->index). 4052 * 4053 * We don't have to set UPDATE here because we mark ichain 4054 * modified down below (so the normal modified -> flush -> set-moved 4055 * sequence applies). 4056 * 4057 * The insertion shouldn't race as this is a completely new block 4058 * and the parent is locked. 4059 */ 4060 base = NULL; /* safety, parent modify may change address */ 4061 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4062 KKASSERT(parent->core.live_count < count); 4063 hammer2_chain_insert(parent, ichain, 4064 HAMMER2_CHAIN_INSERT_SPIN | 4065 HAMMER2_CHAIN_INSERT_LIVE, 4066 0); 4067 4068 /* 4069 * Make sure flushes propogate after our manual insertion. 4070 */ 4071 hammer2_chain_setflush(ichain); 4072 hammer2_chain_setflush(parent); 4073 4074 /* 4075 * Figure out what to return. 4076 */ 4077 if (rounddown2(create_key ^ key, (hammer2_key_t)1 << keybits) != 0) { 4078 /* 4079 * Key being created is outside the key range, 4080 * return the original parent. 4081 */ 4082 hammer2_chain_unlock(ichain); 4083 hammer2_chain_drop(ichain); 4084 } else { 4085 /* 4086 * Otherwise its in the range, return the new parent. 4087 * (leave both the new and old parent locked). 4088 */ 4089 parent = ichain; 4090 } 4091 4092 return(parent); 4093 } 4094 4095 /* 4096 * Do maintenance on an indirect chain. Both parent and chain are locked. 4097 * 4098 * Returns non-zero if (chain) is deleted, either due to being empty or 4099 * because its children were safely moved into the parent. 4100 */ 4101 int 4102 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4103 hammer2_chain_t *chain) 4104 { 4105 hammer2_blockref_t *chain_base; 4106 hammer2_blockref_t *base; 4107 hammer2_blockref_t *bref; 4108 hammer2_blockref_t bsave; 4109 hammer2_key_t key_next; 4110 hammer2_key_t key_beg; 4111 hammer2_key_t key_end; 4112 hammer2_chain_t *sub; 4113 int chain_count; 4114 int count; 4115 int error; 4116 int generation; 4117 4118 /* 4119 * Make sure we have an accurate live_count 4120 */ 4121 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4122 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4123 base = &chain->data->npdata[0]; 4124 count = chain->bytes / sizeof(hammer2_blockref_t); 4125 hammer2_chain_countbrefs(chain, base, count); 4126 } 4127 4128 /* 4129 * If the indirect block is empty we can delete it. 4130 * (ignore deletion error) 4131 */ 4132 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4133 hammer2_chain_delete(parent, chain, 4134 chain->bref.modify_tid, 4135 HAMMER2_DELETE_PERMANENT); 4136 hammer2_chain_repchange(parent, chain); 4137 return 1; 4138 } 4139 4140 base = hammer2_chain_base_and_count(parent, &count); 4141 4142 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4143 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4144 hammer2_chain_countbrefs(parent, base, count); 4145 } 4146 4147 /* 4148 * Determine if we can collapse chain into parent, calculate 4149 * hysteresis for chain emptiness. 4150 */ 4151 if (parent->core.live_count + chain->core.live_count - 1 > count) 4152 return 0; 4153 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4154 if (chain->core.live_count > chain_count * 3 / 4) 4155 return 0; 4156 4157 /* 4158 * Ok, theoretically we can collapse chain's contents into 4159 * parent. chain is locked, but any in-memory children of chain 4160 * are not. For this to work, we must be able to dispose of any 4161 * in-memory children of chain. 4162 * 4163 * For now require that there are no in-memory children of chain. 4164 * 4165 * WARNING! Both chain and parent must remain locked across this 4166 * entire operation. 4167 */ 4168 4169 /* 4170 * Parent must be marked modified. Don't try to collapse it if we 4171 * can't mark it modified. Once modified, destroy chain to make room 4172 * and to get rid of what will be a conflicting key (this is included 4173 * in the calculation above). Finally, move the children of chain 4174 * into chain's parent. 4175 * 4176 * This order creates an accounting problem for bref.embed.stats 4177 * because we destroy chain before we remove its children. Any 4178 * elements whos blockref is already synchronized will be counted 4179 * twice. To deal with the problem we clean out chain's stats prior 4180 * to deleting it. 4181 */ 4182 error = hammer2_chain_modify(parent, 0, 0, 0); 4183 if (error) { 4184 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4185 hammer2_error_str(error)); 4186 return 0; 4187 } 4188 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4189 if (error) { 4190 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4191 hammer2_error_str(error)); 4192 return 0; 4193 } 4194 4195 chain->bref.embed.stats.inode_count = 0; 4196 chain->bref.embed.stats.data_count = 0; 4197 error = hammer2_chain_delete(parent, chain, 4198 chain->bref.modify_tid, 4199 HAMMER2_DELETE_PERMANENT); 4200 KKASSERT(error == 0); 4201 4202 /* 4203 * The combined_find call requires core.spin to be held. One would 4204 * think there wouldn't be any conflicts since we hold chain 4205 * exclusively locked, but the caching mechanism for 0-ref children 4206 * does not require a chain lock. 4207 */ 4208 hammer2_spin_ex(&chain->core.spin); 4209 4210 key_next = 0; 4211 key_beg = 0; 4212 key_end = HAMMER2_KEY_MAX; 4213 for (;;) { 4214 chain_base = &chain->data->npdata[0]; 4215 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4216 sub = hammer2_combined_find(chain, chain_base, chain_count, 4217 &key_next, 4218 key_beg, key_end, 4219 &bref); 4220 generation = chain->core.generation; 4221 if (bref == NULL) 4222 break; 4223 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4224 4225 bsave = *bref; 4226 if (sub) { 4227 hammer2_chain_ref(sub); 4228 hammer2_spin_unex(&chain->core.spin); 4229 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4230 } else { 4231 hammer2_spin_unex(&chain->core.spin); 4232 sub = hammer2_chain_get(chain, generation, &bsave, 4233 HAMMER2_RESOLVE_NEVER); 4234 if (sub == NULL) { 4235 hammer2_spin_ex(&chain->core.spin); 4236 continue; 4237 } 4238 } 4239 if (bcmp(&bsave, &sub->bref, sizeof(bsave)) || 4240 sub->parent != chain || 4241 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4242 hammer2_chain_unlock(sub); 4243 hammer2_chain_drop(sub); 4244 hammer2_spin_ex(&chain->core.spin); 4245 sub = NULL; /* safety */ 4246 continue; 4247 } 4248 error = hammer2_chain_delete_obref(chain, sub, 4249 sub->bref.modify_tid, 0, 4250 &bsave); 4251 KKASSERT(error == 0); 4252 hammer2_chain_rename_obref(&parent, sub, 4253 sub->bref.modify_tid, 4254 HAMMER2_INSERT_SAMEPARENT, &bsave); 4255 hammer2_chain_unlock(sub); 4256 hammer2_chain_drop(sub); 4257 hammer2_spin_ex(&chain->core.spin); 4258 4259 if (key_next == 0) 4260 break; 4261 key_beg = key_next; 4262 } 4263 hammer2_spin_unex(&chain->core.spin); 4264 4265 hammer2_chain_repchange(parent, chain); 4266 4267 return 1; 4268 } 4269 4270 /* 4271 * Freemap indirect blocks 4272 * 4273 * Calculate the keybits and highside/lowside of the freemap node the 4274 * caller is creating. 4275 * 4276 * This routine will specify the next higher-level freemap key/radix 4277 * representing the lowest-ordered set. By doing so, eventually all 4278 * low-ordered sets will be moved one level down. 4279 * 4280 * We have to be careful here because the freemap reserves a limited 4281 * number of blocks for a limited number of levels. So we can't just 4282 * push indiscriminately. 4283 */ 4284 int 4285 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4286 int keybits, hammer2_blockref_t *base, int count) 4287 { 4288 hammer2_chain_t *chain; 4289 hammer2_blockref_t *bref; 4290 hammer2_key_t key; 4291 hammer2_key_t key_beg; 4292 hammer2_key_t key_end; 4293 hammer2_key_t key_next; 4294 int maxloops = 300000; 4295 4296 key = *keyp; 4297 keybits = 64; 4298 4299 /* 4300 * Calculate the range of keys in the array being careful to skip 4301 * slots which are overridden with a deletion. 4302 */ 4303 key_beg = 0; 4304 key_end = HAMMER2_KEY_MAX; 4305 hammer2_spin_ex(&parent->core.spin); 4306 4307 for (;;) { 4308 if (--maxloops == 0) { 4309 panic("indkey_freemap shit %p %p:%d\n", 4310 parent, base, count); 4311 } 4312 chain = hammer2_combined_find(parent, base, count, 4313 &key_next, 4314 key_beg, key_end, 4315 &bref); 4316 4317 /* 4318 * Exhausted search 4319 */ 4320 if (bref == NULL) 4321 break; 4322 4323 /* 4324 * Skip deleted chains. 4325 */ 4326 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4327 if (key_next == 0 || key_next > key_end) 4328 break; 4329 key_beg = key_next; 4330 continue; 4331 } 4332 4333 /* 4334 * Use the full live (not deleted) element for the scan 4335 * iteration. HAMMER2 does not allow partial replacements. 4336 * 4337 * XXX should be built into hammer2_combined_find(). 4338 */ 4339 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4340 4341 if (keybits > bref->keybits) { 4342 key = bref->key; 4343 keybits = bref->keybits; 4344 } else if (keybits == bref->keybits && bref->key < key) { 4345 key = bref->key; 4346 } 4347 if (key_next == 0) 4348 break; 4349 key_beg = key_next; 4350 } 4351 hammer2_spin_unex(&parent->core.spin); 4352 4353 /* 4354 * Return the keybits for a higher-level FREEMAP_NODE covering 4355 * this node. 4356 */ 4357 switch(keybits) { 4358 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4359 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4360 break; 4361 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4362 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4363 break; 4364 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4365 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4366 break; 4367 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4368 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4369 break; 4370 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4371 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4372 break; 4373 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4374 panic("hammer2_chain_indkey_freemap: level too high"); 4375 break; 4376 default: 4377 panic("hammer2_chain_indkey_freemap: bad radix"); 4378 break; 4379 } 4380 *keyp = key; 4381 4382 return (keybits); 4383 } 4384 4385 /* 4386 * File indirect blocks 4387 * 4388 * Calculate the key/keybits for the indirect block to create by scanning 4389 * existing keys. The key being created is also passed in *keyp and can be 4390 * inside or outside the indirect block. Regardless, the indirect block 4391 * must hold at least two keys in order to guarantee sufficient space. 4392 * 4393 * We use a modified version of the freemap's fixed radix tree, but taylored 4394 * for file data. Basically we configure an indirect block encompassing the 4395 * smallest key. 4396 */ 4397 static int 4398 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4399 int keybits, hammer2_blockref_t *base, int count, 4400 int ncount) 4401 { 4402 hammer2_chain_t *chain; 4403 hammer2_blockref_t *bref; 4404 hammer2_key_t key; 4405 hammer2_key_t key_beg; 4406 hammer2_key_t key_end; 4407 hammer2_key_t key_next; 4408 int nradix; 4409 int maxloops = 300000; 4410 4411 key = *keyp; 4412 keybits = 64; 4413 4414 /* 4415 * Calculate the range of keys in the array being careful to skip 4416 * slots which are overridden with a deletion. 4417 * 4418 * Locate the smallest key. 4419 */ 4420 key_beg = 0; 4421 key_end = HAMMER2_KEY_MAX; 4422 hammer2_spin_ex(&parent->core.spin); 4423 4424 for (;;) { 4425 if (--maxloops == 0) { 4426 panic("indkey_freemap shit %p %p:%d\n", 4427 parent, base, count); 4428 } 4429 chain = hammer2_combined_find(parent, base, count, 4430 &key_next, 4431 key_beg, key_end, 4432 &bref); 4433 4434 /* 4435 * Exhausted search 4436 */ 4437 if (bref == NULL) 4438 break; 4439 4440 /* 4441 * Skip deleted chains. 4442 */ 4443 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4444 if (key_next == 0 || key_next > key_end) 4445 break; 4446 key_beg = key_next; 4447 continue; 4448 } 4449 4450 /* 4451 * Use the full live (not deleted) element for the scan 4452 * iteration. HAMMER2 does not allow partial replacements. 4453 * 4454 * XXX should be built into hammer2_combined_find(). 4455 */ 4456 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4457 4458 if (keybits > bref->keybits) { 4459 key = bref->key; 4460 keybits = bref->keybits; 4461 } else if (keybits == bref->keybits && bref->key < key) { 4462 key = bref->key; 4463 } 4464 if (key_next == 0) 4465 break; 4466 key_beg = key_next; 4467 } 4468 hammer2_spin_unex(&parent->core.spin); 4469 4470 /* 4471 * Calculate the static keybits for a higher-level indirect block 4472 * that contains the key. 4473 */ 4474 *keyp = key; 4475 4476 switch(ncount) { 4477 case HAMMER2_IND_COUNT_MIN: 4478 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4479 break; 4480 case HAMMER2_IND_COUNT_NOM: 4481 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4482 break; 4483 case HAMMER2_IND_COUNT_MAX: 4484 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4485 break; 4486 default: 4487 panic("bad ncount %d\n", ncount); 4488 nradix = 0; 4489 break; 4490 } 4491 4492 /* 4493 * The largest radix that can be returned for an indirect block is 4494 * 63 bits. (The largest practical indirect block radix is actually 4495 * 62 bits because the top-level inode or volume root contains four 4496 * entries, but allow 63 to be returned). 4497 */ 4498 if (nradix >= 64) 4499 nradix = 63; 4500 4501 return keybits + nradix; 4502 } 4503 4504 #if 1 4505 4506 /* 4507 * Directory indirect blocks. 4508 * 4509 * Covers both the inode index (directory of inodes), and directory contents 4510 * (filenames hardlinked to inodes). 4511 * 4512 * Because directory keys are hashed we generally try to cut the space in 4513 * half. We accomodate the inode index (which tends to have linearly 4514 * increasing inode numbers) by ensuring that the keyspace is at least large 4515 * enough to fill up the indirect block being created. 4516 */ 4517 static int 4518 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4519 int keybits, hammer2_blockref_t *base, int count, 4520 int ncount) 4521 { 4522 hammer2_blockref_t *bref; 4523 hammer2_chain_t *chain; 4524 hammer2_key_t key_beg; 4525 hammer2_key_t key_end; 4526 hammer2_key_t key_next; 4527 hammer2_key_t key; 4528 int nkeybits; 4529 int locount; 4530 int hicount; 4531 int maxloops = 300000; 4532 4533 /* 4534 * NOTE: We can't take a shortcut here anymore for inodes because 4535 * the root directory can contain a mix of inodes and directory 4536 * entries (we used to just return 63 if parent->bref.type was 4537 * HAMMER2_BREF_TYPE_INODE. 4538 */ 4539 key = *keyp; 4540 locount = 0; 4541 hicount = 0; 4542 4543 /* 4544 * Calculate the range of keys in the array being careful to skip 4545 * slots which are overridden with a deletion. 4546 */ 4547 key_beg = 0; 4548 key_end = HAMMER2_KEY_MAX; 4549 hammer2_spin_ex(&parent->core.spin); 4550 4551 for (;;) { 4552 if (--maxloops == 0) { 4553 panic("indkey_freemap shit %p %p:%d\n", 4554 parent, base, count); 4555 } 4556 chain = hammer2_combined_find(parent, base, count, 4557 &key_next, 4558 key_beg, key_end, 4559 &bref); 4560 4561 /* 4562 * Exhausted search 4563 */ 4564 if (bref == NULL) 4565 break; 4566 4567 /* 4568 * Deleted object 4569 */ 4570 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4571 if (key_next == 0 || key_next > key_end) 4572 break; 4573 key_beg = key_next; 4574 continue; 4575 } 4576 4577 /* 4578 * Use the full live (not deleted) element for the scan 4579 * iteration. HAMMER2 does not allow partial replacements. 4580 * 4581 * XXX should be built into hammer2_combined_find(). 4582 */ 4583 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4584 4585 /* 4586 * Expand our calculated key range (key, keybits) to fit 4587 * the scanned key. nkeybits represents the full range 4588 * that we will later cut in half (two halves @ nkeybits - 1). 4589 */ 4590 nkeybits = keybits; 4591 if (nkeybits < bref->keybits) { 4592 if (bref->keybits > 64) { 4593 kprintf("bad bref chain %p bref %p\n", 4594 chain, bref); 4595 Debugger("fubar"); 4596 } 4597 nkeybits = bref->keybits; 4598 } 4599 while (nkeybits < 64 && 4600 rounddown2(key ^ bref->key, (hammer2_key_t)1 << nkeybits) != 0) { 4601 ++nkeybits; 4602 } 4603 4604 /* 4605 * If the new key range is larger we have to determine 4606 * which side of the new key range the existing keys fall 4607 * under by checking the high bit, then collapsing the 4608 * locount into the hicount or vise-versa. 4609 */ 4610 if (keybits != nkeybits) { 4611 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4612 hicount += locount; 4613 locount = 0; 4614 } else { 4615 locount += hicount; 4616 hicount = 0; 4617 } 4618 keybits = nkeybits; 4619 } 4620 4621 /* 4622 * The newly scanned key will be in the lower half or the 4623 * upper half of the (new) key range. 4624 */ 4625 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4626 ++hicount; 4627 else 4628 ++locount; 4629 4630 if (key_next == 0) 4631 break; 4632 key_beg = key_next; 4633 } 4634 hammer2_spin_unex(&parent->core.spin); 4635 bref = NULL; /* now invalid (safety) */ 4636 4637 /* 4638 * Adjust keybits to represent half of the full range calculated 4639 * above (radix 63 max) for our new indirect block. 4640 */ 4641 --keybits; 4642 4643 /* 4644 * Expand keybits to hold at least ncount elements. ncount will be 4645 * a power of 2. This is to try to completely fill leaf nodes (at 4646 * least for keys which are not hashes). 4647 * 4648 * We aren't counting 'in' or 'out', we are counting 'high side' 4649 * and 'low side' based on the bit at (1LL << keybits). We want 4650 * everything to be inside in these cases so shift it all to 4651 * the low or high side depending on the new high bit. 4652 */ 4653 while (((hammer2_key_t)1 << keybits) < ncount) { 4654 ++keybits; 4655 if (key & ((hammer2_key_t)1 << keybits)) { 4656 hicount += locount; 4657 locount = 0; 4658 } else { 4659 locount += hicount; 4660 hicount = 0; 4661 } 4662 } 4663 4664 if (hicount > locount) 4665 key |= (hammer2_key_t)1 << keybits; 4666 else 4667 key &= ~(hammer2_key_t)1 << keybits; 4668 4669 *keyp = key; 4670 4671 return (keybits); 4672 } 4673 4674 #else 4675 4676 /* 4677 * Directory indirect blocks. 4678 * 4679 * Covers both the inode index (directory of inodes), and directory contents 4680 * (filenames hardlinked to inodes). 4681 * 4682 * Because directory keys are hashed we generally try to cut the space in 4683 * half. We accomodate the inode index (which tends to have linearly 4684 * increasing inode numbers) by ensuring that the keyspace is at least large 4685 * enough to fill up the indirect block being created. 4686 */ 4687 static int 4688 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4689 int keybits, hammer2_blockref_t *base, int count, 4690 int ncount) 4691 { 4692 hammer2_blockref_t *bref; 4693 hammer2_chain_t *chain; 4694 hammer2_key_t key_beg; 4695 hammer2_key_t key_end; 4696 hammer2_key_t key_next; 4697 hammer2_key_t key; 4698 int nkeybits; 4699 int locount; 4700 int hicount; 4701 int maxloops = 300000; 4702 4703 /* 4704 * Shortcut if the parent is the inode. In this situation the 4705 * parent has 4+1 directory entries and we are creating an indirect 4706 * block capable of holding many more. 4707 */ 4708 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4709 return 63; 4710 } 4711 4712 key = *keyp; 4713 locount = 0; 4714 hicount = 0; 4715 4716 /* 4717 * Calculate the range of keys in the array being careful to skip 4718 * slots which are overridden with a deletion. 4719 */ 4720 key_beg = 0; 4721 key_end = HAMMER2_KEY_MAX; 4722 hammer2_spin_ex(&parent->core.spin); 4723 4724 for (;;) { 4725 if (--maxloops == 0) { 4726 panic("indkey_freemap shit %p %p:%d\n", 4727 parent, base, count); 4728 } 4729 chain = hammer2_combined_find(parent, base, count, 4730 &key_next, 4731 key_beg, key_end, 4732 &bref); 4733 4734 /* 4735 * Exhausted search 4736 */ 4737 if (bref == NULL) 4738 break; 4739 4740 /* 4741 * Deleted object 4742 */ 4743 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4744 if (key_next == 0 || key_next > key_end) 4745 break; 4746 key_beg = key_next; 4747 continue; 4748 } 4749 4750 /* 4751 * Use the full live (not deleted) element for the scan 4752 * iteration. HAMMER2 does not allow partial replacements. 4753 * 4754 * XXX should be built into hammer2_combined_find(). 4755 */ 4756 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4757 4758 /* 4759 * Expand our calculated key range (key, keybits) to fit 4760 * the scanned key. nkeybits represents the full range 4761 * that we will later cut in half (two halves @ nkeybits - 1). 4762 */ 4763 nkeybits = keybits; 4764 if (nkeybits < bref->keybits) { 4765 if (bref->keybits > 64) { 4766 kprintf("bad bref chain %p bref %p\n", 4767 chain, bref); 4768 Debugger("fubar"); 4769 } 4770 nkeybits = bref->keybits; 4771 } 4772 while (nkeybits < 64 && 4773 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4774 (key ^ bref->key)) != 0) { 4775 ++nkeybits; 4776 } 4777 4778 /* 4779 * If the new key range is larger we have to determine 4780 * which side of the new key range the existing keys fall 4781 * under by checking the high bit, then collapsing the 4782 * locount into the hicount or vise-versa. 4783 */ 4784 if (keybits != nkeybits) { 4785 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4786 hicount += locount; 4787 locount = 0; 4788 } else { 4789 locount += hicount; 4790 hicount = 0; 4791 } 4792 keybits = nkeybits; 4793 } 4794 4795 /* 4796 * The newly scanned key will be in the lower half or the 4797 * upper half of the (new) key range. 4798 */ 4799 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4800 ++hicount; 4801 else 4802 ++locount; 4803 4804 if (key_next == 0) 4805 break; 4806 key_beg = key_next; 4807 } 4808 hammer2_spin_unex(&parent->core.spin); 4809 bref = NULL; /* now invalid (safety) */ 4810 4811 /* 4812 * Adjust keybits to represent half of the full range calculated 4813 * above (radix 63 max) for our new indirect block. 4814 */ 4815 --keybits; 4816 4817 /* 4818 * Expand keybits to hold at least ncount elements. ncount will be 4819 * a power of 2. This is to try to completely fill leaf nodes (at 4820 * least for keys which are not hashes). 4821 * 4822 * We aren't counting 'in' or 'out', we are counting 'high side' 4823 * and 'low side' based on the bit at (1LL << keybits). We want 4824 * everything to be inside in these cases so shift it all to 4825 * the low or high side depending on the new high bit. 4826 */ 4827 while (((hammer2_key_t)1 << keybits) < ncount) { 4828 ++keybits; 4829 if (key & ((hammer2_key_t)1 << keybits)) { 4830 hicount += locount; 4831 locount = 0; 4832 } else { 4833 locount += hicount; 4834 hicount = 0; 4835 } 4836 } 4837 4838 if (hicount > locount) 4839 key |= (hammer2_key_t)1 << keybits; 4840 else 4841 key &= ~(hammer2_key_t)1 << keybits; 4842 4843 *keyp = key; 4844 4845 return (keybits); 4846 } 4847 4848 #endif 4849 4850 /* 4851 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 4852 * it exists. 4853 * 4854 * Both parent and chain must be locked exclusively. 4855 * 4856 * This function will modify the parent if the blockref requires removal 4857 * from the parent's block table. 4858 * 4859 * This function is NOT recursive. Any entity already pushed into the 4860 * chain (such as an inode) may still need visibility into its contents, 4861 * as well as the ability to read and modify the contents. For example, 4862 * for an unlinked file which is still open. 4863 * 4864 * Also note that the flusher is responsible for cleaning up empty 4865 * indirect blocks. 4866 */ 4867 int 4868 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 4869 hammer2_tid_t mtid, int flags) 4870 { 4871 int error = 0; 4872 4873 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4874 4875 /* 4876 * Nothing to do if already marked. 4877 * 4878 * We need the spinlock on the core whos RBTREE contains chain 4879 * to protect against races. 4880 */ 4881 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4882 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4883 chain->parent == parent); 4884 error = _hammer2_chain_delete_helper(parent, chain, 4885 mtid, flags, NULL); 4886 } 4887 4888 /* 4889 * Permanent deletions mark the chain as destroyed. 4890 * 4891 * NOTE: We do not setflush the chain unless the deletion is 4892 * permanent, since the deletion of a chain does not actually 4893 * require it to be flushed. 4894 */ 4895 if (error == 0) { 4896 if (flags & HAMMER2_DELETE_PERMANENT) { 4897 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4898 hammer2_chain_setflush(chain); 4899 } 4900 } 4901 4902 return error; 4903 } 4904 4905 static int 4906 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 4907 hammer2_tid_t mtid, int flags, 4908 hammer2_blockref_t *obref) 4909 { 4910 int error = 0; 4911 4912 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4913 4914 /* 4915 * Nothing to do if already marked. 4916 * 4917 * We need the spinlock on the core whos RBTREE contains chain 4918 * to protect against races. 4919 */ 4920 obref->type = HAMMER2_BREF_TYPE_EMPTY; 4921 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4922 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4923 chain->parent == parent); 4924 error = _hammer2_chain_delete_helper(parent, chain, 4925 mtid, flags, obref); 4926 } 4927 4928 /* 4929 * Permanent deletions mark the chain as destroyed. 4930 * 4931 * NOTE: We do not setflush the chain unless the deletion is 4932 * permanent, since the deletion of a chain does not actually 4933 * require it to be flushed. 4934 */ 4935 if (error == 0) { 4936 if (flags & HAMMER2_DELETE_PERMANENT) { 4937 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4938 hammer2_chain_setflush(chain); 4939 } 4940 } 4941 4942 return error; 4943 } 4944 4945 /* 4946 * Returns the index of the nearest element in the blockref array >= elm. 4947 * Returns (count) if no element could be found. 4948 * 4949 * Sets *key_nextp to the next key for loop purposes but does not modify 4950 * it if the next key would be higher than the current value of *key_nextp. 4951 * Note that *key_nexp can overflow to 0, which should be tested by the 4952 * caller. 4953 * 4954 * WARNING! Must be called with parent's spinlock held. Spinlock remains 4955 * held through the operation. 4956 */ 4957 static int 4958 hammer2_base_find(hammer2_chain_t *parent, 4959 hammer2_blockref_t *base, int count, 4960 hammer2_key_t *key_nextp, 4961 hammer2_key_t key_beg, hammer2_key_t key_end) 4962 { 4963 hammer2_blockref_t *scan; 4964 hammer2_key_t scan_end; 4965 int i; 4966 int limit; 4967 4968 /* 4969 * Require the live chain's already have their core's counted 4970 * so we can optimize operations. 4971 */ 4972 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 4973 4974 /* 4975 * Degenerate case 4976 */ 4977 if (count == 0 || base == NULL) 4978 return(count); 4979 4980 /* 4981 * Sequential optimization using parent->cache_index. This is 4982 * the most likely scenario. 4983 * 4984 * We can avoid trailing empty entries on live chains, otherwise 4985 * we might have to check the whole block array. 4986 */ 4987 i = parent->cache_index; /* SMP RACE OK */ 4988 cpu_ccfence(); 4989 limit = parent->core.live_zero; 4990 if (i >= limit) 4991 i = limit - 1; 4992 if (i < 0) 4993 i = 0; 4994 KKASSERT(i < count); 4995 4996 /* 4997 * Search backwards 4998 */ 4999 scan = &base[i]; 5000 while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY || 5001 scan->key > key_beg)) { 5002 --scan; 5003 --i; 5004 } 5005 parent->cache_index = i; 5006 5007 /* 5008 * Search forwards, stop when we find a scan element which 5009 * encloses the key or until we know that there are no further 5010 * elements. 5011 */ 5012 while (i < count) { 5013 if (scan->type != HAMMER2_BREF_TYPE_EMPTY) { 5014 scan_end = scan->key + 5015 ((hammer2_key_t)1 << scan->keybits) - 1; 5016 if (scan->key > key_beg || scan_end >= key_beg) 5017 break; 5018 } 5019 if (i >= limit) 5020 return (count); 5021 ++scan; 5022 ++i; 5023 } 5024 if (i != count) { 5025 parent->cache_index = i; 5026 if (i >= limit) { 5027 i = count; 5028 } else { 5029 scan_end = scan->key + 5030 ((hammer2_key_t)1 << scan->keybits); 5031 if (scan_end && (*key_nextp > scan_end || 5032 *key_nextp == 0)) { 5033 *key_nextp = scan_end; 5034 } 5035 } 5036 } 5037 return (i); 5038 } 5039 5040 /* 5041 * Do a combined search and return the next match either from the blockref 5042 * array or from the in-memory chain. Sets *brefp to the returned bref in 5043 * both cases, or sets it to NULL if the search exhausted. Only returns 5044 * a non-NULL chain if the search matched from the in-memory chain. 5045 * 5046 * When no in-memory chain has been found and a non-NULL bref is returned 5047 * in *brefp. 5048 * 5049 * 5050 * The returned chain is not locked or referenced. Use the returned bref 5051 * to determine if the search exhausted or not. Iterate if the base find 5052 * is chosen but matches a deleted chain. 5053 * 5054 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5055 * held through the operation. 5056 */ 5057 static hammer2_chain_t * 5058 hammer2_combined_find(hammer2_chain_t *parent, 5059 hammer2_blockref_t *base, int count, 5060 hammer2_key_t *key_nextp, 5061 hammer2_key_t key_beg, hammer2_key_t key_end, 5062 hammer2_blockref_t **brefp) 5063 { 5064 hammer2_blockref_t *bref; 5065 hammer2_chain_t *chain; 5066 int i; 5067 5068 /* 5069 * Lookup in block array and in rbtree. 5070 */ 5071 *key_nextp = key_end + 1; 5072 i = hammer2_base_find(parent, base, count, key_nextp, 5073 key_beg, key_end); 5074 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5075 5076 /* 5077 * Neither matched 5078 */ 5079 if (i == count && chain == NULL) { 5080 *brefp = NULL; 5081 return(NULL); 5082 } 5083 5084 /* 5085 * Only chain matched. 5086 */ 5087 if (i == count) { 5088 bref = &chain->bref; 5089 goto found; 5090 } 5091 5092 /* 5093 * Only blockref matched. 5094 */ 5095 if (chain == NULL) { 5096 bref = &base[i]; 5097 goto found; 5098 } 5099 5100 /* 5101 * Both in-memory and blockref matched, select the nearer element. 5102 * 5103 * If both are flush with the left-hand side or both are the 5104 * same distance away, select the chain. In this situation the 5105 * chain must have been loaded from the matching blockmap. 5106 */ 5107 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5108 chain->bref.key == base[i].key) { 5109 KKASSERT(chain->bref.key == base[i].key); 5110 bref = &chain->bref; 5111 goto found; 5112 } 5113 5114 /* 5115 * Select the nearer key 5116 */ 5117 if (chain->bref.key < base[i].key) { 5118 bref = &chain->bref; 5119 } else { 5120 bref = &base[i]; 5121 chain = NULL; 5122 } 5123 5124 /* 5125 * If the bref is out of bounds we've exhausted our search. 5126 */ 5127 found: 5128 if (bref->key > key_end) { 5129 *brefp = NULL; 5130 chain = NULL; 5131 } else { 5132 *brefp = bref; 5133 } 5134 return(chain); 5135 } 5136 5137 /* 5138 * Locate the specified block array element and delete it. The element 5139 * must exist. 5140 * 5141 * The spin lock on the related chain must be held. 5142 * 5143 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5144 * need to be adjusted when we commit the media change. 5145 */ 5146 void 5147 hammer2_base_delete(hammer2_chain_t *parent, 5148 hammer2_blockref_t *base, int count, 5149 hammer2_chain_t *chain, 5150 hammer2_blockref_t *obref) 5151 { 5152 hammer2_blockref_t *elm = &chain->bref; 5153 hammer2_blockref_t *scan; 5154 hammer2_key_t key_next; 5155 int i; 5156 5157 /* 5158 * Delete element. Expect the element to exist. 5159 * 5160 * XXX see caller, flush code not yet sophisticated enough to prevent 5161 * re-flushed in some cases. 5162 */ 5163 key_next = 0; /* max range */ 5164 i = hammer2_base_find(parent, base, count, &key_next, 5165 elm->key, elm->key); 5166 scan = &base[i]; 5167 if (i == count || scan->type == HAMMER2_BREF_TYPE_EMPTY || 5168 scan->key != elm->key || 5169 ((chain->flags & HAMMER2_CHAIN_BLKMAPUPD) == 0 && 5170 scan->keybits != elm->keybits)) { 5171 hammer2_spin_unex(&parent->core.spin); 5172 panic("delete base %p element not found at %d/%d elm %p\n", 5173 base, i, count, elm); 5174 return; 5175 } 5176 5177 /* 5178 * Update stats and zero the entry. 5179 * 5180 * NOTE: Handle radix == 0 (0 bytes) case. 5181 */ 5182 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5183 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5184 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5185 } 5186 switch(scan->type) { 5187 case HAMMER2_BREF_TYPE_INODE: 5188 --parent->bref.embed.stats.inode_count; 5189 /* fall through */ 5190 case HAMMER2_BREF_TYPE_DATA: 5191 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5192 atomic_set_int(&chain->flags, 5193 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5194 } else { 5195 if (parent->bref.leaf_count) 5196 --parent->bref.leaf_count; 5197 } 5198 /* fall through */ 5199 case HAMMER2_BREF_TYPE_INDIRECT: 5200 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5201 parent->bref.embed.stats.data_count -= 5202 scan->embed.stats.data_count; 5203 parent->bref.embed.stats.inode_count -= 5204 scan->embed.stats.inode_count; 5205 } 5206 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5207 break; 5208 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5209 atomic_set_int(&chain->flags, 5210 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5211 } else { 5212 if (parent->bref.leaf_count <= scan->leaf_count) 5213 parent->bref.leaf_count = 0; 5214 else 5215 parent->bref.leaf_count -= scan->leaf_count; 5216 } 5217 break; 5218 case HAMMER2_BREF_TYPE_DIRENT: 5219 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5220 atomic_set_int(&chain->flags, 5221 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5222 } else { 5223 if (parent->bref.leaf_count) 5224 --parent->bref.leaf_count; 5225 } 5226 default: 5227 break; 5228 } 5229 5230 if (obref) 5231 *obref = *scan; 5232 bzero(scan, sizeof(*scan)); 5233 5234 /* 5235 * We can only optimize parent->core.live_zero for live chains. 5236 */ 5237 if (parent->core.live_zero == i + 1) { 5238 while (--i >= 0 && base[i].type == HAMMER2_BREF_TYPE_EMPTY) 5239 ; 5240 parent->core.live_zero = i + 1; 5241 } 5242 5243 /* 5244 * Clear appropriate blockmap flags in chain. 5245 */ 5246 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED | 5247 HAMMER2_CHAIN_BLKMAPUPD); 5248 } 5249 5250 /* 5251 * Insert the specified element. The block array must not already have the 5252 * element and must have space available for the insertion. 5253 * 5254 * The spin lock on the related chain must be held. 5255 * 5256 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5257 * need to be adjusted when we commit the media change. 5258 */ 5259 void 5260 hammer2_base_insert(hammer2_chain_t *parent, 5261 hammer2_blockref_t *base, int count, 5262 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5263 { 5264 hammer2_key_t key_next; 5265 hammer2_key_t xkey; 5266 int i; 5267 int j; 5268 int k; 5269 int l; 5270 int u = 1; 5271 5272 /* 5273 * Insert new element. Expect the element to not already exist 5274 * unless we are replacing it. 5275 * 5276 * XXX see caller, flush code not yet sophisticated enough to prevent 5277 * re-flushed in some cases. 5278 */ 5279 key_next = 0; /* max range */ 5280 i = hammer2_base_find(parent, base, count, &key_next, 5281 elm->key, elm->key); 5282 5283 /* 5284 * Shortcut fill optimization, typical ordered insertion(s) may not 5285 * require a search. 5286 */ 5287 KKASSERT(i >= 0 && i <= count); 5288 5289 /* 5290 * Set appropriate blockmap flags in chain (if not NULL) 5291 */ 5292 if (chain) 5293 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 5294 5295 /* 5296 * Update stats and zero the entry 5297 */ 5298 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5299 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5300 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5301 } 5302 switch(elm->type) { 5303 case HAMMER2_BREF_TYPE_INODE: 5304 ++parent->bref.embed.stats.inode_count; 5305 /* fall through */ 5306 case HAMMER2_BREF_TYPE_DATA: 5307 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5308 ++parent->bref.leaf_count; 5309 /* fall through */ 5310 case HAMMER2_BREF_TYPE_INDIRECT: 5311 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5312 parent->bref.embed.stats.data_count += 5313 elm->embed.stats.data_count; 5314 parent->bref.embed.stats.inode_count += 5315 elm->embed.stats.inode_count; 5316 } 5317 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5318 break; 5319 if (parent->bref.leaf_count + elm->leaf_count < 5320 HAMMER2_BLOCKREF_LEAF_MAX) { 5321 parent->bref.leaf_count += elm->leaf_count; 5322 } else { 5323 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5324 } 5325 break; 5326 case HAMMER2_BREF_TYPE_DIRENT: 5327 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5328 ++parent->bref.leaf_count; 5329 break; 5330 default: 5331 break; 5332 } 5333 5334 5335 /* 5336 * We can only optimize parent->core.live_zero for live chains. 5337 */ 5338 if (i == count && parent->core.live_zero < count) { 5339 i = parent->core.live_zero++; 5340 base[i] = *elm; 5341 return; 5342 } 5343 5344 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5345 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5346 hammer2_spin_unex(&parent->core.spin); 5347 panic("insert base %p overlapping elements at %d elm %p\n", 5348 base, i, elm); 5349 } 5350 5351 /* 5352 * Try to find an empty slot before or after. 5353 */ 5354 j = i; 5355 k = i; 5356 while (j > 0 || k < count) { 5357 --j; 5358 if (j >= 0 && base[j].type == HAMMER2_BREF_TYPE_EMPTY) { 5359 if (j == i - 1) { 5360 base[j] = *elm; 5361 } else { 5362 bcopy(&base[j+1], &base[j], 5363 (i - j - 1) * sizeof(*base)); 5364 base[i - 1] = *elm; 5365 } 5366 goto validate; 5367 } 5368 ++k; 5369 if (k < count && base[k].type == HAMMER2_BREF_TYPE_EMPTY) { 5370 bcopy(&base[i], &base[i+1], 5371 (k - i) * sizeof(hammer2_blockref_t)); 5372 base[i] = *elm; 5373 5374 /* 5375 * We can only update parent->core.live_zero for live 5376 * chains. 5377 */ 5378 if (parent->core.live_zero <= k) 5379 parent->core.live_zero = k + 1; 5380 u = 2; 5381 goto validate; 5382 } 5383 } 5384 panic("hammer2_base_insert: no room!"); 5385 5386 /* 5387 * Debugging 5388 */ 5389 validate: 5390 key_next = 0; 5391 for (l = 0; l < count; ++l) { 5392 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5393 key_next = base[l].key + 5394 ((hammer2_key_t)1 << base[l].keybits) - 1; 5395 break; 5396 } 5397 } 5398 while (++l < count) { 5399 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5400 if (base[l].key <= key_next) 5401 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5402 key_next = base[l].key + 5403 ((hammer2_key_t)1 << base[l].keybits) - 1; 5404 5405 } 5406 } 5407 5408 } 5409 5410 #if 0 5411 5412 /* 5413 * Sort the blockref array for the chain. Used by the flush code to 5414 * sort the blockref[] array. 5415 * 5416 * The chain must be exclusively locked AND spin-locked. 5417 */ 5418 typedef hammer2_blockref_t *hammer2_blockref_p; 5419 5420 static 5421 int 5422 hammer2_base_sort_callback(const void *v1, const void *v2) 5423 { 5424 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5425 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5426 5427 /* 5428 * Make sure empty elements are placed at the end of the array 5429 */ 5430 if (bref1->type == HAMMER2_BREF_TYPE_EMPTY) { 5431 if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) 5432 return(0); 5433 return(1); 5434 } else if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) { 5435 return(-1); 5436 } 5437 5438 /* 5439 * Sort by key 5440 */ 5441 if (bref1->key < bref2->key) 5442 return(-1); 5443 if (bref1->key > bref2->key) 5444 return(1); 5445 return(0); 5446 } 5447 5448 void 5449 hammer2_base_sort(hammer2_chain_t *chain) 5450 { 5451 hammer2_blockref_t *base; 5452 int count; 5453 5454 switch(chain->bref.type) { 5455 case HAMMER2_BREF_TYPE_INODE: 5456 /* 5457 * Special shortcut for embedded data returns the inode 5458 * itself. Callers must detect this condition and access 5459 * the embedded data (the strategy code does this for us). 5460 * 5461 * This is only applicable to regular files and softlinks. 5462 */ 5463 if (chain->data->ipdata.meta.op_flags & 5464 HAMMER2_OPFLAG_DIRECTDATA) { 5465 return; 5466 } 5467 base = &chain->data->ipdata.u.blockset.blockref[0]; 5468 count = HAMMER2_SET_COUNT; 5469 break; 5470 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5471 case HAMMER2_BREF_TYPE_INDIRECT: 5472 /* 5473 * Optimize indirect blocks in the INITIAL state to avoid 5474 * I/O. 5475 */ 5476 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5477 base = &chain->data->npdata[0]; 5478 count = chain->bytes / sizeof(hammer2_blockref_t); 5479 break; 5480 case HAMMER2_BREF_TYPE_VOLUME: 5481 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5482 count = HAMMER2_SET_COUNT; 5483 break; 5484 case HAMMER2_BREF_TYPE_FREEMAP: 5485 base = &chain->data->blkset.blockref[0]; 5486 count = HAMMER2_SET_COUNT; 5487 break; 5488 default: 5489 panic("hammer2_base_sort: unrecognized " 5490 "blockref(A) type: %d", 5491 chain->bref.type); 5492 base = NULL; /* safety */ 5493 count = 0; /* safety */ 5494 break; 5495 } 5496 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5497 } 5498 5499 #endif 5500 5501 /* 5502 * Set the check data for a chain. This can be a heavy-weight operation 5503 * and typically only runs on-flush. For file data check data is calculated 5504 * when the logical buffers are flushed. 5505 */ 5506 void 5507 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5508 { 5509 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 5510 5511 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5512 case HAMMER2_CHECK_NONE: 5513 break; 5514 case HAMMER2_CHECK_DISABLED: 5515 break; 5516 case HAMMER2_CHECK_ISCSI32: 5517 chain->bref.check.iscsi32.value = 5518 hammer2_icrc32(bdata, chain->bytes); 5519 break; 5520 case HAMMER2_CHECK_XXHASH64: 5521 chain->bref.check.xxhash64.value = 5522 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5523 break; 5524 case HAMMER2_CHECK_SHA192: 5525 assert(0); /* XXX unsupported */ 5526 /* 5527 { 5528 SHA256_CTX hash_ctx; 5529 union { 5530 uint8_t digest[SHA256_DIGEST_LENGTH]; 5531 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5532 } u; 5533 5534 SHA256_Init(&hash_ctx); 5535 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5536 SHA256_Final(u.digest, &hash_ctx); 5537 u.digest64[2] ^= u.digest64[3]; 5538 bcopy(u.digest, 5539 chain->bref.check.sha192.data, 5540 sizeof(chain->bref.check.sha192.data)); 5541 } 5542 */ 5543 break; 5544 case HAMMER2_CHECK_FREEMAP: 5545 chain->bref.check.freemap.icrc32 = 5546 hammer2_icrc32(bdata, chain->bytes); 5547 break; 5548 default: 5549 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5550 chain->bref.methods); 5551 break; 5552 } 5553 } 5554 5555 /* 5556 * Characterize a failed check code and try to trace back to the inode. 5557 */ 5558 static void 5559 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5560 int bits) 5561 { 5562 hammer2_chain_t *lchain; 5563 hammer2_chain_t *ochain; 5564 int did; 5565 5566 did = krateprintf(&krate_h2chk, 5567 "chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5568 "(flags=%08x, bref/data ", 5569 chain->bref.data_off, 5570 chain->bref.type, 5571 hammer2_bref_type_str(chain->bref.type), 5572 chain->bref.methods, 5573 chain->flags); 5574 if (did == 0) 5575 return; 5576 5577 if (bits == 32) { 5578 kprintf("%08x/%08x)\n", 5579 chain->bref.check.iscsi32.value, 5580 (uint32_t)check); 5581 } else { 5582 kprintf("%016jx/%016jx)\n", 5583 chain->bref.check.xxhash64.value, 5584 check); 5585 } 5586 5587 /* 5588 * Run up the chains to try to find the governing inode so we 5589 * can report it. 5590 * 5591 * XXX This error reporting is not really MPSAFE 5592 */ 5593 ochain = chain; 5594 lchain = chain; 5595 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5596 lchain = chain; 5597 chain = chain->parent; 5598 } 5599 5600 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5601 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5602 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5603 kprintf(" Resides at/in inode %ld\n", 5604 (long)chain->bref.key); 5605 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5606 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5607 } else { 5608 kprintf(" Resides in root index - CRITICAL!!!\n"); 5609 } 5610 if (ochain->hmp) { 5611 const char *pfsname = "UNKNOWN"; 5612 int i; 5613 5614 if (ochain->pmp) { 5615 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5616 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5617 ochain->pmp->pfs_names[i]) { 5618 pfsname = ochain->pmp->pfs_names[i]; 5619 break; 5620 } 5621 } 5622 } 5623 kprintf(" In pfs %s on device %s\n", 5624 pfsname, ochain->hmp->devrepname); 5625 } 5626 } 5627 5628 /* 5629 * Returns non-zero on success, 0 on failure. 5630 */ 5631 int 5632 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5633 { 5634 uint32_t check32; 5635 uint64_t check64; 5636 int r; 5637 5638 if (chain->flags & HAMMER2_CHAIN_NOTTESTED) 5639 return 1; 5640 5641 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5642 case HAMMER2_CHECK_NONE: 5643 r = 1; 5644 break; 5645 case HAMMER2_CHECK_DISABLED: 5646 r = 1; 5647 break; 5648 case HAMMER2_CHECK_ISCSI32: 5649 check32 = hammer2_icrc32(bdata, chain->bytes); 5650 r = (chain->bref.check.iscsi32.value == check32); 5651 if (r == 0) { 5652 hammer2_characterize_failed_chain(chain, check32, 32); 5653 } 5654 hammer2_process_icrc32 += chain->bytes; 5655 break; 5656 case HAMMER2_CHECK_XXHASH64: 5657 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5658 r = (chain->bref.check.xxhash64.value == check64); 5659 if (r == 0) { 5660 hammer2_characterize_failed_chain(chain, check64, 64); 5661 } 5662 hammer2_process_xxhash64 += chain->bytes; 5663 break; 5664 case HAMMER2_CHECK_SHA192: 5665 assert(0); /* XXX unsupported */ 5666 /* 5667 { 5668 SHA256_CTX hash_ctx; 5669 union { 5670 uint8_t digest[SHA256_DIGEST_LENGTH]; 5671 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5672 } u; 5673 5674 SHA256_Init(&hash_ctx); 5675 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5676 SHA256_Final(u.digest, &hash_ctx); 5677 u.digest64[2] ^= u.digest64[3]; 5678 if (bcmp(u.digest, 5679 chain->bref.check.sha192.data, 5680 sizeof(chain->bref.check.sha192.data)) == 0) { 5681 r = 1; 5682 } else { 5683 r = 0; 5684 krateprintf(&krate_h2chk, 5685 "chain %016jx.%02x meth=%02x " 5686 "CHECK FAIL\n", 5687 chain->bref.data_off, 5688 chain->bref.type, 5689 chain->bref.methods); 5690 } 5691 } 5692 */ 5693 break; 5694 case HAMMER2_CHECK_FREEMAP: 5695 r = (chain->bref.check.freemap.icrc32 == 5696 hammer2_icrc32(bdata, chain->bytes)); 5697 if (r == 0) { 5698 int did; 5699 5700 did = krateprintf(&krate_h2chk, 5701 "chain %016jx.%02x meth=%02x " 5702 "CHECK FAIL\n", 5703 chain->bref.data_off, 5704 chain->bref.type, 5705 chain->bref.methods); 5706 if (did) { 5707 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5708 chain->bref.check.freemap.icrc32, 5709 hammer2_icrc32(bdata, chain->bytes), 5710 chain->bytes); 5711 if (chain->dio) { 5712 kprintf("dio %p buf %016jx,%ld " 5713 "bdata %p/%p\n", 5714 chain->dio, 5715 (intmax_t)chain->dio->bp->b_loffset, 5716 chain->dio->bp->b_bufsize, 5717 bdata, 5718 chain->dio->bp->b_data); 5719 } 5720 } 5721 } 5722 break; 5723 default: 5724 kprintf("hammer2_chain_testcheck: unknown check type %02x\n", 5725 chain->bref.methods); 5726 r = 1; 5727 break; 5728 } 5729 return r; 5730 } 5731 5732 /* 5733 * Acquire the chain and parent representing the specified inode for the 5734 * device at the specified cluster index. 5735 * 5736 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5737 * 5738 * If we are unable to locate the inode, HAMMER2_ERROR_EIO or HAMMER2_ERROR_CHECK 5739 * is returned. In case of error, *chainp and/or *parentp may still be returned 5740 * non-NULL. 5741 * 5742 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 5743 * They will be unlocked and released by this function. The *parentp and 5744 * *chainp representing the located inode are returned locked. 5745 * 5746 * The returned error includes any error on the returned chain in addition to 5747 * errors incurred while trying to lookup the inode. However, a chain->error 5748 * might not be recognized if HAMMER2_LOOKUP_NODATA is passed. This flag may 5749 * not be passed to this function. 5750 */ 5751 int 5752 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5753 int clindex, int flags, 5754 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5755 { 5756 hammer2_chain_t *parent; 5757 hammer2_chain_t *rchain; 5758 hammer2_key_t key_dummy; 5759 hammer2_inode_t *ip; 5760 int resolve_flags; 5761 int error; 5762 5763 KKASSERT((flags & HAMMER2_LOOKUP_NODATA) == 0); 5764 5765 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5766 HAMMER2_RESOLVE_SHARED : 0; 5767 5768 /* 5769 * Caller expects us to replace these. 5770 */ 5771 if (*chainp) { 5772 hammer2_chain_unlock(*chainp); 5773 hammer2_chain_drop(*chainp); 5774 *chainp = NULL; 5775 } 5776 if (*parentp) { 5777 hammer2_chain_unlock(*parentp); 5778 hammer2_chain_drop(*parentp); 5779 *parentp = NULL; 5780 } 5781 5782 /* 5783 * Be very careful, this is a backend function and we CANNOT 5784 * lock any frontend inode structure we find. But we have to 5785 * look the inode up this way first in case it exists but is 5786 * detached from the radix tree. 5787 */ 5788 ip = hammer2_inode_lookup(pmp, inum); 5789 if (ip) { 5790 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 5791 parentp, 5792 resolve_flags); 5793 hammer2_inode_drop(ip); 5794 if (*chainp) 5795 return (*chainp)->error; 5796 hammer2_chain_unlock(*chainp); 5797 hammer2_chain_drop(*chainp); 5798 *chainp = NULL; 5799 if (*parentp) { 5800 hammer2_chain_unlock(*parentp); 5801 hammer2_chain_drop(*parentp); 5802 *parentp = NULL; 5803 } 5804 } 5805 5806 /* 5807 * Inodes hang off of the iroot (bit 63 is clear, differentiating 5808 * inodes from root directory entries in the key lookup). 5809 */ 5810 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 5811 rchain = NULL; 5812 if (parent) { 5813 /* 5814 * NOTE: rchain can be returned as NULL even if error == 0 5815 * (i.e. not found) 5816 */ 5817 rchain = hammer2_chain_lookup(&parent, &key_dummy, 5818 inum, inum, 5819 &error, flags); 5820 /* 5821 * Propagate a chain-specific error to caller. 5822 * 5823 * If the chain is not errored, we must still validate that the inode 5824 * number is correct, because all hell will break loose if it isn't 5825 * correct. It should always be correct so print to the console and 5826 * simulate a CHECK error if it is not. 5827 */ 5828 if (error == 0 && rchain) { 5829 error = rchain->error; 5830 if (error == 0 && rchain->data) { 5831 if (inum != rchain->data->ipdata.meta.inum) { 5832 kprintf("hammer2_chain_inode_find: lookup inum %ld, " 5833 "got valid inode but with inum %ld\n", 5834 (long)inum, (long)rchain->data->ipdata.meta.inum); 5835 error = HAMMER2_ERROR_CHECK; 5836 rchain->error = error; 5837 } 5838 } 5839 } 5840 } else { 5841 error = HAMMER2_ERROR_EIO; 5842 } 5843 *parentp = parent; 5844 *chainp = rchain; 5845 5846 return error; 5847 } 5848 5849 /* 5850 * Used by the bulkscan code to snapshot the synchronized storage for 5851 * a volume, allowing it to be scanned concurrently against normal 5852 * operation. 5853 */ 5854 hammer2_chain_t * 5855 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 5856 { 5857 hammer2_chain_t *copy; 5858 5859 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 5860 copy->data = kmalloc(sizeof(copy->data->voldata), 5861 hmp->mmsg, M_WAITOK | M_ZERO); 5862 hammer2_voldata_lock(hmp); 5863 copy->data->voldata = hmp->volsync; 5864 hammer2_voldata_unlock(hmp); 5865 5866 return copy; 5867 } 5868 5869 void 5870 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 5871 { 5872 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 5873 KKASSERT(copy->data); 5874 kfree(copy->data, copy->hmp->mmsg); 5875 copy->data = NULL; 5876 hammer2_chain_drop(copy); 5877 } 5878 5879 /* 5880 * Returns non-zero if the chain (INODE or DIRENT) matches the 5881 * filename. 5882 */ 5883 int 5884 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 5885 size_t name_len) 5886 { 5887 const hammer2_inode_data_t *ripdata; 5888 5889 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5890 ripdata = &chain->data->ipdata; 5891 if (ripdata->meta.name_len == name_len && 5892 bcmp(ripdata->filename, name, name_len) == 0) { 5893 return 1; 5894 } 5895 } 5896 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 5897 chain->bref.embed.dirent.namlen == name_len) { 5898 if (name_len > sizeof(chain->bref.check.buf) && 5899 bcmp(chain->data->buf, name, name_len) == 0) { 5900 return 1; 5901 } 5902 if (name_len <= sizeof(chain->bref.check.buf) && 5903 bcmp(chain->bref.check.buf, name, name_len) == 0) { 5904 return 1; 5905 } 5906 } 5907 return 0; 5908 } 5909 5910 /* 5911 * Debugging 5912 */ 5913 void 5914 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int bi, int *countp, 5915 char pfx, u_int flags) 5916 { 5917 hammer2_chain_t *scan; 5918 hammer2_chain_t *parent; 5919 5920 --*countp; 5921 if (*countp == 0) { 5922 kprintf("%*.*s...\n", tab, tab, ""); 5923 return; 5924 } 5925 if (*countp < 0) 5926 return; 5927 kprintf("%*.*s%c-chain %p %s.%-3d %016jx %016jx/%-2d mir=%016jx\n", 5928 tab, tab, "", pfx, chain, 5929 hammer2_bref_type_str(chain->bref.type), bi, 5930 chain->bref.data_off, chain->bref.key, chain->bref.keybits, 5931 chain->bref.mirror_tid); 5932 5933 kprintf("%*.*s [%08x] (%s) refs=%d", 5934 tab, tab, "", 5935 chain->flags, 5936 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5937 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 5938 chain->refs); 5939 5940 parent = chain->parent; 5941 if (parent) 5942 kprintf("\n%*.*s p=%p [pflags %08x prefs %d]", 5943 tab, tab, "", 5944 parent, parent->flags, parent->refs); 5945 if (RB_EMPTY(&chain->core.rbtree)) { 5946 kprintf("\n"); 5947 } else { 5948 int bi = 0; 5949 kprintf(" {\n"); 5950 RB_FOREACH(scan, hammer2_chain_tree, &chain->core.rbtree) { 5951 if ((scan->flags & flags) || flags == (u_int)-1) { 5952 hammer2_dump_chain(scan, tab + 4, bi, countp, 5953 'a', flags); 5954 } 5955 bi++; 5956 } 5957 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 5958 kprintf("%*.*s}(%s)\n", tab, tab, "", 5959 chain->data->ipdata.filename); 5960 else 5961 kprintf("%*.*s}\n", tab, tab, ""); 5962 } 5963 } 5964 5965 void 5966 hammer2_dump_chains(hammer2_dev_t *hmp, char vpfx, char fpfx) 5967 { 5968 int dumpcnt; 5969 5970 dumpcnt = 50; 5971 hammer2_dump_chain(&hmp->vchain, 0, 0, &dumpcnt, vpfx, (u_int)-1); 5972 5973 dumpcnt = 50; 5974 hammer2_dump_chain(&hmp->fchain, 0, 0, &dumpcnt, fpfx, (u_int)-1); 5975 } 5976