1 /* 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/types.h> 40 #include <sys/lock.h> 41 #include <sys/uuid.h> 42 43 #include "hammer2.h" 44 45 /* 46 * Recursively flush the specified chain. The chain is locked and 47 * referenced by the caller and will remain so on return. The chain 48 * will remain referenced throughout but can temporarily lose its 49 * lock during the recursion to avoid unnecessarily stalling user 50 * processes. 51 */ 52 struct hammer2_flush_info { 53 hammer2_chain_t *parent; 54 hammer2_trans_t *trans; 55 int depth; 56 int diddeferral; 57 int pass; 58 int cache_index; 59 struct h2_flush_deferral_list flush_list; 60 hammer2_tid_t sync_tid; /* flush synchronization point */ 61 hammer2_tid_t mirror_tid; /* collect mirror TID updates */ 62 }; 63 64 typedef struct hammer2_flush_info hammer2_flush_info_t; 65 66 static void hammer2_chain_flush_core(hammer2_flush_info_t *info, 67 hammer2_chain_t *chain); 68 static int hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data); 69 static int hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data); 70 static void hammer2_rollup_stats(hammer2_chain_t *parent, 71 hammer2_chain_t *child, int how); 72 73 #if 0 74 static __inline 75 void 76 hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref, 77 int how) 78 { 79 hammer2_key_t bytes; 80 81 if (bref->type != 0) { 82 bytes = 1 << (bref->data_off & HAMMER2_OFF_MASK_RADIX); 83 if (bref->type == HAMMER2_BREF_TYPE_INODE) 84 info->inode_count += how; 85 if (how < 0) 86 info->data_count -= bytes; 87 else 88 info->data_count += bytes; 89 } 90 } 91 #endif 92 93 /* 94 * Transaction support functions for writing to the filesystem. 95 * 96 * Initializing a new transaction allocates a transaction ID. We 97 * don't bother marking the volume header MODIFIED. Instead, the volume 98 * will be synchronized at a later time as part of a larger flush sequence. 99 * 100 * Non-flush transactions can typically run concurrently. However if 101 * there are non-flush transaction both before AND after a flush trans, 102 * the transactions after stall until the ones before finish. 103 * 104 * Non-flush transactions occuring after a flush pointer can run concurrently 105 * with that flush. They only have to wait for transactions prior to the 106 * flush trans to complete before they unstall. 107 * 108 * WARNING! Transaction ids are only allocated when the transaction becomes 109 * active, which allows other transactions to insert ahead of us 110 * if we are forced to block (only bioq transactions do that). 111 * 112 * WARNING! Modifications to the root volume cannot dup the root volume 113 * header to handle synchronization points, so alloc_tid can 114 * wind up (harmlessly) more advanced on flush. 115 * 116 * WARNING! Operations which might call inode_duplicate()/chain_duplicate() 117 * depend heavily on having a unique sync_tid to avoid duplication 118 * collisions (which key off of delete_tid). 119 */ 120 void 121 hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags) 122 { 123 hammer2_cluster_t *cluster; 124 hammer2_mount_t *hmp; 125 hammer2_trans_t *scan; 126 127 bzero(trans, sizeof(*trans)); 128 trans->pmp = pmp; 129 cluster = pmp->cluster; 130 hmp = cluster->hmp; 131 132 hammer2_voldata_lock(hmp); 133 trans->flags = flags; 134 trans->td = curthread; 135 /*trans->delete_gen = 0;*/ /* multiple deletions within trans */ 136 137 if (flags & HAMMER2_TRANS_ISFLUSH) { 138 /* 139 * If multiple flushes are trying to run we have to 140 * wait until it is our turn, then set curflush to 141 * indicate that a flush is now pending (but not 142 * necessarily active yet). 143 * 144 * NOTE: Do not set trans->blocked here. 145 */ 146 ++hmp->flushcnt; 147 while (hmp->curflush != NULL) { 148 lksleep(&hmp->curflush, &hmp->voldatalk, 149 0, "h2multf", hz); 150 } 151 hmp->curflush = trans; 152 TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 153 154 /* 155 * If we are a flush we have to wait for all transactions 156 * prior to our flush synchronization point to complete 157 * before we can start our flush. 158 * 159 * Most importantly, this includes bioq flushes. 160 * 161 * NOTE: Do not set trans->blocked here. 162 */ 163 while (TAILQ_FIRST(&hmp->transq) != trans) { 164 lksleep(&trans->sync_tid, &hmp->voldatalk, 165 0, "h2syncw", hz); 166 } 167 168 /* 169 * don't assign sync_tid until we become the running 170 * flush. topo_flush_tid is used to control when 171 * chain modifications in concurrent transactions are 172 * required to delete-duplicate (so as not to disturb 173 * the state of what is being currently flushed). 174 */ 175 trans->sync_tid = hmp->voldata.alloc_tid++; 176 hmp->topo_flush_tid = trans->sync_tid; 177 178 /* 179 * Once we become the running flush we can wakeup anyone 180 * who blocked on us, up to the next flush. That is, 181 * our flush can run concurrent with frontend operations. 182 */ 183 scan = trans; 184 while ((scan = TAILQ_NEXT(scan, entry)) != NULL) { 185 if (scan->flags & HAMMER2_TRANS_ISFLUSH) 186 break; 187 if (scan->blocked == 0) 188 break; 189 scan->blocked = 0; 190 wakeup(&scan->blocked); 191 } 192 } else if ((flags & HAMMER2_TRANS_BUFCACHE) && hmp->curflush) { 193 /* 194 * We cannot block if we are the bioq thread. When a 195 * flush is not pending we can operate normally but 196 * if a flush IS pending the bioq thread's transaction 197 * must be placed either before or after curflush. 198 * 199 * If the current flush is waiting the bioq thread's 200 * transaction is placed before. If it is running the 201 * bioq thread's transaction is placed after. 202 */ 203 scan = TAILQ_FIRST(&hmp->transq); 204 if (scan != hmp->curflush) { 205 TAILQ_INSERT_BEFORE(hmp->curflush, trans, entry); 206 } else { 207 TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 208 } 209 trans->sync_tid = hmp->voldata.alloc_tid++; 210 } else { 211 /* 212 * If this is a normal transaction and not a flush, or 213 * if this is a bioq transaction and no flush is pending, 214 * we can queue normally. 215 * 216 * Normal transactions must block while a pending flush is 217 * waiting for prior transactions to complete. Once the 218 * pending flush becomes active we can run concurrently 219 * with it. 220 */ 221 TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 222 scan = TAILQ_FIRST(&hmp->transq); 223 if (hmp->curflush && hmp->curflush != scan) { 224 trans->blocked = 1; 225 while (trans->blocked) { 226 lksleep(&trans->blocked, &hmp->voldatalk, 227 0, "h2trans", hz); 228 } 229 } 230 trans->sync_tid = hmp->voldata.alloc_tid++; 231 } 232 hammer2_voldata_unlock(hmp, 0); 233 } 234 235 void 236 hammer2_trans_done(hammer2_trans_t *trans) 237 { 238 hammer2_cluster_t *cluster; 239 hammer2_mount_t *hmp; 240 hammer2_trans_t *scan; 241 242 cluster = trans->pmp->cluster; 243 hmp = cluster->hmp; 244 245 hammer2_voldata_lock(hmp); 246 TAILQ_REMOVE(&hmp->transq, trans, entry); 247 if (trans->flags & HAMMER2_TRANS_ISFLUSH) { 248 --hmp->flushcnt; 249 if (hmp->flushcnt) { 250 /* 251 * If we were a flush then wakeup anyone waiting on 252 * curflush (i.e. other flushes that want to run). 253 * Leave topo_flush_id set (I think we could probably 254 * clear it to zero here). 255 */ 256 hmp->curflush = NULL; 257 wakeup(&hmp->curflush); 258 } else { 259 /* 260 * Theoretically we don't have to clear flush_tid 261 * here since the flush will have synchronized 262 * all operations <= flush_tid already. But for 263 * now zero-it. 264 */ 265 hmp->curflush = NULL; 266 hmp->topo_flush_tid = 0; 267 } 268 } else { 269 /* 270 * If we are not a flush but a flush is now at the head 271 * of the queue and we were previously blocking it, 272 * we can now unblock it. 273 */ 274 if (hmp->flushcnt && 275 (scan = TAILQ_FIRST(&hmp->transq)) != NULL && 276 trans->sync_tid < scan->sync_tid && 277 (scan->flags & HAMMER2_TRANS_ISFLUSH)) { 278 wakeup(&scan->sync_tid); 279 } 280 } 281 hammer2_voldata_unlock(hmp, 0); 282 } 283 284 /* 285 * Flush the chain and all modified sub-chains through the specified 286 * synchronization point (sync_tid), propagating parent chain modifications 287 * and mirror_tid updates back up as needed. Since we are recursing downward 288 * we do not have to deal with the complexities of multi-homed chains (chains 289 * with multiple parents). 290 * 291 * Caller must have interlocked against any non-flush-related modifying 292 * operations in progress whos modify_tid values are less than or equal 293 * to the passed sync_tid. 294 * 295 * Caller must have already vetted synchronization points to ensure they 296 * are properly flushed. Only snapshots and cluster flushes can create 297 * these sorts of synchronization points. 298 * 299 * This routine can be called from several places but the most important 300 * is from the hammer2_vop_reclaim() function. We want to try to completely 301 * clean out the inode structure to prevent disconnected inodes from 302 * building up and blowing out the kmalloc pool. However, it is not actually 303 * necessary to flush reclaimed inodes to maintain HAMMER2's crash recovery 304 * capability. 305 * 306 * chain is locked on call and will remain locked on return. If a flush 307 * occured, the chain's MOVED bit will be set indicating that its parent 308 * (which is not part of the flush) should be updated. 309 */ 310 void 311 hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t *chain) 312 { 313 hammer2_chain_t *scan; 314 hammer2_chain_core_t *core; 315 hammer2_flush_info_t info; 316 317 /* 318 * Execute the recursive flush and handle deferrals. 319 * 320 * Chains can be ridiculously long (thousands deep), so to 321 * avoid blowing out the kernel stack the recursive flush has a 322 * depth limit. Elements at the limit are placed on a list 323 * for re-execution after the stack has been popped. 324 */ 325 bzero(&info, sizeof(info)); 326 TAILQ_INIT(&info.flush_list); 327 info.trans = trans; 328 info.sync_tid = trans->sync_tid; 329 info.mirror_tid = 0; 330 info.cache_index = -1; 331 332 core = chain->core; 333 334 for (;;) { 335 /* 336 * Unwind deep recursions which had been deferred. This 337 * can leave MOVED set for these chains, which will be 338 * handled when we [re]flush chain after the unwind. 339 */ 340 while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) { 341 KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 342 TAILQ_REMOVE(&info.flush_list, scan, flush_node); 343 atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED); 344 345 /* 346 * Now that we've popped back up we can do a secondary 347 * recursion on the deferred elements. 348 */ 349 if (hammer2_debug & 0x0040) 350 kprintf("defered flush %p\n", scan); 351 hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 352 hammer2_chain_flush(trans, scan); 353 hammer2_chain_unlock(scan); 354 hammer2_chain_drop(scan); /* ref from deferral */ 355 } 356 357 /* 358 * Flush pass1 on root. 359 */ 360 info.diddeferral = 0; 361 hammer2_chain_flush_core(&info, chain); 362 #if FLUSH_DEBUG 363 kprintf("flush_core_done parent=<base> chain=%p.%d %08x\n", 364 chain, chain->bref.type, chain->flags); 365 #endif 366 367 /* 368 * Only loop if deep recursions have been deferred. 369 */ 370 if (TAILQ_EMPTY(&info.flush_list)) 371 break; 372 } 373 } 374 375 /* 376 * This is the core of the chain flushing code. The chain is locked by the 377 * caller and remains locked on return. This function is keyed off of 378 * the SUBMODIFIED bit but must make fine-grained choices based on the 379 * synchronization point we are flushing to. 380 * 381 * If the flush accomplished any work chain will be flagged MOVED 382 * indicating a copy-on-write propagation back up is required. 383 * Deep sub-nodes may also have been entered onto the deferral list. 384 * MOVED is never set on the volume root. 385 * 386 * NOTE: modify_tid is different from MODIFIED. modify_tid is updated 387 * only when a chain is specifically modified, and not updated 388 * for copy-on-write propagations. MODIFIED is set on any modification 389 * including copy-on-write propagations. 390 */ 391 static void 392 hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain) 393 { 394 hammer2_mount_t *hmp; 395 hammer2_blockref_t *bref; 396 hammer2_off_t pbase; 397 hammer2_off_t pmask; 398 hammer2_tid_t saved_sync; 399 hammer2_trans_t *trans = info->trans; 400 hammer2_chain_core_t *core; 401 size_t psize; 402 size_t boff; 403 char *bdata; 404 struct buf *bp; 405 int error; 406 int wasmodified; 407 int diddeferral = 0; 408 409 hmp = chain->hmp; 410 411 #if FLUSH_DEBUG 412 if (info->parent) 413 kprintf("flush_core %p->%p.%d %08x (%s)\n", 414 info->parent, chain, chain->bref.type, 415 chain->flags, 416 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 417 chain->data->ipdata.filename : "?")); 418 else 419 kprintf("flush_core NULL->%p.%d %08x (%s)\n", 420 chain, chain->bref.type, 421 chain->flags, 422 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 423 chain->data->ipdata.filename : "?")); 424 #endif 425 /* 426 * Ignore chains modified beyond the current flush point. These 427 * will be treated as if they did not exist. 428 */ 429 if (chain->modify_tid > info->sync_tid) 430 return; 431 432 /* 433 * Deleted chains which have not been destroyed must be retained, 434 * and we probably have to recurse to clean-up any sub-trees. 435 * However, restricted flushes can stop processing here because 436 * the chain cleanup will be handled by a later normal flush. 437 * 438 * The MODIFIED bit can likely be cleared in this situation and we 439 * will do so later on in this procedure. 440 */ 441 if (chain->delete_tid <= info->sync_tid) { 442 if (trans->flags & HAMMER2_TRANS_RESTRICTED) 443 return; 444 } 445 446 saved_sync = info->sync_tid; 447 core = chain->core; 448 449 /* 450 * If SUBMODIFIED is set we recurse the flush and adjust the 451 * blockrefs accordingly. 452 * 453 * NOTE: Looping on SUBMODIFIED can prevent a flush from ever 454 * finishing in the face of filesystem activity. 455 */ 456 if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) { 457 hammer2_chain_t *saved_parent; 458 hammer2_tid_t saved_mirror; 459 hammer2_chain_layer_t *layer; 460 461 /* 462 * Clear SUBMODIFIED to catch races. Note that any child 463 * with MODIFIED, DELETED, or MOVED set during scan2, or 464 * which tries to lastdrop but can't free its structures, 465 * or which gets defered, will cause SUBMODIFIED to be set 466 * again. 467 * 468 * We don't want to set our chain to MODIFIED gratuitously. 469 * 470 * We need an extra ref on chain because we are going to 471 * release its lock temporarily in our child loop. 472 */ 473 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED); 474 hammer2_chain_ref(chain); 475 476 /* 477 * Run two passes. The first pass handles MODIFIED and 478 * SUBMODIFIED chains and recurses while the second pass 479 * handles MOVED chains on the way back up. 480 * 481 * If the stack gets too deep we defer scan1, but must 482 * be sure to still run scan2 if on the next loop the 483 * deferred chain has been flushed and now needs MOVED 484 * handling on the way back up. 485 * 486 * Scan1 is recursive. 487 * 488 * NOTE: The act of handling a modified/submodified chain can 489 * cause the MOVED Flag to be set. It can also be set 490 * via hammer2_chain_delete() and in other situations. 491 * 492 * NOTE: RB_SCAN() must be used instead of RB_FOREACH() 493 * because children can be physically removed during 494 * the scan. 495 */ 496 saved_parent = info->parent; 497 saved_mirror = info->mirror_tid; 498 info->parent = chain; 499 info->mirror_tid = chain->bref.mirror_tid; 500 501 if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 502 if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) { 503 hammer2_chain_ref(chain); 504 TAILQ_INSERT_TAIL(&info->flush_list, 505 chain, flush_node); 506 atomic_set_int(&chain->flags, 507 HAMMER2_CHAIN_DEFERRED); 508 } 509 diddeferral = 1; 510 } else { 511 info->diddeferral = 0; 512 spin_lock(&core->cst.spin); 513 KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 514 TAILQ_FOREACH_REVERSE(layer, &core->layerq, 515 h2_layer_list, entry) { 516 ++layer->refs; 517 KKASSERT(layer->good == 0xABCD); 518 RB_SCAN(hammer2_chain_tree, &layer->rbtree, 519 NULL, hammer2_chain_flush_scan1, info); 520 --layer->refs; 521 diddeferral += info->diddeferral; 522 } 523 spin_unlock(&core->cst.spin); 524 } 525 526 /* 527 * Handle successfully flushed children who are in the MOVED 528 * state on the way back up the recursion. This can have 529 * the side-effect of clearing MOVED. 530 * 531 * Scan2 is non-recursive. 532 */ 533 if (diddeferral) { 534 atomic_set_int(&chain->flags, 535 HAMMER2_CHAIN_SUBMODIFIED); 536 spin_lock(&core->cst.spin); 537 } else { 538 spin_lock(&core->cst.spin); 539 KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 540 TAILQ_FOREACH_REVERSE(layer, &core->layerq, 541 h2_layer_list, entry) { 542 info->pass = 1; 543 ++layer->refs; 544 KKASSERT(layer->good == 0xABCD); 545 RB_SCAN(hammer2_chain_tree, &layer->rbtree, 546 NULL, hammer2_chain_flush_scan2, info); 547 info->pass = 2; 548 RB_SCAN(hammer2_chain_tree, &layer->rbtree, 549 NULL, hammer2_chain_flush_scan2, info); 550 /*diddeferral += info->diddeferral; n/a*/ 551 --layer->refs; 552 } 553 } 554 hammer2_chain_layer_check_locked(chain->hmp, core); 555 spin_unlock(&core->cst.spin); 556 557 chain->bref.mirror_tid = info->mirror_tid; 558 info->mirror_tid = saved_mirror; 559 info->parent = saved_parent; 560 KKASSERT(chain->refs > 1); 561 hammer2_chain_drop(chain); 562 } 563 564 /* 565 * Restore sync_tid in case it was restricted by a delete/duplicate. 566 */ 567 info->sync_tid = saved_sync; 568 569 /* 570 * Rollup diddeferral for caller. Note direct assignment, not +=. 571 */ 572 info->diddeferral = diddeferral; 573 574 /* 575 * Do not flush chain if there were any deferrals. It will be 576 * retried later after the deferrals are independently handled. 577 */ 578 if (diddeferral) { 579 if (hammer2_debug & 0x0008) { 580 kprintf("%*.*s} %p/%d %04x (deferred)", 581 info->depth, info->depth, "", 582 chain, chain->refs, chain->flags); 583 } 584 return; 585 } 586 587 /* 588 * If we encounter a deleted chain within our flush we can clear 589 * the MODIFIED bit and avoid flushing it whether it has been 590 * destroyed or not. We must make sure that the chain is flagged 591 * MOVED in this situation so the parent picks up the deletion. 592 * 593 * Note that scan2 has already executed above so statistics have 594 * already been rolled up. 595 */ 596 if (chain->delete_tid <= info->sync_tid) { 597 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 598 if (chain->bp) { 599 if (chain->bytes == chain->bp->b_bufsize) 600 chain->bp->b_flags |= B_INVAL|B_RELBUF; 601 } 602 if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { 603 hammer2_chain_ref(chain); 604 atomic_set_int(&chain->flags, 605 HAMMER2_CHAIN_MOVED); 606 } 607 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 608 hammer2_chain_drop(chain); 609 } 610 return; 611 } 612 #if 0 613 if ((chain->flags & HAMMER2_CHAIN_DESTROYED) && 614 (chain->flags & HAMMER2_CHAIN_DELETED) && 615 (trans->flags & HAMMER2_TRANS_RESTRICTED) == 0) { 616 /* 617 * Throw-away the MODIFIED flag 618 */ 619 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 620 if (chain->bp) { 621 if (chain->bytes == chain->bp->b_bufsize) 622 chain->bp->b_flags |= B_INVAL|B_RELBUF; 623 } 624 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 625 hammer2_chain_drop(chain); 626 } 627 return; 628 } 629 #endif 630 631 /* 632 * A degenerate flush might not have flushed anything and thus not 633 * processed modified blocks on the way back up. Detect the case. 634 * 635 * Note that MOVED can be set without MODIFIED being set due to 636 * a deletion, in which case it is handled by Scan2 later on. 637 * 638 * Both bits can be set along with DELETED due to a deletion if 639 * modified data within the synchronization zone and the chain 640 * was then deleted beyond the zone, in which case we still have 641 * to flush for synchronization point consistency. Otherwise though 642 * DELETED and MODIFIED are treated as separate flags. 643 */ 644 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) 645 return; 646 647 /* 648 * Issue flush. 649 * 650 * A DESTROYED node that reaches this point must be flushed for 651 * synchronization point consistency. 652 */ 653 654 /* 655 * Update mirror_tid, clear MODIFIED, and set MOVED. 656 * 657 * The caller will update the parent's reference to this chain 658 * by testing MOVED as long as the modification was in-bounds. 659 * 660 * MOVED is never set on the volume root as there is no parent 661 * to adjust. 662 */ 663 if (chain->bref.mirror_tid < info->sync_tid) 664 chain->bref.mirror_tid = info->sync_tid; 665 wasmodified = (chain->flags & HAMMER2_CHAIN_MODIFIED) != 0; 666 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 667 if (chain == &hmp->vchain) 668 kprintf("(FLUSHED VOLUME HEADER)\n"); 669 if (chain == &hmp->fchain) 670 kprintf("(FLUSHED FREEMAP HEADER)\n"); 671 672 if ((chain->flags & HAMMER2_CHAIN_MOVED) || 673 chain == &hmp->vchain || 674 chain == &hmp->fchain) { 675 /* 676 * Drop the ref from the MODIFIED bit we cleared. 677 */ 678 if (wasmodified) 679 hammer2_chain_drop(chain); 680 } else { 681 /* 682 * If we were MODIFIED we inherit the ref from clearing 683 * that bit, otherwise we need another ref. 684 */ 685 if (wasmodified == 0) 686 hammer2_chain_ref(chain); 687 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); 688 } 689 690 /* 691 * If this is part of a recursive flush we can go ahead and write 692 * out the buffer cache buffer and pass a new bref back up the chain 693 * via the MOVED bit. 694 * 695 * Volume headers are NOT flushed here as they require special 696 * processing. 697 */ 698 switch(chain->bref.type) { 699 case HAMMER2_BREF_TYPE_FREEMAP: 700 hammer2_modify_volume(hmp); 701 break; 702 case HAMMER2_BREF_TYPE_VOLUME: 703 /* 704 * We should flush the free block table before we calculate 705 * CRCs and copy voldata -> volsync. 706 * 707 * To prevent SMP races, fchain must remain locked until 708 * voldata is copied to volsync. 709 */ 710 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 711 if (hmp->fchain.flags & (HAMMER2_CHAIN_MODIFIED | 712 HAMMER2_CHAIN_SUBMODIFIED)) { 713 /* this will modify vchain as a side effect */ 714 hammer2_chain_flush(info->trans, &hmp->fchain); 715 } 716 717 /* 718 * The volume header is flushed manually by the syncer, not 719 * here. All we do is adjust the crc's. 720 */ 721 KKASSERT(chain->data != NULL); 722 KKASSERT(chain->bp == NULL); 723 kprintf("volume header mirror_tid %jd\n", 724 hmp->voldata.mirror_tid); 725 726 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 727 hammer2_icrc32( 728 (char *)&hmp->voldata + 729 HAMMER2_VOLUME_ICRC1_OFF, 730 HAMMER2_VOLUME_ICRC1_SIZE); 731 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 732 hammer2_icrc32( 733 (char *)&hmp->voldata + 734 HAMMER2_VOLUME_ICRC0_OFF, 735 HAMMER2_VOLUME_ICRC0_SIZE); 736 hmp->voldata.icrc_volheader = 737 hammer2_icrc32( 738 (char *)&hmp->voldata + 739 HAMMER2_VOLUME_ICRCVH_OFF, 740 HAMMER2_VOLUME_ICRCVH_SIZE); 741 hmp->volsync = hmp->voldata; 742 atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 743 hammer2_chain_unlock(&hmp->fchain); 744 break; 745 case HAMMER2_BREF_TYPE_DATA: 746 /* 747 * Data elements have already been flushed via the logical 748 * file buffer cache. Their hash was set in the bref by 749 * the vop_write code. 750 * 751 * Make sure any device buffer(s) have been flushed out here. 752 * (there aren't usually any to flush). 753 */ 754 psize = hammer2_devblksize(chain->bytes); 755 pmask = (hammer2_off_t)psize - 1; 756 pbase = chain->bref.data_off & ~pmask; 757 boff = chain->bref.data_off & (HAMMER2_OFF_MASK & pmask); 758 759 bp = getblk(hmp->devvp, pbase, psize, GETBLK_NOWAIT, 0); 760 if (bp) { 761 if ((bp->b_flags & (B_CACHE | B_DIRTY)) == 762 (B_CACHE | B_DIRTY)) { 763 cluster_awrite(bp); 764 } else { 765 bp->b_flags |= B_RELBUF; 766 brelse(bp); 767 } 768 } 769 break; 770 #if 0 771 case HAMMER2_BREF_TYPE_INDIRECT: 772 /* 773 * Indirect blocks may be in an INITIAL state. Use the 774 * chain_lock() call to ensure that the buffer has been 775 * instantiated (even though it is already locked the buffer 776 * might not have been instantiated). 777 * 778 * Only write the buffer out if it is dirty, it is possible 779 * the operating system had already written out the buffer. 780 */ 781 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 782 KKASSERT(chain->bp != NULL); 783 784 bp = chain->bp; 785 if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) || 786 (bp->b_flags & B_DIRTY)) { 787 bdwrite(chain->bp); 788 } else { 789 brelse(chain->bp); 790 } 791 chain->bp = NULL; 792 chain->data = NULL; 793 hammer2_chain_unlock(chain); 794 break; 795 #endif 796 case HAMMER2_BREF_TYPE_INDIRECT: 797 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 798 /* 799 * Device-backed. Buffer will be flushed by the sync 800 * code XXX. 801 */ 802 KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 803 break; 804 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 805 default: 806 /* 807 * Embedded elements have to be flushed out. 808 * (Basically just BREF_TYPE_INODE). 809 */ 810 KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 811 KKASSERT(chain->data != NULL); 812 KKASSERT(chain->bp == NULL); 813 bref = &chain->bref; 814 815 KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0); 816 KKASSERT(HAMMER2_DEC_CHECK(chain->bref.methods) == 817 HAMMER2_CHECK_ISCSI32 || 818 HAMMER2_DEC_CHECK(chain->bref.methods) == 819 HAMMER2_CHECK_FREEMAP); 820 821 /* 822 * The data is embedded, we have to acquire the 823 * buffer cache buffer and copy the data into it. 824 */ 825 psize = hammer2_devblksize(chain->bytes); 826 pmask = (hammer2_off_t)psize - 1; 827 pbase = bref->data_off & ~pmask; 828 boff = bref->data_off & (HAMMER2_OFF_MASK & pmask); 829 830 /* 831 * The getblk() optimization can only be used if the 832 * physical block size matches the request. 833 */ 834 error = bread(hmp->devvp, pbase, psize, &bp); 835 KKASSERT(error == 0); 836 837 bdata = (char *)bp->b_data + boff; 838 839 /* 840 * Copy the data to the buffer, mark the buffer 841 * dirty, and convert the chain to unmodified. 842 */ 843 bcopy(chain->data, bdata, chain->bytes); 844 bp->b_flags |= B_CLUSTEROK; 845 bdwrite(bp); 846 bp = NULL; 847 848 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 849 case HAMMER2_CHECK_FREEMAP: 850 chain->bref.check.freemap.icrc32 = 851 hammer2_icrc32(chain->data, chain->bytes); 852 break; 853 case HAMMER2_CHECK_ISCSI32: 854 chain->bref.check.iscsi32.value = 855 hammer2_icrc32(chain->data, chain->bytes); 856 break; 857 default: 858 panic("hammer2_flush_core: bad crc type"); 859 break; /* NOT REACHED */ 860 } 861 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 862 ++hammer2_iod_meta_write; 863 else 864 ++hammer2_iod_indr_write; 865 } 866 } 867 868 /* 869 * Flush helper scan1 (recursive) 870 * 871 * Flushes the children of the caller's chain (parent) and updates 872 * the blockref, restricted by sync_tid. 873 * 874 * Ripouts during the loop should not cause any problems. Because we are 875 * flushing to a synchronization point, modification races will occur after 876 * sync_tid and do not have to be flushed anyway. 877 * 878 * It is also ok if the parent is chain_duplicate()'d while unlocked because 879 * the delete/duplication will install a delete_tid that is still larger than 880 * our current sync_tid. 881 */ 882 static int 883 hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) 884 { 885 hammer2_flush_info_t *info = data; 886 hammer2_trans_t *trans = info->trans; 887 hammer2_chain_t *parent = info->parent; 888 int diddeferral; 889 890 /* 891 * We should only need to recurse if SUBMODIFIED is set, but as 892 * a safety also recurse if MODIFIED is also set. 893 * 894 * Return early if neither bit is set. We must re-assert the 895 * SUBMODIFIED flag in the parent if any child covered by the 896 * parent (via delete_tid) is skipped. 897 */ 898 if ((child->flags & (HAMMER2_CHAIN_MODIFIED | 899 HAMMER2_CHAIN_SUBMODIFIED)) == 0) { 900 return (0); 901 } 902 if (child->modify_tid > trans->sync_tid) { 903 if (parent->delete_tid > trans->sync_tid) { 904 atomic_set_int(&parent->flags, 905 HAMMER2_CHAIN_SUBMODIFIED); 906 } 907 return (0); 908 } 909 910 hammer2_chain_ref(child); 911 spin_unlock(&parent->core->cst.spin); 912 913 /* 914 * The caller has added a ref to the parent so we can temporarily 915 * unlock it in order to lock the child. Re-check the flags before 916 * continuing. 917 */ 918 hammer2_chain_unlock(parent); 919 hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 920 921 if ((child->flags & (HAMMER2_CHAIN_MODIFIED | 922 HAMMER2_CHAIN_SUBMODIFIED)) == 0) { 923 hammer2_chain_unlock(child); 924 hammer2_chain_drop(child); 925 hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 926 spin_lock(&parent->core->cst.spin); 927 return (0); 928 } 929 if (child->modify_tid > trans->sync_tid) { 930 hammer2_chain_unlock(child); 931 hammer2_chain_drop(child); 932 hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 933 spin_lock(&parent->core->cst.spin); 934 if (parent->delete_tid > trans->sync_tid) { 935 atomic_set_int(&parent->flags, 936 HAMMER2_CHAIN_SUBMODIFIED); 937 } 938 return (0); 939 } 940 941 /* 942 * The DESTROYED flag can only be initially set on an unreferenced 943 * deleted inode and will propagate downward via the mechanic below. 944 * Such inode chains have been deleted for good and should no longer 945 * be subject to delete/duplication. 946 * 947 * This optimization allows the inode reclaim (destroy unlinked file 948 * on vnode reclamation after last close) to be flagged by just 949 * setting HAMMER2_CHAIN_DESTROYED at the top level and then will 950 * cause the chains to be terminated and related buffers to be 951 * invalidated and not flushed out. 952 * 953 * We have to be careful not to propagate the DESTROYED flag if 954 * the destruction occurred after our flush sync_tid. 955 */ 956 if ((parent->flags & HAMMER2_CHAIN_DESTROYED) && 957 (child->flags & HAMMER2_CHAIN_DELETED) && 958 (child->flags & HAMMER2_CHAIN_DESTROYED) == 0) { 959 atomic_set_int(&child->flags, HAMMER2_CHAIN_DESTROYED | 960 HAMMER2_CHAIN_SUBMODIFIED); 961 } 962 963 /* 964 * Recurse and collect deferral data. 965 */ 966 diddeferral = info->diddeferral; 967 ++info->depth; 968 hammer2_chain_flush_core(info, child); 969 #if FLUSH_DEBUG 970 kprintf("flush_core_done parent=%p flags=%08x child=%p.%d %08x\n", 971 parent, parent->flags, child, child->bref.type, child->flags); 972 #endif 973 --info->depth; 974 info->diddeferral += diddeferral; 975 976 if (child->flags & HAMMER2_CHAIN_SUBMODIFIED) 977 atomic_set_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED); 978 979 hammer2_chain_unlock(child); 980 hammer2_chain_drop(child); 981 982 hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 983 984 spin_lock(&parent->core->cst.spin); 985 986 return (0); 987 } 988 989 /* 990 * Flush helper scan2 (non-recursive) 991 * 992 * This pass on a chain's children propagates any MOVED or DELETED 993 * elements back up the chain towards the root after those elements have 994 * been fully flushed. Unlike scan1, this function is NOT recursive and 995 * the parent remains locked across the entire scan. 996 * 997 * SCAN2 is called twice, once with pass set to 1 and once with it set to 2. 998 * We have to do this so base[] elements can be deleted in pass 1 to make 999 * room for adding new elements in pass 2. 1000 * 1001 * This function also rolls up storage statistics. 1002 * 1003 * NOTE! We must re-set SUBMODIFIED on the parent(s) as appropriate, and 1004 * due to the above conditions it is possible to do this and still 1005 * have some children flagged MOVED depending on the synchronization. 1006 * 1007 * NOTE! A deletion is a visbility issue, there can still be references to 1008 * deleted elements (for example, to an unlinked file which is still 1009 * open), and there can also be multiple chains pointing to the same 1010 * bref where some are deleted and some are not (for example due to 1011 * a rename). So a chain marked for deletion is basically considered 1012 * to be live until it is explicitly destroyed or until its ref-count 1013 * reaches zero (also implying that MOVED and MODIFIED are clear). 1014 */ 1015 static int 1016 hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) 1017 { 1018 hammer2_flush_info_t *info = data; 1019 hammer2_chain_t *parent = info->parent; 1020 hammer2_chain_core_t *above = child->above; 1021 hammer2_mount_t *hmp = child->hmp; 1022 hammer2_trans_t *trans = info->trans; 1023 hammer2_blockref_t *base; 1024 int count; 1025 int ok; 1026 1027 /* 1028 * Inodes with stale children that have been converted to DIRECTDATA 1029 * mode (file extension or hardlink conversion typically) need to 1030 * skipped right now before we start messing with a non-existant 1031 * block table. 1032 */ 1033 #if 0 1034 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 1035 (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA)) { 1036 goto finalize; 1037 } 1038 #endif 1039 1040 /* 1041 * Ignore children created after our flush point, treating them as 1042 * if they did not exist). These children will not cause the parent 1043 * to be updated. 1044 * 1045 * When we encounter such children and the parent chain has not been 1046 * deleted, delete/duplicated, or delete/duplicated-for-move, then 1047 * the parent may be used to funnel through several flush points. 1048 * We must re-set the SUBMODIFIED flag in the parent to ensure that 1049 * those flushes have visbility. A simple test of delete_tid suffices 1050 * to determine if the parent spans beyond our current flush. 1051 */ 1052 if (child->modify_tid > trans->sync_tid) { 1053 goto finalize; 1054 } 1055 1056 /* 1057 * Ignore children which have not changed. The parent's block table 1058 * is already correct. 1059 * 1060 * XXX The MOVED bit is only cleared when all multi-homed parents 1061 * have flushed, creating a situation where a re-flush can occur 1062 * via a parent which has already flushed. The hammer2_base_*() 1063 * functions currently have a hack to deal with this case but 1064 * we need something better. 1065 */ 1066 if ((child->flags & HAMMER2_CHAIN_MOVED) == 0) { 1067 goto finalize; 1068 } 1069 1070 /* 1071 * Make sure child is referenced before we unlock. 1072 */ 1073 hammer2_chain_ref(child); 1074 spin_unlock(&above->cst.spin); 1075 1076 /* 1077 * Parent reflushed after the child has passed them by should skip 1078 * due to the modify_tid test. XXX 1079 */ 1080 hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); 1081 KKASSERT(child->above == above); 1082 KKASSERT(parent->core == above); 1083 1084 /* 1085 * The parent's blockref to the child must be deleted or updated. 1086 * 1087 * This point is not reached on successful DESTROYED optimizations 1088 * but can be reached on recursive deletions and restricted flushes. 1089 * 1090 * Because flushes are ordered we do not have to make a 1091 * modify/duplicate of indirect blocks. That is, the flush 1092 * code does not have to kmalloc or duplicate anything. We 1093 * can adjust the indirect block table in-place and reuse the 1094 * chain. It IS possible that the chain has already been duplicated 1095 * or may wind up being duplicated on-the-fly by modifying code 1096 * on the frontend. We simply use the original and ignore such 1097 * chains. However, it does mean we can't clear the MOVED bit. 1098 * 1099 * XXX recursive deletions not optimized. 1100 */ 1101 hammer2_chain_modify(trans, &parent, 1102 HAMMER2_MODIFY_NO_MODIFY_TID | 1103 HAMMER2_MODIFY_ASSERTNOCOPY); 1104 1105 switch(parent->bref.type) { 1106 case HAMMER2_BREF_TYPE_INODE: 1107 /* 1108 * XXX Should assert that OPFLAG_DIRECTDATA is 0 once we 1109 * properly duplicate the inode headers and do proper flush 1110 * range checks (all the children should be beyond the flush 1111 * point). For now just don't sync the non-applicable 1112 * children. 1113 * 1114 * XXX Can also occur due to hardlink consolidation. We 1115 * set OPFLAG_DIRECTDATA to prevent the indirect and data 1116 * blocks from syncing ot the hardlink pointer. 1117 */ 1118 #if 0 1119 KKASSERT((parent->data->ipdata.op_flags & 1120 HAMMER2_OPFLAG_DIRECTDATA) == 0); 1121 #endif 1122 #if 0 1123 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1124 base = NULL; 1125 } else 1126 #endif 1127 { 1128 base = &parent->data->ipdata.u.blockset.blockref[0]; 1129 count = HAMMER2_SET_COUNT; 1130 } 1131 break; 1132 case HAMMER2_BREF_TYPE_INDIRECT: 1133 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1134 if (parent->data) { 1135 base = &parent->data->npdata[0]; 1136 } else { 1137 base = NULL; 1138 KKASSERT(child->flags & HAMMER2_CHAIN_DELETED); 1139 } 1140 count = parent->bytes / sizeof(hammer2_blockref_t); 1141 break; 1142 case HAMMER2_BREF_TYPE_VOLUME: 1143 base = &hmp->voldata.sroot_blockset.blockref[0]; 1144 count = HAMMER2_SET_COUNT; 1145 break; 1146 case HAMMER2_BREF_TYPE_FREEMAP: 1147 base = &parent->data->npdata[0]; 1148 count = HAMMER2_SET_COUNT; 1149 break; 1150 default: 1151 base = NULL; 1152 count = 0; 1153 panic("hammer2_chain_flush_scan2: " 1154 "unrecognized blockref type: %d", 1155 parent->bref.type); 1156 } 1157 1158 /* 1159 * Don't bother updating a deleted parent's blockrefs (caller will 1160 * optimize-out the disk write). Note that this is not optional, 1161 * a deleted parent's blockref array might not be synchronized at 1162 * all so calling hammer2_base*() functions could result in a panic. 1163 * 1164 * Otherwise, we need to be COUNTEDBREFS synchronized for the 1165 * hammer2_base_*() functions. 1166 */ 1167 if (parent->delete_tid <= trans->sync_tid) 1168 base = NULL; 1169 else if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 1170 hammer2_chain_countbrefs(parent, base, count); 1171 1172 /* 1173 * Update the parent's blockref table and propagate mirror_tid. 1174 * 1175 * NOTE! Children with modify_tid's beyond our flush point are 1176 * considered to not exist for the purposes of updating the 1177 * parent's blockref array. 1178 * 1179 * NOTE! Updates to a parent's blockref table do not adjust the 1180 * parent's bref.modify_tid, only its bref.mirror_tid. 1181 */ 1182 if (info->pass == 1 && child->delete_tid <= trans->sync_tid) { 1183 /* 1184 * Deleting. Only adjust the block array if it contains 1185 * the child's entry (child's REPLACE flag is set). Clear 1186 * the child's REPLACE flag only once all possible parent's 1187 * have been updated. 1188 */ 1189 ok = 1; 1190 if (base && (child->flags & HAMMER2_CHAIN_REPLACE)) { 1191 hammer2_rollup_stats(parent, child, -1); 1192 spin_lock(&above->cst.spin); 1193 hammer2_base_delete(parent, base, count, 1194 &info->cache_index, &child->bref); 1195 if (TAILQ_NEXT(parent, core_entry) == NULL) { 1196 atomic_clear_int(&child->flags, 1197 HAMMER2_CHAIN_REPLACE); 1198 } 1199 spin_unlock(&above->cst.spin); 1200 } 1201 if (info->mirror_tid < child->delete_tid) 1202 info->mirror_tid = child->delete_tid; 1203 } else if (info->pass == 2 && child->delete_tid > trans->sync_tid) { 1204 /* 1205 * Inserting. Only set the child's REPLACE flag indicating 1206 * that the parent's blockref array entry is valid once all 1207 * possible parent's have been updated. 1208 */ 1209 ok = 1; 1210 if (base) { 1211 if (child->flags & HAMMER2_CHAIN_REPLACE) 1212 hammer2_rollup_stats(parent, child, 0); 1213 else 1214 hammer2_rollup_stats(parent, child, 1); 1215 spin_lock(&above->cst.spin); 1216 hammer2_base_insert(parent, base, count, 1217 &info->cache_index, &child->bref, 1218 child->flags); 1219 if (TAILQ_NEXT(parent, core_entry) == NULL) { 1220 atomic_set_int(&child->flags, 1221 HAMMER2_CHAIN_REPLACE); 1222 } 1223 spin_unlock(&above->cst.spin); 1224 } 1225 if (info->mirror_tid < child->modify_tid) 1226 info->mirror_tid = child->modify_tid; 1227 } else { 1228 ok = 0; 1229 } 1230 1231 if (info->mirror_tid < child->bref.mirror_tid) { 1232 info->mirror_tid = child->bref.mirror_tid; 1233 } 1234 if ((parent->bref.type == HAMMER2_BREF_TYPE_VOLUME || 1235 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP) && 1236 hmp->voldata.mirror_tid < child->bref.mirror_tid) { 1237 hmp->voldata.mirror_tid = child->bref.mirror_tid; 1238 } 1239 1240 /* 1241 * Only clear MOVED once all possible parents have been flushed. 1242 * 1243 * When can we safely clear the MOVED flag? Flushes down duplicate 1244 * paths can occur out of order, for example if an inode is moved 1245 * as part of a hardlink consolidation or if an inode is moved into 1246 * an indirect block indexed before the inode. 1247 */ 1248 if (ok && (child->flags & HAMMER2_CHAIN_MOVED)) { 1249 hammer2_chain_t *scan; 1250 int ok = 1; 1251 1252 spin_lock(&above->cst.spin); 1253 TAILQ_FOREACH(scan, &above->ownerq, core_entry) { 1254 /* 1255 * XXX weird code also checked at the top of scan2, 1256 * I would like to fix this by detaching the core 1257 * on initial hardlink consolidation (1->2 nlinks). 1258 */ 1259 #if 0 1260 if (scan->bref.type == HAMMER2_BREF_TYPE_INODE && 1261 (scan->data->ipdata.op_flags & 1262 HAMMER2_OPFLAG_DIRECTDATA)) { 1263 continue; 1264 } 1265 #endif 1266 if (scan->flags & HAMMER2_CHAIN_SUBMODIFIED) { 1267 ok = 0; 1268 break; 1269 } 1270 } 1271 spin_unlock(&above->cst.spin); 1272 if (ok) { 1273 atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); 1274 hammer2_chain_drop(child); /* flag */ 1275 } 1276 } 1277 1278 /* 1279 * Unlock the child. This can wind up dropping the child's 1280 * last ref, removing it from the parent's RB tree, and deallocating 1281 * the structure. The RB_SCAN() our caller is doing handles the 1282 * situation. 1283 */ 1284 hammer2_chain_unlock(child); 1285 hammer2_chain_drop(child); 1286 spin_lock(&above->cst.spin); 1287 1288 /* 1289 * The parent cleared SUBMODIFIED prior to the scan. If the child 1290 * still requires a flush (possibly due to being outside the current 1291 * synchronization zone), we must re-set SUBMODIFIED on the way back 1292 * up. 1293 */ 1294 finalize: 1295 return (0); 1296 } 1297 1298 static 1299 void 1300 hammer2_rollup_stats(hammer2_chain_t *parent, hammer2_chain_t *child, int how) 1301 { 1302 #if 0 1303 hammer2_chain_t *grandp; 1304 #endif 1305 1306 parent->data_count += child->data_count; 1307 parent->inode_count += child->inode_count; 1308 child->data_count = 0; 1309 child->inode_count = 0; 1310 if (how < 0) { 1311 parent->data_count -= child->bytes; 1312 if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 1313 parent->inode_count -= 1; 1314 #if 0 1315 /* XXX child->data may be NULL atm */ 1316 parent->data_count -= child->data->ipdata.data_count; 1317 parent->inode_count -= child->data->ipdata.inode_count; 1318 #endif 1319 } 1320 } else if (how > 0) { 1321 parent->data_count += child->bytes; 1322 if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 1323 parent->inode_count += 1; 1324 #if 0 1325 /* XXX child->data may be NULL atm */ 1326 parent->data_count += child->data->ipdata.data_count; 1327 parent->inode_count += child->data->ipdata.inode_count; 1328 #endif 1329 } 1330 } 1331 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 1332 parent->data->ipdata.data_count += parent->data_count; 1333 parent->data->ipdata.inode_count += parent->inode_count; 1334 #if 0 1335 for (grandp = parent->above->first_parent; 1336 grandp; 1337 grandp = grandp->next_parent) { 1338 grandp->data_count += parent->data_count; 1339 grandp->inode_count += parent->inode_count; 1340 } 1341 #endif 1342 parent->data_count = 0; 1343 parent->inode_count = 0; 1344 } 1345 } 1346