1 /* 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/types.h> 40 #include <sys/lock.h> 41 #include <sys/uuid.h> 42 43 #include "hammer2.h" 44 45 /* 46 * Recursively flush the specified chain. The chain is locked and 47 * referenced by the caller and will remain so on return. The chain 48 * will remain referenced throughout but can temporarily lose its 49 * lock during the recursion to avoid unnecessarily stalling user 50 * processes. 51 */ 52 struct hammer2_flush_info { 53 hammer2_mount_t *hmp; 54 hammer2_chain_t *parent; 55 hammer2_trans_t *trans; 56 int depth; 57 int diddeferral; 58 struct flush_deferral_list flush_list; 59 hammer2_tid_t sync_tid; /* flush synchronization point */ 60 hammer2_tid_t mirror_tid; /* collect mirror TID updates */ 61 }; 62 63 typedef struct hammer2_flush_info hammer2_flush_info_t; 64 65 static void hammer2_chain_flush_core(hammer2_flush_info_t *info, 66 hammer2_chain_t *chain); 67 static int hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data); 68 static int hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data); 69 70 /* 71 * Transaction support functions for writing to the filesystem. 72 * 73 * Initializing a new transaction allocates a transaction ID. We 74 * don't bother marking the volume header MODIFIED. Instead, the volume 75 * header will be updated only if the operation actually makes modifications 76 * (when then propagate to the root). 77 * 78 * WARNING! Modifications to the root volume cannot dup the root volume 79 * header to handle synchronization points, so alloc_tid can 80 * wind up (harmlessly) more advanced on flush. 81 */ 82 void 83 hammer2_trans_init(hammer2_trans_t *trans, hammer2_mount_t *hmp) 84 { 85 bzero(trans, sizeof(*trans)); 86 trans->hmp = hmp; 87 hammer2_voldata_lock(hmp); 88 trans->sync_tid = hmp->voldata.alloc_tid++; 89 hammer2_voldata_unlock(hmp, 0); /* don't immediately mark modified */ 90 } 91 92 void 93 hammer2_trans_done(hammer2_trans_t *trans) 94 { 95 trans->hmp = NULL; 96 } 97 98 /* 99 * Flush the chain and all modified sub-chains through the specified 100 * synchronization point (sync_tid), propagating parent chain modifications 101 * and mirror_tid updates back up as needed. Since we are recursing downward 102 * we do not have to deal with the complexities of multi-homed chains (chains 103 * with multiple parents). 104 * 105 * Caller must have interlocked against any non-flush-related modifying 106 * operations in progress whos modify_tid values are less than or equal 107 * to the passed sync_tid. 108 * 109 * Caller must have already vetted synchronization points to ensure they 110 * are properly flushed. Only snapshots and cluster flushes can create 111 * these sorts of synchronization points. 112 * 113 * SUBMODIFIED is not cleared if modified elements with higher modify_tid 114 * values (thus not flushed) are still present after the flush. 115 * 116 * If a chain is unable to completely flush we have to be sure that 117 * SUBMODIFIED remains set up the parent chain, and that MOVED is not 118 * cleared or our desynchronized bref will not properly update in the 119 * parent. The parent's indirect block is copied-on-write and adjusted 120 * as needed so it no longer needs to be placemarked by the subchains, 121 * allowing the sub-chains to be cleaned out. 122 * 123 * This routine can be called from several places but the most important 124 * is from the hammer2_vop_reclaim() function. We want to try to completely 125 * clean out the inode structure to prevent disconnected inodes from 126 * building up and blowing out the kmalloc pool. However, it is not actually 127 * necessary to flush reclaimed inodes to maintain HAMMER2's crash recovery 128 * capability. 129 * 130 * chain is locked on call and will remain locked on return. If a flush 131 * occured, the chain's MOVED bit will be set indicating that its parent 132 * (which is not part of the flush) should be updated. 133 */ 134 void 135 hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t *chain) 136 { 137 hammer2_chain_t *scan; 138 hammer2_flush_info_t info; 139 140 /* 141 * Execute the recursive flush and handle deferrals. 142 * 143 * Chains can be ridiculously long (thousands deep), so to 144 * avoid blowing out the kernel stack the recursive flush has a 145 * depth limit. Elements at the limit are placed on a list 146 * for re-execution after the stack has been popped. 147 */ 148 bzero(&info, sizeof(info)); 149 TAILQ_INIT(&info.flush_list); 150 info.hmp = trans->hmp; 151 info.trans = trans; 152 info.sync_tid = trans->sync_tid; 153 info.mirror_tid = 0; 154 155 for (;;) { 156 /* 157 * Unwind deep recursions which had been deferred. This 158 * can leave MOVED set for these chains, which will be 159 * handled when we [re]flush chain after the unwind. 160 */ 161 while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) { 162 KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 163 TAILQ_REMOVE(&info.flush_list, scan, flush_node); 164 atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED); 165 166 /* 167 * Now that we've popped back up we can do a secondary 168 * recursion on the deferred elements. 169 */ 170 if (hammer2_debug & 0x0040) 171 kprintf("defered flush %p\n", scan); 172 hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 173 hammer2_chain_flush(trans, scan); 174 hammer2_chain_unlock(scan); 175 hammer2_chain_drop(scan); /* ref from deferral */ 176 } 177 178 /* 179 * Flush pass1 on root. SUBMODIFIED can remain set after 180 * this call for numerous reasons, including write failures, 181 * but most likely due to only a partial flush being 182 * requested. 183 */ 184 info.diddeferral = 0; 185 hammer2_chain_flush_core(&info, chain); 186 187 /* 188 * Only loop if deep recursions have been deferred. 189 */ 190 if (TAILQ_EMPTY(&info.flush_list)) 191 break; 192 } 193 194 /* 195 * SUBMODIFIED can be temporarily cleared and then re-set, which 196 * can prevent concurrent setsubmods from reaching all the way to 197 * the root. If after the flush we find the node is still in need 198 * of flushing (though possibly due to modifications made outside 199 * the requested synchronization zone), we must call setsubmod again 200 * to cover the race. 201 */ 202 if (chain->flags & (HAMMER2_CHAIN_MOVED | 203 HAMMER2_CHAIN_DELETED | 204 HAMMER2_CHAIN_MODIFIED | 205 HAMMER2_CHAIN_SUBMODIFIED)) { 206 hammer2_chain_parent_setsubmod(chain); 207 } 208 } 209 210 /* 211 * (chain) is locked by the caller and remains locked on return. 212 * This function is keyed off of SUBMODIFIED but must make fine-grained 213 * choices based on the synchronization point we are flushing to. 214 * 215 * If the flush accomplished any work chain will be flagged MOVED 216 * indicating a copy-on-write propagation back up is required. 217 * Deep sub-nodes may also have been entered onto the deferral list. 218 * MOVED is never set on the volume root. 219 * 220 * NOTE: modify_tid is different from MODIFIED. modify_tid is updated 221 * only when a chain is specifically modified, and not updated 222 * for copy-on-write propagations. MODIFIED is set on any modification 223 * including copy-on-write propagations. 224 */ 225 static void 226 hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain) 227 { 228 hammer2_mount_t *hmp; 229 hammer2_blockref_t *bref; 230 hammer2_off_t pbase; 231 size_t bbytes; 232 size_t boff; 233 char *bdata; 234 struct buf *bp; 235 int error; 236 int wasmodified; 237 int diddeferral = 0; 238 239 hmp = info->hmp; 240 241 /* 242 * If SUBMODIFIED is set we recurse the flush and adjust the 243 * blockrefs accordingly. 244 * 245 * NOTE: Looping on SUBMODIFIED can prevent a flush from ever 246 * finishing in the face of filesystem activity. 247 */ 248 if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) { 249 hammer2_chain_t *saved_parent; 250 251 /* 252 * Clear SUBMODIFIED to catch races. Note that any child 253 * with MODIFIED, DELETED, or MOVED set during Scan2, after 254 * it processes the child, will cause SUBMODIFIED to be 255 * re-set. 256 * child has to be flushed SUBMODIFIED will wind up being 257 * set again (for next time), but this does not stop us from 258 * synchronizing block updates which occurred. 259 * 260 * We don't want to set our chain to MODIFIED gratuitously. 261 * 262 * We need an extra ref on chain because we are going to 263 * release its lock temporarily in our child loop. 264 */ 265 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED); 266 hammer2_chain_ref(chain); 267 268 /* 269 * Run two passes. The first pass handles MODIFIED and 270 * SUBMODIFIED chains and recurses while the second pass 271 * handles MOVED chains on the way back up. 272 * 273 * If the stack gets too deep we defer scan1, but must 274 * be sure to still run scan2 if on the next loop the 275 * deferred chain has been flushed and now needs MOVED 276 * handling on the way back up. 277 * 278 * Scan1 is recursive. 279 * 280 * NOTE: The act of handling a modified/submodified chain can 281 * cause the MOVED Flag to be set. It can also be set 282 * via hammer2_chain_delete() and in other situations. 283 * 284 * NOTE: RB_SCAN() must be used instead of RB_FOREACH() 285 * because children can be physically removed during 286 * the scan. 287 */ 288 saved_parent = info->parent; 289 info->parent = chain; 290 291 if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 292 if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) { 293 hammer2_chain_ref(chain); 294 TAILQ_INSERT_TAIL(&info->flush_list, 295 chain, flush_node); 296 atomic_set_int(&chain->flags, 297 HAMMER2_CHAIN_DEFERRED); 298 } 299 diddeferral = 1; 300 } else { 301 info->diddeferral = 0; 302 spin_lock(&chain->core->cst.spin); 303 RB_SCAN(hammer2_chain_tree, &chain->core->rbtree, 304 NULL, hammer2_chain_flush_scan1, info); 305 spin_unlock(&chain->core->cst.spin); 306 diddeferral += info->diddeferral; 307 } 308 309 /* 310 * Handle successfully flushed children who are in the MOVED 311 * state on the way back up the recursion. This can have 312 * the side-effect of clearing MOVED. 313 * 314 * We execute this even if there were deferrals to try to 315 * keep the chain topology cleaner. 316 * 317 * Scan2 is non-recursive. 318 */ 319 spin_lock(&chain->core->cst.spin); 320 RB_SCAN(hammer2_chain_tree, &chain->core->rbtree, 321 NULL, hammer2_chain_flush_scan2, info); 322 spin_unlock(&chain->core->cst.spin); 323 info->parent = saved_parent; 324 hammer2_chain_drop(chain); 325 } 326 327 /* 328 * Rollup diddeferral for caller. Note direct assignment, not +=. 329 */ 330 info->diddeferral = diddeferral; 331 332 /* 333 * Do not flush chain if there were any deferrals. It will be 334 * retried later after the deferrals are independently handled. 335 */ 336 if (diddeferral) { 337 if (hammer2_debug & 0x0008) { 338 kprintf("%*.*s} %p/%d %04x (deferred)", 339 info->depth, info->depth, "", 340 chain, chain->refs, chain->flags); 341 } 342 return; 343 } 344 345 /* 346 * Chain objects flagged for complete destruction recurse down from 347 * their inode. The inode will have already been removed from 348 * its parent. We have no need to disconnect the children from 349 * their parents or the inode in this situation (it would just 350 * waste time and storage with copy-on-write operations), so 351 * we can clear both the MODIFIED bit and the MOVED bit. 352 * 353 * However, delete_tid must be within the synchronization zone 354 * for us to act on this bit. Open-but-deleted files have to 355 * be managed by the cluster such that they are not subjected to 356 * reclamation. 357 * 358 * DESTROYED chains stop processing here. 359 */ 360 if ((chain->flags & HAMMER2_CHAIN_DESTROYED) && 361 (chain->delete_tid <= info->sync_tid)) { 362 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 363 if (chain->bp) 364 chain->bp->b_flags |= B_INVAL|B_RELBUF; 365 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 366 hammer2_chain_drop(chain); 367 } 368 if (chain->flags & HAMMER2_CHAIN_MOVED) { 369 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MOVED); 370 hammer2_chain_drop(chain); 371 } 372 if (hammer2_debug & 0x0008) { 373 kprintf("%*.*s} %p/%d %04x (destroyed)", 374 info->depth, info->depth, "", 375 chain, chain->refs, chain->flags); 376 } 377 return; 378 } 379 380 /* 381 * If MODIFIED is not set or modify_tid is > sync_tid we have 382 * nothing to do. 383 * 384 * Note that MOVED can be set without MODIFIED being set due to 385 * a deletion, in which case it is handled by Scan2 later on. 386 * 387 * Both bits can be set along with DELETED due to a deletion if 388 * modified data within the synchronization zone and the chain 389 * was then deleted beyond the zone, in which case we still have 390 * to flush for synchronization point consistency. 391 */ 392 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) 393 return; 394 if (chain->bref.modify_tid > info->sync_tid) { 395 if (hammer2_debug & 0x0008) { 396 kprintf("%*.*s} %p/%d %04x (skip - beyond sync_tid)", 397 info->depth, info->depth, "", 398 chain, chain->refs, chain->flags); 399 } 400 return; 401 } 402 403 /* 404 * Issue flush. 405 * 406 * A DESTROYED node that reaches this point must be flushed for 407 * synchronization point consistency. 408 */ 409 410 /* 411 * Update mirror_tid, clear MODIFIED, and set MOVED. 412 * 413 * The caller will update the parent's reference to this chain 414 * by testing MOVED as long as the modification was in-bounds. 415 * 416 * MOVED is never set on the volume root as there is no parent 417 * to adjust. 418 */ 419 if (chain->bref.mirror_tid < info->sync_tid) 420 chain->bref.mirror_tid = info->sync_tid; 421 wasmodified = (chain->flags & HAMMER2_CHAIN_MODIFIED) != 0; 422 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 423 if (chain == &hmp->vchain) 424 kprintf("(FLUSHED VOLUME HEADER)\n"); 425 426 if ((chain->flags & HAMMER2_CHAIN_MOVED) || 427 chain == &hmp->vchain) { 428 /* 429 * Drop the ref from the MODIFIED bit we cleared. 430 */ 431 if (wasmodified) 432 hammer2_chain_drop(chain); 433 } else { 434 /* 435 * If we were MODIFIED we inherit the ref from clearing 436 * that bit, otherwise we need another ref. 437 */ 438 if (wasmodified == 0) 439 hammer2_chain_ref(chain); 440 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); 441 } 442 443 /* 444 * If this is part of a recursive flush we can go ahead and write 445 * out the buffer cache buffer and pass a new bref back up the chain. 446 * 447 * This will never be a volume header. 448 */ 449 switch(chain->bref.type) { 450 case HAMMER2_BREF_TYPE_VOLUME: 451 /* 452 * The volume header is flushed manually by the syncer, not 453 * here. 454 */ 455 KKASSERT(chain->data != NULL); 456 KKASSERT(chain->bp == NULL); 457 kprintf("volume header mirror_tid %jd\n", 458 hmp->voldata.mirror_tid); 459 460 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 461 hammer2_icrc32( 462 (char *)&hmp->voldata + 463 HAMMER2_VOLUME_ICRC1_OFF, 464 HAMMER2_VOLUME_ICRC1_SIZE); 465 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 466 hammer2_icrc32( 467 (char *)&hmp->voldata + 468 HAMMER2_VOLUME_ICRC0_OFF, 469 HAMMER2_VOLUME_ICRC0_SIZE); 470 hmp->voldata.icrc_volheader = 471 hammer2_icrc32( 472 (char *)&hmp->voldata + 473 HAMMER2_VOLUME_ICRCVH_OFF, 474 HAMMER2_VOLUME_ICRCVH_SIZE); 475 hmp->volsync = hmp->voldata; 476 atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 477 break; 478 case HAMMER2_BREF_TYPE_DATA: 479 /* 480 * Data elements have already been flushed via the logical 481 * file buffer cache. Their hash was set in the bref by 482 * the vop_write code. 483 * 484 * Make sure the buffer(s) have been flushed out here. 485 */ 486 bbytes = chain->bytes; 487 pbase = chain->bref.data_off & ~(hammer2_off_t)(bbytes - 1); 488 boff = chain->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1); 489 490 bp = getblk(hmp->devvp, pbase, bbytes, GETBLK_NOWAIT, 0); 491 if (bp) { 492 if ((bp->b_flags & (B_CACHE | B_DIRTY)) == 493 (B_CACHE | B_DIRTY)) { 494 cluster_awrite(bp); 495 } else { 496 bp->b_flags |= B_RELBUF; 497 brelse(bp); 498 } 499 } 500 break; 501 case HAMMER2_BREF_TYPE_INDIRECT: 502 /* 503 * Indirect blocks may be in an INITIAL state. Use the 504 * chain_lock() call to ensure that the buffer has been 505 * instantiated (even though it is already locked the buffer 506 * might not have been instantiated). 507 * 508 * Only write the buffer out if it is dirty, it is possible 509 * the operating system had already written out the buffer. 510 */ 511 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 512 KKASSERT(chain->bp != NULL); 513 514 bp = chain->bp; 515 if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) || 516 (bp->b_flags & B_DIRTY)) { 517 bdwrite(chain->bp); 518 } else { 519 brelse(chain->bp); 520 } 521 chain->bp = NULL; 522 chain->data = NULL; 523 hammer2_chain_unlock(chain); 524 break; 525 default: 526 /* 527 * Embedded elements have to be flushed out. 528 */ 529 KKASSERT(chain->data != NULL); 530 KKASSERT(chain->bp == NULL); 531 bref = &chain->bref; 532 533 KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0); 534 KKASSERT(HAMMER2_DEC_CHECK(chain->bref.methods) == 535 HAMMER2_CHECK_ISCSI32); 536 537 if (chain->bp == NULL) { 538 /* 539 * The data is embedded, we have to acquire the 540 * buffer cache buffer and copy the data into it. 541 */ 542 if ((bbytes = chain->bytes) < HAMMER2_MINIOSIZE) 543 bbytes = HAMMER2_MINIOSIZE; 544 pbase = bref->data_off & ~(hammer2_off_t)(bbytes - 1); 545 boff = bref->data_off & HAMMER2_OFF_MASK & (bbytes - 1); 546 547 /* 548 * The getblk() optimization can only be used if the 549 * physical block size matches the request. 550 */ 551 if (chain->bytes == bbytes) { 552 bp = getblk(hmp->devvp, pbase, bbytes, 0, 0); 553 error = 0; 554 } else { 555 error = bread(hmp->devvp, pbase, bbytes, &bp); 556 KKASSERT(error == 0); 557 } 558 bdata = (char *)bp->b_data + boff; 559 560 /* 561 * Copy the data to the buffer, mark the buffer 562 * dirty, and convert the chain to unmodified. 563 */ 564 bcopy(chain->data, bdata, chain->bytes); 565 bp->b_flags |= B_CLUSTEROK; 566 bdwrite(bp); 567 bp = NULL; 568 chain->bref.check.iscsi32.value = 569 hammer2_icrc32(chain->data, chain->bytes); 570 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 571 ++hammer2_iod_meta_write; 572 else 573 ++hammer2_iod_indr_write; 574 } else { 575 chain->bref.check.iscsi32.value = 576 hammer2_icrc32(chain->data, chain->bytes); 577 } 578 } 579 if (hammer2_debug & 0x0008) { 580 kprintf("%*.*s} %p/%d %04x (flushed)", 581 info->depth, info->depth, "", 582 chain, chain->refs, chain->flags); 583 } 584 } 585 586 /* 587 * Flush helper scan1 (recursive) 588 * 589 * Flushes the children of the caller's chain (parent) and updates 590 * the blockref. 591 * 592 * Ripouts during the loop should not cause any problems. Because we are 593 * flushing to a synchronization point, modification races will occur after 594 * sync_tid and do not have to be flushed anyway. 595 */ 596 static int 597 hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) 598 { 599 hammer2_flush_info_t *info = data; 600 hammer2_chain_t *parent = info->parent; 601 /*hammer2_mount_t *hmp = info->hmp;*/ 602 int diddeferral; 603 604 /* 605 * We should only need to recurse if SUBMODIFIED is set, but as 606 * a safety also recursive if MODIFIED is also set. Return early 607 * if neither bit is set. 608 */ 609 if ((child->flags & (HAMMER2_CHAIN_SUBMODIFIED | 610 HAMMER2_CHAIN_MODIFIED)) == 0) { 611 return (0); 612 } 613 spin_unlock(&parent->core->cst.spin); 614 615 /* 616 * The caller has added a ref to the parent so we can temporarily 617 * unlock it in order to lock the child. Re-check the flags before 618 * continuing. 619 */ 620 hammer2_chain_unlock(parent); 621 hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 622 623 if ((child->flags & (HAMMER2_CHAIN_SUBMODIFIED | 624 HAMMER2_CHAIN_MODIFIED)) == 0) { 625 hammer2_chain_unlock(child); 626 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 627 spin_lock(&parent->core->cst.spin); 628 return (0); 629 } 630 631 /* 632 * Propagate the DESTROYED flag if found set as well as deal with 633 * delete_tid. This also causes SUBMODIFIED to be propagated 634 * downward to keep the recursion going. 635 * 636 * In the case of delete_tid, nothing need be done. Destruction 637 * occurs after any deletions and destruction of internal chains 638 * where delete_tid may be 0 (since we don't bother to copy-on-write 639 * the propagation of a deletion) will pass the conditional just 640 * fine. 641 * 642 * This optimization allows the inode reclaim (destroy unlinked file 643 * on vnode reclamation after last close) to be flagged by just 644 * setting HAMMER2_CHAIN_DESTROYED at the top level. 645 */ 646 if ((parent->flags & HAMMER2_CHAIN_DESTROYED) && 647 (child->flags & HAMMER2_CHAIN_DESTROYED) == 0) { 648 atomic_set_int(&child->flags, 649 HAMMER2_CHAIN_DESTROYED | 650 HAMMER2_CHAIN_SUBMODIFIED); 651 } 652 653 /* 654 * Recurse and collect deferral data. 655 */ 656 diddeferral = info->diddeferral; 657 ++info->depth; 658 hammer2_chain_flush_core(info, child); 659 --info->depth; 660 info->diddeferral += diddeferral; 661 662 hammer2_chain_unlock(child); 663 664 /* 665 * Always resolve when relocking the parent meta-data so Scan2 666 * has the indirect block data in-hand to handle the MOVED bit. 667 */ 668 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 669 670 spin_lock(&parent->core->cst.spin); 671 return (0); 672 } 673 674 /* 675 * Flush helper scan2 (non-recursive) 676 * 677 * This pass on a chain's children propagates any MOVED or DELETED 678 * elements back up the chain towards the root. The bref's modify_tid 679 * must be within the synchronization zone for MOVED to be recognized 680 * and delete_tid must be within the synchronization zone for DELETED 681 * to be recognized. 682 * 683 * We must re-set SUBMODIFIED if appropriate. 684 */ 685 static int 686 hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) 687 { 688 enum { HC_NONE, HC_DELETE, HC_UPDATE } action = HC_NONE; 689 hammer2_flush_info_t *info = data; 690 hammer2_chain_t *parent = info->parent; 691 hammer2_mount_t *hmp = info->hmp; 692 hammer2_blockref_t *base; 693 int count; 694 695 /* 696 * Check update conditions prior to locking child. 697 * We may not be able to safely test the 64-bit TIDs 698 * but we can certainly test the flags. 699 */ 700 if ((child->flags & (HAMMER2_CHAIN_DELETED | 701 HAMMER2_CHAIN_MOVED)) == 0) { 702 goto finalize; 703 } 704 spin_unlock(&parent->core->cst.spin); 705 706 /* 707 * The MOVED bit implies an additional reference which prevents 708 * the child from being destroyed out from under our operation 709 * so we can lock the child safely without worrying about it 710 * getting ripped up (?). 711 */ 712 hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); 713 714 /* 715 * Full condition check. We can only update and clear MOVED 716 * if the child is deleted or updated within our synchronization 717 * zone. 718 */ 719 if ((child->flags & HAMMER2_CHAIN_DELETED) && 720 child->delete_tid <= info->sync_tid) { 721 action = HC_DELETE; 722 } else if ((child->flags & HAMMER2_CHAIN_MOVED) && 723 child->bref.modify_tid <= info->sync_tid) { 724 action = HC_UPDATE; 725 } else { 726 hammer2_chain_unlock(child); 727 spin_lock(&parent->core->cst.spin); 728 goto finalize; 729 } 730 731 /* 732 * If the parent is to be deleted then we can clear MOVED 733 * in the child without updating the parent. That is, it 734 * doesn't matter that the parent->child blockref is left intact 735 * because the parent is going to be deleted too. This little 736 * bit of code will result in major optimizations of recursive 737 * file tree deletions and truncations. 738 */ 739 if ((parent->flags & HAMMER2_CHAIN_DELETED) && 740 parent->delete_tid <= info->sync_tid) { 741 goto cleanup; 742 } 743 744 /* 745 * The parent's blockref to the child must be deleted or updated. 746 * 747 * This point is not reached on successful DESTROYED optimizations 748 * but can be reached on recursive deletions. We can optimize 749 */ 750 hammer2_chain_modify(info->trans, parent, HAMMER2_MODIFY_NO_MODIFY_TID); 751 752 switch(parent->bref.type) { 753 case HAMMER2_BREF_TYPE_INODE: 754 KKASSERT((parent->data->ipdata.op_flags & 755 HAMMER2_OPFLAG_DIRECTDATA) == 0); 756 base = &parent->data->ipdata.u.blockset.blockref[0]; 757 count = HAMMER2_SET_COUNT; 758 break; 759 case HAMMER2_BREF_TYPE_INDIRECT: 760 if (parent->data) { 761 base = &parent->data->npdata.blockref[0]; 762 } else { 763 base = NULL; 764 KKASSERT(child->flags & HAMMER2_CHAIN_DELETED); 765 } 766 count = parent->bytes / sizeof(hammer2_blockref_t); 767 break; 768 case HAMMER2_BREF_TYPE_VOLUME: 769 base = &hmp->voldata.sroot_blockset.blockref[0]; 770 count = HAMMER2_SET_COUNT; 771 break; 772 default: 773 base = NULL; 774 count = 0; 775 panic("hammer2_chain_get: " 776 "unrecognized blockref type: %d", 777 parent->bref.type); 778 } 779 780 /* 781 * Update the parent's blockref table and propagate mirror_tid. 782 * blockref updates do not touch modify_tid. Instead, mirroring 783 * operations always reconcile the entire array during their 784 * mirror_tid based recursion. 785 * 786 * WARNING! Deleted chains may still be used by the filesystem 787 * in a later duplication, for example in a rename() 788 * operation. Also any topological movement of the 789 * related blocks. 790 * 791 * We adjust the parent's bref pointer to the child but 792 * we do not modify the contents of the child. 793 */ 794 if (action == HC_DELETE) { 795 if (base) { 796 KKASSERT(child->index < count); 797 bzero(&base[child->index], sizeof(child->bref)); 798 } 799 } else { 800 if (base) { 801 KKASSERT(child->index < count); 802 base[child->index] = child->bref; 803 } 804 } 805 KKASSERT(child->index >= 0); 806 807 if (parent->bref.mirror_tid < child->bref.mirror_tid) { 808 parent->bref.mirror_tid = child->bref.mirror_tid; 809 } 810 if (parent->bref.type == HAMMER2_BREF_TYPE_VOLUME && 811 hmp->voldata.mirror_tid < child->bref.mirror_tid) { 812 hmp->voldata.mirror_tid = child->bref.mirror_tid; 813 } 814 815 cleanup: 816 817 /* 818 * Cleanup the child's MOVED flag and unlock the child. 819 */ 820 if (child->flags & HAMMER2_CHAIN_MOVED) { 821 atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); 822 hammer2_chain_drop(child); /* flag */ 823 } 824 825 /* 826 * Unlock the child. This can wind up dropping the child's 827 * last ref, removing it from the parent's RB tree, and deallocating 828 * the structure. The RB_SCAN() our caller is doing handles the 829 * situation. 830 */ 831 hammer2_chain_unlock(child); 832 spin_lock(&parent->core->cst.spin); 833 834 /* 835 * The parent cleared SUBMODIFIED prior to the scan. If the child 836 * still requires a flush (possibly due to being outside the current 837 * synchronization zone), we must re-set SUBMODIFIED on the way back 838 * up. 839 */ 840 finalize: 841 if (child->flags & (HAMMER2_CHAIN_MOVED | 842 HAMMER2_CHAIN_DELETED | 843 HAMMER2_CHAIN_MODIFIED | 844 HAMMER2_CHAIN_SUBMODIFIED)) { 845 atomic_set_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED); 846 } 847 848 return (0); 849 } 850