1 /* 2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * TRANSACTION AND FLUSH HANDLING 37 * 38 * Deceptively simple but actually fairly difficult to implement properly is 39 * how I would describe it. 40 * 41 * Flushing generally occurs bottom-up but requires a top-down scan to 42 * locate chains with MODIFIED and/or UPDATE bits set. The ONFLUSH flag 43 * tells how to recurse downward to find these chains. 44 */ 45 46 #include <sys/cdefs.h> 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/types.h> 50 #include <sys/lock.h> 51 #include <sys/uuid.h> 52 53 #include "hammer2.h" 54 55 #define FLUSH_DEBUG 0 56 57 #define HAMMER2_FLUSH_DEPTH_LIMIT 10 /* stack recursion limit */ 58 59 60 /* 61 * Recursively flush the specified chain. The chain is locked and 62 * referenced by the caller and will remain so on return. The chain 63 * will remain referenced throughout but can temporarily lose its 64 * lock during the recursion to avoid unnecessarily stalling user 65 * processes. 66 */ 67 struct hammer2_flush_info { 68 hammer2_chain_t *parent; 69 int depth; 70 int diddeferral; 71 int error; /* cumulative error */ 72 int flags; 73 struct h2_flush_list flushq; 74 hammer2_chain_t *debug; 75 }; 76 77 typedef struct hammer2_flush_info hammer2_flush_info_t; 78 79 static void hammer2_flush_core(hammer2_flush_info_t *info, 80 hammer2_chain_t *chain, int flags); 81 static int hammer2_flush_recurse(hammer2_chain_t *child, void *data); 82 83 /* 84 * Any per-pfs transaction initialization goes here. 85 */ 86 void 87 hammer2_trans_manage_init(hammer2_pfs_t *pmp) 88 { 89 } 90 91 /* 92 * Transaction support for any modifying operation. Transactions are used 93 * in the pmp layer by the frontend and in the spmp layer by the backend. 94 * 95 * 0 - Normal transaction, interlocked against flush 96 * transaction. 97 * 98 * TRANS_ISFLUSH - Flush transaction, interlocked against normal 99 * transaction. 100 * 101 * TRANS_BUFCACHE - Buffer cache transaction, no interlock. 102 * 103 * Initializing a new transaction allocates a transaction ID. Typically 104 * passed a pmp (hmp passed as NULL), indicating a cluster transaction. Can 105 * be passed a NULL pmp and non-NULL hmp to indicate a transaction on a single 106 * media target. The latter mode is used by the recovery code. 107 * 108 * TWO TRANSACTION IDs can run concurrently, where one is a flush and the 109 * other is a set of any number of concurrent filesystem operations. We 110 * can either have <running_fs_ops> + <waiting_flush> + <blocked_fs_ops> 111 * or we can have <running_flush> + <concurrent_fs_ops>. 112 * 113 * During a flush, new fs_ops are only blocked until the fs_ops prior to 114 * the flush complete. The new fs_ops can then run concurrent with the flush. 115 * 116 * Buffer-cache transactions operate as fs_ops but never block. A 117 * buffer-cache flush will run either before or after the current pending 118 * flush depending on its state. 119 */ 120 void 121 hammer2_trans_init(hammer2_pfs_t *pmp, uint32_t flags) 122 { 123 uint32_t oflags; 124 uint32_t nflags; 125 int dowait; 126 127 for (;;) { 128 oflags = pmp->trans.flags; 129 cpu_ccfence(); 130 dowait = 0; 131 132 if (flags & HAMMER2_TRANS_ISFLUSH) { 133 /* 134 * Requesting flush transaction. Wait for all 135 * currently running transactions to finish. 136 * Afterwords, normal transactions will be 137 * interlocked. 138 */ 139 if (oflags & HAMMER2_TRANS_MASK) { 140 nflags = oflags | HAMMER2_TRANS_FPENDING | 141 HAMMER2_TRANS_WAITING; 142 dowait = 1; 143 } else { 144 nflags = (oflags | flags) + 1; 145 } 146 } else if (flags & HAMMER2_TRANS_BUFCACHE) { 147 /* 148 * Requesting strategy transaction from buffer-cache, 149 * or a VM getpages/putpages through the buffer cache. 150 * We must allow such transactions in all situations 151 * to avoid deadlocks. 152 */ 153 nflags = (oflags | flags) + 1; 154 #if 0 155 /* 156 * (old) previous code interlocked against the main 157 * flush pass. 158 */ 159 if ((oflags & (HAMMER2_TRANS_ISFLUSH | 160 HAMMER2_TRANS_PREFLUSH)) == 161 HAMMER2_TRANS_ISFLUSH) { 162 nflags = oflags | HAMMER2_TRANS_WAITING; 163 dowait = 1; 164 } else { 165 nflags = (oflags | flags) + 1; 166 } 167 #endif 168 } else { 169 /* 170 * Requesting normal modifying transaction (read-only 171 * operations do not use transactions). Waits for 172 * any flush to finish before allowing. Multiple 173 * modifying transactions can run concurrently. 174 */ 175 if (oflags & HAMMER2_TRANS_ISFLUSH) { 176 nflags = oflags | HAMMER2_TRANS_WAITING; 177 dowait = 1; 178 } else { 179 nflags = (oflags | flags) + 1; 180 } 181 } 182 if (dowait) 183 tsleep_interlock(&pmp->trans.sync_wait, 0); 184 if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) { 185 if (dowait == 0) 186 break; 187 tsleep(&pmp->trans.sync_wait, PINTERLOCKED, 188 "h2trans", hz); 189 } else { 190 cpu_pause(); 191 } 192 /* retry */ 193 } 194 } 195 196 /* 197 * Start a sub-transaction, there is no 'subdone' function. This will 198 * issue a new modify_tid (mtid) for the current transaction, which is a 199 * CLC (cluster level change) id and not a per-node id. 200 * 201 * This function must be called for each XOP when multiple XOPs are run in 202 * sequence within a transaction. 203 * 204 * Callers typically update the inode with the transaction mtid manually 205 * to enforce sequencing. 206 */ 207 hammer2_tid_t 208 hammer2_trans_sub(hammer2_pfs_t *pmp) 209 { 210 hammer2_tid_t mtid; 211 212 mtid = atomic_fetchadd_64(&pmp->modify_tid, 1); 213 214 return (mtid); 215 } 216 217 void 218 hammer2_trans_done(hammer2_pfs_t *pmp) 219 { 220 uint32_t oflags; 221 uint32_t nflags; 222 223 for (;;) { 224 oflags = pmp->trans.flags; 225 cpu_ccfence(); 226 KKASSERT(oflags & HAMMER2_TRANS_MASK); 227 if ((oflags & HAMMER2_TRANS_MASK) == 1) { 228 /* 229 * This was the last transaction 230 */ 231 nflags = (oflags - 1) & ~(HAMMER2_TRANS_ISFLUSH | 232 HAMMER2_TRANS_BUFCACHE | 233 HAMMER2_TRANS_FPENDING | 234 HAMMER2_TRANS_WAITING); 235 } else { 236 /* 237 * Still transactions pending 238 */ 239 nflags = oflags - 1; 240 } 241 if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) { 242 if ((nflags & HAMMER2_TRANS_MASK) == 0 && 243 (oflags & HAMMER2_TRANS_WAITING)) { 244 wakeup(&pmp->trans.sync_wait); 245 } 246 break; 247 } else { 248 cpu_pause(); 249 } 250 /* retry */ 251 } 252 } 253 254 /* 255 * Obtain new, unique inode number (not serialized by caller). 256 */ 257 hammer2_tid_t 258 hammer2_trans_newinum(hammer2_pfs_t *pmp) 259 { 260 hammer2_tid_t tid; 261 262 tid = atomic_fetchadd_64(&pmp->inode_tid, 1); 263 264 return tid; 265 } 266 267 /* 268 * Assert that a strategy call is ok here. Currently we allow strategy 269 * calls in all situations, including during flushes. Previously: 270 * (old) (1) In a normal transaction. 271 * (old) (2) In a flush transaction only if PREFLUSH is also set. 272 */ 273 void 274 hammer2_trans_assert_strategy(hammer2_pfs_t *pmp) 275 { 276 #if 0 277 KKASSERT((pmp->trans.flags & HAMMER2_TRANS_ISFLUSH) == 0 || 278 (pmp->trans.flags & HAMMER2_TRANS_PREFLUSH)); 279 #endif 280 } 281 282 283 /* 284 * Chains undergoing destruction are removed from the in-memory topology. 285 * To avoid getting lost these chains are placed on the delayed flush 286 * queue which will properly dispose of them. 287 * 288 * We do this instead of issuing an immediate flush in order to give 289 * recursive deletions (rm -rf, etc) a chance to remove more of the 290 * hierarchy, potentially allowing an enormous amount of write I/O to 291 * be avoided. 292 */ 293 void 294 hammer2_delayed_flush(hammer2_chain_t *chain) 295 { 296 if ((chain->flags & HAMMER2_CHAIN_DELAYED) == 0) { 297 hammer2_spin_ex(&chain->hmp->list_spin); 298 if ((chain->flags & (HAMMER2_CHAIN_DELAYED | 299 HAMMER2_CHAIN_DEFERRED)) == 0) { 300 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELAYED | 301 HAMMER2_CHAIN_DEFERRED); 302 TAILQ_INSERT_TAIL(&chain->hmp->flushq, 303 chain, flush_node); 304 hammer2_chain_ref(chain); 305 } 306 hammer2_spin_unex(&chain->hmp->list_spin); 307 hammer2_voldata_modify(chain->hmp); 308 } 309 } 310 311 /* 312 * Flush the chain and all modified sub-chains through the specified 313 * synchronization point, propagating blockref updates back up. As 314 * part of this propagation, mirror_tid and inode/data usage statistics 315 * propagates back upward. 316 * 317 * Returns a HAMMER2 error code, 0 if no error. Note that I/O errors from 318 * buffers dirtied during the flush operation can occur later. 319 * 320 * modify_tid (clc - cluster level change) is not propagated. 321 * 322 * update_tid (clc) is used for validation and is not propagated by this 323 * function. 324 * 325 * This routine can be called from several places but the most important 326 * is from VFS_SYNC (frontend) via hammer2_inode_xop_flush (backend). 327 * 328 * chain is locked on call and will remain locked on return. The chain's 329 * UPDATE flag indicates that its parent's block table (which is not yet 330 * part of the flush) should be updated. 331 */ 332 int 333 hammer2_flush(hammer2_chain_t *chain, int flags) 334 { 335 hammer2_chain_t *scan; 336 hammer2_flush_info_t info; 337 hammer2_dev_t *hmp; 338 int loops; 339 340 /* 341 * Execute the recursive flush and handle deferrals. 342 * 343 * Chains can be ridiculously long (thousands deep), so to 344 * avoid blowing out the kernel stack the recursive flush has a 345 * depth limit. Elements at the limit are placed on a list 346 * for re-execution after the stack has been popped. 347 */ 348 bzero(&info, sizeof(info)); 349 TAILQ_INIT(&info.flushq); 350 info.flags = flags & ~HAMMER2_FLUSH_TOP; 351 352 /* 353 * Calculate parent (can be NULL), if not NULL the flush core 354 * expects the parent to be referenced so it can easily lock/unlock 355 * it without it getting ripped up. 356 */ 357 if ((info.parent = chain->parent) != NULL) 358 hammer2_chain_ref(info.parent); 359 360 /* 361 * Extra ref needed because flush_core expects it when replacing 362 * chain. 363 */ 364 hammer2_chain_ref(chain); 365 hmp = chain->hmp; 366 loops = 0; 367 368 for (;;) { 369 /* 370 * Move hmp->flushq to info.flushq if non-empty so it can 371 * be processed. 372 */ 373 if (TAILQ_FIRST(&hmp->flushq) != NULL) { 374 hammer2_spin_ex(&chain->hmp->list_spin); 375 TAILQ_CONCAT(&info.flushq, &hmp->flushq, flush_node); 376 hammer2_spin_unex(&chain->hmp->list_spin); 377 } 378 379 /* 380 * Unwind deep recursions which had been deferred. This 381 * can leave the FLUSH_* bits set for these chains, which 382 * will be handled when we [re]flush chain after the unwind. 383 */ 384 while ((scan = TAILQ_FIRST(&info.flushq)) != NULL) { 385 KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 386 TAILQ_REMOVE(&info.flushq, scan, flush_node); 387 atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED | 388 HAMMER2_CHAIN_DELAYED); 389 390 /* 391 * Now that we've popped back up we can do a secondary 392 * recursion on the deferred elements. 393 * 394 * NOTE: hammer2_flush() may replace scan. 395 */ 396 if (hammer2_debug & 0x0040) 397 kprintf("deferred flush %p\n", scan); 398 hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 399 if (scan->error == 0) { 400 hammer2_flush(scan, flags & ~HAMMER2_FLUSH_TOP); 401 hammer2_chain_unlock(scan); 402 hammer2_chain_drop(scan);/* ref from defer */ 403 } else { 404 info.error |= scan->error; 405 } 406 } 407 408 /* 409 * [re]flush chain. 410 */ 411 info.diddeferral = 0; 412 hammer2_flush_core(&info, chain, flags); 413 414 /* 415 * Only loop if deep recursions have been deferred. 416 */ 417 if (TAILQ_EMPTY(&info.flushq)) 418 break; 419 420 if (++loops % 1000 == 0) { 421 kprintf("hammer2_flush: excessive loops on %p\n", 422 chain); 423 if (hammer2_debug & 0x100000) 424 Debugger("hell4"); 425 } 426 } 427 hammer2_chain_drop(chain); 428 if (info.parent) 429 hammer2_chain_drop(info.parent); 430 return (info.error); 431 } 432 433 /* 434 * This is the core of the chain flushing code. The chain is locked by the 435 * caller and must also have an extra ref on it by the caller, and remains 436 * locked and will have an extra ref on return. info.parent is referenced 437 * but not locked. 438 * 439 * Upon return, the caller can test the UPDATE bit on the chain to determine 440 * if the parent needs updating. 441 * 442 * (1) Determine if this node is a candidate for the flush, return if it is 443 * not. fchain and vchain are always candidates for the flush. 444 * 445 * (2) If we recurse too deep the chain is entered onto the deferral list and 446 * the current flush stack is aborted until after the deferral list is 447 * run. 448 * 449 * (3) Recursively flush live children (rbtree). This can create deferrals. 450 * A successful flush clears the MODIFIED and UPDATE bits on the children 451 * and typically causes the parent to be marked MODIFIED as the children 452 * update the parent's block table. A parent might already be marked 453 * MODIFIED due to a deletion (whos blocktable update in the parent is 454 * handled by the frontend), or if the parent itself is modified by the 455 * frontend for other reasons. 456 * 457 * (4) Permanently disconnected sub-trees are cleaned up by the front-end. 458 * Deleted-but-open inodes can still be individually flushed via the 459 * filesystem syncer. 460 * 461 * (5) Delete parents on the way back up if they are normal indirect blocks 462 * and have no children. 463 * 464 * (6) Note that an unmodified child may still need the block table in its 465 * parent updated (e.g. rename/move). The child will have UPDATE set 466 * in this case. 467 * 468 * WARNING ON BREF MODIFY_TID/MIRROR_TID 469 * 470 * blockref.modify_tid is consistent only within a PFS, and will not be 471 * consistent during synchronization. mirror_tid is consistent across the 472 * block device regardless of the PFS. 473 */ 474 static void 475 hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain, 476 int flags) 477 { 478 hammer2_chain_t *parent; 479 hammer2_dev_t *hmp; 480 int diddeferral; 481 int save_error; 482 483 /* 484 * (1) Optimize downward recursion to locate nodes needing action. 485 * Nothing to do if none of these flags are set. 486 */ 487 if ((chain->flags & HAMMER2_CHAIN_FLUSH_MASK) == 0) { 488 if (hammer2_debug & 0x200) { 489 if (info->debug == NULL) 490 info->debug = chain; 491 } else { 492 return; 493 } 494 } 495 496 hmp = chain->hmp; 497 diddeferral = info->diddeferral; 498 parent = info->parent; /* can be NULL */ 499 KKASSERT(chain->parent == parent); 500 501 /* 502 * Downward search recursion 503 */ 504 if (chain->flags & (HAMMER2_CHAIN_DEFERRED | HAMMER2_CHAIN_DELAYED)) { 505 /* 506 * Already deferred. 507 */ 508 ++info->diddeferral; 509 } else if ((chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) && 510 (flags & HAMMER2_FLUSH_ALL) == 0 && 511 (flags & HAMMER2_FLUSH_TOP) == 0) { 512 /* 513 * If FLUSH_ALL is not specified the caller does not want 514 * to recurse through PFS roots. The typical sequence is 515 * to flush dirty PFS's starting at their root downward, 516 * then flush the device root (vchain). It is this second 517 * flush that typically leaves out the ALL flag. 518 * 519 * However we must still process the PFSROOT chains for block 520 * table updates in their parent (which IS part of our flush). 521 * 522 * NOTE: The volume root, vchain, does not set PFSBOUNDARY. 523 * 524 * NOTE: This test must be done before the depth-limit test, 525 * else it might become the top on a flushq iteration. 526 * 527 * NOTE: We must re-set ONFLUSH in the parent to retain if 528 * this chain (that we are skipping) requires work. 529 */ 530 if (chain->flags & (HAMMER2_CHAIN_ONFLUSH | 531 HAMMER2_CHAIN_DESTROY | 532 HAMMER2_CHAIN_MODIFIED)) { 533 hammer2_chain_setflush(parent); 534 } 535 } else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 536 /* 537 * Recursion depth reached. 538 */ 539 KKASSERT((chain->flags & HAMMER2_CHAIN_DELAYED) == 0); 540 hammer2_chain_ref(chain); 541 TAILQ_INSERT_TAIL(&info->flushq, chain, flush_node); 542 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DEFERRED); 543 ++info->diddeferral; 544 } else if (chain->flags & (HAMMER2_CHAIN_ONFLUSH | 545 HAMMER2_CHAIN_DESTROY)) { 546 /* 547 * Downward recursion search (actual flush occurs bottom-up). 548 * pre-clear ONFLUSH. It can get set again due to races or 549 * flush errors, which we want so the scan finds us again in 550 * the next flush. 551 * 552 * We must also recurse if DESTROY is set so we can finally 553 * get rid of the related children, otherwise the node will 554 * just get re-flushed on lastdrop. 555 * 556 * WARNING! The recursion will unlock/relock info->parent 557 * (which is 'chain'), potentially allowing it 558 * to be ripped up. 559 */ 560 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 561 save_error = info->error; 562 info->error = 0; 563 info->parent = chain; 564 hammer2_spin_ex(&chain->core.spin); 565 RB_SCAN(hammer2_chain_tree, &chain->core.rbtree, 566 NULL, hammer2_flush_recurse, info); 567 hammer2_spin_unex(&chain->core.spin); 568 info->parent = parent; 569 570 /* 571 * Re-set the flush bits if the flush was incomplete or 572 * an error occurred. If an error occurs it is typically 573 * an allocation error. Errors do not cause deferrals. 574 */ 575 if (info->error) 576 hammer2_chain_setflush(chain); 577 info->error |= save_error; 578 if (info->diddeferral) 579 hammer2_chain_setflush(chain); 580 581 /* 582 * If we lost the parent->chain association we have to 583 * stop processing this chain because it is no longer 584 * in this recursion. If it moved, it will be handled 585 * by the ONFLUSH flag elsewhere. 586 */ 587 if (chain->parent != parent) { 588 kprintf("LOST CHILD2 %p->%p (actual parent %p)\n", 589 parent, chain, chain->parent); 590 goto done; 591 } 592 } 593 594 /* 595 * Now we are in the bottom-up part of the recursion. 596 * 597 * Do not update chain if lower layers were deferred. We continue 598 * to try to update the chain on lower-level errors, but the flush 599 * code may decide not to flush the volume root. 600 * 601 * XXX should we continue to try to update the chain if an error 602 * occurred? 603 */ 604 if (info->diddeferral) 605 goto done; 606 607 /* 608 * Both parent and chain must be locked in order to flush chain, 609 * in order to properly update the parent under certain conditions. 610 * 611 * In addition, we can't safely unlock/relock the chain once we 612 * start flushing the chain itself, which we would have to do later 613 * on in order to lock the parent if we didn't do that now. 614 */ 615 hammer2_chain_unlock(chain); 616 if (parent) 617 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 618 hammer2_chain_lock(chain, HAMMER2_RESOLVE_MAYBE); 619 620 /* 621 * Can't process if we can't access their content. 622 */ 623 if ((parent && parent->error) || chain->error) { 624 kprintf("hammer2: chain error during flush\n"); 625 info->error |= chain->error; 626 if (parent) { 627 info->error |= parent->error; 628 hammer2_chain_unlock(parent); 629 } 630 goto done; 631 } 632 633 if (chain->parent != parent) { 634 kprintf("LOST CHILD3 %p->%p (actual parent %p)\n", 635 parent, chain, chain->parent); 636 KKASSERT(parent != NULL); 637 hammer2_chain_unlock(parent); 638 if ((chain->flags & HAMMER2_CHAIN_DELAYED) == 0) { 639 hammer2_chain_ref(chain); 640 TAILQ_INSERT_TAIL(&info->flushq, chain, flush_node); 641 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DEFERRED); 642 ++info->diddeferral; 643 } 644 goto done; 645 } 646 647 /* 648 * Propagate the DESTROY flag downwards. This dummies up the flush 649 * code and tries to invalidate related buffer cache buffers to 650 * avoid the disk write. 651 */ 652 if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY)) 653 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 654 655 /* 656 * Dispose of the modified bit. 657 * 658 * If parent is present, the UPDATE bit should already be set. 659 * UPDATE should already be set. 660 * bref.mirror_tid should already be set. 661 */ 662 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 663 KKASSERT((chain->flags & HAMMER2_CHAIN_UPDATE) || 664 chain->parent == NULL); 665 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 666 atomic_add_long(&hammer2_count_modified_chains, -1); 667 668 /* 669 * Manage threads waiting for excessive dirty memory to 670 * be retired. 671 */ 672 if (chain->pmp) 673 hammer2_pfs_memory_wakeup(chain->pmp); 674 675 #if 0 676 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0 && 677 chain != &hmp->vchain && 678 chain != &hmp->fchain) { 679 /* 680 * Set UPDATE bit indicating that the parent block 681 * table requires updating. 682 */ 683 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 684 } 685 #endif 686 687 /* 688 * Issue the flush. This is indirect via the DIO. 689 * 690 * NOTE: A DELETED node that reaches this point must be 691 * flushed for synchronization point consistency. 692 * 693 * NOTE: Even though MODIFIED was already set, the related DIO 694 * might not be dirty due to a system buffer cache 695 * flush and must be set dirty if we are going to make 696 * further modifications to the buffer. Chains with 697 * embedded data don't need this. 698 */ 699 if (hammer2_debug & 0x1000) { 700 kprintf("Flush %p.%d %016jx/%d data=%016jx\n", 701 chain, chain->bref.type, 702 (uintmax_t)chain->bref.key, 703 chain->bref.keybits, 704 (uintmax_t)chain->bref.data_off); 705 } 706 if (hammer2_debug & 0x2000) { 707 Debugger("Flush hell"); 708 } 709 710 /* 711 * Update chain CRCs for flush. 712 * 713 * NOTE: Volume headers are NOT flushed here as they require 714 * special processing. 715 */ 716 switch(chain->bref.type) { 717 case HAMMER2_BREF_TYPE_FREEMAP: 718 /* 719 * Update the volume header's freemap_tid to the 720 * freemap's flushing mirror_tid. 721 * 722 * (note: embedded data, do not call setdirty) 723 */ 724 KKASSERT(hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED); 725 KKASSERT(chain == &hmp->fchain); 726 hmp->voldata.freemap_tid = chain->bref.mirror_tid; 727 if (hammer2_debug & 0x8000) { 728 /* debug only, avoid syslogd loop */ 729 kprintf("sync freemap mirror_tid %08jx\n", 730 (intmax_t)chain->bref.mirror_tid); 731 } 732 733 /* 734 * The freemap can be flushed independently of the 735 * main topology, but for the case where it is 736 * flushed in the same transaction, and flushed 737 * before vchain (a case we want to allow for 738 * performance reasons), make sure modifications 739 * made during the flush under vchain use a new 740 * transaction id. 741 * 742 * Otherwise the mount recovery code will get confused. 743 */ 744 ++hmp->voldata.mirror_tid; 745 break; 746 case HAMMER2_BREF_TYPE_VOLUME: 747 /* 748 * The free block table is flushed by 749 * hammer2_vfs_sync() before it flushes vchain. 750 * We must still hold fchain locked while copying 751 * voldata to volsync, however. 752 * 753 * These do not error per-say since their data does 754 * not need to be re-read from media on lock. 755 * 756 * (note: embedded data, do not call setdirty) 757 */ 758 hammer2_chain_lock(&hmp->fchain, 759 HAMMER2_RESOLVE_ALWAYS); 760 hammer2_voldata_lock(hmp); 761 if (hammer2_debug & 0x8000) { 762 /* debug only, avoid syslogd loop */ 763 kprintf("sync volume mirror_tid %08jx\n", 764 (intmax_t)chain->bref.mirror_tid); 765 } 766 767 /* 768 * Update the volume header's mirror_tid to the 769 * main topology's flushing mirror_tid. It is 770 * possible that voldata.mirror_tid is already 771 * beyond bref.mirror_tid due to the bump we made 772 * above in BREF_TYPE_FREEMAP. 773 */ 774 if (hmp->voldata.mirror_tid < chain->bref.mirror_tid) { 775 hmp->voldata.mirror_tid = 776 chain->bref.mirror_tid; 777 } 778 779 /* 780 * The volume header is flushed manually by the 781 * syncer, not here. All we do here is adjust the 782 * crc's. 783 */ 784 KKASSERT(chain->data != NULL); 785 KKASSERT(chain->dio == NULL); 786 787 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 788 hammer2_icrc32( 789 (char *)&hmp->voldata + 790 HAMMER2_VOLUME_ICRC1_OFF, 791 HAMMER2_VOLUME_ICRC1_SIZE); 792 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 793 hammer2_icrc32( 794 (char *)&hmp->voldata + 795 HAMMER2_VOLUME_ICRC0_OFF, 796 HAMMER2_VOLUME_ICRC0_SIZE); 797 hmp->voldata.icrc_volheader = 798 hammer2_icrc32( 799 (char *)&hmp->voldata + 800 HAMMER2_VOLUME_ICRCVH_OFF, 801 HAMMER2_VOLUME_ICRCVH_SIZE); 802 803 if (hammer2_debug & 0x8000) { 804 /* debug only, avoid syslogd loop */ 805 kprintf("syncvolhdr %016jx %016jx\n", 806 hmp->voldata.mirror_tid, 807 hmp->vchain.bref.mirror_tid); 808 } 809 hmp->volsync = hmp->voldata; 810 atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 811 hammer2_voldata_unlock(hmp); 812 hammer2_chain_unlock(&hmp->fchain); 813 break; 814 case HAMMER2_BREF_TYPE_DATA: 815 /* 816 * Data elements have already been flushed via the 817 * logical file buffer cache. Their hash was set in 818 * the bref by the vop_write code. Do not re-dirty. 819 * 820 * Make sure any device buffer(s) have been flushed 821 * out here (there aren't usually any to flush) XXX. 822 */ 823 break; 824 case HAMMER2_BREF_TYPE_INDIRECT: 825 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 826 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 827 /* 828 * Buffer I/O will be cleaned up when the volume is 829 * flushed (but the kernel is free to flush it before 830 * then, as well). 831 */ 832 KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 833 hammer2_chain_setcheck(chain, chain->data); 834 break; 835 case HAMMER2_BREF_TYPE_DIRENT: 836 /* 837 * A directory entry can use the check area to store 838 * the filename for filenames <= 64 bytes, don't blow 839 * it up! 840 */ 841 KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 842 if (chain->bytes) 843 hammer2_chain_setcheck(chain, chain->data); 844 break; 845 case HAMMER2_BREF_TYPE_INODE: 846 /* 847 * NOTE: We must call io_setdirty() to make any late 848 * changes to the inode data, the system might 849 * have already flushed the buffer. 850 */ 851 if (chain->data->ipdata.meta.op_flags & 852 HAMMER2_OPFLAG_PFSROOT) { 853 /* 854 * non-NULL pmp if mounted as a PFS. We must 855 * sync fields cached in the pmp? XXX 856 */ 857 hammer2_inode_data_t *ipdata; 858 859 hammer2_io_setdirty(chain->dio); 860 ipdata = &chain->data->ipdata; 861 if (chain->pmp) { 862 ipdata->meta.pfs_inum = 863 chain->pmp->inode_tid; 864 } 865 } else { 866 /* can't be mounted as a PFS */ 867 } 868 869 KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 870 hammer2_chain_setcheck(chain, chain->data); 871 break; 872 default: 873 KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 874 panic("hammer2_flush_core: unsupported " 875 "embedded bref %d", 876 chain->bref.type); 877 /* NOT REACHED */ 878 } 879 880 /* 881 * If the chain was destroyed try to avoid unnecessary I/O 882 * that might not have yet occurred. Remove the data range 883 * from dedup candidacy and attempt to invalidation that 884 * potentially dirty portion of the I/O buffer. 885 */ 886 if (chain->flags & HAMMER2_CHAIN_DESTROY) { 887 hammer2_io_dedup_delete(hmp, 888 chain->bref.type, 889 chain->bref.data_off, 890 chain->bytes); 891 #if 0 892 hammer2_io_t *dio; 893 if (chain->dio) { 894 hammer2_io_inval(chain->dio, 895 chain->bref.data_off, 896 chain->bytes); 897 } else if ((dio = hammer2_io_getquick(hmp, 898 chain->bref.data_off, 899 chain->bytes, 900 1)) != NULL) { 901 hammer2_io_inval(dio, 902 chain->bref.data_off, 903 chain->bytes); 904 hammer2_io_putblk(&dio); 905 } 906 #endif 907 } 908 } 909 910 /* 911 * If UPDATE is set the parent block table may need to be updated. 912 * This can fail if the hammer2_chain_modify() fails. 913 * 914 * NOTE: UPDATE may be set on vchain or fchain in which case 915 * parent could be NULL. It's easiest to allow the case 916 * and test for NULL. parent can also wind up being NULL 917 * due to a deletion so we need to handle the case anyway. 918 * 919 * If no parent exists we can just clear the UPDATE bit. If the 920 * chain gets reattached later on the bit will simply get set 921 * again. 922 */ 923 if ((chain->flags & HAMMER2_CHAIN_UPDATE) && parent == NULL) 924 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 925 926 /* 927 * The chain may need its blockrefs updated in the parent. 928 */ 929 if (chain->flags & HAMMER2_CHAIN_UPDATE) { 930 hammer2_blockref_t *base; 931 int count; 932 933 /* 934 * Clear UPDATE flag, mark parent modified, update its 935 * modify_tid if necessary, and adjust the parent blockmap. 936 */ 937 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 938 939 /* 940 * (optional code) 941 * 942 * Avoid actually modifying and updating the parent if it 943 * was flagged for destruction. This can greatly reduce 944 * disk I/O in large tree removals because the 945 * hammer2_io_setinval() call in the upward recursion 946 * (see MODIFIED code above) can only handle a few cases. 947 */ 948 if (parent->flags & HAMMER2_CHAIN_DESTROY) { 949 if (parent->bref.modify_tid < chain->bref.modify_tid) { 950 parent->bref.modify_tid = 951 chain->bref.modify_tid; 952 } 953 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 954 HAMMER2_CHAIN_BMAPUPD); 955 goto skipupdate; 956 } 957 958 /* 959 * The flusher is responsible for deleting empty indirect 960 * blocks at this point. If we don't do this, no major harm 961 * will be done but the empty indirect blocks will stay in 962 * the topology and make it a messy and inefficient. 963 * 964 * The flusher is also responsible for collapsing the 965 * content of an indirect block into its parent whenever 966 * possible (with some hysteresis). Not doing this will also 967 * not harm the topology, but would make it messy and 968 * inefficient. 969 */ 970 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) { 971 if (hammer2_chain_indirect_maintenance(parent, chain)) 972 goto skipupdate; 973 } 974 975 /* 976 * We are updating the parent's blockmap, the parent must 977 * be set modified. If this fails we re-set the UPDATE flag 978 * in the child. 979 * 980 * NOTE! A modification error can be ENOSPC. We still want 981 * to flush modified chains recursively, not break out, 982 * so we just skip the update in this situation and 983 * continue. That is, we still need to try to clean 984 * out dirty chains and buffers. 985 * 986 * This may not help bulkfree though. XXX 987 */ 988 save_error = hammer2_chain_modify(parent, 0, 0, 0); 989 if (save_error) { 990 info->error |= save_error; 991 kprintf("hammer2_flush: %016jx.%02x error=%08x\n", 992 parent->bref.data_off, parent->bref.type, 993 save_error); 994 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 995 goto skipupdate; 996 } 997 if (parent->bref.modify_tid < chain->bref.modify_tid) 998 parent->bref.modify_tid = chain->bref.modify_tid; 999 1000 /* 1001 * Calculate blockmap pointer 1002 */ 1003 switch(parent->bref.type) { 1004 case HAMMER2_BREF_TYPE_INODE: 1005 /* 1006 * Access the inode's block array. However, there is 1007 * no block array if the inode is flagged DIRECTDATA. 1008 */ 1009 if (parent->data && 1010 (parent->data->ipdata.meta.op_flags & 1011 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 1012 base = &parent->data-> 1013 ipdata.u.blockset.blockref[0]; 1014 } else { 1015 base = NULL; 1016 } 1017 count = HAMMER2_SET_COUNT; 1018 break; 1019 case HAMMER2_BREF_TYPE_INDIRECT: 1020 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1021 if (parent->data) 1022 base = &parent->data->npdata[0]; 1023 else 1024 base = NULL; 1025 count = parent->bytes / sizeof(hammer2_blockref_t); 1026 break; 1027 case HAMMER2_BREF_TYPE_VOLUME: 1028 base = &chain->hmp->voldata.sroot_blockset.blockref[0]; 1029 count = HAMMER2_SET_COUNT; 1030 break; 1031 case HAMMER2_BREF_TYPE_FREEMAP: 1032 base = &parent->data->npdata[0]; 1033 count = HAMMER2_SET_COUNT; 1034 break; 1035 default: 1036 base = NULL; 1037 count = 0; 1038 panic("hammer2_flush_core: " 1039 "unrecognized blockref type: %d", 1040 parent->bref.type); 1041 } 1042 1043 /* 1044 * Blocktable updates 1045 * 1046 * We synchronize pending statistics at this time. Delta 1047 * adjustments designated for the current and upper level 1048 * are synchronized. 1049 */ 1050 if (base && (chain->flags & HAMMER2_CHAIN_BMAPUPD)) { 1051 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 1052 hammer2_spin_ex(&parent->core.spin); 1053 hammer2_base_delete(parent, base, count, chain); 1054 hammer2_spin_unex(&parent->core.spin); 1055 /* base_delete clears both bits */ 1056 } else { 1057 atomic_clear_int(&chain->flags, 1058 HAMMER2_CHAIN_BMAPUPD); 1059 } 1060 } 1061 if (base && (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 1062 hammer2_spin_ex(&parent->core.spin); 1063 hammer2_base_insert(parent, base, count, 1064 chain, &chain->bref); 1065 hammer2_spin_unex(&parent->core.spin); 1066 /* base_insert sets BMAPPED */ 1067 } 1068 } 1069 skipupdate: 1070 if (parent) 1071 hammer2_chain_unlock(parent); 1072 1073 /* 1074 * Final cleanup after flush 1075 */ 1076 done: 1077 KKASSERT(chain->refs > 0); 1078 if (hammer2_debug & 0x200) { 1079 if (info->debug == chain) 1080 info->debug = NULL; 1081 } 1082 } 1083 1084 /* 1085 * Flush recursion helper, called from flush_core, calls flush_core. 1086 * 1087 * Flushes the children of the caller's chain (info->parent), restricted 1088 * by sync_tid. Set info->domodify if the child's blockref must propagate 1089 * back up to the parent. 1090 * 1091 * This function may set info->error as a side effect. 1092 * 1093 * Ripouts can move child from rbtree to dbtree or dbq but the caller's 1094 * flush scan order prevents any chains from being lost. A child can be 1095 * executes more than once. 1096 * 1097 * WARNING! If we do not call hammer2_flush_core() we must update 1098 * bref.mirror_tid ourselves to indicate that the flush has 1099 * processed the child. 1100 * 1101 * WARNING! parent->core spinlock is held on entry and return. 1102 */ 1103 static int 1104 hammer2_flush_recurse(hammer2_chain_t *child, void *data) 1105 { 1106 hammer2_flush_info_t *info = data; 1107 hammer2_chain_t *parent = info->parent; 1108 1109 /* 1110 * (child can never be fchain or vchain so a special check isn't 1111 * needed). 1112 * 1113 * We must ref the child before unlocking the spinlock. 1114 * 1115 * The caller has added a ref to the parent so we can temporarily 1116 * unlock it in order to lock the child. However, if it no longer 1117 * winds up being the child of the parent we must skip this child. 1118 * 1119 * NOTE! chain locking errors are fatal. They are never out-of-space 1120 * errors. 1121 */ 1122 hammer2_chain_ref(child); 1123 hammer2_spin_unex(&parent->core.spin); 1124 1125 hammer2_chain_unlock(parent); 1126 hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 1127 if (child->parent != parent) { 1128 kprintf("LOST CHILD1 %p->%p (actual parent %p)\n", 1129 parent, child, child->parent); 1130 goto done; 1131 } 1132 if (child->error) { 1133 kprintf("CHILD ERROR DURING FLUSH LOCK %p->%p\n", 1134 parent, child); 1135 info->error |= child->error; 1136 goto done; 1137 } 1138 1139 /* 1140 * Must propagate the DESTROY flag downwards, otherwise the 1141 * parent could end up never being removed because it will 1142 * be requeued to the flusher if it survives this run due to 1143 * the flag. 1144 */ 1145 if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY)) 1146 atomic_set_int(&child->flags, HAMMER2_CHAIN_DESTROY); 1147 1148 /* 1149 * Recurse and collect deferral data. We're in the media flush, 1150 * this can cross PFS boundaries. 1151 */ 1152 if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) { 1153 ++info->depth; 1154 hammer2_flush_core(info, child, info->flags); 1155 --info->depth; 1156 } else if (hammer2_debug & 0x200) { 1157 if (info->debug == NULL) 1158 info->debug = child; 1159 ++info->depth; 1160 hammer2_flush_core(info, child, info->flags); 1161 --info->depth; 1162 if (info->debug == child) 1163 info->debug = NULL; 1164 } 1165 1166 done: 1167 /* 1168 * Relock to continue the loop. 1169 */ 1170 hammer2_chain_unlock(child); 1171 hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 1172 if (parent->error) { 1173 kprintf("PARENT ERROR DURING FLUSH LOCK %p->%p\n", 1174 parent, child); 1175 info->error |= parent->error; 1176 } 1177 hammer2_chain_drop(child); 1178 KKASSERT(info->parent == parent); 1179 hammer2_spin_ex(&parent->core.spin); 1180 1181 return (0); 1182 } 1183 1184 /* 1185 * flush helper (backend threaded) 1186 * 1187 * Flushes core chains, issues disk sync, flushes volume roots. 1188 * 1189 * Primarily called from vfs_sync(). 1190 */ 1191 void 1192 hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg) 1193 { 1194 hammer2_xop_flush_t *xop = &arg->xop_flush; 1195 hammer2_chain_t *chain; 1196 hammer2_chain_t *parent; 1197 hammer2_dev_t *hmp; 1198 int flush_error = 0; 1199 int fsync_error = 0; 1200 int total_error = 0; 1201 int j; 1202 1203 /* 1204 * Flush core chains 1205 */ 1206 chain = hammer2_inode_chain(xop->head.ip1, thr->clindex, 1207 HAMMER2_RESOLVE_ALWAYS); 1208 if (chain) { 1209 hmp = chain->hmp; 1210 if ((chain->flags & HAMMER2_CHAIN_FLUSH_MASK) || 1211 TAILQ_FIRST(&hmp->flushq) != NULL) { 1212 hammer2_flush(chain, HAMMER2_FLUSH_TOP); 1213 parent = chain->parent; 1214 KKASSERT(chain->pmp != parent->pmp); 1215 hammer2_chain_setflush(parent); 1216 } 1217 hammer2_chain_unlock(chain); 1218 hammer2_chain_drop(chain); 1219 chain = NULL; 1220 } else { 1221 hmp = NULL; 1222 } 1223 1224 /* 1225 * Flush volume roots. Avoid replication, we only want to 1226 * flush each hammer2_dev (hmp) once. 1227 */ 1228 for (j = thr->clindex - 1; j >= 0; --j) { 1229 if ((chain = xop->head.ip1->cluster.array[j].chain) != NULL) { 1230 if (chain->hmp == hmp) { 1231 chain = NULL; /* safety */ 1232 goto skip; 1233 } 1234 } 1235 } 1236 chain = NULL; /* safety */ 1237 1238 /* 1239 * spmp transaction. The super-root is never directly mounted so 1240 * there shouldn't be any vnodes, let alone any dirty vnodes 1241 * associated with it, so we shouldn't have to mess around with any 1242 * vnode flushes here. 1243 */ 1244 hammer2_trans_init(hmp->spmp, HAMMER2_TRANS_ISFLUSH); 1245 1246 /* 1247 * Media mounts have two 'roots', vchain for the topology 1248 * and fchain for the free block table. Flush both. 1249 * 1250 * Note that the topology and free block table are handled 1251 * independently, so the free block table can wind up being 1252 * ahead of the topology. We depend on the bulk free scan 1253 * code to deal with any loose ends. 1254 * 1255 * vchain and fchain do not error on-lock since their data does 1256 * not have to be re-read from media. 1257 */ 1258 hammer2_chain_ref(&hmp->vchain); 1259 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 1260 hammer2_chain_ref(&hmp->fchain); 1261 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 1262 if (hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_MASK) { 1263 /* 1264 * This will also modify vchain as a side effect, 1265 * mark vchain as modified now. 1266 */ 1267 hammer2_voldata_modify(hmp); 1268 chain = &hmp->fchain; 1269 flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP); 1270 KKASSERT(chain == &hmp->fchain); 1271 } 1272 hammer2_chain_unlock(&hmp->fchain); 1273 hammer2_chain_unlock(&hmp->vchain); 1274 hammer2_chain_drop(&hmp->fchain); 1275 /* vchain dropped down below */ 1276 1277 hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 1278 if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_MASK) { 1279 chain = &hmp->vchain; 1280 flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP); 1281 KKASSERT(chain == &hmp->vchain); 1282 } 1283 hammer2_chain_unlock(&hmp->vchain); 1284 hammer2_chain_drop(&hmp->vchain); 1285 1286 /* 1287 * We can't safely flush the volume header until we have 1288 * flushed any device buffers which have built up. 1289 * 1290 * XXX this isn't being incremental 1291 */ 1292 vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 1293 fsync_error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 1294 vn_unlock(hmp->devvp); 1295 if (fsync_error || flush_error) { 1296 kprintf("hammer2: sync error fsync=%d h2flush=0x%04x dev=%s\n", 1297 fsync_error, flush_error, hmp->devrepname); 1298 } 1299 1300 /* 1301 * The flush code sets CHAIN_VOLUMESYNC to indicate that the 1302 * volume header needs synchronization via hmp->volsync. 1303 * 1304 * XXX synchronize the flag & data with only this flush XXX 1305 */ 1306 if (fsync_error == 0 && flush_error == 0 && 1307 (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) { 1308 struct buf *bp; 1309 int vol_error = 0; 1310 1311 /* 1312 * Synchronize the disk before flushing the volume 1313 * header. 1314 */ 1315 bp = getpbuf(NULL); 1316 bp->b_bio1.bio_offset = 0; 1317 bp->b_bufsize = 0; 1318 bp->b_bcount = 0; 1319 bp->b_cmd = BUF_CMD_FLUSH; 1320 bp->b_bio1.bio_done = biodone_sync; 1321 bp->b_bio1.bio_flags |= BIO_SYNC; 1322 vn_strategy(hmp->devvp, &bp->b_bio1); 1323 fsync_error = biowait(&bp->b_bio1, "h2vol"); 1324 relpbuf(bp, NULL); 1325 1326 /* 1327 * Then we can safely flush the version of the 1328 * volume header synchronized by the flush code. 1329 */ 1330 j = hmp->volhdrno + 1; 1331 if (j >= HAMMER2_NUM_VOLHDRS) 1332 j = 0; 1333 if (j * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 1334 hmp->volsync.volu_size) { 1335 j = 0; 1336 } 1337 if (hammer2_debug & 0x8000) { 1338 /* debug only, avoid syslogd loop */ 1339 kprintf("sync volhdr %d %jd\n", 1340 j, (intmax_t)hmp->volsync.volu_size); 1341 } 1342 bp = getblk(hmp->devvp, j * HAMMER2_ZONE_BYTES64, 1343 HAMMER2_PBUFSIZE, 0, 0); 1344 atomic_clear_int(&hmp->vchain.flags, 1345 HAMMER2_CHAIN_VOLUMESYNC); 1346 bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 1347 vol_error = bwrite(bp); 1348 hmp->volhdrno = j; 1349 if (vol_error) 1350 fsync_error = vol_error; 1351 } 1352 if (flush_error) 1353 total_error = flush_error; 1354 if (fsync_error) 1355 total_error = hammer2_errno_to_error(fsync_error); 1356 1357 hammer2_trans_done(hmp->spmp); /* spmp trans */ 1358 skip: 1359 hammer2_xop_feed(&xop->head, NULL, thr->clindex, total_error); 1360 } 1361