1 /* 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * IO Primitives and buffer cache management 36 * 37 * All major data-tracking structures in HAMMER contain a struct hammer_io 38 * which is used to manage their backing store. We use filesystem buffers 39 * for backing store and we leave them passively associated with their 40 * HAMMER structures. 41 * 42 * If the kernel tries to destroy a passively associated buf which we cannot 43 * yet let go we set B_LOCKED in the buffer and then actively released it 44 * later when we can. 45 * 46 * The io_token is required for anything which might race bioops and bio_done 47 * callbacks, with one exception: A successful hammer_try_interlock_norefs(). 48 * the fs_token will be held in all other cases. 49 */ 50 51 #include "hammer.h" 52 #include <sys/fcntl.h> 53 #include <sys/nlookup.h> 54 #include <sys/buf.h> 55 #include <sys/buf2.h> 56 57 static void hammer_io_modify(hammer_io_t io, int count); 58 static void hammer_io_deallocate(struct buf *bp); 59 static void hammer_indirect_callback(struct bio *bio); 60 #if 0 61 static void hammer_io_direct_read_complete(struct bio *nbio); 62 #endif 63 static void hammer_io_direct_write_complete(struct bio *nbio); 64 static int hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data); 65 static void hammer_io_set_modlist(struct hammer_io *io); 66 static void hammer_io_flush_mark(hammer_volume_t volume); 67 68 static int 69 hammer_mod_rb_compare(hammer_io_t io1, hammer_io_t io2) 70 { 71 hammer_off_t io1_offset; 72 hammer_off_t io2_offset; 73 74 io1_offset = ((io1->offset & HAMMER_OFF_SHORT_MASK) << 8) | 75 HAMMER_VOL_DECODE(io1->offset); 76 io2_offset = ((io2->offset & HAMMER_OFF_SHORT_MASK) << 8) | 77 HAMMER_VOL_DECODE(io2->offset); 78 79 if (io1_offset < io2_offset) 80 return(-1); 81 if (io1_offset > io2_offset) 82 return(1); 83 return(0); 84 } 85 86 RB_GENERATE(hammer_mod_rb_tree, hammer_io, rb_node, hammer_mod_rb_compare); 87 88 /* 89 * Initialize a new, already-zero'd hammer_io structure, or reinitialize 90 * an existing hammer_io structure which may have switched to another type. 91 */ 92 void 93 hammer_io_init(hammer_io_t io, hammer_volume_t volume, enum hammer_io_type type) 94 { 95 io->volume = volume; 96 io->hmp = volume->io.hmp; 97 io->type = type; 98 } 99 100 /* 101 * Helper routine to disassociate a buffer cache buffer from an I/O 102 * structure. The io must be interlocked and marked appropriately for 103 * reclamation. 104 * 105 * The io must be in a released state with the io->bp owned and 106 * locked by the caller of this function. When not called from an 107 * io_deallocate() this cannot race an io_deallocate() since the 108 * kernel would be unable to get the buffer lock in that case. 109 * (The released state in this case means we own the bp, not the 110 * hammer_io structure). 111 * 112 * The io may have 0 or 1 references depending on who called us. The 113 * caller is responsible for dealing with the refs. 114 * 115 * This call can only be made when no action is required on the buffer. 116 * 117 * This function is guaranteed not to race against anything because we 118 * own both the io lock and the bp lock and are interlocked with no 119 * references. 120 */ 121 static void 122 hammer_io_disassociate(hammer_io_structure_t iou) 123 { 124 struct buf *bp = iou->io.bp; 125 126 KKASSERT(iou->io.released); 127 KKASSERT(iou->io.modified == 0); 128 KKASSERT(LIST_FIRST(&bp->b_dep) == (void *)iou); 129 buf_dep_init(bp); 130 iou->io.bp = NULL; 131 132 /* 133 * If the buffer was locked someone wanted to get rid of it. 134 */ 135 if (bp->b_flags & B_LOCKED) { 136 atomic_add_int(&hammer_count_io_locked, -1); 137 bp->b_flags &= ~B_LOCKED; 138 } 139 if (iou->io.reclaim) { 140 bp->b_flags |= B_NOCACHE|B_RELBUF; 141 iou->io.reclaim = 0; 142 } 143 144 switch(iou->io.type) { 145 case HAMMER_STRUCTURE_VOLUME: 146 iou->volume.ondisk = NULL; 147 break; 148 case HAMMER_STRUCTURE_DATA_BUFFER: 149 case HAMMER_STRUCTURE_META_BUFFER: 150 case HAMMER_STRUCTURE_UNDO_BUFFER: 151 iou->buffer.ondisk = NULL; 152 break; 153 case HAMMER_STRUCTURE_DUMMY: 154 panic("hammer_io_disassociate: bad io type"); 155 break; 156 } 157 } 158 159 /* 160 * Wait for any physical IO to complete 161 * 162 * XXX we aren't interlocked against a spinlock or anything so there 163 * is a small window in the interlock / io->running == 0 test. 164 */ 165 void 166 hammer_io_wait(hammer_io_t io) 167 { 168 if (io->running) { 169 hammer_mount_t hmp = io->hmp; 170 171 lwkt_gettoken(&hmp->io_token); 172 while (io->running) { 173 io->waiting = 1; 174 tsleep_interlock(io, 0); 175 if (io->running) 176 tsleep(io, PINTERLOCKED, "hmrflw", hz); 177 } 178 lwkt_reltoken(&hmp->io_token); 179 } 180 } 181 182 /* 183 * Wait for all currently queued HAMMER-initiated I/Os to complete. 184 * 185 * This is not supposed to count direct I/O's but some can leak 186 * through (for non-full-sized direct I/Os). 187 */ 188 void 189 hammer_io_wait_all(hammer_mount_t hmp, const char *ident, int doflush) 190 { 191 struct hammer_io iodummy; 192 hammer_io_t io; 193 194 /* 195 * Degenerate case, no I/O is running 196 */ 197 lwkt_gettoken(&hmp->io_token); 198 if (TAILQ_EMPTY(&hmp->iorun_list)) { 199 lwkt_reltoken(&hmp->io_token); 200 if (doflush) 201 hammer_io_flush_sync(hmp); 202 return; 203 } 204 bzero(&iodummy, sizeof(iodummy)); 205 iodummy.type = HAMMER_STRUCTURE_DUMMY; 206 207 /* 208 * Add placemarker and then wait until it becomes the head of 209 * the list. 210 */ 211 TAILQ_INSERT_TAIL(&hmp->iorun_list, &iodummy, iorun_entry); 212 while (TAILQ_FIRST(&hmp->iorun_list) != &iodummy) { 213 tsleep(&iodummy, 0, ident, 0); 214 } 215 216 /* 217 * Chain in case several placemarkers are present. 218 */ 219 TAILQ_REMOVE(&hmp->iorun_list, &iodummy, iorun_entry); 220 io = TAILQ_FIRST(&hmp->iorun_list); 221 if (io && io->type == HAMMER_STRUCTURE_DUMMY) 222 wakeup(io); 223 lwkt_reltoken(&hmp->io_token); 224 225 if (doflush) 226 hammer_io_flush_sync(hmp); 227 } 228 229 /* 230 * Clear a flagged error condition on a I/O buffer. The caller must hold 231 * its own ref on the buffer. 232 */ 233 void 234 hammer_io_clear_error(struct hammer_io *io) 235 { 236 hammer_mount_t hmp = io->hmp; 237 238 lwkt_gettoken(&hmp->io_token); 239 if (io->ioerror) { 240 io->ioerror = 0; 241 hammer_rel(&io->lock); 242 KKASSERT(hammer_isactive(&io->lock)); 243 } 244 lwkt_reltoken(&hmp->io_token); 245 } 246 247 void 248 hammer_io_clear_error_noassert(struct hammer_io *io) 249 { 250 hammer_mount_t hmp = io->hmp; 251 252 lwkt_gettoken(&hmp->io_token); 253 if (io->ioerror) { 254 io->ioerror = 0; 255 hammer_rel(&io->lock); 256 } 257 lwkt_reltoken(&hmp->io_token); 258 } 259 260 /* 261 * This is an advisory function only which tells the buffer cache 262 * the bp is not a meta-data buffer, even though it is backed by 263 * a block device. 264 * 265 * This is used by HAMMER's reblocking code to avoid trying to 266 * swapcache the filesystem's data when it is read or written 267 * by the reblocking code. 268 * 269 * The caller has a ref on the buffer preventing the bp from 270 * being disassociated from it. 271 */ 272 void 273 hammer_io_notmeta(hammer_buffer_t buffer) 274 { 275 if ((buffer->io.bp->b_flags & B_NOTMETA) == 0) { 276 hammer_mount_t hmp = buffer->io.hmp; 277 278 lwkt_gettoken(&hmp->io_token); 279 buffer->io.bp->b_flags |= B_NOTMETA; 280 lwkt_reltoken(&hmp->io_token); 281 } 282 } 283 284 /* 285 * Load bp for a HAMMER structure. The io must be exclusively locked by 286 * the caller. 287 * 288 * This routine is mostly used on meta-data and small-data blocks. Generally 289 * speaking HAMMER assumes some locality of reference and will cluster. 290 * 291 * Note that the caller (hammer_ondisk.c) may place further restrictions 292 * on clusterability via the limit (in bytes). Typically large-data 293 * zones cannot be clustered due to their mixed buffer sizes. This is 294 * not an issue since such clustering occurs in hammer_vnops at the 295 * regular file layer, whereas this is the buffered block device layer. 296 * 297 * No I/O callbacks can occur while we hold the buffer locked. 298 */ 299 int 300 hammer_io_read(struct vnode *devvp, struct hammer_io *io, int limit) 301 { 302 struct buf *bp; 303 int error; 304 305 if ((bp = io->bp) == NULL) { 306 atomic_add_int(&hammer_count_io_running_read, io->bytes); 307 if (hammer_cluster_enable && limit > io->bytes) { 308 error = cluster_read(devvp, io->offset + limit, 309 io->offset, io->bytes, 310 HAMMER_CLUSTER_SIZE, 311 HAMMER_CLUSTER_SIZE, 312 &io->bp); 313 } else { 314 error = bread(devvp, io->offset, io->bytes, &io->bp); 315 } 316 hammer_stats_disk_read += io->bytes; 317 atomic_add_int(&hammer_count_io_running_read, -io->bytes); 318 319 /* 320 * The code generally assumes b_ops/b_dep has been set-up, 321 * even if we error out here. 322 */ 323 bp = io->bp; 324 if ((hammer_debug_io & 0x0001) && (bp->b_flags & B_IODEBUG)) { 325 const char *metatype; 326 327 switch(io->type) { 328 case HAMMER_STRUCTURE_VOLUME: 329 metatype = "volume"; 330 break; 331 case HAMMER_STRUCTURE_META_BUFFER: 332 switch(((struct hammer_buffer *)io)-> 333 zoneX_offset & HAMMER_OFF_ZONE_MASK) { 334 case HAMMER_ZONE_BTREE: 335 metatype = "btree"; 336 break; 337 case HAMMER_ZONE_META: 338 metatype = "meta"; 339 break; 340 case HAMMER_ZONE_FREEMAP: 341 metatype = "freemap"; 342 break; 343 default: 344 metatype = "meta?"; 345 break; 346 } 347 break; 348 case HAMMER_STRUCTURE_DATA_BUFFER: 349 metatype = "data"; 350 break; 351 case HAMMER_STRUCTURE_UNDO_BUFFER: 352 metatype = "undo"; 353 break; 354 default: 355 metatype = "unknown"; 356 break; 357 } 358 kprintf("doff %016jx %s\n", 359 (intmax_t)bp->b_bio2.bio_offset, 360 metatype); 361 } 362 bp->b_flags &= ~B_IODEBUG; 363 bp->b_ops = &hammer_bioops; 364 KKASSERT(LIST_FIRST(&bp->b_dep) == NULL); 365 366 /* io->worklist is locked by the io lock */ 367 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 368 BUF_KERNPROC(bp); 369 KKASSERT(io->modified == 0); 370 KKASSERT(io->running == 0); 371 KKASSERT(io->waiting == 0); 372 io->released = 0; /* we hold an active lock on bp */ 373 } else { 374 error = 0; 375 } 376 return(error); 377 } 378 379 /* 380 * Similar to hammer_io_read() but returns a zero'd out buffer instead. 381 * Must be called with the IO exclusively locked. 382 * 383 * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background 384 * I/O by forcing the buffer to not be in a released state before calling 385 * it. 386 * 387 * This function will also mark the IO as modified but it will not 388 * increment the modify_refs count. 389 * 390 * No I/O callbacks can occur while we hold the buffer locked. 391 */ 392 int 393 hammer_io_new(struct vnode *devvp, struct hammer_io *io) 394 { 395 struct buf *bp; 396 397 if ((bp = io->bp) == NULL) { 398 io->bp = getblk(devvp, io->offset, io->bytes, 0, 0); 399 bp = io->bp; 400 bp->b_ops = &hammer_bioops; 401 KKASSERT(LIST_FIRST(&bp->b_dep) == NULL); 402 403 /* io->worklist is locked by the io lock */ 404 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 405 io->released = 0; 406 KKASSERT(io->running == 0); 407 io->waiting = 0; 408 BUF_KERNPROC(bp); 409 } else { 410 if (io->released) { 411 regetblk(bp); 412 BUF_KERNPROC(bp); 413 io->released = 0; 414 } 415 } 416 hammer_io_modify(io, 0); 417 vfs_bio_clrbuf(bp); 418 return(0); 419 } 420 421 /* 422 * Advance the activity count on the underlying buffer because 423 * HAMMER does not getblk/brelse on every access. 424 * 425 * The io->bp cannot go away while the buffer is referenced. 426 */ 427 void 428 hammer_io_advance(struct hammer_io *io) 429 { 430 if (io->bp) 431 buf_act_advance(io->bp); 432 } 433 434 /* 435 * Remove potential device level aliases against buffers managed by high level 436 * vnodes. Aliases can also be created due to mixed buffer sizes or via 437 * direct access to the backing store device. 438 * 439 * This is nasty because the buffers are also VMIO-backed. Even if a buffer 440 * does not exist its backing VM pages might, and we have to invalidate 441 * those as well or a getblk() will reinstate them. 442 * 443 * Buffer cache buffers associated with hammer_buffers cannot be 444 * invalidated. 445 */ 446 int 447 hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset) 448 { 449 hammer_io_structure_t iou; 450 hammer_mount_t hmp; 451 hammer_off_t phys_offset; 452 struct buf *bp; 453 int error; 454 455 hmp = volume->io.hmp; 456 lwkt_gettoken(&hmp->io_token); 457 458 /* 459 * If a device buffer already exists for the specified physical 460 * offset use that, otherwise instantiate a buffer to cover any 461 * related VM pages, set BNOCACHE, and brelse(). 462 */ 463 phys_offset = volume->ondisk->vol_buf_beg + 464 (zone2_offset & HAMMER_OFF_SHORT_MASK); 465 if ((bp = findblk(volume->devvp, phys_offset, 0)) != NULL) 466 bremfree(bp); 467 else 468 bp = getblk(volume->devvp, phys_offset, HAMMER_BUFSIZE, 0, 0); 469 470 if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) { 471 #if 0 472 hammer_ref(&iou->io.lock); 473 hammer_io_clear_modify(&iou->io, 1); 474 bundirty(bp); 475 iou->io.released = 0; 476 BUF_KERNPROC(bp); 477 iou->io.reclaim = 1; 478 iou->io.waitdep = 1; /* XXX this is a fs_token field */ 479 KKASSERT(hammer_isactive(&iou->io.lock) == 1); 480 hammer_rel_buffer(&iou->buffer, 0); 481 /*hammer_io_deallocate(bp);*/ 482 #endif 483 bqrelse(bp); 484 error = EAGAIN; 485 } else { 486 KKASSERT((bp->b_flags & B_LOCKED) == 0); 487 bundirty(bp); 488 bp->b_flags |= B_NOCACHE|B_RELBUF; 489 brelse(bp); 490 error = 0; 491 } 492 lwkt_reltoken(&hmp->io_token); 493 return(error); 494 } 495 496 /* 497 * This routine is called on the last reference to a hammer structure. 498 * The io must be interlocked with a refcount of zero. The hammer structure 499 * will remain interlocked on return. 500 * 501 * This routine may return a non-NULL bp to the caller for dispoal. 502 * The caller typically brelse()'s the bp. 503 * 504 * The bp may or may not still be passively associated with the IO. It 505 * will remain passively associated if it is unreleasable (e.g. a modified 506 * meta-data buffer). 507 * 508 * The only requirement here is that modified meta-data and volume-header 509 * buffer may NOT be disassociated from the IO structure, and consequently 510 * we also leave such buffers actively associated with the IO if they already 511 * are (since the kernel can't do anything with them anyway). Only the 512 * flusher is allowed to write such buffers out. Modified pure-data and 513 * undo buffers are returned to the kernel but left passively associated 514 * so we can track when the kernel writes the bp out. 515 */ 516 struct buf * 517 hammer_io_release(struct hammer_io *io, int flush) 518 { 519 union hammer_io_structure *iou = (void *)io; 520 struct buf *bp; 521 522 if ((bp = io->bp) == NULL) 523 return(NULL); 524 525 /* 526 * Try to flush a dirty IO to disk if asked to by the 527 * caller or if the kernel tried to flush the buffer in the past. 528 * 529 * Kernel-initiated flushes are only allowed for pure-data buffers. 530 * meta-data and volume buffers can only be flushed explicitly 531 * by HAMMER. 532 */ 533 if (io->modified) { 534 if (flush) { 535 hammer_io_flush(io, 0); 536 } else if (bp->b_flags & B_LOCKED) { 537 switch(io->type) { 538 case HAMMER_STRUCTURE_DATA_BUFFER: 539 hammer_io_flush(io, 0); 540 break; 541 case HAMMER_STRUCTURE_UNDO_BUFFER: 542 hammer_io_flush(io, hammer_undo_reclaim(io)); 543 break; 544 default: 545 break; 546 } 547 } /* else no explicit request to flush the buffer */ 548 } 549 550 /* 551 * Wait for the IO to complete if asked to. This occurs when 552 * the buffer must be disposed of definitively during an umount 553 * or buffer invalidation. 554 */ 555 if (io->waitdep && io->running) { 556 hammer_io_wait(io); 557 } 558 559 /* 560 * Return control of the buffer to the kernel (with the provisio 561 * that our bioops can override kernel decisions with regards to 562 * the buffer). 563 */ 564 if ((flush || io->reclaim) && io->modified == 0 && io->running == 0) { 565 /* 566 * Always disassociate the bp if an explicit flush 567 * was requested and the IO completed with no error 568 * (so unmount can really clean up the structure). 569 */ 570 if (io->released) { 571 regetblk(bp); 572 BUF_KERNPROC(bp); 573 } else { 574 io->released = 1; 575 } 576 hammer_io_disassociate((hammer_io_structure_t)io); 577 /* return the bp */ 578 } else if (io->modified) { 579 /* 580 * Only certain IO types can be released to the kernel if 581 * the buffer has been modified. 582 * 583 * volume and meta-data IO types may only be explicitly 584 * flushed by HAMMER. 585 */ 586 switch(io->type) { 587 case HAMMER_STRUCTURE_DATA_BUFFER: 588 case HAMMER_STRUCTURE_UNDO_BUFFER: 589 if (io->released == 0) { 590 io->released = 1; 591 bdwrite(bp); 592 } 593 break; 594 default: 595 break; 596 } 597 bp = NULL; /* bp left associated */ 598 } else if (io->released == 0) { 599 /* 600 * Clean buffers can be generally released to the kernel. 601 * We leave the bp passively associated with the HAMMER 602 * structure and use bioops to disconnect it later on 603 * if the kernel wants to discard the buffer. 604 * 605 * We can steal the structure's ownership of the bp. 606 */ 607 io->released = 1; 608 if (bp->b_flags & B_LOCKED) { 609 hammer_io_disassociate(iou); 610 /* return the bp */ 611 } else { 612 if (io->reclaim) { 613 hammer_io_disassociate(iou); 614 /* return the bp */ 615 } else { 616 /* return the bp (bp passively associated) */ 617 } 618 } 619 } else { 620 /* 621 * A released buffer is passively associate with our 622 * hammer_io structure. The kernel cannot destroy it 623 * without making a bioops call. If the kernel (B_LOCKED) 624 * or we (reclaim) requested that the buffer be destroyed 625 * we destroy it, otherwise we do a quick get/release to 626 * reset its position in the kernel's LRU list. 627 * 628 * Leaving the buffer passively associated allows us to 629 * use the kernel's LRU buffer flushing mechanisms rather 630 * then rolling our own. 631 * 632 * XXX there are two ways of doing this. We can re-acquire 633 * and passively release to reset the LRU, or not. 634 */ 635 if (io->running == 0) { 636 regetblk(bp); 637 if ((bp->b_flags & B_LOCKED) || io->reclaim) { 638 hammer_io_disassociate(iou); 639 /* return the bp */ 640 } else { 641 /* return the bp (bp passively associated) */ 642 } 643 } else { 644 /* 645 * bp is left passively associated but we do not 646 * try to reacquire it. Interactions with the io 647 * structure will occur on completion of the bp's 648 * I/O. 649 */ 650 bp = NULL; 651 } 652 } 653 return(bp); 654 } 655 656 /* 657 * This routine is called with a locked IO when a flush is desired and 658 * no other references to the structure exists other then ours. This 659 * routine is ONLY called when HAMMER believes it is safe to flush a 660 * potentially modified buffer out. 661 * 662 * The locked io or io reference prevents a flush from being initiated 663 * by the kernel. 664 */ 665 void 666 hammer_io_flush(struct hammer_io *io, int reclaim) 667 { 668 struct buf *bp; 669 hammer_mount_t hmp; 670 671 /* 672 * Degenerate case - nothing to flush if nothing is dirty. 673 */ 674 if (io->modified == 0) 675 return; 676 677 KKASSERT(io->bp); 678 KKASSERT(io->modify_refs <= 0); 679 680 /* 681 * Acquire ownership of the bp, particularly before we clear our 682 * modified flag. 683 * 684 * We are going to bawrite() this bp. Don't leave a window where 685 * io->released is set, we actually own the bp rather then our 686 * buffer. 687 * 688 * The io_token should not be required here as only 689 */ 690 hmp = io->hmp; 691 bp = io->bp; 692 if (io->released) { 693 regetblk(bp); 694 /* BUF_KERNPROC(io->bp); */ 695 /* io->released = 0; */ 696 KKASSERT(io->released); 697 KKASSERT(io->bp == bp); 698 } else { 699 io->released = 1; 700 } 701 702 if (reclaim) { 703 io->reclaim = 1; 704 if ((bp->b_flags & B_LOCKED) == 0) { 705 bp->b_flags |= B_LOCKED; 706 atomic_add_int(&hammer_count_io_locked, 1); 707 } 708 } 709 710 /* 711 * Acquire exclusive access to the bp and then clear the modified 712 * state of the buffer prior to issuing I/O to interlock any 713 * modifications made while the I/O is in progress. This shouldn't 714 * happen anyway but losing data would be worse. The modified bit 715 * will be rechecked after the IO completes. 716 * 717 * NOTE: This call also finalizes the buffer's content (inval == 0). 718 * 719 * This is only legal when lock.refs == 1 (otherwise we might clear 720 * the modified bit while there are still users of the cluster 721 * modifying the data). 722 * 723 * Do this before potentially blocking so any attempt to modify the 724 * ondisk while we are blocked blocks waiting for us. 725 */ 726 hammer_ref(&io->lock); 727 hammer_io_clear_modify(io, 0); 728 hammer_rel(&io->lock); 729 730 if (hammer_debug_io & 0x0002) 731 kprintf("hammer io_write %016jx\n", bp->b_bio1.bio_offset); 732 733 /* 734 * Transfer ownership to the kernel and initiate I/O. 735 * 736 * NOTE: We do not hold io_token so an atomic op is required to 737 * update io_running_space. 738 */ 739 io->running = 1; 740 atomic_add_int(&hmp->io_running_space, io->bytes); 741 atomic_add_int(&hammer_count_io_running_write, io->bytes); 742 lwkt_gettoken(&hmp->io_token); 743 TAILQ_INSERT_TAIL(&hmp->iorun_list, io, iorun_entry); 744 lwkt_reltoken(&hmp->io_token); 745 bawrite(bp); 746 hammer_io_flush_mark(io->volume); 747 } 748 749 /************************************************************************ 750 * BUFFER DIRTYING * 751 ************************************************************************ 752 * 753 * These routines deal with dependancies created when IO buffers get 754 * modified. The caller must call hammer_modify_*() on a referenced 755 * HAMMER structure prior to modifying its on-disk data. 756 * 757 * Any intent to modify an IO buffer acquires the related bp and imposes 758 * various write ordering dependancies. 759 */ 760 761 /* 762 * Mark a HAMMER structure as undergoing modification. Meta-data buffers 763 * are locked until the flusher can deal with them, pure data buffers 764 * can be written out. 765 * 766 * The referenced io prevents races. 767 */ 768 static 769 void 770 hammer_io_modify(hammer_io_t io, int count) 771 { 772 /* 773 * io->modify_refs must be >= 0 774 */ 775 while (io->modify_refs < 0) { 776 io->waitmod = 1; 777 tsleep(io, 0, "hmrmod", 0); 778 } 779 780 /* 781 * Shortcut if nothing to do. 782 */ 783 KKASSERT(hammer_isactive(&io->lock) && io->bp != NULL); 784 io->modify_refs += count; 785 if (io->modified && io->released == 0) 786 return; 787 788 /* 789 * NOTE: It is important not to set the modified bit 790 * until after we have acquired the bp or we risk 791 * racing against checkwrite. 792 */ 793 hammer_lock_ex(&io->lock); 794 if (io->released) { 795 regetblk(io->bp); 796 BUF_KERNPROC(io->bp); 797 io->released = 0; 798 } 799 if (io->modified == 0) { 800 hammer_io_set_modlist(io); 801 io->modified = 1; 802 } 803 hammer_unlock(&io->lock); 804 } 805 806 static __inline 807 void 808 hammer_io_modify_done(hammer_io_t io) 809 { 810 KKASSERT(io->modify_refs > 0); 811 --io->modify_refs; 812 if (io->modify_refs == 0 && io->waitmod) { 813 io->waitmod = 0; 814 wakeup(io); 815 } 816 } 817 818 /* 819 * The write interlock blocks other threads trying to modify a buffer 820 * (they block in hammer_io_modify()) after us, or blocks us while other 821 * threads are in the middle of modifying a buffer. 822 * 823 * The caller also has a ref on the io, however if we are not careful 824 * we will race bioops callbacks (checkwrite). To deal with this 825 * we must at least acquire and release the io_token, and it is probably 826 * better to hold it through the setting of modify_refs. 827 */ 828 void 829 hammer_io_write_interlock(hammer_io_t io) 830 { 831 hammer_mount_t hmp = io->hmp; 832 833 lwkt_gettoken(&hmp->io_token); 834 while (io->modify_refs != 0) { 835 io->waitmod = 1; 836 tsleep(io, 0, "hmrmod", 0); 837 } 838 io->modify_refs = -1; 839 lwkt_reltoken(&hmp->io_token); 840 } 841 842 void 843 hammer_io_done_interlock(hammer_io_t io) 844 { 845 KKASSERT(io->modify_refs == -1); 846 io->modify_refs = 0; 847 if (io->waitmod) { 848 io->waitmod = 0; 849 wakeup(io); 850 } 851 } 852 853 /* 854 * Caller intends to modify a volume's ondisk structure. 855 * 856 * This is only allowed if we are the flusher or we have a ref on the 857 * sync_lock. 858 */ 859 void 860 hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume, 861 void *base, int len) 862 { 863 KKASSERT (trans == NULL || trans->sync_lock_refs > 0); 864 865 hammer_io_modify(&volume->io, 1); 866 if (len) { 867 intptr_t rel_offset = (intptr_t)base - (intptr_t)volume->ondisk; 868 KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 869 hammer_generate_undo(trans, 870 HAMMER_ENCODE_RAW_VOLUME(volume->vol_no, rel_offset), 871 base, len); 872 } 873 } 874 875 /* 876 * Caller intends to modify a buffer's ondisk structure. 877 * 878 * This is only allowed if we are the flusher or we have a ref on the 879 * sync_lock. 880 */ 881 void 882 hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer, 883 void *base, int len) 884 { 885 KKASSERT (trans == NULL || trans->sync_lock_refs > 0); 886 887 hammer_io_modify(&buffer->io, 1); 888 if (len) { 889 intptr_t rel_offset = (intptr_t)base - (intptr_t)buffer->ondisk; 890 KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 891 hammer_generate_undo(trans, 892 buffer->zone2_offset + rel_offset, 893 base, len); 894 } 895 } 896 897 void 898 hammer_modify_volume_done(hammer_volume_t volume) 899 { 900 hammer_io_modify_done(&volume->io); 901 } 902 903 void 904 hammer_modify_buffer_done(hammer_buffer_t buffer) 905 { 906 hammer_io_modify_done(&buffer->io); 907 } 908 909 /* 910 * Mark an entity as not being dirty any more and finalize any 911 * delayed adjustments to the buffer. 912 * 913 * Delayed adjustments are an important performance enhancement, allowing 914 * us to avoid recalculating B-Tree node CRCs over and over again when 915 * making bulk-modifications to the B-Tree. 916 * 917 * If inval is non-zero delayed adjustments are ignored. 918 * 919 * This routine may dereference related btree nodes and cause the 920 * buffer to be dereferenced. The caller must own a reference on io. 921 */ 922 void 923 hammer_io_clear_modify(struct hammer_io *io, int inval) 924 { 925 hammer_mount_t hmp; 926 927 /* 928 * io_token is needed to avoid races on mod_root 929 */ 930 if (io->modified == 0) 931 return; 932 hmp = io->hmp; 933 lwkt_gettoken(&hmp->io_token); 934 if (io->modified == 0) { 935 lwkt_reltoken(&hmp->io_token); 936 return; 937 } 938 939 /* 940 * Take us off the mod-list and clear the modified bit. 941 */ 942 KKASSERT(io->mod_root != NULL); 943 if (io->mod_root == &io->hmp->volu_root || 944 io->mod_root == &io->hmp->meta_root) { 945 io->hmp->locked_dirty_space -= io->bytes; 946 atomic_add_int(&hammer_count_dirtybufspace, -io->bytes); 947 } 948 RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io); 949 io->mod_root = NULL; 950 io->modified = 0; 951 952 lwkt_reltoken(&hmp->io_token); 953 954 /* 955 * If this bit is not set there are no delayed adjustments. 956 */ 957 if (io->gencrc == 0) 958 return; 959 io->gencrc = 0; 960 961 /* 962 * Finalize requested CRCs. The NEEDSCRC flag also holds a reference 963 * on the node (& underlying buffer). Release the node after clearing 964 * the flag. 965 */ 966 if (io->type == HAMMER_STRUCTURE_META_BUFFER) { 967 hammer_buffer_t buffer = (void *)io; 968 hammer_node_t node; 969 970 restart: 971 TAILQ_FOREACH(node, &buffer->clist, entry) { 972 if ((node->flags & HAMMER_NODE_NEEDSCRC) == 0) 973 continue; 974 node->flags &= ~HAMMER_NODE_NEEDSCRC; 975 KKASSERT(node->ondisk); 976 if (inval == 0) 977 node->ondisk->crc = crc32(&node->ondisk->crc + 1, HAMMER_BTREE_CRCSIZE); 978 hammer_rel_node(node); 979 goto restart; 980 } 981 } 982 /* caller must still have ref on io */ 983 KKASSERT(hammer_isactive(&io->lock)); 984 } 985 986 /* 987 * Clear the IO's modify list. Even though the IO is no longer modified 988 * it may still be on the lose_root. This routine is called just before 989 * the governing hammer_buffer is destroyed. 990 * 991 * mod_root requires io_token protection. 992 */ 993 void 994 hammer_io_clear_modlist(struct hammer_io *io) 995 { 996 hammer_mount_t hmp = io->hmp; 997 998 KKASSERT(io->modified == 0); 999 if (io->mod_root) { 1000 lwkt_gettoken(&hmp->io_token); 1001 if (io->mod_root) { 1002 KKASSERT(io->mod_root == &io->hmp->lose_root); 1003 RB_REMOVE(hammer_mod_rb_tree, io->mod_root, io); 1004 io->mod_root = NULL; 1005 } 1006 lwkt_reltoken(&hmp->io_token); 1007 } 1008 } 1009 1010 static void 1011 hammer_io_set_modlist(struct hammer_io *io) 1012 { 1013 struct hammer_mount *hmp = io->hmp; 1014 1015 lwkt_gettoken(&hmp->io_token); 1016 KKASSERT(io->mod_root == NULL); 1017 1018 switch(io->type) { 1019 case HAMMER_STRUCTURE_VOLUME: 1020 io->mod_root = &hmp->volu_root; 1021 hmp->locked_dirty_space += io->bytes; 1022 atomic_add_int(&hammer_count_dirtybufspace, io->bytes); 1023 break; 1024 case HAMMER_STRUCTURE_META_BUFFER: 1025 io->mod_root = &hmp->meta_root; 1026 hmp->locked_dirty_space += io->bytes; 1027 atomic_add_int(&hammer_count_dirtybufspace, io->bytes); 1028 break; 1029 case HAMMER_STRUCTURE_UNDO_BUFFER: 1030 io->mod_root = &hmp->undo_root; 1031 break; 1032 case HAMMER_STRUCTURE_DATA_BUFFER: 1033 io->mod_root = &hmp->data_root; 1034 break; 1035 case HAMMER_STRUCTURE_DUMMY: 1036 panic("hammer_io_set_modlist: bad io type"); 1037 break; /* NOT REACHED */ 1038 } 1039 if (RB_INSERT(hammer_mod_rb_tree, io->mod_root, io)) { 1040 panic("hammer_io_set_modlist: duplicate entry"); 1041 /* NOT REACHED */ 1042 } 1043 lwkt_reltoken(&hmp->io_token); 1044 } 1045 1046 /************************************************************************ 1047 * HAMMER_BIOOPS * 1048 ************************************************************************ 1049 * 1050 */ 1051 1052 /* 1053 * Pre-IO initiation kernel callback - cluster build only 1054 * 1055 * bioops callback - hold io_token 1056 */ 1057 static void 1058 hammer_io_start(struct buf *bp) 1059 { 1060 /* nothing to do, so io_token not needed */ 1061 } 1062 1063 /* 1064 * Post-IO completion kernel callback - MAY BE CALLED FROM INTERRUPT! 1065 * 1066 * NOTE: HAMMER may modify a data buffer after we have initiated write 1067 * I/O. 1068 * 1069 * NOTE: MPSAFE callback 1070 * 1071 * bioops callback - hold io_token 1072 */ 1073 static void 1074 hammer_io_complete(struct buf *bp) 1075 { 1076 union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); 1077 struct hammer_mount *hmp = iou->io.hmp; 1078 struct hammer_io *ionext; 1079 1080 lwkt_gettoken(&hmp->io_token); 1081 1082 KKASSERT(iou->io.released == 1); 1083 1084 /* 1085 * Deal with people waiting for I/O to drain 1086 */ 1087 if (iou->io.running) { 1088 /* 1089 * Deal with critical write errors. Once a critical error 1090 * has been flagged in hmp the UNDO FIFO will not be updated. 1091 * That way crash recover will give us a consistent 1092 * filesystem. 1093 * 1094 * Because of this we can throw away failed UNDO buffers. If 1095 * we throw away META or DATA buffers we risk corrupting 1096 * the now read-only version of the filesystem visible to 1097 * the user. Clear B_ERROR so the buffer is not re-dirtied 1098 * by the kernel and ref the io so it doesn't get thrown 1099 * away. 1100 */ 1101 if (bp->b_flags & B_ERROR) { 1102 lwkt_gettoken(&hmp->fs_token); 1103 hammer_critical_error(hmp, NULL, bp->b_error, 1104 "while flushing meta-data"); 1105 lwkt_reltoken(&hmp->fs_token); 1106 1107 switch(iou->io.type) { 1108 case HAMMER_STRUCTURE_UNDO_BUFFER: 1109 break; 1110 default: 1111 if (iou->io.ioerror == 0) { 1112 iou->io.ioerror = 1; 1113 hammer_ref(&iou->io.lock); 1114 } 1115 break; 1116 } 1117 bp->b_flags &= ~B_ERROR; 1118 bundirty(bp); 1119 #if 0 1120 hammer_io_set_modlist(&iou->io); 1121 iou->io.modified = 1; 1122 #endif 1123 } 1124 hammer_stats_disk_write += iou->io.bytes; 1125 atomic_add_int(&hammer_count_io_running_write, -iou->io.bytes); 1126 atomic_add_int(&hmp->io_running_space, -iou->io.bytes); 1127 if (hmp->io_running_wakeup && 1128 hmp->io_running_space < hammer_limit_running_io / 2) { 1129 hmp->io_running_wakeup = 0; 1130 wakeup(&hmp->io_running_wakeup); 1131 } 1132 KKASSERT(hmp->io_running_space >= 0); 1133 iou->io.running = 0; 1134 1135 /* 1136 * Remove from iorun list and wakeup any multi-io waiter(s). 1137 */ 1138 if (TAILQ_FIRST(&hmp->iorun_list) == &iou->io) { 1139 ionext = TAILQ_NEXT(&iou->io, iorun_entry); 1140 if (ionext && ionext->type == HAMMER_STRUCTURE_DUMMY) 1141 wakeup(ionext); 1142 } 1143 TAILQ_REMOVE(&hmp->iorun_list, &iou->io, iorun_entry); 1144 } else { 1145 hammer_stats_disk_read += iou->io.bytes; 1146 } 1147 1148 if (iou->io.waiting) { 1149 iou->io.waiting = 0; 1150 wakeup(iou); 1151 } 1152 1153 /* 1154 * If B_LOCKED is set someone wanted to deallocate the bp at some 1155 * point, try to do it now. The operation will fail if there are 1156 * refs or if hammer_io_deallocate() is unable to gain the 1157 * interlock. 1158 */ 1159 if (bp->b_flags & B_LOCKED) { 1160 atomic_add_int(&hammer_count_io_locked, -1); 1161 bp->b_flags &= ~B_LOCKED; 1162 hammer_io_deallocate(bp); 1163 /* structure may be dead now */ 1164 } 1165 lwkt_reltoken(&hmp->io_token); 1166 } 1167 1168 /* 1169 * Callback from kernel when it wishes to deallocate a passively 1170 * associated structure. This mostly occurs with clean buffers 1171 * but it may be possible for a holding structure to be marked dirty 1172 * while its buffer is passively associated. The caller owns the bp. 1173 * 1174 * If we cannot disassociate we set B_LOCKED to prevent the buffer 1175 * from getting reused. 1176 * 1177 * WARNING: Because this can be called directly by getnewbuf we cannot 1178 * recurse into the tree. If a bp cannot be immediately disassociated 1179 * our only recourse is to set B_LOCKED. 1180 * 1181 * WARNING: This may be called from an interrupt via hammer_io_complete() 1182 * 1183 * bioops callback - hold io_token 1184 */ 1185 static void 1186 hammer_io_deallocate(struct buf *bp) 1187 { 1188 hammer_io_structure_t iou = (void *)LIST_FIRST(&bp->b_dep); 1189 hammer_mount_t hmp; 1190 1191 hmp = iou->io.hmp; 1192 1193 lwkt_gettoken(&hmp->io_token); 1194 1195 KKASSERT((bp->b_flags & B_LOCKED) == 0 && iou->io.running == 0); 1196 if (hammer_try_interlock_norefs(&iou->io.lock) == 0) { 1197 /* 1198 * We cannot safely disassociate a bp from a referenced 1199 * or interlocked HAMMER structure. 1200 */ 1201 bp->b_flags |= B_LOCKED; 1202 atomic_add_int(&hammer_count_io_locked, 1); 1203 } else if (iou->io.modified) { 1204 /* 1205 * It is not legal to disassociate a modified buffer. This 1206 * case really shouldn't ever occur. 1207 */ 1208 bp->b_flags |= B_LOCKED; 1209 atomic_add_int(&hammer_count_io_locked, 1); 1210 hammer_put_interlock(&iou->io.lock, 0); 1211 } else { 1212 /* 1213 * Disassociate the BP. If the io has no refs left we 1214 * have to add it to the loose list. The kernel has 1215 * locked the buffer and therefore our io must be 1216 * in a released state. 1217 */ 1218 hammer_io_disassociate(iou); 1219 if (iou->io.type != HAMMER_STRUCTURE_VOLUME) { 1220 KKASSERT(iou->io.bp == NULL); 1221 KKASSERT(iou->io.mod_root == NULL); 1222 iou->io.mod_root = &hmp->lose_root; 1223 if (RB_INSERT(hammer_mod_rb_tree, iou->io.mod_root, 1224 &iou->io)) { 1225 panic("hammer_io_deallocate: duplicate entry"); 1226 } 1227 } 1228 hammer_put_interlock(&iou->io.lock, 1); 1229 } 1230 lwkt_reltoken(&hmp->io_token); 1231 } 1232 1233 /* 1234 * bioops callback - hold io_token 1235 */ 1236 static int 1237 hammer_io_fsync(struct vnode *vp) 1238 { 1239 /* nothing to do, so io_token not needed */ 1240 return(0); 1241 } 1242 1243 /* 1244 * NOTE: will not be called unless we tell the kernel about the 1245 * bioops. Unused... we use the mount's VFS_SYNC instead. 1246 * 1247 * bioops callback - hold io_token 1248 */ 1249 static int 1250 hammer_io_sync(struct mount *mp) 1251 { 1252 /* nothing to do, so io_token not needed */ 1253 return(0); 1254 } 1255 1256 /* 1257 * bioops callback - hold io_token 1258 */ 1259 static void 1260 hammer_io_movedeps(struct buf *bp1, struct buf *bp2) 1261 { 1262 /* nothing to do, so io_token not needed */ 1263 } 1264 1265 /* 1266 * I/O pre-check for reading and writing. HAMMER only uses this for 1267 * B_CACHE buffers so checkread just shouldn't happen, but if it does 1268 * allow it. 1269 * 1270 * Writing is a different case. We don't want the kernel to try to write 1271 * out a buffer that HAMMER may be modifying passively or which has a 1272 * dependancy. In addition, kernel-demanded writes can only proceed for 1273 * certain types of buffers (i.e. UNDO and DATA types). Other dirty 1274 * buffer types can only be explicitly written by the flusher. 1275 * 1276 * checkwrite will only be called for bdwrite()n buffers. If we return 1277 * success the kernel is guaranteed to initiate the buffer write. 1278 * 1279 * bioops callback - hold io_token 1280 */ 1281 static int 1282 hammer_io_checkread(struct buf *bp) 1283 { 1284 /* nothing to do, so io_token not needed */ 1285 return(0); 1286 } 1287 1288 /* 1289 * The kernel is asking us whether it can write out a dirty buffer or not. 1290 * 1291 * bioops callback - hold io_token 1292 */ 1293 static int 1294 hammer_io_checkwrite(struct buf *bp) 1295 { 1296 hammer_io_t io = (void *)LIST_FIRST(&bp->b_dep); 1297 hammer_mount_t hmp = io->hmp; 1298 1299 /* 1300 * This shouldn't happen under normal operation. 1301 */ 1302 lwkt_gettoken(&hmp->io_token); 1303 if (io->type == HAMMER_STRUCTURE_VOLUME || 1304 io->type == HAMMER_STRUCTURE_META_BUFFER) { 1305 if (!panicstr) 1306 panic("hammer_io_checkwrite: illegal buffer"); 1307 if ((bp->b_flags & B_LOCKED) == 0) { 1308 bp->b_flags |= B_LOCKED; 1309 atomic_add_int(&hammer_count_io_locked, 1); 1310 } 1311 lwkt_reltoken(&hmp->io_token); 1312 return(1); 1313 } 1314 1315 /* 1316 * We have to be able to interlock the IO to safely modify any 1317 * of its fields without holding the fs_token. If we can't lock 1318 * it then we are racing someone. 1319 * 1320 * Our ownership of the bp lock prevents the io from being ripped 1321 * out from under us. 1322 */ 1323 if (hammer_try_interlock_norefs(&io->lock) == 0) { 1324 bp->b_flags |= B_LOCKED; 1325 atomic_add_int(&hammer_count_io_locked, 1); 1326 lwkt_reltoken(&hmp->io_token); 1327 return(1); 1328 } 1329 1330 /* 1331 * The modified bit must be cleared prior to the initiation of 1332 * any IO (returning 0 initiates the IO). Because this is a 1333 * normal data buffer hammer_io_clear_modify() runs through a 1334 * simple degenerate case. 1335 * 1336 * Return 0 will cause the kernel to initiate the IO, and we 1337 * must normally clear the modified bit before we begin. If 1338 * the io has modify_refs we do not clear the modified bit, 1339 * otherwise we may miss changes. 1340 * 1341 * Only data and undo buffers can reach here. These buffers do 1342 * not have terminal crc functions but we temporarily reference 1343 * the IO anyway, just in case. 1344 */ 1345 if (io->modify_refs == 0 && io->modified) { 1346 hammer_ref(&io->lock); 1347 hammer_io_clear_modify(io, 0); 1348 hammer_rel(&io->lock); 1349 } else if (io->modified) { 1350 KKASSERT(io->type == HAMMER_STRUCTURE_DATA_BUFFER); 1351 } 1352 1353 /* 1354 * The kernel is going to start the IO, set io->running. 1355 */ 1356 KKASSERT(io->running == 0); 1357 io->running = 1; 1358 atomic_add_int(&io->hmp->io_running_space, io->bytes); 1359 atomic_add_int(&hammer_count_io_running_write, io->bytes); 1360 TAILQ_INSERT_TAIL(&io->hmp->iorun_list, io, iorun_entry); 1361 1362 hammer_put_interlock(&io->lock, 1); 1363 lwkt_reltoken(&hmp->io_token); 1364 1365 return(0); 1366 } 1367 1368 /* 1369 * Return non-zero if we wish to delay the kernel's attempt to flush 1370 * this buffer to disk. 1371 * 1372 * bioops callback - hold io_token 1373 */ 1374 static int 1375 hammer_io_countdeps(struct buf *bp, int n) 1376 { 1377 /* nothing to do, so io_token not needed */ 1378 return(0); 1379 } 1380 1381 struct bio_ops hammer_bioops = { 1382 .io_start = hammer_io_start, 1383 .io_complete = hammer_io_complete, 1384 .io_deallocate = hammer_io_deallocate, 1385 .io_fsync = hammer_io_fsync, 1386 .io_sync = hammer_io_sync, 1387 .io_movedeps = hammer_io_movedeps, 1388 .io_countdeps = hammer_io_countdeps, 1389 .io_checkread = hammer_io_checkread, 1390 .io_checkwrite = hammer_io_checkwrite, 1391 }; 1392 1393 /************************************************************************ 1394 * DIRECT IO OPS * 1395 ************************************************************************ 1396 * 1397 * These functions operate directly on the buffer cache buffer associated 1398 * with a front-end vnode rather then a back-end device vnode. 1399 */ 1400 1401 /* 1402 * Read a buffer associated with a front-end vnode directly from the 1403 * disk media. The bio may be issued asynchronously. If leaf is non-NULL 1404 * we validate the CRC. 1405 * 1406 * We must check for the presence of a HAMMER buffer to handle the case 1407 * where the reblocker has rewritten the data (which it does via the HAMMER 1408 * buffer system, not via the high-level vnode buffer cache), but not yet 1409 * committed the buffer to the media. 1410 */ 1411 int 1412 hammer_io_direct_read(hammer_mount_t hmp, struct bio *bio, 1413 hammer_btree_leaf_elm_t leaf) 1414 { 1415 hammer_off_t buf_offset; 1416 hammer_off_t zone2_offset; 1417 hammer_volume_t volume; 1418 struct buf *bp; 1419 struct bio *nbio; 1420 int vol_no; 1421 int error; 1422 1423 buf_offset = bio->bio_offset; 1424 KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == 1425 HAMMER_ZONE_LARGE_DATA); 1426 1427 /* 1428 * The buffer cache may have an aliased buffer (the reblocker can 1429 * write them). If it does we have to sync any dirty data before 1430 * we can build our direct-read. This is a non-critical code path. 1431 */ 1432 bp = bio->bio_buf; 1433 hammer_sync_buffers(hmp, buf_offset, bp->b_bufsize); 1434 1435 /* 1436 * Resolve to a zone-2 offset. The conversion just requires 1437 * munging the top 4 bits but we want to abstract it anyway 1438 * so the blockmap code can verify the zone assignment. 1439 */ 1440 zone2_offset = hammer_blockmap_lookup(hmp, buf_offset, &error); 1441 if (error) 1442 goto done; 1443 KKASSERT((zone2_offset & HAMMER_OFF_ZONE_MASK) == 1444 HAMMER_ZONE_RAW_BUFFER); 1445 1446 /* 1447 * Resolve volume and raw-offset for 3rd level bio. The 1448 * offset will be specific to the volume. 1449 */ 1450 vol_no = HAMMER_VOL_DECODE(zone2_offset); 1451 volume = hammer_get_volume(hmp, vol_no, &error); 1452 if (error == 0 && zone2_offset >= volume->maxbuf_off) 1453 error = EIO; 1454 1455 if (error == 0) { 1456 /* 1457 * 3rd level bio 1458 */ 1459 nbio = push_bio(bio); 1460 nbio->bio_offset = volume->ondisk->vol_buf_beg + 1461 (zone2_offset & HAMMER_OFF_SHORT_MASK); 1462 #if 0 1463 /* 1464 * XXX disabled - our CRC check doesn't work if the OS 1465 * does bogus_page replacement on the direct-read. 1466 */ 1467 if (leaf && hammer_verify_data) { 1468 nbio->bio_done = hammer_io_direct_read_complete; 1469 nbio->bio_caller_info1.uvalue32 = leaf->data_crc; 1470 } 1471 #endif 1472 hammer_stats_disk_read += bp->b_bufsize; 1473 vn_strategy(volume->devvp, nbio); 1474 } 1475 hammer_rel_volume(volume, 0); 1476 done: 1477 if (error) { 1478 kprintf("hammer_direct_read: failed @ %016llx\n", 1479 (long long)zone2_offset); 1480 bp->b_error = error; 1481 bp->b_flags |= B_ERROR; 1482 biodone(bio); 1483 } 1484 return(error); 1485 } 1486 1487 /* 1488 * This works similarly to hammer_io_direct_read() except instead of 1489 * directly reading from the device into the bio we instead indirectly 1490 * read through the device's buffer cache and then copy the data into 1491 * the bio. 1492 * 1493 * If leaf is non-NULL and validation is enabled, the CRC will be checked. 1494 * 1495 * This routine also executes asynchronously. It allows hammer strategy 1496 * calls to operate asynchronously when in double_buffer mode (in addition 1497 * to operating asynchronously when in normal mode). 1498 */ 1499 int 1500 hammer_io_indirect_read(hammer_mount_t hmp, struct bio *bio, 1501 hammer_btree_leaf_elm_t leaf) 1502 { 1503 hammer_off_t buf_offset; 1504 hammer_off_t zone2_offset; 1505 hammer_volume_t volume; 1506 struct buf *bp; 1507 int vol_no; 1508 int error; 1509 1510 buf_offset = bio->bio_offset; 1511 KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == 1512 HAMMER_ZONE_LARGE_DATA); 1513 1514 /* 1515 * The buffer cache may have an aliased buffer (the reblocker can 1516 * write them). If it does we have to sync any dirty data before 1517 * we can build our direct-read. This is a non-critical code path. 1518 */ 1519 bp = bio->bio_buf; 1520 hammer_sync_buffers(hmp, buf_offset, bp->b_bufsize); 1521 1522 /* 1523 * Resolve to a zone-2 offset. The conversion just requires 1524 * munging the top 4 bits but we want to abstract it anyway 1525 * so the blockmap code can verify the zone assignment. 1526 */ 1527 zone2_offset = hammer_blockmap_lookup(hmp, buf_offset, &error); 1528 if (error) 1529 goto done; 1530 KKASSERT((zone2_offset & HAMMER_OFF_ZONE_MASK) == 1531 HAMMER_ZONE_RAW_BUFFER); 1532 1533 /* 1534 * Resolve volume and raw-offset for 3rd level bio. The 1535 * offset will be specific to the volume. 1536 */ 1537 vol_no = HAMMER_VOL_DECODE(zone2_offset); 1538 volume = hammer_get_volume(hmp, vol_no, &error); 1539 if (error == 0 && zone2_offset >= volume->maxbuf_off) 1540 error = EIO; 1541 1542 if (error == 0) { 1543 /* 1544 * Convert to the raw volume->devvp offset and acquire 1545 * the buf, issuing async I/O if necessary. 1546 */ 1547 buf_offset = volume->ondisk->vol_buf_beg + 1548 (zone2_offset & HAMMER_OFF_SHORT_MASK); 1549 1550 if (leaf && hammer_verify_data) { 1551 bio->bio_caller_info1.uvalue32 = leaf->data_crc; 1552 bio->bio_caller_info2.index = 1; 1553 } else { 1554 bio->bio_caller_info2.index = 0; 1555 } 1556 breadcb(volume->devvp, buf_offset, bp->b_bufsize, 1557 hammer_indirect_callback, bio); 1558 } 1559 hammer_rel_volume(volume, 0); 1560 done: 1561 if (error) { 1562 kprintf("hammer_direct_read: failed @ %016llx\n", 1563 (long long)zone2_offset); 1564 bp->b_error = error; 1565 bp->b_flags |= B_ERROR; 1566 biodone(bio); 1567 } 1568 return(error); 1569 } 1570 1571 /* 1572 * Indirect callback on completion. bio/bp specify the device-backed 1573 * buffer. bio->bio_caller_info1.ptr holds obio. 1574 * 1575 * obio/obp is the original regular file buffer. obio->bio_caller_info* 1576 * contains the crc specification. 1577 * 1578 * We are responsible for calling bpdone() and bqrelse() on bio/bp, and 1579 * for calling biodone() on obio. 1580 */ 1581 static void 1582 hammer_indirect_callback(struct bio *bio) 1583 { 1584 struct buf *bp = bio->bio_buf; 1585 struct buf *obp; 1586 struct bio *obio; 1587 1588 /* 1589 * If BIO_DONE is already set the device buffer was already 1590 * fully valid (B_CACHE). If it is not set then I/O was issued 1591 * and we have to run I/O completion as the last bio. 1592 * 1593 * Nobody is waiting for our device I/O to complete, we are 1594 * responsible for bqrelse()ing it which means we also have to do 1595 * the equivalent of biowait() and clear BIO_DONE (which breadcb() 1596 * may have set). 1597 * 1598 * Any preexisting device buffer should match the requested size, 1599 * but due to bigblock recycling and other factors there is some 1600 * fragility there, so we assert that the device buffer covers 1601 * the request. 1602 */ 1603 if ((bio->bio_flags & BIO_DONE) == 0) 1604 bpdone(bp, 0); 1605 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC); 1606 1607 obio = bio->bio_caller_info1.ptr; 1608 obp = obio->bio_buf; 1609 1610 if (bp->b_flags & B_ERROR) { 1611 obp->b_flags |= B_ERROR; 1612 obp->b_error = bp->b_error; 1613 } else if (obio->bio_caller_info2.index && 1614 obio->bio_caller_info1.uvalue32 != 1615 crc32(bp->b_data, bp->b_bufsize)) { 1616 obp->b_flags |= B_ERROR; 1617 obp->b_error = EIO; 1618 } else { 1619 KKASSERT(bp->b_bufsize >= obp->b_bufsize); 1620 bcopy(bp->b_data, obp->b_data, obp->b_bufsize); 1621 obp->b_resid = 0; 1622 obp->b_flags |= B_AGE; 1623 } 1624 biodone(obio); 1625 bqrelse(bp); 1626 } 1627 1628 #if 0 1629 /* 1630 * On completion of the BIO this callback must check the data CRC 1631 * and chain to the previous bio. 1632 * 1633 * MPSAFE - since we do not modify and hammer_records we do not need 1634 * io_token. 1635 * 1636 * NOTE: MPSAFE callback 1637 */ 1638 static 1639 void 1640 hammer_io_direct_read_complete(struct bio *nbio) 1641 { 1642 struct bio *obio; 1643 struct buf *bp; 1644 u_int32_t rec_crc = nbio->bio_caller_info1.uvalue32; 1645 1646 bp = nbio->bio_buf; 1647 if (crc32(bp->b_data, bp->b_bufsize) != rec_crc) { 1648 kprintf("HAMMER: data_crc error @%016llx/%d\n", 1649 nbio->bio_offset, bp->b_bufsize); 1650 if (hammer_debug_critical) 1651 Debugger("data_crc on read"); 1652 bp->b_flags |= B_ERROR; 1653 bp->b_error = EIO; 1654 } 1655 obio = pop_bio(nbio); 1656 biodone(obio); 1657 } 1658 #endif 1659 1660 /* 1661 * Write a buffer associated with a front-end vnode directly to the 1662 * disk media. The bio may be issued asynchronously. 1663 * 1664 * The BIO is associated with the specified record and RECG_DIRECT_IO 1665 * is set. The recorded is added to its object. 1666 */ 1667 int 1668 hammer_io_direct_write(hammer_mount_t hmp, struct bio *bio, 1669 hammer_record_t record) 1670 { 1671 hammer_btree_leaf_elm_t leaf = &record->leaf; 1672 hammer_off_t buf_offset; 1673 hammer_off_t zone2_offset; 1674 hammer_volume_t volume; 1675 hammer_buffer_t buffer; 1676 struct buf *bp; 1677 struct bio *nbio; 1678 char *ptr; 1679 int vol_no; 1680 int error; 1681 1682 buf_offset = leaf->data_offset; 1683 1684 KKASSERT(buf_offset > HAMMER_ZONE_BTREE); 1685 KKASSERT(bio->bio_buf->b_cmd == BUF_CMD_WRITE); 1686 1687 /* 1688 * Issue or execute the I/O. The new memory record must replace 1689 * the old one before the I/O completes, otherwise a reaquisition of 1690 * the buffer will load the old media data instead of the new. 1691 */ 1692 if ((buf_offset & HAMMER_BUFMASK) == 0 && 1693 leaf->data_len >= HAMMER_BUFSIZE) { 1694 /* 1695 * We are using the vnode's bio to write directly to the 1696 * media, any hammer_buffer at the same zone-X offset will 1697 * now have stale data. 1698 */ 1699 zone2_offset = hammer_blockmap_lookup(hmp, buf_offset, &error); 1700 vol_no = HAMMER_VOL_DECODE(zone2_offset); 1701 volume = hammer_get_volume(hmp, vol_no, &error); 1702 1703 if (error == 0 && zone2_offset >= volume->maxbuf_off) 1704 error = EIO; 1705 if (error == 0) { 1706 bp = bio->bio_buf; 1707 KKASSERT((bp->b_bufsize & HAMMER_BUFMASK) == 0); 1708 /* 1709 hammer_del_buffers(hmp, buf_offset, 1710 zone2_offset, bp->b_bufsize); 1711 */ 1712 1713 /* 1714 * Second level bio - cached zone2 offset. 1715 * 1716 * (We can put our bio_done function in either the 1717 * 2nd or 3rd level). 1718 */ 1719 nbio = push_bio(bio); 1720 nbio->bio_offset = zone2_offset; 1721 nbio->bio_done = hammer_io_direct_write_complete; 1722 nbio->bio_caller_info1.ptr = record; 1723 record->zone2_offset = zone2_offset; 1724 record->gflags |= HAMMER_RECG_DIRECT_IO | 1725 HAMMER_RECG_DIRECT_INVAL; 1726 1727 /* 1728 * Third level bio - raw offset specific to the 1729 * correct volume. 1730 */ 1731 zone2_offset &= HAMMER_OFF_SHORT_MASK; 1732 nbio = push_bio(nbio); 1733 nbio->bio_offset = volume->ondisk->vol_buf_beg + 1734 zone2_offset; 1735 hammer_stats_disk_write += bp->b_bufsize; 1736 hammer_ip_replace_bulk(hmp, record); 1737 vn_strategy(volume->devvp, nbio); 1738 hammer_io_flush_mark(volume); 1739 } 1740 hammer_rel_volume(volume, 0); 1741 } else { 1742 /* 1743 * Must fit in a standard HAMMER buffer. In this case all 1744 * consumers use the HAMMER buffer system and RECG_DIRECT_IO 1745 * does not need to be set-up. 1746 */ 1747 KKASSERT(((buf_offset ^ (buf_offset + leaf->data_len - 1)) & ~HAMMER_BUFMASK64) == 0); 1748 buffer = NULL; 1749 ptr = hammer_bread(hmp, buf_offset, &error, &buffer); 1750 if (error == 0) { 1751 bp = bio->bio_buf; 1752 bp->b_flags |= B_AGE; 1753 hammer_io_modify(&buffer->io, 1); 1754 bcopy(bp->b_data, ptr, leaf->data_len); 1755 hammer_io_modify_done(&buffer->io); 1756 hammer_rel_buffer(buffer, 0); 1757 bp->b_resid = 0; 1758 hammer_ip_replace_bulk(hmp, record); 1759 biodone(bio); 1760 } 1761 } 1762 if (error) { 1763 /* 1764 * Major suckage occured. Also note: The record was 1765 * never added to the tree so we do not have to worry 1766 * about the backend. 1767 */ 1768 kprintf("hammer_direct_write: failed @ %016llx\n", 1769 (long long)leaf->data_offset); 1770 bp = bio->bio_buf; 1771 bp->b_resid = 0; 1772 bp->b_error = EIO; 1773 bp->b_flags |= B_ERROR; 1774 biodone(bio); 1775 record->flags |= HAMMER_RECF_DELETED_FE; 1776 hammer_rel_mem_record(record); 1777 } 1778 return(error); 1779 } 1780 1781 /* 1782 * On completion of the BIO this callback must disconnect 1783 * it from the hammer_record and chain to the previous bio. 1784 * 1785 * An I/O error forces the mount to read-only. Data buffers 1786 * are not B_LOCKED like meta-data buffers are, so we have to 1787 * throw the buffer away to prevent the kernel from retrying. 1788 * 1789 * NOTE: MPSAFE callback, only modify fields we have explicit 1790 * access to (the bp and the record->gflags). 1791 */ 1792 static 1793 void 1794 hammer_io_direct_write_complete(struct bio *nbio) 1795 { 1796 struct bio *obio; 1797 struct buf *bp; 1798 hammer_record_t record; 1799 hammer_mount_t hmp; 1800 1801 record = nbio->bio_caller_info1.ptr; 1802 KKASSERT(record != NULL); 1803 hmp = record->ip->hmp; 1804 1805 lwkt_gettoken(&hmp->io_token); 1806 1807 bp = nbio->bio_buf; 1808 obio = pop_bio(nbio); 1809 if (bp->b_flags & B_ERROR) { 1810 lwkt_gettoken(&hmp->fs_token); 1811 hammer_critical_error(hmp, record->ip, 1812 bp->b_error, 1813 "while writing bulk data"); 1814 lwkt_reltoken(&hmp->fs_token); 1815 bp->b_flags |= B_INVAL; 1816 } 1817 biodone(obio); 1818 1819 KKASSERT(record->gflags & HAMMER_RECG_DIRECT_IO); 1820 if (record->gflags & HAMMER_RECG_DIRECT_WAIT) { 1821 record->gflags &= ~(HAMMER_RECG_DIRECT_IO | 1822 HAMMER_RECG_DIRECT_WAIT); 1823 /* record can disappear once DIRECT_IO flag is cleared */ 1824 wakeup(&record->flags); 1825 } else { 1826 record->gflags &= ~HAMMER_RECG_DIRECT_IO; 1827 /* record can disappear once DIRECT_IO flag is cleared */ 1828 } 1829 lwkt_reltoken(&hmp->io_token); 1830 } 1831 1832 1833 /* 1834 * This is called before a record is either committed to the B-Tree 1835 * or destroyed, to resolve any associated direct-IO. 1836 * 1837 * (1) We must wait for any direct-IO related to the record to complete. 1838 * 1839 * (2) We must remove any buffer cache aliases for data accessed via 1840 * leaf->data_offset or zone2_offset so non-direct-IO consumers 1841 * (the mirroring and reblocking code) do not see stale data. 1842 */ 1843 void 1844 hammer_io_direct_wait(hammer_record_t record) 1845 { 1846 hammer_mount_t hmp = record->ip->hmp; 1847 1848 /* 1849 * Wait for I/O to complete 1850 */ 1851 if (record->gflags & HAMMER_RECG_DIRECT_IO) { 1852 lwkt_gettoken(&hmp->io_token); 1853 while (record->gflags & HAMMER_RECG_DIRECT_IO) { 1854 record->gflags |= HAMMER_RECG_DIRECT_WAIT; 1855 tsleep(&record->flags, 0, "hmdiow", 0); 1856 } 1857 lwkt_reltoken(&hmp->io_token); 1858 } 1859 1860 /* 1861 * Invalidate any related buffer cache aliases associated with the 1862 * backing device. This is needed because the buffer cache buffer 1863 * for file data is associated with the file vnode, not the backing 1864 * device vnode. 1865 * 1866 * XXX I do not think this case can occur any more now that 1867 * reservations ensure that all such buffers are removed before 1868 * an area can be reused. 1869 */ 1870 if (record->gflags & HAMMER_RECG_DIRECT_INVAL) { 1871 KKASSERT(record->leaf.data_offset); 1872 hammer_del_buffers(hmp, record->leaf.data_offset, 1873 record->zone2_offset, record->leaf.data_len, 1874 1); 1875 record->gflags &= ~HAMMER_RECG_DIRECT_INVAL; 1876 } 1877 } 1878 1879 /* 1880 * This is called to remove the second-level cached zone-2 offset from 1881 * frontend buffer cache buffers, now stale due to a data relocation. 1882 * These offsets are generated by cluster_read() via VOP_BMAP, or directly 1883 * by hammer_vop_strategy_read(). 1884 * 1885 * This is rather nasty because here we have something like the reblocker 1886 * scanning the raw B-Tree with no held references on anything, really, 1887 * other then a shared lock on the B-Tree node, and we have to access the 1888 * frontend's buffer cache to check for and clean out the association. 1889 * Specifically, if the reblocker is moving data on the disk, these cached 1890 * offsets will become invalid. 1891 * 1892 * Only data record types associated with the large-data zone are subject 1893 * to direct-io and need to be checked. 1894 * 1895 */ 1896 void 1897 hammer_io_direct_uncache(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf) 1898 { 1899 struct hammer_inode_info iinfo; 1900 int zone; 1901 1902 if (leaf->base.rec_type != HAMMER_RECTYPE_DATA) 1903 return; 1904 zone = HAMMER_ZONE_DECODE(leaf->data_offset); 1905 if (zone != HAMMER_ZONE_LARGE_DATA_INDEX) 1906 return; 1907 iinfo.obj_id = leaf->base.obj_id; 1908 iinfo.obj_asof = 0; /* unused */ 1909 iinfo.obj_localization = leaf->base.localization & 1910 HAMMER_LOCALIZE_PSEUDOFS_MASK; 1911 iinfo.u.leaf = leaf; 1912 hammer_scan_inode_snapshots(hmp, &iinfo, 1913 hammer_io_direct_uncache_callback, 1914 leaf); 1915 } 1916 1917 static int 1918 hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data) 1919 { 1920 hammer_inode_info_t iinfo = data; 1921 hammer_off_t data_offset; 1922 hammer_off_t file_offset; 1923 struct vnode *vp; 1924 struct buf *bp; 1925 int blksize; 1926 1927 if (ip->vp == NULL) 1928 return(0); 1929 data_offset = iinfo->u.leaf->data_offset; 1930 file_offset = iinfo->u.leaf->base.key - iinfo->u.leaf->data_len; 1931 blksize = iinfo->u.leaf->data_len; 1932 KKASSERT((blksize & HAMMER_BUFMASK) == 0); 1933 1934 /* 1935 * Warning: FINDBLK_TEST return stable storage but not stable 1936 * contents. It happens to be ok in this case. 1937 */ 1938 hammer_ref(&ip->lock); 1939 if (hammer_get_vnode(ip, &vp) == 0) { 1940 if ((bp = findblk(ip->vp, file_offset, FINDBLK_TEST)) != NULL && 1941 bp->b_bio2.bio_offset != NOOFFSET) { 1942 bp = getblk(ip->vp, file_offset, blksize, 0, 0); 1943 bp->b_bio2.bio_offset = NOOFFSET; 1944 brelse(bp); 1945 } 1946 vput(vp); 1947 } 1948 hammer_rel_inode(ip, 0); 1949 return(0); 1950 } 1951 1952 1953 /* 1954 * This function is called when writes may have occured on the volume, 1955 * indicating that the device may be holding cached writes. 1956 */ 1957 static void 1958 hammer_io_flush_mark(hammer_volume_t volume) 1959 { 1960 atomic_set_int(&volume->vol_flags, HAMMER_VOLF_NEEDFLUSH); 1961 } 1962 1963 /* 1964 * This function ensures that the device has flushed any cached writes out. 1965 */ 1966 void 1967 hammer_io_flush_sync(hammer_mount_t hmp) 1968 { 1969 hammer_volume_t volume; 1970 struct buf *bp_base = NULL; 1971 struct buf *bp; 1972 1973 RB_FOREACH(volume, hammer_vol_rb_tree, &hmp->rb_vols_root) { 1974 if (volume->vol_flags & HAMMER_VOLF_NEEDFLUSH) { 1975 atomic_clear_int(&volume->vol_flags, 1976 HAMMER_VOLF_NEEDFLUSH); 1977 bp = getpbuf(NULL); 1978 bp->b_bio1.bio_offset = 0; 1979 bp->b_bufsize = 0; 1980 bp->b_bcount = 0; 1981 bp->b_cmd = BUF_CMD_FLUSH; 1982 bp->b_bio1.bio_caller_info1.cluster_head = bp_base; 1983 bp->b_bio1.bio_done = biodone_sync; 1984 bp->b_bio1.bio_flags |= BIO_SYNC; 1985 bp_base = bp; 1986 vn_strategy(volume->devvp, &bp->b_bio1); 1987 } 1988 } 1989 while ((bp = bp_base) != NULL) { 1990 bp_base = bp->b_bio1.bio_caller_info1.cluster_head; 1991 biowait(&bp->b_bio1, "hmrFLS"); 1992 relpbuf(bp, NULL); 1993 } 1994 } 1995 1996 /* 1997 * Limit the amount of backlog which we allow to build up 1998 */ 1999 void 2000 hammer_io_limit_backlog(hammer_mount_t hmp) 2001 { 2002 while (hmp->io_running_space > hammer_limit_running_io) { 2003 hmp->io_running_wakeup = 1; 2004 tsleep(&hmp->io_running_wakeup, 0, "hmiolm", hz / 10); 2005 } 2006 } 2007