1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.22 2008/03/19 20:18:17 dillon Exp $ 35 */ 36 /* 37 * IO Primitives and buffer cache management 38 * 39 * All major data-tracking structures in HAMMER contain a struct hammer_io 40 * which is used to manage their backing store. We use filesystem buffers 41 * for backing store and we leave them passively associated with their 42 * HAMMER structures. 43 * 44 * If the kernel tries to release a passively associated buf which we cannot 45 * yet let go we set B_LOCKED in the buffer and then actively released it 46 * later when we can. 47 */ 48 49 #include "hammer.h" 50 #include <sys/fcntl.h> 51 #include <sys/nlookup.h> 52 #include <sys/buf.h> 53 #include <sys/buf2.h> 54 55 static void hammer_io_deallocate(struct buf *bp); 56 static int hammer_io_checkwrite(struct buf *bp); 57 58 /* 59 * Initialize an already-zero'd hammer_io structure 60 */ 61 void 62 hammer_io_init(hammer_io_t io, enum hammer_io_type type) 63 { 64 io->type = type; 65 TAILQ_INIT(&io->deplist); 66 } 67 68 /* 69 * Helper routine to disassociate a buffer cache buffer from an I/O 70 * structure. Called with the io structure exclusively locked. 71 * 72 * The io may have 0 or 1 references depending on who called us. The 73 * caller is responsible for dealing with the refs. 74 * 75 * This call can only be made when no action is required on the buffer. 76 * HAMMER must own the buffer (released == 0) since we mess around with it. 77 */ 78 static void 79 hammer_io_disassociate(hammer_io_structure_t iou, int elseit) 80 { 81 struct buf *bp = iou->io.bp; 82 83 KKASSERT(TAILQ_EMPTY(&iou->io.deplist) && iou->io.modified == 0); 84 buf_dep_init(bp); 85 iou->io.bp = NULL; 86 bp->b_flags &= ~B_LOCKED; 87 if (elseit) { 88 KKASSERT(iou->io.released == 0); 89 iou->io.released = 1; 90 bqrelse(bp); 91 } else { 92 KKASSERT(iou->io.released); 93 } 94 95 switch(iou->io.type) { 96 case HAMMER_STRUCTURE_VOLUME: 97 iou->volume.ondisk = NULL; 98 break; 99 case HAMMER_STRUCTURE_BUFFER: 100 iou->buffer.ondisk = NULL; 101 break; 102 } 103 } 104 105 /* 106 * Wait for any physical IO to complete 107 */ 108 static void 109 hammer_io_wait(hammer_io_t io) 110 { 111 if (io->running) { 112 crit_enter(); 113 tsleep_interlock(io); 114 io->waiting = 1; 115 for (;;) { 116 tsleep(io, 0, "hmrflw", 0); 117 if (io->running == 0) 118 break; 119 tsleep_interlock(io); 120 io->waiting = 1; 121 if (io->running == 0) 122 break; 123 } 124 crit_exit(); 125 } 126 } 127 128 void 129 hammer_io_waitdep(hammer_io_t io) 130 { 131 while (TAILQ_FIRST(&io->deplist)) { 132 kprintf("waitdep %p\n", io); 133 tsleep(io, 0, "hmrdep", hz); 134 } 135 } 136 137 /* 138 * Load bp for a HAMMER structure. The io is exclusively locked by the 139 * caller. 140 */ 141 int 142 hammer_io_read(struct vnode *devvp, struct hammer_io *io) 143 { 144 struct buf *bp; 145 int error; 146 147 if ((bp = io->bp) == NULL) { 148 error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp); 149 if (error == 0) { 150 bp = io->bp; 151 bp->b_ops = &hammer_bioops; 152 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 153 BUF_KERNPROC(bp); 154 } 155 io->modified = 0; /* no new modifications yet */ 156 io->released = 0; /* we hold an active lock on bp */ 157 io->running = 0; 158 io->waiting = 0; 159 } else { 160 error = 0; 161 } 162 return(error); 163 } 164 165 /* 166 * Similar to hammer_io_read() but returns a zero'd out buffer instead. 167 * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background 168 * I/O so we can call it. 169 * 170 * The caller is responsible for calling hammer_modify_*() on the appropriate 171 * HAMMER structure. 172 */ 173 int 174 hammer_io_new(struct vnode *devvp, struct hammer_io *io) 175 { 176 struct buf *bp; 177 178 if ((bp = io->bp) == NULL) { 179 io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0); 180 bp = io->bp; 181 bp->b_ops = &hammer_bioops; 182 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 183 io->modified = 0; 184 io->released = 0; 185 io->running = 0; 186 io->waiting = 0; 187 BUF_KERNPROC(bp); 188 } else { 189 if (io->released) { 190 regetblk(bp); 191 BUF_KERNPROC(bp); 192 io->released = 0; 193 } 194 } 195 vfs_bio_clrbuf(bp); 196 return(0); 197 } 198 199 /* 200 * This routine is called on the last reference to a hammer structure. 201 * The io is usually locked exclusively (but may not be during unmount). 202 * 203 * If flush is 1, or B_LOCKED was set indicating that the kernel 204 * wanted to recycle the buffer, and there are no dependancies, this 205 * function will issue an asynchronous write. 206 * 207 * If flush is 2 this function waits until all I/O has completed and 208 * disassociates the bp from the IO before returning, unless there 209 * are still other references. 210 */ 211 void 212 hammer_io_release(struct hammer_io *io, int flush) 213 { 214 struct buf *bp; 215 216 if ((bp = io->bp) == NULL) 217 return; 218 219 #if 0 220 /* 221 * If flush is 2 wait for dependancies 222 */ 223 while (flush == 2 && TAILQ_FIRST(&io->deplist)) { 224 hammer_io_wait(TAILQ_FIRST(&io->deplist)); 225 } 226 #endif 227 228 /* 229 * Try to flush a dirty IO to disk if asked to by the caller 230 * or if the kernel tried to flush the buffer in the past. 231 * 232 * The flush will fail if any dependancies are present. 233 */ 234 if (io->modified && (flush || bp->b_flags & B_LOCKED)) 235 hammer_io_flush(io); 236 237 /* 238 * If flush is 2 we wait for the IO to complete. 239 */ 240 if (flush == 2 && io->running) { 241 hammer_io_wait(io); 242 } 243 244 /* 245 * Actively or passively release the buffer. Modified IOs with 246 * dependancies cannot be released. 247 */ 248 if (flush && io->modified == 0 && io->running == 0) { 249 KKASSERT(TAILQ_EMPTY(&io->deplist)); 250 if (io->released) { 251 regetblk(bp); 252 BUF_KERNPROC(bp); 253 io->released = 0; 254 } 255 hammer_io_disassociate((hammer_io_structure_t)io, 1); 256 } else if (io->modified) { 257 if (io->released == 0 && TAILQ_EMPTY(&io->deplist)) { 258 io->released = 1; 259 bdwrite(bp); 260 } 261 } else if (io->released == 0) { 262 io->released = 1; 263 bqrelse(bp); 264 } 265 } 266 267 /* 268 * This routine is called with a locked IO when a flush is desired and 269 * no other references to the structure exists other then ours. This 270 * routine is ONLY called when HAMMER believes it is safe to flush a 271 * potentially modified buffer out. 272 */ 273 void 274 hammer_io_flush(struct hammer_io *io) 275 { 276 struct buf *bp; 277 278 /* 279 * Can't flush if the IO isn't modified or if it has dependancies. 280 */ 281 if (io->modified == 0) 282 return; 283 if (TAILQ_FIRST(&io->deplist)) 284 return; 285 286 KKASSERT(io->bp); 287 288 /* 289 * XXX - umount syncs buffers without referencing them, check for 0 290 * also. 291 */ 292 KKASSERT(io->lock.refs == 0 || io->lock.refs == 1); 293 294 /* 295 * Reset modified to 0 here and re-check it after the IO completes. 296 * This is only legal when lock.refs == 1 (otherwise we might clear 297 * the modified bit while there are still users of the cluster 298 * modifying the data). 299 * 300 * NOTE: We have no dependancies so we don't have to worry about 301 * cluster-open's here. 302 * 303 * Do this before potentially blocking so any attempt to modify the 304 * ondisk while we are blocked blocks waiting for us. 305 */ 306 io->modified = 0; /* force interlock */ 307 bp = io->bp; 308 309 if (io->released) { 310 regetblk(bp); 311 /* BUF_KERNPROC(io->bp); */ 312 io->released = 0; 313 } 314 io->released = 1; 315 io->running = 1; 316 bawrite(bp); 317 } 318 319 /************************************************************************ 320 * BUFFER DIRTYING * 321 ************************************************************************ 322 * 323 * These routines deal with dependancies created when IO buffers get 324 * modified. The caller must call hammer_modify_*() on a referenced 325 * HAMMER structure prior to modifying its on-disk data. 326 * 327 * Any intent to modify an IO buffer acquires the related bp and imposes 328 * various write ordering dependancies. 329 */ 330 331 /* 332 * Mark a HAMMER structure as undergoing modification. Return 1 when applying 333 * a non-NULL ordering dependancy for the first time, 0 otherwise. 334 * 335 * list can be NULL, indicating that a structural modification is being made 336 * without creating an ordering dependancy. 337 */ 338 static __inline 339 int 340 hammer_io_modify(hammer_io_t io, struct hammer_io_list *list) 341 { 342 int r; 343 344 /* 345 * Shortcut if nothing to do. 346 */ 347 KKASSERT(io->lock.refs != 0 && io->bp != NULL); 348 if (io->modified && io->released == 0 && 349 (io->entry_list || list == NULL)) { 350 return(0); 351 } 352 353 hammer_lock_ex(&io->lock); 354 io->modified = 1; 355 if (io->released) { 356 regetblk(io->bp); 357 BUF_KERNPROC(io->bp); 358 io->released = 0; 359 KKASSERT(io->modified != 0); 360 } 361 if (io->entry_list == NULL) { 362 io->entry_list = list; 363 if (list) { 364 TAILQ_INSERT_TAIL(list, io, entry); 365 r = 1; 366 } else { 367 r = 0; 368 } 369 } else { 370 /* only one dependancy is allowed */ 371 KKASSERT(list == NULL || io->entry_list == list); 372 r = 0; 373 } 374 hammer_unlock(&io->lock); 375 return(r); 376 } 377 378 void 379 hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume, 380 void *base, int len) 381 { 382 hammer_io_modify(&volume->io, NULL); 383 384 if (len) { 385 intptr_t rel_offset = (intptr_t)base - (intptr_t)volume->ondisk; 386 KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 387 hammer_generate_undo(trans, 388 HAMMER_ENCODE_RAW_VOLUME(volume->vol_no, rel_offset), 389 base, len); 390 } 391 } 392 393 /* 394 * Caller intends to modify a buffer's ondisk structure. The related 395 * cluster must be marked open prior to being able to flush the modified 396 * buffer so get that I/O going now. 397 */ 398 void 399 hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer, 400 void *base, int len) 401 { 402 hammer_io_modify(&buffer->io, NULL); 403 if (len) { 404 intptr_t rel_offset = (intptr_t)base - (intptr_t)buffer->ondisk; 405 KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 406 hammer_generate_undo(trans, 407 buffer->zone2_offset + rel_offset, 408 base, len); 409 } 410 } 411 412 /* 413 * Mark an entity as not being dirty any more -- this usually occurs when 414 * the governing a-list has freed the entire entity. 415 * 416 * XXX 417 */ 418 void 419 hammer_io_clear_modify(struct hammer_io *io) 420 { 421 #if 0 422 struct buf *bp; 423 424 io->modified = 0; 425 if ((bp = io->bp) != NULL) { 426 if (io->released) { 427 regetblk(bp); 428 /* BUF_KERNPROC(io->bp); */ 429 } else { 430 io->released = 1; 431 } 432 if (io->modified == 0) { 433 kprintf("hammer_io_clear_modify: cleared %p\n", io); 434 bundirty(bp); 435 bqrelse(bp); 436 } else { 437 bdwrite(bp); 438 } 439 } 440 #endif 441 } 442 443 /************************************************************************ 444 * HAMMER_BIOOPS * 445 ************************************************************************ 446 * 447 */ 448 449 /* 450 * Pre-IO initiation kernel callback - cluster build only 451 */ 452 static void 453 hammer_io_start(struct buf *bp) 454 { 455 } 456 457 /* 458 * Post-IO completion kernel callback 459 * 460 * NOTE: HAMMER may modify a buffer after initiating I/O. The modified bit 461 * may also be set if we were marking a cluster header open. Only remove 462 * our dependancy if the modified bit is clear. 463 */ 464 static void 465 hammer_io_complete(struct buf *bp) 466 { 467 union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); 468 469 KKASSERT(iou->io.released == 1); 470 471 /* 472 * If this was a write and the modified bit is still clear we can 473 * remove ourselves from the dependancy list. 474 * 475 * If no lock references remain and we can acquire the IO lock and 476 * someone at some point wanted us to flush (B_LOCKED test), then 477 * try to dispose of the IO. 478 */ 479 if (iou->io.modified == 0 && iou->io.entry_list) { 480 TAILQ_REMOVE(iou->io.entry_list, &iou->io, entry); 481 iou->io.entry_list = NULL; 482 } 483 iou->io.running = 0; 484 if (iou->io.waiting) { 485 iou->io.waiting = 0; 486 wakeup(iou); 487 } 488 489 /* 490 * Someone wanted us to flush, try to clean out the buffer. 491 */ 492 if ((bp->b_flags & B_LOCKED) && iou->io.lock.refs == 0) { 493 KKASSERT(iou->io.modified == 0); 494 bp->b_flags &= ~B_LOCKED; 495 hammer_io_deallocate(bp); 496 /* structure may be dead now */ 497 } 498 } 499 500 /* 501 * Callback from kernel when it wishes to deallocate a passively 502 * associated structure. This case can only occur with read-only 503 * bp's. 504 * 505 * If we cannot disassociate we set B_LOCKED to prevent the buffer 506 * from getting reused. 507 * 508 * WARNING: Because this can be called directly by getnewbuf we cannot 509 * recurse into the tree. If a bp cannot be immediately disassociated 510 * our only recourse is to set B_LOCKED. 511 * 512 * WARNING: If the HAMMER structure is passively cached we have to 513 * scrap it here. 514 */ 515 static void 516 hammer_io_deallocate(struct buf *bp) 517 { 518 hammer_io_structure_t iou = (void *)LIST_FIRST(&bp->b_dep); 519 520 KKASSERT((bp->b_flags & B_LOCKED) == 0 && iou->io.running == 0); 521 if (iou->io.lock.refs > 0 || iou->io.modified) { 522 bp->b_flags |= B_LOCKED; 523 } else { 524 /* XXX interlock against ref or another disassociate */ 525 /* XXX this can leave HAMMER structures lying around */ 526 hammer_io_disassociate(iou, 0); 527 #if 0 528 switch(iou->io.type) { 529 case HAMMER_STRUCTURE_VOLUME: 530 hammer_rel_volume(&iou->volume, 1); 531 break; 532 case HAMMER_STRUCTURE_BUFFER: 533 hammer_rel_buffer(&iou->buffer, 1); 534 break; 535 } 536 #endif 537 } 538 } 539 540 static int 541 hammer_io_fsync(struct vnode *vp) 542 { 543 return(0); 544 } 545 546 /* 547 * NOTE: will not be called unless we tell the kernel about the 548 * bioops. Unused... we use the mount's VFS_SYNC instead. 549 */ 550 static int 551 hammer_io_sync(struct mount *mp) 552 { 553 return(0); 554 } 555 556 static void 557 hammer_io_movedeps(struct buf *bp1, struct buf *bp2) 558 { 559 } 560 561 /* 562 * I/O pre-check for reading and writing. HAMMER only uses this for 563 * B_CACHE buffers so checkread just shouldn't happen, but if it does 564 * allow it. 565 * 566 * Writing is a different case. We don't want the kernel to try to write 567 * out a buffer that HAMMER may be modifying passively or which has a 568 * dependancy. 569 * 570 * This code enforces the following write ordering: buffers, then cluster 571 * headers, then volume headers. 572 */ 573 static int 574 hammer_io_checkread(struct buf *bp) 575 { 576 return(0); 577 } 578 579 static int 580 hammer_io_checkwrite(struct buf *bp) 581 { 582 union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); 583 584 KKASSERT(TAILQ_EMPTY(&iou->io.deplist)); 585 586 /* 587 * We are called from the kernel on delayed-write buffers, and 588 * called from hammer_io_flush() on flush requests. There should 589 * be no dependancies in either case. 590 * 591 * In the case of delayed-writes, the introduction of a dependancy 592 * will block until the bp can be reacquired, and the bp is then 593 * simply not released until the dependancy can be satisfied. 594 * 595 * We can only clear the modified bit when entered from the kernel 596 * if io.lock.refs == 0. 597 */ 598 if (iou->io.lock.refs == 0) { 599 iou->io.modified = 0; 600 } 601 return(0); 602 } 603 604 /* 605 * Return non-zero if the caller should flush the structure associated 606 * with this io sub-structure. 607 */ 608 int 609 hammer_io_checkflush(struct hammer_io *io) 610 { 611 if (io->bp == NULL || (io->bp->b_flags & B_LOCKED)) { 612 return(1); 613 } 614 return(0); 615 } 616 617 /* 618 * Return non-zero if we wish to delay the kernel's attempt to flush 619 * this buffer to disk. 620 */ 621 static int 622 hammer_io_countdeps(struct buf *bp, int n) 623 { 624 return(0); 625 } 626 627 struct bio_ops hammer_bioops = { 628 .io_start = hammer_io_start, 629 .io_complete = hammer_io_complete, 630 .io_deallocate = hammer_io_deallocate, 631 .io_fsync = hammer_io_fsync, 632 .io_sync = hammer_io_sync, 633 .io_movedeps = hammer_io_movedeps, 634 .io_countdeps = hammer_io_countdeps, 635 .io_checkread = hammer_io_checkread, 636 .io_checkwrite = hammer_io_checkwrite, 637 }; 638 639