1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.7 2007/12/29 09:01:27 dillon Exp $ 35 */ 36 /* 37 * IO Primitives and buffer cache management 38 * 39 * All major data-tracking structures in HAMMER contain a struct hammer_io 40 * which is used to manage their backing store. We use filesystem buffers 41 * for backing store and we leave them passively associated with their 42 * HAMMER structures. 43 * 44 * If the kernel tries to release a passively associated buf which we cannot 45 * yet let go we set B_LOCKED in the buffer and then actively released it 46 * later when we can. 47 */ 48 49 #include "hammer.h" 50 #include <sys/fcntl.h> 51 #include <sys/nlookup.h> 52 #include <sys/buf.h> 53 #include <sys/buf2.h> 54 55 /* 56 * Helper routine to disassociate a buffer cache buffer from an I/O 57 * structure. 58 */ 59 static void 60 hammer_io_disassociate(union hammer_io_structure *io) 61 { 62 struct buf *bp = io->io.bp; 63 64 LIST_INIT(&bp->b_dep); /* clear the association */ 65 bp->b_ops = NULL; 66 io->io.bp = NULL; 67 68 switch(io->io.type) { 69 case HAMMER_STRUCTURE_VOLUME: 70 io->volume.ondisk = NULL; 71 io->volume.alist.meta = NULL; 72 break; 73 case HAMMER_STRUCTURE_SUPERCL: 74 io->supercl.ondisk = NULL; 75 io->supercl.alist.meta = NULL; 76 break; 77 case HAMMER_STRUCTURE_CLUSTER: 78 io->cluster.ondisk = NULL; 79 io->cluster.alist_master.meta = NULL; 80 io->cluster.alist_btree.meta = NULL; 81 io->cluster.alist_record.meta = NULL; 82 io->cluster.alist_mdata.meta = NULL; 83 break; 84 case HAMMER_STRUCTURE_BUFFER: 85 io->buffer.ondisk = NULL; 86 io->buffer.alist.meta = NULL; 87 break; 88 } 89 io->io.modified = 0; 90 io->io.released = 1; 91 } 92 93 /* 94 * Mark a cluster as being closed. This is done as late as possible, 95 * only when we are asked to flush the cluster 96 */ 97 static void 98 hammer_close_cluster(hammer_cluster_t cluster) 99 { 100 while (cluster->state == HAMMER_CLUSTER_ASYNC) 101 tsleep(cluster, 0, "hmrdep", 0); 102 if (cluster->state == HAMMER_CLUSTER_OPEN) { 103 cluster->state = HAMMER_CLUSTER_IDLE; 104 cluster->ondisk->clu_flags &= ~HAMMER_CLUF_OPEN; 105 kprintf("CLOSE CLUSTER\n"); 106 hammer_modify_cluster(cluster); 107 } 108 } 109 110 111 /* 112 * Load bp for a HAMMER structure. 113 */ 114 int 115 hammer_io_read(struct vnode *devvp, struct hammer_io *io) 116 { 117 struct buf *bp; 118 int error; 119 120 if ((bp = io->bp) == NULL) { 121 error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp); 122 if (error == 0) { 123 bp = io->bp; 124 bp->b_ops = &hammer_bioops; 125 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 126 BUF_KERNPROC(bp); 127 } 128 io->modified = 0; /* no new modifications yet */ 129 io->released = 0; /* we hold an active lock on bp */ 130 } else { 131 error = 0; 132 } 133 return(error); 134 } 135 136 /* 137 * Similar to hammer_io_read() but returns a zero'd out buffer instead. 138 * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background 139 * I/O so we can call it. 140 */ 141 int 142 hammer_io_new(struct vnode *devvp, struct hammer_io *io) 143 { 144 struct buf *bp; 145 146 if ((bp = io->bp) == NULL) { 147 io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0); 148 bp = io->bp; 149 bp->b_ops = &hammer_bioops; 150 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 151 io->released = 0; /* we hold an active lock on bp */ 152 BUF_KERNPROC(bp); 153 } else { 154 if (io->released) { 155 regetblk(bp); 156 io->released = 0; 157 BUF_KERNPROC(bp); 158 } 159 } 160 io->modified = 1; 161 vfs_bio_clrbuf(bp); 162 return(0); 163 } 164 165 /* 166 * This routine is called when a buffer within a cluster is modified. We 167 * mark the cluster open and immediately initiate asynchronous I/O. Any 168 * related hammer_buffer write I/O blocks until our async write completes. 169 * This guarentees (inasmuch as the OS can) that the cluster recovery code 170 * will see a cluster marked open if a crash occured while the filesystem 171 * still had dirty buffers associated with that cluster. 172 */ 173 void 174 hammer_io_notify_cluster(hammer_cluster_t cluster) 175 { 176 struct hammer_io *io = &cluster->io; 177 178 if (cluster->state == HAMMER_CLUSTER_IDLE) { 179 hammer_lock_ex(&cluster->io.lock); 180 if (cluster->state == HAMMER_CLUSTER_IDLE) { 181 if (io->released) 182 regetblk(io->bp); 183 kprintf("MARK CLUSTER OPEN\n"); 184 cluster->ondisk->clu_flags |= HAMMER_CLUF_OPEN; 185 cluster->state = HAMMER_CLUSTER_ASYNC; 186 hammer_modify_cluster(cluster); 187 bawrite(io->bp); 188 io->released = 1; 189 /* leave cluster marked as modified */ 190 } 191 hammer_unlock(&cluster->io.lock); 192 } 193 } 194 195 /* 196 * This routine is called on the last reference to a hammer structure. If 197 * flush is non-zero we have to completely disassociate the bp from the 198 * structure (which may involve blocking). Otherwise we can leave the bp 199 * passively associated with the structure. 200 * 201 * The caller is holding io->lock exclusively. 202 */ 203 void 204 hammer_io_release(struct hammer_io *io, int flush) 205 { 206 union hammer_io_structure *iou = (void *)io; 207 hammer_cluster_t cluster; 208 struct buf *bp; 209 210 if ((bp = io->bp) != NULL) { 211 /* 212 * If neither we nor the kernel want to flush the bp, we can 213 * stop here. Make sure the bp is passively released 214 * before returning. Even though we are still holding it, 215 * we want to be notified when the kernel wishes to flush 216 * it out so make sure B_DELWRI is properly set if we had 217 * made modifications. 218 */ 219 if (flush == 0 && (bp->b_flags & B_LOCKED) == 0) { 220 if ((bp->b_flags & B_DELWRI) == 0 && io->modified) { 221 if (io->released) 222 regetblk(bp); 223 bdwrite(bp); 224 io->released = 1; 225 } else if (io->released == 0) { 226 bqrelse(bp); 227 io->released = 1; 228 } 229 return; 230 } 231 232 /* 233 * We've been asked to flush the buffer. 234 * 235 * If this is a hammer_buffer we may have to wait for the 236 * cluster header write to complete. 237 */ 238 if (iou->io.type == HAMMER_STRUCTURE_BUFFER && 239 (io->modified || (bp->b_flags & B_DELWRI))) { 240 cluster = iou->buffer.cluster; 241 while (cluster->state == HAMMER_CLUSTER_ASYNC) 242 tsleep(iou->buffer.cluster, 0, "hmrdep", 0); 243 } 244 245 /* 246 * If we have an open cluster header, close it 247 */ 248 if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) { 249 hammer_close_cluster(&iou->cluster); 250 } 251 252 253 /* 254 * Ok the dependancies are all gone. Check for the simple 255 * disassociation case. 256 */ 257 if (io->released && (bp->b_flags & B_LOCKED) == 0 && 258 (io->modified == 0 || (bp->b_flags & B_DELWRI))) { 259 hammer_io_disassociate(iou); 260 return; 261 } 262 263 /* 264 * Handle the more complex disassociation case. Acquire the 265 * buffer, clean up B_LOCKED, and deal with the modified 266 * flag. 267 */ 268 if (io->released) 269 regetblk(bp); 270 io->released = 1; 271 bp->b_flags &= ~B_LOCKED; 272 if (io->modified || (bp->b_flags & B_DELWRI)) 273 bawrite(bp); 274 else 275 bqrelse(bp); 276 hammer_io_disassociate(iou); 277 } 278 } 279 280 /* 281 * Flush dirty data, if any. 282 */ 283 void 284 hammer_io_flush(struct hammer_io *io, struct hammer_sync_info *info) 285 { 286 struct buf *bp; 287 int error; 288 289 if ((bp = io->bp) == NULL) 290 return; 291 if (bp->b_flags & B_DELWRI) 292 io->modified = 1; 293 if (io->modified == 0) 294 return; 295 kprintf("IO FLUSH BP %p TYPE %d REFS %d\n", bp, io->type, io->lock.refs); 296 hammer_lock_ex(&io->lock); 297 298 if ((bp = io->bp) != NULL && io->modified) { 299 if (io->released) 300 regetblk(bp); 301 io->released = 1; 302 303 /* 304 * We own the bp now 305 */ 306 if (info->waitfor & MNT_WAIT) { 307 io->modified = 0; 308 error = bwrite(bp); 309 if (error) 310 info->error = error; 311 } else if (io->lock.refs == 1) { 312 io->modified = 0; 313 bawrite(bp); 314 } else { 315 /* 316 * structure is in-use, don't race the write, but 317 * also set B_LOCKED so we know something tried to 318 * flush it. 319 */ 320 kprintf("can't flush bp %p, %d refs - delaying\n", 321 bp, io->lock.refs); 322 bp->b_flags |= B_LOCKED; 323 bqrelse(bp); 324 } 325 } 326 hammer_unlock(&io->lock); 327 } 328 329 330 /* 331 * HAMMER_BIOOPS 332 */ 333 334 /* 335 * Pre and post I/O callbacks. 336 */ 337 static void hammer_io_deallocate(struct buf *bp); 338 339 static void 340 hammer_io_start(struct buf *bp) 341 { 342 #if 0 343 union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep); 344 345 if (io->io.type == HAMMER_STRUCTURE_BUFFER) { 346 while (io->buffer.cluster->io_in_progress) { 347 kprintf("hammer_io_start: wait for cluster\n"); 348 tsleep(io->buffer.cluster, 0, "hmrdep", 0); 349 kprintf("hammer_io_start: wait for cluster done\n"); 350 } 351 } 352 #endif 353 } 354 355 static void 356 hammer_io_complete(struct buf *bp) 357 { 358 union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep); 359 360 if (io->io.type == HAMMER_STRUCTURE_CLUSTER) { 361 if (io->cluster.state == HAMMER_CLUSTER_ASYNC) { 362 kprintf("cluster write complete flags %08x\n", 363 io->cluster.ondisk->clu_flags); 364 io->cluster.state = HAMMER_CLUSTER_OPEN; 365 wakeup(&io->cluster); 366 } 367 } 368 } 369 370 /* 371 * Callback from kernel when it wishes to deallocate a passively 372 * associated structure. This can only occur if the buffer is 373 * passively associated with the structure. The kernel has locked 374 * the buffer. 375 * 376 * If we cannot disassociate we set B_LOCKED to prevent the buffer 377 * from getting reused. 378 */ 379 static void 380 hammer_io_deallocate(struct buf *bp) 381 { 382 union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep); 383 384 /* XXX memory interlock, spinlock to sync cpus */ 385 386 /* 387 * Since the kernel is passing us a locked buffer, the HAMMER 388 * structure had better not believe it has a lock on the buffer. 389 */ 390 KKASSERT(io->io.released); 391 crit_enter(); 392 393 /* 394 * First, ref the structure to prevent either the buffer or the 395 * structure from going away or being unexpectedly flushed. 396 */ 397 hammer_ref(&io->io.lock); 398 399 /* 400 * Buffers can have active references from cached hammer_node's, 401 * even if those nodes are themselves passively cached. Attempt 402 * to clean them out. This may not succeed. 403 */ 404 if (io->io.type == HAMMER_STRUCTURE_BUFFER && 405 hammer_lock_ex_try(&io->io.lock) == 0) { 406 hammer_flush_buffer_nodes(&io->buffer); 407 hammer_unlock(&io->io.lock); 408 } 409 410 if (hammer_islastref(&io->io.lock)) { 411 /* 412 * If we are the only ref left we can disassociate the I/O. 413 * It had better still be in a released state because the 414 * kernel is holding a lock on the buffer. Any passive 415 * modifications should have already been synchronized with 416 * the buffer. 417 */ 418 KKASSERT(io->io.released); 419 hammer_io_disassociate(io); 420 bp->b_flags &= ~B_LOCKED; 421 KKASSERT (io->io.modified == 0 || (bp->b_flags & B_DELWRI)); 422 423 /* 424 * Perform final rights on the structure. This can cause 425 * a chain reaction - e.g. last buffer -> last cluster -> 426 * last supercluster -> last volume. 427 */ 428 switch(io->io.type) { 429 case HAMMER_STRUCTURE_VOLUME: 430 hammer_rel_volume(&io->volume, 1); 431 break; 432 case HAMMER_STRUCTURE_SUPERCL: 433 hammer_rel_supercl(&io->supercl, 1); 434 break; 435 case HAMMER_STRUCTURE_CLUSTER: 436 hammer_rel_cluster(&io->cluster, 1); 437 break; 438 case HAMMER_STRUCTURE_BUFFER: 439 hammer_rel_buffer(&io->buffer, 1); 440 break; 441 } 442 } else { 443 /* 444 * Otherwise tell the kernel not to destroy the buffer. 445 * 446 * We have to unref the structure without performing any 447 * final rights to it to avoid a deadlock. 448 */ 449 bp->b_flags |= B_LOCKED; 450 hammer_unref(&io->io.lock); 451 } 452 453 crit_exit(); 454 } 455 456 static int 457 hammer_io_fsync(struct vnode *vp) 458 { 459 return(0); 460 } 461 462 /* 463 * NOTE: will not be called unless we tell the kernel about the 464 * bioops. Unused... we use the mount's VFS_SYNC instead. 465 */ 466 static int 467 hammer_io_sync(struct mount *mp) 468 { 469 return(0); 470 } 471 472 static void 473 hammer_io_movedeps(struct buf *bp1, struct buf *bp2) 474 { 475 } 476 477 /* 478 * I/O pre-check for reading and writing. HAMMER only uses this for 479 * B_CACHE buffers so checkread just shouldn't happen, but if it does 480 * allow it. 481 * 482 * Writing is a different case. We don't want the kernel to try to write 483 * out a buffer that HAMMER may be modifying passively or which has a 484 * dependancy. 485 * 486 * This code enforces the following write ordering: buffers, then cluster 487 * headers, then volume headers. 488 */ 489 static int 490 hammer_io_checkread(struct buf *bp) 491 { 492 return(0); 493 } 494 495 static int 496 hammer_io_checkwrite(struct buf *bp) 497 { 498 union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); 499 500 if (iou->io.type == HAMMER_STRUCTURE_BUFFER && 501 iou->buffer.cluster->state == HAMMER_CLUSTER_ASYNC) { 502 /* 503 * Cannot write out a cluster buffer if the cluster header 504 * I/O opening the cluster has not completed. 505 */ 506 kprintf("hammer_io_checkwrite: w/ depend - delayed\n"); 507 bp->b_flags |= B_LOCKED; 508 return(-1); 509 } else if (iou->io.lock.refs) { 510 /* 511 * Cannot write out a bp if its associated buffer has active 512 * references. 513 */ 514 kprintf("hammer_io_checkwrite: w/ refs - delayed\n"); 515 bp->b_flags |= B_LOCKED; 516 return(-1); 517 } else { 518 /* 519 * We're good, but before we can let the kernel proceed we 520 * may have to make some adjustments. 521 */ 522 if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) 523 hammer_close_cluster(&iou->cluster); 524 kprintf("hammer_io_checkwrite: ok\n"); 525 KKASSERT(iou->io.released); 526 hammer_io_disassociate(iou); 527 return(0); 528 } 529 } 530 531 /* 532 * Return non-zero if the caller should flush the structure associated 533 * with this io sub-structure. 534 */ 535 int 536 hammer_io_checkflush(struct hammer_io *io) 537 { 538 if (io->bp == NULL || (io->bp->b_flags & B_LOCKED)) 539 return(1); 540 return(0); 541 } 542 543 /* 544 * Return non-zero if we wish to delay the kernel's attempt to flush 545 * this buffer to disk. 546 */ 547 static int 548 hammer_io_countdeps(struct buf *bp, int n) 549 { 550 return(0); 551 } 552 553 struct bio_ops hammer_bioops = { 554 .io_start = hammer_io_start, 555 .io_complete = hammer_io_complete, 556 .io_deallocate = hammer_io_deallocate, 557 .io_fsync = hammer_io_fsync, 558 .io_sync = hammer_io_sync, 559 .io_movedeps = hammer_io_movedeps, 560 .io_countdeps = hammer_io_countdeps, 561 .io_checkread = hammer_io_checkread, 562 .io_checkwrite = hammer_io_checkwrite, 563 }; 564 565