166325755SMatthew Dillon /* 2b84de5afSMatthew Dillon * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 366325755SMatthew Dillon * 466325755SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 566325755SMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 666325755SMatthew Dillon * 766325755SMatthew Dillon * Redistribution and use in source and binary forms, with or without 866325755SMatthew Dillon * modification, are permitted provided that the following conditions 966325755SMatthew Dillon * are met: 1066325755SMatthew Dillon * 1166325755SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 1266325755SMatthew Dillon * notice, this list of conditions and the following disclaimer. 1366325755SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 1466325755SMatthew Dillon * notice, this list of conditions and the following disclaimer in 1566325755SMatthew Dillon * the documentation and/or other materials provided with the 1666325755SMatthew Dillon * distribution. 1766325755SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 1866325755SMatthew Dillon * contributors may be used to endorse or promote products derived 1966325755SMatthew Dillon * from this software without specific, prior written permission. 2066325755SMatthew Dillon * 2166325755SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2266325755SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2366325755SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 2466325755SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 2566325755SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2666325755SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 2766325755SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2866325755SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2966325755SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3066325755SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3166325755SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3266325755SMatthew Dillon * SUCH DAMAGE. 3366325755SMatthew Dillon * 34*1b0ab2c3SMatthew Dillon * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.49 2008/07/14 03:20:49 dillon Exp $ 3566325755SMatthew Dillon */ 3666325755SMatthew Dillon /* 3766325755SMatthew Dillon * IO Primitives and buffer cache management 3866325755SMatthew Dillon * 3966325755SMatthew Dillon * All major data-tracking structures in HAMMER contain a struct hammer_io 4066325755SMatthew Dillon * which is used to manage their backing store. We use filesystem buffers 4166325755SMatthew Dillon * for backing store and we leave them passively associated with their 4266325755SMatthew Dillon * HAMMER structures. 4366325755SMatthew Dillon * 449f5097dcSMatthew Dillon * If the kernel tries to destroy a passively associated buf which we cannot 4566325755SMatthew Dillon * yet let go we set B_LOCKED in the buffer and then actively released it 4666325755SMatthew Dillon * later when we can. 4766325755SMatthew Dillon */ 4866325755SMatthew Dillon 4966325755SMatthew Dillon #include "hammer.h" 5066325755SMatthew Dillon #include <sys/fcntl.h> 5166325755SMatthew Dillon #include <sys/nlookup.h> 5266325755SMatthew Dillon #include <sys/buf.h> 5366325755SMatthew Dillon #include <sys/buf2.h> 5466325755SMatthew Dillon 5510a5d1baSMatthew Dillon static void hammer_io_modify(hammer_io_t io, int count); 56055f5ff8SMatthew Dillon static void hammer_io_deallocate(struct buf *bp); 57*1b0ab2c3SMatthew Dillon #if 0 58*1b0ab2c3SMatthew Dillon static void hammer_io_direct_read_complete(struct bio *nbio); 59*1b0ab2c3SMatthew Dillon #endif 60*1b0ab2c3SMatthew Dillon static void hammer_io_direct_write_complete(struct bio *nbio); 6143c665aeSMatthew Dillon static int hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data); 62055f5ff8SMatthew Dillon 63055f5ff8SMatthew Dillon /* 6410a5d1baSMatthew Dillon * Initialize a new, already-zero'd hammer_io structure, or reinitialize 6510a5d1baSMatthew Dillon * an existing hammer_io structure which may have switched to another type. 66055f5ff8SMatthew Dillon */ 67055f5ff8SMatthew Dillon void 6810a5d1baSMatthew Dillon hammer_io_init(hammer_io_t io, hammer_mount_t hmp, enum hammer_io_type type) 69055f5ff8SMatthew Dillon { 7010a5d1baSMatthew Dillon io->hmp = hmp; 71055f5ff8SMatthew Dillon io->type = type; 72055f5ff8SMatthew Dillon } 73055f5ff8SMatthew Dillon 7466325755SMatthew Dillon /* 75fbc6e32aSMatthew Dillon * Helper routine to disassociate a buffer cache buffer from an I/O 76ecca949aSMatthew Dillon * structure. The buffer is unlocked and marked appropriate for reclamation. 77055f5ff8SMatthew Dillon * 78055f5ff8SMatthew Dillon * The io may have 0 or 1 references depending on who called us. The 79055f5ff8SMatthew Dillon * caller is responsible for dealing with the refs. 80055f5ff8SMatthew Dillon * 81055f5ff8SMatthew Dillon * This call can only be made when no action is required on the buffer. 82ecca949aSMatthew Dillon * 83ecca949aSMatthew Dillon * The caller must own the buffer and the IO must indicate that the 84ecca949aSMatthew Dillon * structure no longer owns it (io.released != 0). 8566325755SMatthew Dillon */ 8666325755SMatthew Dillon static void 87ecca949aSMatthew Dillon hammer_io_disassociate(hammer_io_structure_t iou) 8866325755SMatthew Dillon { 89055f5ff8SMatthew Dillon struct buf *bp = iou->io.bp; 9066325755SMatthew Dillon 91ecca949aSMatthew Dillon KKASSERT(iou->io.released); 92b58c6388SMatthew Dillon KKASSERT(iou->io.modified == 0); 93af209b0fSMatthew Dillon KKASSERT(LIST_FIRST(&bp->b_dep) == (void *)iou); 944d75d829SMatthew Dillon buf_dep_init(bp); 95055f5ff8SMatthew Dillon iou->io.bp = NULL; 969f5097dcSMatthew Dillon 979f5097dcSMatthew Dillon /* 989f5097dcSMatthew Dillon * If the buffer was locked someone wanted to get rid of it. 999f5097dcSMatthew Dillon */ 100a99b9ea2SMatthew Dillon if (bp->b_flags & B_LOCKED) { 101a99b9ea2SMatthew Dillon --hammer_count_io_locked; 102d8971d2bSMatthew Dillon bp->b_flags &= ~B_LOCKED; 103a99b9ea2SMatthew Dillon } 104ecca949aSMatthew Dillon if (iou->io.reclaim) { 105cebe9493SMatthew Dillon bp->b_flags |= B_NOCACHE|B_RELBUF; 106cebe9493SMatthew Dillon iou->io.reclaim = 0; 107ecca949aSMatthew Dillon } 10866325755SMatthew Dillon 109055f5ff8SMatthew Dillon switch(iou->io.type) { 11066325755SMatthew Dillon case HAMMER_STRUCTURE_VOLUME: 111055f5ff8SMatthew Dillon iou->volume.ondisk = NULL; 11266325755SMatthew Dillon break; 11310a5d1baSMatthew Dillon case HAMMER_STRUCTURE_DATA_BUFFER: 11410a5d1baSMatthew Dillon case HAMMER_STRUCTURE_META_BUFFER: 11510a5d1baSMatthew Dillon case HAMMER_STRUCTURE_UNDO_BUFFER: 116055f5ff8SMatthew Dillon iou->buffer.ondisk = NULL; 11766325755SMatthew Dillon break; 11866325755SMatthew Dillon } 11966325755SMatthew Dillon } 120fbc6e32aSMatthew Dillon 121fbc6e32aSMatthew Dillon /* 122055f5ff8SMatthew Dillon * Wait for any physical IO to complete 123fbc6e32aSMatthew Dillon */ 124*1b0ab2c3SMatthew Dillon void 125055f5ff8SMatthew Dillon hammer_io_wait(hammer_io_t io) 126fbc6e32aSMatthew Dillon { 127055f5ff8SMatthew Dillon if (io->running) { 128055f5ff8SMatthew Dillon crit_enter(); 129055f5ff8SMatthew Dillon tsleep_interlock(io); 130055f5ff8SMatthew Dillon io->waiting = 1; 131055f5ff8SMatthew Dillon for (;;) { 132055f5ff8SMatthew Dillon tsleep(io, 0, "hmrflw", 0); 133055f5ff8SMatthew Dillon if (io->running == 0) 134055f5ff8SMatthew Dillon break; 135055f5ff8SMatthew Dillon tsleep_interlock(io); 136055f5ff8SMatthew Dillon io->waiting = 1; 137055f5ff8SMatthew Dillon if (io->running == 0) 138055f5ff8SMatthew Dillon break; 139055f5ff8SMatthew Dillon } 140055f5ff8SMatthew Dillon crit_exit(); 141055f5ff8SMatthew Dillon } 142055f5ff8SMatthew Dillon } 143055f5ff8SMatthew Dillon 144af209b0fSMatthew Dillon /* 145af209b0fSMatthew Dillon * Wait for all hammer_io-initated write I/O's to complete. This is not 146af209b0fSMatthew Dillon * supposed to count direct I/O's but some can leak through (for 147af209b0fSMatthew Dillon * non-full-sized direct I/Os). 148af209b0fSMatthew Dillon */ 149af209b0fSMatthew Dillon void 150af209b0fSMatthew Dillon hammer_io_wait_all(hammer_mount_t hmp, const char *ident) 151af209b0fSMatthew Dillon { 152af209b0fSMatthew Dillon crit_enter(); 153f5a07a7aSMatthew Dillon while (hmp->io_running_space) 154f5a07a7aSMatthew Dillon tsleep(&hmp->io_running_space, 0, ident, 0); 155af209b0fSMatthew Dillon crit_exit(); 156af209b0fSMatthew Dillon } 157af209b0fSMatthew Dillon 1582f85fa4dSMatthew Dillon #define HAMMER_MAXRA 4 1592f85fa4dSMatthew Dillon 16061aeeb33SMatthew Dillon /* 16110a5d1baSMatthew Dillon * Load bp for a HAMMER structure. The io must be exclusively locked by 16210a5d1baSMatthew Dillon * the caller. 1632f85fa4dSMatthew Dillon * 164a99b9ea2SMatthew Dillon * This routine is mostly used on meta-data and small-data blocks. Generally 165a99b9ea2SMatthew Dillon * speaking HAMMER assumes some locality of reference and will cluster 166a99b9ea2SMatthew Dillon * a 64K read. 167af209b0fSMatthew Dillon * 168af209b0fSMatthew Dillon * Note that clustering occurs at the device layer, not the logical layer. 169af209b0fSMatthew Dillon * If the buffers do not apply to the current operation they may apply to 170af209b0fSMatthew Dillon * some other. 17166325755SMatthew Dillon */ 17266325755SMatthew Dillon int 1732f85fa4dSMatthew Dillon hammer_io_read(struct vnode *devvp, struct hammer_io *io, hammer_off_t limit) 17466325755SMatthew Dillon { 17566325755SMatthew Dillon struct buf *bp; 17666325755SMatthew Dillon int error; 17766325755SMatthew Dillon 17866325755SMatthew Dillon if ((bp = io->bp) == NULL) { 179f5a07a7aSMatthew Dillon hammer_count_io_running_read += io->bytes; 1804a2796f3SMatthew Dillon #if 1 1814a2796f3SMatthew Dillon error = cluster_read(devvp, limit, io->offset, io->bytes, 182af209b0fSMatthew Dillon HAMMER_CLUSTER_SIZE, 183af209b0fSMatthew Dillon HAMMER_CLUSTER_BUFS, &io->bp); 1844a2796f3SMatthew Dillon #else 1854a2796f3SMatthew Dillon error = bread(devvp, io->offset, io->bytes, &io->bp); 1864a2796f3SMatthew Dillon #endif 187f5a07a7aSMatthew Dillon hammer_count_io_running_read -= io->bytes; 18866325755SMatthew Dillon if (error == 0) { 18966325755SMatthew Dillon bp = io->bp; 19066325755SMatthew Dillon bp->b_ops = &hammer_bioops; 191af209b0fSMatthew Dillon KKASSERT(LIST_FIRST(&bp->b_dep) == NULL); 19266325755SMatthew Dillon LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 19366325755SMatthew Dillon BUF_KERNPROC(bp); 19466325755SMatthew Dillon } 19510a5d1baSMatthew Dillon KKASSERT(io->modified == 0); 19610a5d1baSMatthew Dillon KKASSERT(io->running == 0); 19710a5d1baSMatthew Dillon KKASSERT(io->waiting == 0); 19866325755SMatthew Dillon io->released = 0; /* we hold an active lock on bp */ 19966325755SMatthew Dillon } else { 20066325755SMatthew Dillon error = 0; 20166325755SMatthew Dillon } 20266325755SMatthew Dillon return(error); 20366325755SMatthew Dillon } 20466325755SMatthew Dillon 20566325755SMatthew Dillon /* 20666325755SMatthew Dillon * Similar to hammer_io_read() but returns a zero'd out buffer instead. 20710a5d1baSMatthew Dillon * Must be called with the IO exclusively locked. 208055f5ff8SMatthew Dillon * 20910a5d1baSMatthew Dillon * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background 21010a5d1baSMatthew Dillon * I/O by forcing the buffer to not be in a released state before calling 21110a5d1baSMatthew Dillon * it. 21210a5d1baSMatthew Dillon * 21310a5d1baSMatthew Dillon * This function will also mark the IO as modified but it will not 21410a5d1baSMatthew Dillon * increment the modify_refs count. 21566325755SMatthew Dillon */ 21666325755SMatthew Dillon int 21766325755SMatthew Dillon hammer_io_new(struct vnode *devvp, struct hammer_io *io) 21866325755SMatthew Dillon { 21966325755SMatthew Dillon struct buf *bp; 22066325755SMatthew Dillon 22166325755SMatthew Dillon if ((bp = io->bp) == NULL) { 2224a2796f3SMatthew Dillon io->bp = getblk(devvp, io->offset, io->bytes, 0, 0); 22366325755SMatthew Dillon bp = io->bp; 22466325755SMatthew Dillon bp->b_ops = &hammer_bioops; 225af209b0fSMatthew Dillon KKASSERT(LIST_FIRST(&bp->b_dep) == NULL); 22666325755SMatthew Dillon LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 227055f5ff8SMatthew Dillon io->released = 0; 22810a5d1baSMatthew Dillon KKASSERT(io->running == 0); 229055f5ff8SMatthew Dillon io->waiting = 0; 23066325755SMatthew Dillon BUF_KERNPROC(bp); 23166325755SMatthew Dillon } else { 23266325755SMatthew Dillon if (io->released) { 23366325755SMatthew Dillon regetblk(bp); 23466325755SMatthew Dillon BUF_KERNPROC(bp); 235d113fda1SMatthew Dillon io->released = 0; 23666325755SMatthew Dillon } 23766325755SMatthew Dillon } 23810a5d1baSMatthew Dillon hammer_io_modify(io, 0); 23966325755SMatthew Dillon vfs_bio_clrbuf(bp); 24066325755SMatthew Dillon return(0); 24166325755SMatthew Dillon } 24266325755SMatthew Dillon 24366325755SMatthew Dillon /* 24447637bffSMatthew Dillon * Remove potential device level aliases against buffers managed by high level 24547637bffSMatthew Dillon * vnodes. 24647637bffSMatthew Dillon */ 24747637bffSMatthew Dillon void 24847637bffSMatthew Dillon hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset) 24947637bffSMatthew Dillon { 250cebe9493SMatthew Dillon hammer_io_structure_t iou; 25147637bffSMatthew Dillon hammer_off_t phys_offset; 25247637bffSMatthew Dillon struct buf *bp; 25347637bffSMatthew Dillon 25447637bffSMatthew Dillon phys_offset = volume->ondisk->vol_buf_beg + 25547637bffSMatthew Dillon (zone2_offset & HAMMER_OFF_SHORT_MASK); 2564a2796f3SMatthew Dillon crit_enter(); 2574a2796f3SMatthew Dillon if ((bp = findblk(volume->devvp, phys_offset)) != NULL) { 2584a2796f3SMatthew Dillon bp = getblk(volume->devvp, phys_offset, bp->b_bufsize, 0, 0); 259cebe9493SMatthew Dillon if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) { 2604a2796f3SMatthew Dillon hammer_io_clear_modify(&iou->io, 1); 261cebe9493SMatthew Dillon bundirty(bp); 262cebe9493SMatthew Dillon iou->io.reclaim = 1; 2630832c9bbSMatthew Dillon hammer_io_deallocate(bp); 2640832c9bbSMatthew Dillon } else { 265cebe9493SMatthew Dillon KKASSERT((bp->b_flags & B_LOCKED) == 0); 266cebe9493SMatthew Dillon bundirty(bp); 267cebe9493SMatthew Dillon bp->b_flags |= B_NOCACHE|B_RELBUF; 26847637bffSMatthew Dillon } 269ecca949aSMatthew Dillon brelse(bp); 27047637bffSMatthew Dillon } 2714a2796f3SMatthew Dillon crit_exit(); 2720832c9bbSMatthew Dillon } 27347637bffSMatthew Dillon 27447637bffSMatthew Dillon /* 275b3deaf57SMatthew Dillon * This routine is called on the last reference to a hammer structure. 276ecca949aSMatthew Dillon * The io is usually interlocked with io.loading and io.refs must be 1. 277b3deaf57SMatthew Dillon * 278ecca949aSMatthew Dillon * This routine may return a non-NULL bp to the caller for dispoal. Disposal 279ecca949aSMatthew Dillon * simply means the caller finishes decrementing the ref-count on the 280ecca949aSMatthew Dillon * IO structure then brelse()'s the bp. The bp may or may not still be 281ecca949aSMatthew Dillon * passively associated with the IO. 282ecca949aSMatthew Dillon * 283ecca949aSMatthew Dillon * The only requirement here is that modified meta-data and volume-header 284ecca949aSMatthew Dillon * buffer may NOT be disassociated from the IO structure, and consequently 285ecca949aSMatthew Dillon * we also leave such buffers actively associated with the IO if they already 286ecca949aSMatthew Dillon * are (since the kernel can't do anything with them anyway). Only the 287ecca949aSMatthew Dillon * flusher is allowed to write such buffers out. Modified pure-data and 288ecca949aSMatthew Dillon * undo buffers are returned to the kernel but left passively associated 289ecca949aSMatthew Dillon * so we can track when the kernel writes the bp out. 29066325755SMatthew Dillon */ 291ecca949aSMatthew Dillon struct buf * 29209ac686bSMatthew Dillon hammer_io_release(struct hammer_io *io, int flush) 29366325755SMatthew Dillon { 2949f5097dcSMatthew Dillon union hammer_io_structure *iou = (void *)io; 29566325755SMatthew Dillon struct buf *bp; 29666325755SMatthew Dillon 297fbc6e32aSMatthew Dillon if ((bp = io->bp) == NULL) 298ecca949aSMatthew Dillon return(NULL); 299fbc6e32aSMatthew Dillon 3000b075555SMatthew Dillon /* 30110a5d1baSMatthew Dillon * Try to flush a dirty IO to disk if asked to by the 30210a5d1baSMatthew Dillon * caller or if the kernel tried to flush the buffer in the past. 3030b075555SMatthew Dillon * 30410a5d1baSMatthew Dillon * Kernel-initiated flushes are only allowed for pure-data buffers. 30510a5d1baSMatthew Dillon * meta-data and volume buffers can only be flushed explicitly 30610a5d1baSMatthew Dillon * by HAMMER. 307055f5ff8SMatthew Dillon */ 30810a5d1baSMatthew Dillon if (io->modified) { 30909ac686bSMatthew Dillon if (flush) { 310055f5ff8SMatthew Dillon hammer_io_flush(io); 31110a5d1baSMatthew Dillon } else if (bp->b_flags & B_LOCKED) { 31210a5d1baSMatthew Dillon switch(io->type) { 31310a5d1baSMatthew Dillon case HAMMER_STRUCTURE_DATA_BUFFER: 31410a5d1baSMatthew Dillon case HAMMER_STRUCTURE_UNDO_BUFFER: 31510a5d1baSMatthew Dillon hammer_io_flush(io); 31610a5d1baSMatthew Dillon break; 31710a5d1baSMatthew Dillon default: 31810a5d1baSMatthew Dillon break; 31910a5d1baSMatthew Dillon } 32010a5d1baSMatthew Dillon } /* else no explicit request to flush the buffer */ 32110a5d1baSMatthew Dillon } 322055f5ff8SMatthew Dillon 323055f5ff8SMatthew Dillon /* 32410a5d1baSMatthew Dillon * Wait for the IO to complete if asked to. 325055f5ff8SMatthew Dillon */ 326b58c6388SMatthew Dillon if (io->waitdep && io->running) { 327055f5ff8SMatthew Dillon hammer_io_wait(io); 328055f5ff8SMatthew Dillon } 329055f5ff8SMatthew Dillon 330055f5ff8SMatthew Dillon /* 33110a5d1baSMatthew Dillon * Return control of the buffer to the kernel (with the provisio 33210a5d1baSMatthew Dillon * that our bioops can override kernel decisions with regards to 33310a5d1baSMatthew Dillon * the buffer). 334055f5ff8SMatthew Dillon */ 335cebe9493SMatthew Dillon if ((flush || io->reclaim) && io->modified == 0 && io->running == 0) { 33610a5d1baSMatthew Dillon /* 33710a5d1baSMatthew Dillon * Always disassociate the bp if an explicit flush 33810a5d1baSMatthew Dillon * was requested and the IO completed with no error 33910a5d1baSMatthew Dillon * (so unmount can really clean up the structure). 34010a5d1baSMatthew Dillon */ 341055f5ff8SMatthew Dillon if (io->released) { 342055f5ff8SMatthew Dillon regetblk(bp); 34346fe7ae1SMatthew Dillon BUF_KERNPROC(bp); 344ecca949aSMatthew Dillon } else { 345ecca949aSMatthew Dillon io->released = 1; 346055f5ff8SMatthew Dillon } 347ecca949aSMatthew Dillon hammer_io_disassociate((hammer_io_structure_t)io); 348ecca949aSMatthew Dillon /* return the bp */ 349055f5ff8SMatthew Dillon } else if (io->modified) { 35010a5d1baSMatthew Dillon /* 351ecca949aSMatthew Dillon * Only certain IO types can be released to the kernel if 352ecca949aSMatthew Dillon * the buffer has been modified. 353ecca949aSMatthew Dillon * 354ecca949aSMatthew Dillon * volume and meta-data IO types may only be explicitly 355ecca949aSMatthew Dillon * flushed by HAMMER. 35610a5d1baSMatthew Dillon */ 35710a5d1baSMatthew Dillon switch(io->type) { 35810a5d1baSMatthew Dillon case HAMMER_STRUCTURE_DATA_BUFFER: 35910a5d1baSMatthew Dillon case HAMMER_STRUCTURE_UNDO_BUFFER: 360b58c6388SMatthew Dillon if (io->released == 0) { 361055f5ff8SMatthew Dillon io->released = 1; 362055f5ff8SMatthew Dillon bdwrite(bp); 363055f5ff8SMatthew Dillon } 36410a5d1baSMatthew Dillon break; 36510a5d1baSMatthew Dillon default: 36610a5d1baSMatthew Dillon break; 36710a5d1baSMatthew Dillon } 368ecca949aSMatthew Dillon bp = NULL; /* bp left associated */ 369055f5ff8SMatthew Dillon } else if (io->released == 0) { 37010a5d1baSMatthew Dillon /* 37110a5d1baSMatthew Dillon * Clean buffers can be generally released to the kernel. 37210a5d1baSMatthew Dillon * We leave the bp passively associated with the HAMMER 37310a5d1baSMatthew Dillon * structure and use bioops to disconnect it later on 37410a5d1baSMatthew Dillon * if the kernel wants to discard the buffer. 375ecca949aSMatthew Dillon * 376ecca949aSMatthew Dillon * We can steal the structure's ownership of the bp. 37710a5d1baSMatthew Dillon */ 378ecca949aSMatthew Dillon io->released = 1; 3799f5097dcSMatthew Dillon if (bp->b_flags & B_LOCKED) { 380ecca949aSMatthew Dillon hammer_io_disassociate(iou); 381ecca949aSMatthew Dillon /* return the bp */ 3829f5097dcSMatthew Dillon } else { 383cebe9493SMatthew Dillon if (io->reclaim) { 384ecca949aSMatthew Dillon hammer_io_disassociate(iou); 385ecca949aSMatthew Dillon /* return the bp */ 386cebe9493SMatthew Dillon } else { 387ecca949aSMatthew Dillon /* return the bp (bp passively associated) */ 3889f5097dcSMatthew Dillon } 389cebe9493SMatthew Dillon } 39019b97e01SMatthew Dillon } else { 39119b97e01SMatthew Dillon /* 392af209b0fSMatthew Dillon * A released buffer is passively associate with our 393af209b0fSMatthew Dillon * hammer_io structure. The kernel cannot destroy it 394af209b0fSMatthew Dillon * without making a bioops call. If the kernel (B_LOCKED) 395af209b0fSMatthew Dillon * or we (reclaim) requested that the buffer be destroyed 396af209b0fSMatthew Dillon * we destroy it, otherwise we do a quick get/release to 397af209b0fSMatthew Dillon * reset its position in the kernel's LRU list. 398af209b0fSMatthew Dillon * 399af209b0fSMatthew Dillon * Leaving the buffer passively associated allows us to 400af209b0fSMatthew Dillon * use the kernel's LRU buffer flushing mechanisms rather 401af209b0fSMatthew Dillon * then rolling our own. 402cb51be26SMatthew Dillon * 403cb51be26SMatthew Dillon * XXX there are two ways of doing this. We can re-acquire 404cb51be26SMatthew Dillon * and passively release to reset the LRU, or not. 40519b97e01SMatthew Dillon */ 406af209b0fSMatthew Dillon if (io->running == 0) { 40719b97e01SMatthew Dillon regetblk(bp); 408cebe9493SMatthew Dillon if ((bp->b_flags & B_LOCKED) || io->reclaim) { 409ecca949aSMatthew Dillon hammer_io_disassociate(iou); 410ecca949aSMatthew Dillon /* return the bp */ 4119f5097dcSMatthew Dillon } else { 412ecca949aSMatthew Dillon /* return the bp (bp passively associated) */ 413ecca949aSMatthew Dillon } 414ecca949aSMatthew Dillon } else { 415ecca949aSMatthew Dillon /* 416ecca949aSMatthew Dillon * bp is left passively associated but we do not 417ecca949aSMatthew Dillon * try to reacquire it. Interactions with the io 418ecca949aSMatthew Dillon * structure will occur on completion of the bp's 419ecca949aSMatthew Dillon * I/O. 420ecca949aSMatthew Dillon */ 421ecca949aSMatthew Dillon bp = NULL; 42219b97e01SMatthew Dillon } 4239f5097dcSMatthew Dillon } 424ecca949aSMatthew Dillon return(bp); 425055f5ff8SMatthew Dillon } 426055f5ff8SMatthew Dillon 427055f5ff8SMatthew Dillon /* 428b33e2cc0SMatthew Dillon * This routine is called with a locked IO when a flush is desired and 429b33e2cc0SMatthew Dillon * no other references to the structure exists other then ours. This 430b33e2cc0SMatthew Dillon * routine is ONLY called when HAMMER believes it is safe to flush a 431b33e2cc0SMatthew Dillon * potentially modified buffer out. 4320b075555SMatthew Dillon */ 4330b075555SMatthew Dillon void 434055f5ff8SMatthew Dillon hammer_io_flush(struct hammer_io *io) 4350b075555SMatthew Dillon { 436055f5ff8SMatthew Dillon struct buf *bp; 437055f5ff8SMatthew Dillon 438055f5ff8SMatthew Dillon /* 43910a5d1baSMatthew Dillon * Degenerate case - nothing to flush if nothing is dirty. 440055f5ff8SMatthew Dillon */ 441b58c6388SMatthew Dillon if (io->modified == 0) { 442055f5ff8SMatthew Dillon return; 443b58c6388SMatthew Dillon } 444055f5ff8SMatthew Dillon 445055f5ff8SMatthew Dillon KKASSERT(io->bp); 4469f5097dcSMatthew Dillon KKASSERT(io->modify_refs <= 0); 447055f5ff8SMatthew Dillon 448b33e2cc0SMatthew Dillon /* 44977062c8aSMatthew Dillon * Acquire ownership of the bp, particularly before we clear our 45077062c8aSMatthew Dillon * modified flag. 45177062c8aSMatthew Dillon * 45277062c8aSMatthew Dillon * We are going to bawrite() this bp. Don't leave a window where 45377062c8aSMatthew Dillon * io->released is set, we actually own the bp rather then our 45477062c8aSMatthew Dillon * buffer. 45577062c8aSMatthew Dillon */ 45677062c8aSMatthew Dillon bp = io->bp; 45777062c8aSMatthew Dillon if (io->released) { 45877062c8aSMatthew Dillon regetblk(bp); 45977062c8aSMatthew Dillon /* BUF_KERNPROC(io->bp); */ 46077062c8aSMatthew Dillon /* io->released = 0; */ 46177062c8aSMatthew Dillon KKASSERT(io->released); 46277062c8aSMatthew Dillon KKASSERT(io->bp == bp); 46377062c8aSMatthew Dillon } 46477062c8aSMatthew Dillon io->released = 1; 46577062c8aSMatthew Dillon 46677062c8aSMatthew Dillon /* 46710a5d1baSMatthew Dillon * Acquire exclusive access to the bp and then clear the modified 46810a5d1baSMatthew Dillon * state of the buffer prior to issuing I/O to interlock any 46910a5d1baSMatthew Dillon * modifications made while the I/O is in progress. This shouldn't 47010a5d1baSMatthew Dillon * happen anyway but losing data would be worse. The modified bit 47110a5d1baSMatthew Dillon * will be rechecked after the IO completes. 47210a5d1baSMatthew Dillon * 4734a2796f3SMatthew Dillon * NOTE: This call also finalizes the buffer's content (inval == 0). 4744a2796f3SMatthew Dillon * 475b33e2cc0SMatthew Dillon * This is only legal when lock.refs == 1 (otherwise we might clear 476b33e2cc0SMatthew Dillon * the modified bit while there are still users of the cluster 477b33e2cc0SMatthew Dillon * modifying the data). 478b33e2cc0SMatthew Dillon * 479b33e2cc0SMatthew Dillon * Do this before potentially blocking so any attempt to modify the 480b33e2cc0SMatthew Dillon * ondisk while we are blocked blocks waiting for us. 481b33e2cc0SMatthew Dillon */ 4824a2796f3SMatthew Dillon hammer_io_clear_modify(io, 0); 483bcac4bbbSMatthew Dillon 484bcac4bbbSMatthew Dillon /* 48510a5d1baSMatthew Dillon * Transfer ownership to the kernel and initiate I/O. 48610a5d1baSMatthew Dillon */ 487055f5ff8SMatthew Dillon io->running = 1; 488f5a07a7aSMatthew Dillon io->hmp->io_running_space += io->bytes; 489f5a07a7aSMatthew Dillon hammer_count_io_running_write += io->bytes; 490055f5ff8SMatthew Dillon bawrite(bp); 491055f5ff8SMatthew Dillon } 492055f5ff8SMatthew Dillon 493055f5ff8SMatthew Dillon /************************************************************************ 494055f5ff8SMatthew Dillon * BUFFER DIRTYING * 495055f5ff8SMatthew Dillon ************************************************************************ 496055f5ff8SMatthew Dillon * 497055f5ff8SMatthew Dillon * These routines deal with dependancies created when IO buffers get 498055f5ff8SMatthew Dillon * modified. The caller must call hammer_modify_*() on a referenced 499055f5ff8SMatthew Dillon * HAMMER structure prior to modifying its on-disk data. 500055f5ff8SMatthew Dillon * 501055f5ff8SMatthew Dillon * Any intent to modify an IO buffer acquires the related bp and imposes 502055f5ff8SMatthew Dillon * various write ordering dependancies. 503055f5ff8SMatthew Dillon */ 504055f5ff8SMatthew Dillon 505055f5ff8SMatthew Dillon /* 50610a5d1baSMatthew Dillon * Mark a HAMMER structure as undergoing modification. Meta-data buffers 50710a5d1baSMatthew Dillon * are locked until the flusher can deal with them, pure data buffers 50810a5d1baSMatthew Dillon * can be written out. 509055f5ff8SMatthew Dillon */ 51010a5d1baSMatthew Dillon static 511b58c6388SMatthew Dillon void 51210a5d1baSMatthew Dillon hammer_io_modify(hammer_io_t io, int count) 513055f5ff8SMatthew Dillon { 51410a5d1baSMatthew Dillon struct hammer_mount *hmp = io->hmp; 51510a5d1baSMatthew Dillon 51646fe7ae1SMatthew Dillon /* 5179f5097dcSMatthew Dillon * io->modify_refs must be >= 0 5189f5097dcSMatthew Dillon */ 5199f5097dcSMatthew Dillon while (io->modify_refs < 0) { 5209f5097dcSMatthew Dillon io->waitmod = 1; 5219f5097dcSMatthew Dillon tsleep(io, 0, "hmrmod", 0); 5229f5097dcSMatthew Dillon } 5239f5097dcSMatthew Dillon 5249f5097dcSMatthew Dillon /* 52546fe7ae1SMatthew Dillon * Shortcut if nothing to do. 52646fe7ae1SMatthew Dillon */ 527055f5ff8SMatthew Dillon KKASSERT(io->lock.refs != 0 && io->bp != NULL); 52810a5d1baSMatthew Dillon io->modify_refs += count; 529b58c6388SMatthew Dillon if (io->modified && io->released == 0) 530b58c6388SMatthew Dillon return; 53146fe7ae1SMatthew Dillon 532055f5ff8SMatthew Dillon hammer_lock_ex(&io->lock); 53310a5d1baSMatthew Dillon if (io->modified == 0) { 53410a5d1baSMatthew Dillon KKASSERT(io->mod_list == NULL); 53510a5d1baSMatthew Dillon switch(io->type) { 53610a5d1baSMatthew Dillon case HAMMER_STRUCTURE_VOLUME: 53710a5d1baSMatthew Dillon io->mod_list = &hmp->volu_list; 538f5a07a7aSMatthew Dillon hmp->locked_dirty_space += io->bytes; 539f5a07a7aSMatthew Dillon hammer_count_dirtybufspace += io->bytes; 54010a5d1baSMatthew Dillon break; 54110a5d1baSMatthew Dillon case HAMMER_STRUCTURE_META_BUFFER: 54210a5d1baSMatthew Dillon io->mod_list = &hmp->meta_list; 543f5a07a7aSMatthew Dillon hmp->locked_dirty_space += io->bytes; 544f5a07a7aSMatthew Dillon hammer_count_dirtybufspace += io->bytes; 54510a5d1baSMatthew Dillon break; 54610a5d1baSMatthew Dillon case HAMMER_STRUCTURE_UNDO_BUFFER: 54710a5d1baSMatthew Dillon io->mod_list = &hmp->undo_list; 54810a5d1baSMatthew Dillon break; 54910a5d1baSMatthew Dillon case HAMMER_STRUCTURE_DATA_BUFFER: 55010a5d1baSMatthew Dillon io->mod_list = &hmp->data_list; 55110a5d1baSMatthew Dillon break; 55210a5d1baSMatthew Dillon } 55310a5d1baSMatthew Dillon TAILQ_INSERT_TAIL(io->mod_list, io, mod_entry); 55446fe7ae1SMatthew Dillon io->modified = 1; 55510a5d1baSMatthew Dillon } 556055f5ff8SMatthew Dillon if (io->released) { 557055f5ff8SMatthew Dillon regetblk(io->bp); 558055f5ff8SMatthew Dillon BUF_KERNPROC(io->bp); 559055f5ff8SMatthew Dillon io->released = 0; 56046fe7ae1SMatthew Dillon KKASSERT(io->modified != 0); 561055f5ff8SMatthew Dillon } 562055f5ff8SMatthew Dillon hammer_unlock(&io->lock); 5630b075555SMatthew Dillon } 5640b075555SMatthew Dillon 56510a5d1baSMatthew Dillon static __inline 56610a5d1baSMatthew Dillon void 56710a5d1baSMatthew Dillon hammer_io_modify_done(hammer_io_t io) 56810a5d1baSMatthew Dillon { 56910a5d1baSMatthew Dillon KKASSERT(io->modify_refs > 0); 57010a5d1baSMatthew Dillon --io->modify_refs; 5719f5097dcSMatthew Dillon if (io->modify_refs == 0 && io->waitmod) { 5729f5097dcSMatthew Dillon io->waitmod = 0; 5739f5097dcSMatthew Dillon wakeup(io); 5749f5097dcSMatthew Dillon } 5759f5097dcSMatthew Dillon } 5769f5097dcSMatthew Dillon 5779f5097dcSMatthew Dillon void 5789f5097dcSMatthew Dillon hammer_io_write_interlock(hammer_io_t io) 5799f5097dcSMatthew Dillon { 5809f5097dcSMatthew Dillon while (io->modify_refs != 0) { 5819f5097dcSMatthew Dillon io->waitmod = 1; 5829f5097dcSMatthew Dillon tsleep(io, 0, "hmrmod", 0); 5839f5097dcSMatthew Dillon } 5849f5097dcSMatthew Dillon io->modify_refs = -1; 5859f5097dcSMatthew Dillon } 5869f5097dcSMatthew Dillon 5879f5097dcSMatthew Dillon void 5889f5097dcSMatthew Dillon hammer_io_done_interlock(hammer_io_t io) 5899f5097dcSMatthew Dillon { 5909f5097dcSMatthew Dillon KKASSERT(io->modify_refs == -1); 5919f5097dcSMatthew Dillon io->modify_refs = 0; 5929f5097dcSMatthew Dillon if (io->waitmod) { 5939f5097dcSMatthew Dillon io->waitmod = 0; 5949f5097dcSMatthew Dillon wakeup(io); 5959f5097dcSMatthew Dillon } 59610a5d1baSMatthew Dillon } 59710a5d1baSMatthew Dillon 5982f85fa4dSMatthew Dillon /* 5992f85fa4dSMatthew Dillon * Caller intends to modify a volume's ondisk structure. 6002f85fa4dSMatthew Dillon * 6012f85fa4dSMatthew Dillon * This is only allowed if we are the flusher or we have a ref on the 6022f85fa4dSMatthew Dillon * sync_lock. 6032f85fa4dSMatthew Dillon */ 6040b075555SMatthew Dillon void 60536f82b23SMatthew Dillon hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume, 60636f82b23SMatthew Dillon void *base, int len) 6070b075555SMatthew Dillon { 6082f85fa4dSMatthew Dillon KKASSERT (trans == NULL || trans->sync_lock_refs > 0); 609055f5ff8SMatthew Dillon 6102f85fa4dSMatthew Dillon hammer_io_modify(&volume->io, 1); 61147197d71SMatthew Dillon if (len) { 61247197d71SMatthew Dillon intptr_t rel_offset = (intptr_t)base - (intptr_t)volume->ondisk; 61347197d71SMatthew Dillon KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 614059819e3SMatthew Dillon hammer_generate_undo(trans, &volume->io, 61547197d71SMatthew Dillon HAMMER_ENCODE_RAW_VOLUME(volume->vol_no, rel_offset), 61647197d71SMatthew Dillon base, len); 617055f5ff8SMatthew Dillon } 618055f5ff8SMatthew Dillon } 619055f5ff8SMatthew Dillon 620055f5ff8SMatthew Dillon /* 6212f85fa4dSMatthew Dillon * Caller intends to modify a buffer's ondisk structure. 6222f85fa4dSMatthew Dillon * 6232f85fa4dSMatthew Dillon * This is only allowed if we are the flusher or we have a ref on the 6242f85fa4dSMatthew Dillon * sync_lock. 625055f5ff8SMatthew Dillon */ 626055f5ff8SMatthew Dillon void 62736f82b23SMatthew Dillon hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer, 62836f82b23SMatthew Dillon void *base, int len) 62946fe7ae1SMatthew Dillon { 6302f85fa4dSMatthew Dillon KKASSERT (trans == NULL || trans->sync_lock_refs > 0); 6312f85fa4dSMatthew Dillon 63210a5d1baSMatthew Dillon hammer_io_modify(&buffer->io, 1); 63347197d71SMatthew Dillon if (len) { 63447197d71SMatthew Dillon intptr_t rel_offset = (intptr_t)base - (intptr_t)buffer->ondisk; 63547197d71SMatthew Dillon KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 636059819e3SMatthew Dillon hammer_generate_undo(trans, &buffer->io, 63734d829f7SMatthew Dillon buffer->zone2_offset + rel_offset, 63847197d71SMatthew Dillon base, len); 63947197d71SMatthew Dillon } 64046fe7ae1SMatthew Dillon } 64146fe7ae1SMatthew Dillon 64210a5d1baSMatthew Dillon void 64310a5d1baSMatthew Dillon hammer_modify_volume_done(hammer_volume_t volume) 64410a5d1baSMatthew Dillon { 64510a5d1baSMatthew Dillon hammer_io_modify_done(&volume->io); 64610a5d1baSMatthew Dillon } 64710a5d1baSMatthew Dillon 64810a5d1baSMatthew Dillon void 64910a5d1baSMatthew Dillon hammer_modify_buffer_done(hammer_buffer_t buffer) 65010a5d1baSMatthew Dillon { 65110a5d1baSMatthew Dillon hammer_io_modify_done(&buffer->io); 65210a5d1baSMatthew Dillon } 65310a5d1baSMatthew Dillon 65446fe7ae1SMatthew Dillon /* 6554a2796f3SMatthew Dillon * Mark an entity as not being dirty any more and finalize any 6564a2796f3SMatthew Dillon * delayed adjustments to the buffer. 6574a2796f3SMatthew Dillon * 6584a2796f3SMatthew Dillon * Delayed adjustments are an important performance enhancement, allowing 6594a2796f3SMatthew Dillon * us to avoid recalculating B-Tree node CRCs over and over again when 6604a2796f3SMatthew Dillon * making bulk-modifications to the B-Tree. 6614a2796f3SMatthew Dillon * 6624a2796f3SMatthew Dillon * If inval is non-zero delayed adjustments are ignored. 66361aeeb33SMatthew Dillon */ 66461aeeb33SMatthew Dillon void 6654a2796f3SMatthew Dillon hammer_io_clear_modify(struct hammer_io *io, int inval) 66661aeeb33SMatthew Dillon { 6674a2796f3SMatthew Dillon if (io->modified == 0) 6684a2796f3SMatthew Dillon return; 6694a2796f3SMatthew Dillon 6704a2796f3SMatthew Dillon /* 6714a2796f3SMatthew Dillon * Take us off the mod-list and clear the modified bit. 6724a2796f3SMatthew Dillon */ 673cebe9493SMatthew Dillon KKASSERT(io->mod_list != NULL); 674cebe9493SMatthew Dillon if (io->mod_list == &io->hmp->volu_list || 675cebe9493SMatthew Dillon io->mod_list == &io->hmp->meta_list) { 676f5a07a7aSMatthew Dillon io->hmp->locked_dirty_space -= io->bytes; 677f5a07a7aSMatthew Dillon hammer_count_dirtybufspace -= io->bytes; 678cebe9493SMatthew Dillon } 679cebe9493SMatthew Dillon TAILQ_REMOVE(io->mod_list, io, mod_entry); 680cebe9493SMatthew Dillon io->mod_list = NULL; 68161aeeb33SMatthew Dillon io->modified = 0; 6824a2796f3SMatthew Dillon 6834a2796f3SMatthew Dillon /* 6844a2796f3SMatthew Dillon * If this bit is not set there are no delayed adjustments. 6854a2796f3SMatthew Dillon */ 6864a2796f3SMatthew Dillon if (io->gencrc == 0) 6874a2796f3SMatthew Dillon return; 6884a2796f3SMatthew Dillon io->gencrc = 0; 6894a2796f3SMatthew Dillon 6904a2796f3SMatthew Dillon /* 6914a2796f3SMatthew Dillon * Finalize requested CRCs. The NEEDSCRC flag also holds a reference 6924a2796f3SMatthew Dillon * on the node (& underlying buffer). Release the node after clearing 6934a2796f3SMatthew Dillon * the flag. 6944a2796f3SMatthew Dillon */ 6954a2796f3SMatthew Dillon if (io->type == HAMMER_STRUCTURE_META_BUFFER) { 6964a2796f3SMatthew Dillon hammer_buffer_t buffer = (void *)io; 6974a2796f3SMatthew Dillon hammer_node_t node; 6984a2796f3SMatthew Dillon 6994a2796f3SMatthew Dillon restart: 7004a2796f3SMatthew Dillon TAILQ_FOREACH(node, &buffer->clist, entry) { 7014a2796f3SMatthew Dillon if ((node->flags & HAMMER_NODE_NEEDSCRC) == 0) 7024a2796f3SMatthew Dillon continue; 7034a2796f3SMatthew Dillon node->flags &= ~HAMMER_NODE_NEEDSCRC; 7044a2796f3SMatthew Dillon KKASSERT(node->ondisk); 7054a2796f3SMatthew Dillon if (inval == 0) 7064a2796f3SMatthew Dillon node->ondisk->crc = crc32(&node->ondisk->crc + 1, HAMMER_BTREE_CRCSIZE); 7074a2796f3SMatthew Dillon hammer_rel_node(node); 7084a2796f3SMatthew Dillon goto restart; 70961aeeb33SMatthew Dillon } 71061aeeb33SMatthew Dillon } 711cebe9493SMatthew Dillon 7124a2796f3SMatthew Dillon } 7134a2796f3SMatthew Dillon 714cebe9493SMatthew Dillon /* 715cebe9493SMatthew Dillon * Clear the IO's modify list. Even though the IO is no longer modified 716cebe9493SMatthew Dillon * it may still be on the lose_list. This routine is called just before 717cebe9493SMatthew Dillon * the governing hammer_buffer is destroyed. 718cebe9493SMatthew Dillon */ 719cebe9493SMatthew Dillon void 720cebe9493SMatthew Dillon hammer_io_clear_modlist(struct hammer_io *io) 721cebe9493SMatthew Dillon { 7224a2796f3SMatthew Dillon KKASSERT(io->modified == 0); 723cebe9493SMatthew Dillon if (io->mod_list) { 724a99b9ea2SMatthew Dillon crit_enter(); /* biodone race against list */ 725cebe9493SMatthew Dillon KKASSERT(io->mod_list == &io->hmp->lose_list); 726cebe9493SMatthew Dillon TAILQ_REMOVE(io->mod_list, io, mod_entry); 727cebe9493SMatthew Dillon io->mod_list = NULL; 728a99b9ea2SMatthew Dillon crit_exit(); 729cebe9493SMatthew Dillon } 73066325755SMatthew Dillon } 73166325755SMatthew Dillon 732055f5ff8SMatthew Dillon /************************************************************************ 733055f5ff8SMatthew Dillon * HAMMER_BIOOPS * 734055f5ff8SMatthew Dillon ************************************************************************ 735055f5ff8SMatthew Dillon * 736055f5ff8SMatthew Dillon */ 737055f5ff8SMatthew Dillon 738055f5ff8SMatthew Dillon /* 739055f5ff8SMatthew Dillon * Pre-IO initiation kernel callback - cluster build only 740055f5ff8SMatthew Dillon */ 741055f5ff8SMatthew Dillon static void 742055f5ff8SMatthew Dillon hammer_io_start(struct buf *bp) 743055f5ff8SMatthew Dillon { 744055f5ff8SMatthew Dillon } 745055f5ff8SMatthew Dillon 746055f5ff8SMatthew Dillon /* 7477bc5b8c2SMatthew Dillon * Post-IO completion kernel callback - MAY BE CALLED FROM INTERRUPT! 748b33e2cc0SMatthew Dillon * 749b33e2cc0SMatthew Dillon * NOTE: HAMMER may modify a buffer after initiating I/O. The modified bit 750b33e2cc0SMatthew Dillon * may also be set if we were marking a cluster header open. Only remove 751b33e2cc0SMatthew Dillon * our dependancy if the modified bit is clear. 752055f5ff8SMatthew Dillon */ 75366325755SMatthew Dillon static void 75466325755SMatthew Dillon hammer_io_complete(struct buf *bp) 75566325755SMatthew Dillon { 756055f5ff8SMatthew Dillon union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); 757fbc6e32aSMatthew Dillon 758055f5ff8SMatthew Dillon KKASSERT(iou->io.released == 1); 759055f5ff8SMatthew Dillon 760bf3b416bSMatthew Dillon /* 761bf3b416bSMatthew Dillon * Deal with people waiting for I/O to drain 762bf3b416bSMatthew Dillon */ 763f90dde4cSMatthew Dillon if (iou->io.running) { 764f5a07a7aSMatthew Dillon hammer_count_io_running_write -= iou->io.bytes; 765f5a07a7aSMatthew Dillon iou->io.hmp->io_running_space -= iou->io.bytes; 766f5a07a7aSMatthew Dillon if (iou->io.hmp->io_running_space == 0) 767f5a07a7aSMatthew Dillon wakeup(&iou->io.hmp->io_running_space); 768f5a07a7aSMatthew Dillon KKASSERT(iou->io.hmp->io_running_space >= 0); 769f90dde4cSMatthew Dillon iou->io.running = 0; 770f90dde4cSMatthew Dillon } 771f90dde4cSMatthew Dillon 772055f5ff8SMatthew Dillon if (iou->io.waiting) { 773055f5ff8SMatthew Dillon iou->io.waiting = 0; 774055f5ff8SMatthew Dillon wakeup(iou); 775055f5ff8SMatthew Dillon } 776055f5ff8SMatthew Dillon 777055f5ff8SMatthew Dillon /* 778bf3b416bSMatthew Dillon * If B_LOCKED is set someone wanted to deallocate the bp at some 779bf3b416bSMatthew Dillon * point, do it now if refs has become zero. 780055f5ff8SMatthew Dillon */ 781055f5ff8SMatthew Dillon if ((bp->b_flags & B_LOCKED) && iou->io.lock.refs == 0) { 782b33e2cc0SMatthew Dillon KKASSERT(iou->io.modified == 0); 783a99b9ea2SMatthew Dillon --hammer_count_io_locked; 784d5ef456eSMatthew Dillon bp->b_flags &= ~B_LOCKED; 785055f5ff8SMatthew Dillon hammer_io_deallocate(bp); 786055f5ff8SMatthew Dillon /* structure may be dead now */ 787fbc6e32aSMatthew Dillon } 78866325755SMatthew Dillon } 78966325755SMatthew Dillon 79066325755SMatthew Dillon /* 79166325755SMatthew Dillon * Callback from kernel when it wishes to deallocate a passively 79210a5d1baSMatthew Dillon * associated structure. This mostly occurs with clean buffers 79310a5d1baSMatthew Dillon * but it may be possible for a holding structure to be marked dirty 7947bc5b8c2SMatthew Dillon * while its buffer is passively associated. The caller owns the bp. 79566325755SMatthew Dillon * 79666325755SMatthew Dillon * If we cannot disassociate we set B_LOCKED to prevent the buffer 79766325755SMatthew Dillon * from getting reused. 79846fe7ae1SMatthew Dillon * 79946fe7ae1SMatthew Dillon * WARNING: Because this can be called directly by getnewbuf we cannot 80046fe7ae1SMatthew Dillon * recurse into the tree. If a bp cannot be immediately disassociated 80146fe7ae1SMatthew Dillon * our only recourse is to set B_LOCKED. 8027bc5b8c2SMatthew Dillon * 8037bc5b8c2SMatthew Dillon * WARNING: This may be called from an interrupt via hammer_io_complete() 80466325755SMatthew Dillon */ 80566325755SMatthew Dillon static void 80666325755SMatthew Dillon hammer_io_deallocate(struct buf *bp) 80766325755SMatthew Dillon { 808055f5ff8SMatthew Dillon hammer_io_structure_t iou = (void *)LIST_FIRST(&bp->b_dep); 80966325755SMatthew Dillon 810055f5ff8SMatthew Dillon KKASSERT((bp->b_flags & B_LOCKED) == 0 && iou->io.running == 0); 81146fe7ae1SMatthew Dillon if (iou->io.lock.refs > 0 || iou->io.modified) { 81210a5d1baSMatthew Dillon /* 81310a5d1baSMatthew Dillon * It is not legal to disassociate a modified buffer. This 81410a5d1baSMatthew Dillon * case really shouldn't ever occur. 81510a5d1baSMatthew Dillon */ 816055f5ff8SMatthew Dillon bp->b_flags |= B_LOCKED; 817a99b9ea2SMatthew Dillon ++hammer_count_io_locked; 818055f5ff8SMatthew Dillon } else { 81910a5d1baSMatthew Dillon /* 82010a5d1baSMatthew Dillon * Disassociate the BP. If the io has no refs left we 82110a5d1baSMatthew Dillon * have to add it to the loose list. 82210a5d1baSMatthew Dillon */ 823ecca949aSMatthew Dillon hammer_io_disassociate(iou); 824ecca949aSMatthew Dillon if (iou->io.type != HAMMER_STRUCTURE_VOLUME) { 825ecca949aSMatthew Dillon KKASSERT(iou->io.bp == NULL); 82610a5d1baSMatthew Dillon KKASSERT(iou->io.mod_list == NULL); 827a99b9ea2SMatthew Dillon crit_enter(); /* biodone race against list */ 82810a5d1baSMatthew Dillon iou->io.mod_list = &iou->io.hmp->lose_list; 82910a5d1baSMatthew Dillon TAILQ_INSERT_TAIL(iou->io.mod_list, &iou->io, mod_entry); 830a99b9ea2SMatthew Dillon crit_exit(); 83166325755SMatthew Dillon } 83266325755SMatthew Dillon } 83366325755SMatthew Dillon } 83466325755SMatthew Dillon 83566325755SMatthew Dillon static int 83666325755SMatthew Dillon hammer_io_fsync(struct vnode *vp) 83766325755SMatthew Dillon { 83866325755SMatthew Dillon return(0); 83966325755SMatthew Dillon } 84066325755SMatthew Dillon 84166325755SMatthew Dillon /* 84266325755SMatthew Dillon * NOTE: will not be called unless we tell the kernel about the 84366325755SMatthew Dillon * bioops. Unused... we use the mount's VFS_SYNC instead. 84466325755SMatthew Dillon */ 84566325755SMatthew Dillon static int 84666325755SMatthew Dillon hammer_io_sync(struct mount *mp) 84766325755SMatthew Dillon { 84866325755SMatthew Dillon return(0); 84966325755SMatthew Dillon } 85066325755SMatthew Dillon 85166325755SMatthew Dillon static void 85266325755SMatthew Dillon hammer_io_movedeps(struct buf *bp1, struct buf *bp2) 85366325755SMatthew Dillon { 85466325755SMatthew Dillon } 85566325755SMatthew Dillon 85666325755SMatthew Dillon /* 85766325755SMatthew Dillon * I/O pre-check for reading and writing. HAMMER only uses this for 85866325755SMatthew Dillon * B_CACHE buffers so checkread just shouldn't happen, but if it does 85966325755SMatthew Dillon * allow it. 86066325755SMatthew Dillon * 861fbc6e32aSMatthew Dillon * Writing is a different case. We don't want the kernel to try to write 862fbc6e32aSMatthew Dillon * out a buffer that HAMMER may be modifying passively or which has a 86310a5d1baSMatthew Dillon * dependancy. In addition, kernel-demanded writes can only proceed for 86410a5d1baSMatthew Dillon * certain types of buffers (i.e. UNDO and DATA types). Other dirty 86510a5d1baSMatthew Dillon * buffer types can only be explicitly written by the flusher. 866fbc6e32aSMatthew Dillon * 86710a5d1baSMatthew Dillon * checkwrite will only be called for bdwrite()n buffers. If we return 86810a5d1baSMatthew Dillon * success the kernel is guaranteed to initiate the buffer write. 86966325755SMatthew Dillon */ 87066325755SMatthew Dillon static int 87166325755SMatthew Dillon hammer_io_checkread(struct buf *bp) 87266325755SMatthew Dillon { 87366325755SMatthew Dillon return(0); 87466325755SMatthew Dillon } 87566325755SMatthew Dillon 87666325755SMatthew Dillon static int 87766325755SMatthew Dillon hammer_io_checkwrite(struct buf *bp) 87866325755SMatthew Dillon { 87910a5d1baSMatthew Dillon hammer_io_t io = (void *)LIST_FIRST(&bp->b_dep); 88066325755SMatthew Dillon 88177062c8aSMatthew Dillon /* 88277062c8aSMatthew Dillon * This shouldn't happen under normal operation. 88377062c8aSMatthew Dillon */ 88477062c8aSMatthew Dillon if (io->type == HAMMER_STRUCTURE_VOLUME || 88577062c8aSMatthew Dillon io->type == HAMMER_STRUCTURE_META_BUFFER) { 88677062c8aSMatthew Dillon if (!panicstr) 88777062c8aSMatthew Dillon panic("hammer_io_checkwrite: illegal buffer"); 888a99b9ea2SMatthew Dillon if ((bp->b_flags & B_LOCKED) == 0) { 88977062c8aSMatthew Dillon bp->b_flags |= B_LOCKED; 890a99b9ea2SMatthew Dillon ++hammer_count_io_locked; 891a99b9ea2SMatthew Dillon } 89277062c8aSMatthew Dillon return(1); 89377062c8aSMatthew Dillon } 894c9b9e29dSMatthew Dillon 895fbc6e32aSMatthew Dillon /* 89610a5d1baSMatthew Dillon * We can only clear the modified bit if the IO is not currently 89710a5d1baSMatthew Dillon * undergoing modification. Otherwise we may miss changes. 898b33e2cc0SMatthew Dillon */ 899cebe9493SMatthew Dillon if (io->modify_refs == 0 && io->modified) 9004a2796f3SMatthew Dillon hammer_io_clear_modify(io, 0); 901f90dde4cSMatthew Dillon 902f90dde4cSMatthew Dillon /* 903f90dde4cSMatthew Dillon * The kernel is going to start the IO, set io->running. 904f90dde4cSMatthew Dillon */ 905f90dde4cSMatthew Dillon KKASSERT(io->running == 0); 906f90dde4cSMatthew Dillon io->running = 1; 907f5a07a7aSMatthew Dillon io->hmp->io_running_space += io->bytes; 908f5a07a7aSMatthew Dillon hammer_count_io_running_write += io->bytes; 909055f5ff8SMatthew Dillon return(0); 910055f5ff8SMatthew Dillon } 91166325755SMatthew Dillon 9128cd0a023SMatthew Dillon /* 91366325755SMatthew Dillon * Return non-zero if we wish to delay the kernel's attempt to flush 91466325755SMatthew Dillon * this buffer to disk. 91566325755SMatthew Dillon */ 91666325755SMatthew Dillon static int 91766325755SMatthew Dillon hammer_io_countdeps(struct buf *bp, int n) 91866325755SMatthew Dillon { 91966325755SMatthew Dillon return(0); 92066325755SMatthew Dillon } 92166325755SMatthew Dillon 92266325755SMatthew Dillon struct bio_ops hammer_bioops = { 92366325755SMatthew Dillon .io_start = hammer_io_start, 92466325755SMatthew Dillon .io_complete = hammer_io_complete, 92566325755SMatthew Dillon .io_deallocate = hammer_io_deallocate, 92666325755SMatthew Dillon .io_fsync = hammer_io_fsync, 92766325755SMatthew Dillon .io_sync = hammer_io_sync, 92866325755SMatthew Dillon .io_movedeps = hammer_io_movedeps, 92966325755SMatthew Dillon .io_countdeps = hammer_io_countdeps, 93066325755SMatthew Dillon .io_checkread = hammer_io_checkread, 93166325755SMatthew Dillon .io_checkwrite = hammer_io_checkwrite, 93266325755SMatthew Dillon }; 93366325755SMatthew Dillon 93447637bffSMatthew Dillon /************************************************************************ 93547637bffSMatthew Dillon * DIRECT IO OPS * 93647637bffSMatthew Dillon ************************************************************************ 93747637bffSMatthew Dillon * 93847637bffSMatthew Dillon * These functions operate directly on the buffer cache buffer associated 93947637bffSMatthew Dillon * with a front-end vnode rather then a back-end device vnode. 94047637bffSMatthew Dillon */ 94147637bffSMatthew Dillon 94247637bffSMatthew Dillon /* 94347637bffSMatthew Dillon * Read a buffer associated with a front-end vnode directly from the 944*1b0ab2c3SMatthew Dillon * disk media. The bio may be issued asynchronously. If leaf is non-NULL 945*1b0ab2c3SMatthew Dillon * we validate the CRC. 946a99b9ea2SMatthew Dillon * 94743c665aeSMatthew Dillon * A second-level bio already resolved to a zone-2 offset (typically by 94843c665aeSMatthew Dillon * the BMAP code, or by a previous hammer_io_direct_write()), is passed. 949*1b0ab2c3SMatthew Dillon * 950*1b0ab2c3SMatthew Dillon * We must check for the presence of a HAMMER buffer to handle the case 951*1b0ab2c3SMatthew Dillon * where the reblocker has rewritten the data (which it does via the HAMMER 952*1b0ab2c3SMatthew Dillon * buffer system, not via the high-level vnode buffer cache), but not yet 953*1b0ab2c3SMatthew Dillon * committed the buffer to the media. 95447637bffSMatthew Dillon */ 95547637bffSMatthew Dillon int 956*1b0ab2c3SMatthew Dillon hammer_io_direct_read(hammer_mount_t hmp, struct bio *bio, 957*1b0ab2c3SMatthew Dillon hammer_btree_leaf_elm_t leaf) 95847637bffSMatthew Dillon { 959*1b0ab2c3SMatthew Dillon hammer_off_t buf_offset; 96047637bffSMatthew Dillon hammer_off_t zone2_offset; 96147637bffSMatthew Dillon hammer_volume_t volume; 96247637bffSMatthew Dillon struct buf *bp; 96347637bffSMatthew Dillon struct bio *nbio; 96447637bffSMatthew Dillon int vol_no; 96547637bffSMatthew Dillon int error; 96647637bffSMatthew Dillon 967*1b0ab2c3SMatthew Dillon buf_offset = bio->bio_offset; 968*1b0ab2c3SMatthew Dillon KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == 969*1b0ab2c3SMatthew Dillon HAMMER_ZONE_LARGE_DATA); 9704a2796f3SMatthew Dillon 971*1b0ab2c3SMatthew Dillon /* 972*1b0ab2c3SMatthew Dillon * The buffer cache may have an aliased buffer (the reblocker can 973*1b0ab2c3SMatthew Dillon * write them). If it does we have to sync any dirty data before 974*1b0ab2c3SMatthew Dillon * we can build our direct-read. This is a non-critical code path. 975*1b0ab2c3SMatthew Dillon */ 976*1b0ab2c3SMatthew Dillon bp = bio->bio_buf; 977*1b0ab2c3SMatthew Dillon hammer_sync_buffers(hmp, buf_offset, bp->b_bufsize); 978*1b0ab2c3SMatthew Dillon 979*1b0ab2c3SMatthew Dillon /* 980*1b0ab2c3SMatthew Dillon * Resolve to a zone-2 offset. The conversion just requires 981*1b0ab2c3SMatthew Dillon * munging the top 4 bits but we want to abstract it anyway 982*1b0ab2c3SMatthew Dillon * so the blockmap code can verify the zone assignment. 983*1b0ab2c3SMatthew Dillon */ 984*1b0ab2c3SMatthew Dillon zone2_offset = hammer_blockmap_lookup(hmp, buf_offset, &error); 985*1b0ab2c3SMatthew Dillon if (error) 986*1b0ab2c3SMatthew Dillon goto done; 98743c665aeSMatthew Dillon KKASSERT((zone2_offset & HAMMER_OFF_ZONE_MASK) == 98843c665aeSMatthew Dillon HAMMER_ZONE_RAW_BUFFER); 98943c665aeSMatthew Dillon 990*1b0ab2c3SMatthew Dillon /* 991*1b0ab2c3SMatthew Dillon * Resolve volume and raw-offset for 3rd level bio. The 992*1b0ab2c3SMatthew Dillon * offset will be specific to the volume. 993*1b0ab2c3SMatthew Dillon */ 99447637bffSMatthew Dillon vol_no = HAMMER_VOL_DECODE(zone2_offset); 99547637bffSMatthew Dillon volume = hammer_get_volume(hmp, vol_no, &error); 99647637bffSMatthew Dillon if (error == 0 && zone2_offset >= volume->maxbuf_off) 99747637bffSMatthew Dillon error = EIO; 99843c665aeSMatthew Dillon 99947637bffSMatthew Dillon if (error == 0) { 10000832c9bbSMatthew Dillon zone2_offset &= HAMMER_OFF_SHORT_MASK; 10014a2796f3SMatthew Dillon 100247637bffSMatthew Dillon nbio = push_bio(bio); 100347637bffSMatthew Dillon nbio->bio_offset = volume->ondisk->vol_buf_beg + 10040832c9bbSMatthew Dillon zone2_offset; 1005*1b0ab2c3SMatthew Dillon #if 0 1006*1b0ab2c3SMatthew Dillon /* 1007*1b0ab2c3SMatthew Dillon * XXX disabled - our CRC check doesn't work if the OS 1008*1b0ab2c3SMatthew Dillon * does bogus_page replacement on the direct-read. 1009*1b0ab2c3SMatthew Dillon */ 1010*1b0ab2c3SMatthew Dillon if (leaf && hammer_verify_data) { 1011*1b0ab2c3SMatthew Dillon nbio->bio_done = hammer_io_direct_read_complete; 1012*1b0ab2c3SMatthew Dillon nbio->bio_caller_info1.uvalue32 = leaf->data_crc; 1013*1b0ab2c3SMatthew Dillon } 1014*1b0ab2c3SMatthew Dillon #endif 101547637bffSMatthew Dillon vn_strategy(volume->devvp, nbio); 101647637bffSMatthew Dillon } 101747637bffSMatthew Dillon hammer_rel_volume(volume, 0); 1018*1b0ab2c3SMatthew Dillon done: 101947637bffSMatthew Dillon if (error) { 1020cebe9493SMatthew Dillon kprintf("hammer_direct_read: failed @ %016llx\n", 102143c665aeSMatthew Dillon zone2_offset); 102247637bffSMatthew Dillon bp->b_error = error; 102347637bffSMatthew Dillon bp->b_flags |= B_ERROR; 102447637bffSMatthew Dillon biodone(bio); 102547637bffSMatthew Dillon } 102647637bffSMatthew Dillon return(error); 102747637bffSMatthew Dillon } 102847637bffSMatthew Dillon 1029*1b0ab2c3SMatthew Dillon #if 0 1030*1b0ab2c3SMatthew Dillon /* 1031*1b0ab2c3SMatthew Dillon * On completion of the BIO this callback must check the data CRC 1032*1b0ab2c3SMatthew Dillon * and chain to the previous bio. 1033*1b0ab2c3SMatthew Dillon */ 1034*1b0ab2c3SMatthew Dillon static 1035*1b0ab2c3SMatthew Dillon void 1036*1b0ab2c3SMatthew Dillon hammer_io_direct_read_complete(struct bio *nbio) 1037*1b0ab2c3SMatthew Dillon { 1038*1b0ab2c3SMatthew Dillon struct bio *obio; 1039*1b0ab2c3SMatthew Dillon struct buf *bp; 1040*1b0ab2c3SMatthew Dillon u_int32_t rec_crc = nbio->bio_caller_info1.uvalue32; 1041*1b0ab2c3SMatthew Dillon 1042*1b0ab2c3SMatthew Dillon bp = nbio->bio_buf; 1043*1b0ab2c3SMatthew Dillon if (crc32(bp->b_data, bp->b_bufsize) != rec_crc) { 1044*1b0ab2c3SMatthew Dillon kprintf("HAMMER: data_crc error @%016llx/%d\n", 1045*1b0ab2c3SMatthew Dillon nbio->bio_offset, bp->b_bufsize); 1046*1b0ab2c3SMatthew Dillon if (hammer_debug_debug) 1047*1b0ab2c3SMatthew Dillon Debugger(""); 1048*1b0ab2c3SMatthew Dillon bp->b_flags |= B_ERROR; 1049*1b0ab2c3SMatthew Dillon bp->b_error = EIO; 1050*1b0ab2c3SMatthew Dillon } 1051*1b0ab2c3SMatthew Dillon obio = pop_bio(nbio); 1052*1b0ab2c3SMatthew Dillon biodone(obio); 1053*1b0ab2c3SMatthew Dillon } 1054*1b0ab2c3SMatthew Dillon #endif 1055*1b0ab2c3SMatthew Dillon 105647637bffSMatthew Dillon /* 105747637bffSMatthew Dillon * Write a buffer associated with a front-end vnode directly to the 105847637bffSMatthew Dillon * disk media. The bio may be issued asynchronously. 1059*1b0ab2c3SMatthew Dillon * 1060*1b0ab2c3SMatthew Dillon * The BIO is associated with the specified record and RECF_DIRECT_IO 1061*1b0ab2c3SMatthew Dillon * is set. 106247637bffSMatthew Dillon */ 106347637bffSMatthew Dillon int 1064*1b0ab2c3SMatthew Dillon hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record, 106547637bffSMatthew Dillon struct bio *bio) 106647637bffSMatthew Dillon { 1067*1b0ab2c3SMatthew Dillon hammer_btree_leaf_elm_t leaf = &record->leaf; 10680832c9bbSMatthew Dillon hammer_off_t buf_offset; 106947637bffSMatthew Dillon hammer_off_t zone2_offset; 107047637bffSMatthew Dillon hammer_volume_t volume; 10710832c9bbSMatthew Dillon hammer_buffer_t buffer; 107247637bffSMatthew Dillon struct buf *bp; 107347637bffSMatthew Dillon struct bio *nbio; 10740832c9bbSMatthew Dillon char *ptr; 107547637bffSMatthew Dillon int vol_no; 107647637bffSMatthew Dillon int error; 107747637bffSMatthew Dillon 10780832c9bbSMatthew Dillon buf_offset = leaf->data_offset; 10790832c9bbSMatthew Dillon 10800832c9bbSMatthew Dillon KKASSERT(buf_offset > HAMMER_ZONE_BTREE); 108147637bffSMatthew Dillon KKASSERT(bio->bio_buf->b_cmd == BUF_CMD_WRITE); 108247637bffSMatthew Dillon 10830832c9bbSMatthew Dillon if ((buf_offset & HAMMER_BUFMASK) == 0 && 10844a2796f3SMatthew Dillon leaf->data_len >= HAMMER_BUFSIZE) { 10850832c9bbSMatthew Dillon /* 10860832c9bbSMatthew Dillon * We are using the vnode's bio to write directly to the 10870832c9bbSMatthew Dillon * media, any hammer_buffer at the same zone-X offset will 10880832c9bbSMatthew Dillon * now have stale data. 10890832c9bbSMatthew Dillon */ 10900832c9bbSMatthew Dillon zone2_offset = hammer_blockmap_lookup(hmp, buf_offset, &error); 109147637bffSMatthew Dillon vol_no = HAMMER_VOL_DECODE(zone2_offset); 109247637bffSMatthew Dillon volume = hammer_get_volume(hmp, vol_no, &error); 109347637bffSMatthew Dillon 109447637bffSMatthew Dillon if (error == 0 && zone2_offset >= volume->maxbuf_off) 109547637bffSMatthew Dillon error = EIO; 109647637bffSMatthew Dillon if (error == 0) { 10970832c9bbSMatthew Dillon bp = bio->bio_buf; 10984a2796f3SMatthew Dillon KKASSERT((bp->b_bufsize & HAMMER_BUFMASK) == 0); 10994a2796f3SMatthew Dillon hammer_del_buffers(hmp, buf_offset, 11004a2796f3SMatthew Dillon zone2_offset, bp->b_bufsize); 1101*1b0ab2c3SMatthew Dillon 110243c665aeSMatthew Dillon /* 110343c665aeSMatthew Dillon * Second level bio - cached zone2 offset. 1104*1b0ab2c3SMatthew Dillon * 1105*1b0ab2c3SMatthew Dillon * (We can put our bio_done function in either the 1106*1b0ab2c3SMatthew Dillon * 2nd or 3rd level). 110743c665aeSMatthew Dillon */ 110847637bffSMatthew Dillon nbio = push_bio(bio); 110943c665aeSMatthew Dillon nbio->bio_offset = zone2_offset; 1110*1b0ab2c3SMatthew Dillon nbio->bio_done = hammer_io_direct_write_complete; 1111*1b0ab2c3SMatthew Dillon nbio->bio_caller_info1.ptr = record; 1112*1b0ab2c3SMatthew Dillon record->flags |= HAMMER_RECF_DIRECT_IO; 111343c665aeSMatthew Dillon 111443c665aeSMatthew Dillon /* 111543c665aeSMatthew Dillon * Third level bio - raw offset specific to the 111643c665aeSMatthew Dillon * correct volume. 111743c665aeSMatthew Dillon */ 111843c665aeSMatthew Dillon zone2_offset &= HAMMER_OFF_SHORT_MASK; 111943c665aeSMatthew Dillon nbio = push_bio(nbio); 112047637bffSMatthew Dillon nbio->bio_offset = volume->ondisk->vol_buf_beg + 11210832c9bbSMatthew Dillon zone2_offset; 112247637bffSMatthew Dillon vn_strategy(volume->devvp, nbio); 112347637bffSMatthew Dillon } 112447637bffSMatthew Dillon hammer_rel_volume(volume, 0); 11250832c9bbSMatthew Dillon } else { 1126*1b0ab2c3SMatthew Dillon /* 1127*1b0ab2c3SMatthew Dillon * Must fit in a standard HAMMER buffer. In this case all 1128*1b0ab2c3SMatthew Dillon * consumers use the HAMMER buffer system and RECF_DIRECT_IO 1129*1b0ab2c3SMatthew Dillon * does not need to be set-up. 1130*1b0ab2c3SMatthew Dillon */ 11310832c9bbSMatthew Dillon KKASSERT(((buf_offset ^ (buf_offset + leaf->data_len - 1)) & ~HAMMER_BUFMASK64) == 0); 11320832c9bbSMatthew Dillon buffer = NULL; 11330832c9bbSMatthew Dillon ptr = hammer_bread(hmp, buf_offset, &error, &buffer); 11340832c9bbSMatthew Dillon if (error == 0) { 11350832c9bbSMatthew Dillon bp = bio->bio_buf; 11367bc5b8c2SMatthew Dillon bp->b_flags |= B_AGE; 11370832c9bbSMatthew Dillon hammer_io_modify(&buffer->io, 1); 11380832c9bbSMatthew Dillon bcopy(bp->b_data, ptr, leaf->data_len); 11390832c9bbSMatthew Dillon hammer_io_modify_done(&buffer->io); 11407bc5b8c2SMatthew Dillon hammer_rel_buffer(buffer, 0); 11410832c9bbSMatthew Dillon bp->b_resid = 0; 11420832c9bbSMatthew Dillon biodone(bio); 11430832c9bbSMatthew Dillon } 114447637bffSMatthew Dillon } 114547637bffSMatthew Dillon if (error) { 1146cebe9493SMatthew Dillon kprintf("hammer_direct_write: failed @ %016llx\n", 1147cebe9493SMatthew Dillon leaf->data_offset); 114847637bffSMatthew Dillon bp = bio->bio_buf; 114947637bffSMatthew Dillon bp->b_resid = 0; 115047637bffSMatthew Dillon bp->b_error = EIO; 115147637bffSMatthew Dillon bp->b_flags |= B_ERROR; 115247637bffSMatthew Dillon biodone(bio); 115347637bffSMatthew Dillon } 115447637bffSMatthew Dillon return(error); 115547637bffSMatthew Dillon } 115647637bffSMatthew Dillon 115743c665aeSMatthew Dillon /* 1158*1b0ab2c3SMatthew Dillon * On completion of the BIO this callback must disconnect 1159*1b0ab2c3SMatthew Dillon * it from the hammer_record and chain to the previous bio. 1160*1b0ab2c3SMatthew Dillon */ 1161*1b0ab2c3SMatthew Dillon static 1162*1b0ab2c3SMatthew Dillon void 1163*1b0ab2c3SMatthew Dillon hammer_io_direct_write_complete(struct bio *nbio) 1164*1b0ab2c3SMatthew Dillon { 1165*1b0ab2c3SMatthew Dillon struct bio *obio; 1166*1b0ab2c3SMatthew Dillon hammer_record_t record = nbio->bio_caller_info1.ptr; 1167*1b0ab2c3SMatthew Dillon 1168*1b0ab2c3SMatthew Dillon obio = pop_bio(nbio); 1169*1b0ab2c3SMatthew Dillon biodone(obio); 1170*1b0ab2c3SMatthew Dillon KKASSERT(record != NULL && (record->flags & HAMMER_RECF_DIRECT_IO)); 1171*1b0ab2c3SMatthew Dillon record->flags &= ~HAMMER_RECF_DIRECT_IO; 1172*1b0ab2c3SMatthew Dillon if (record->flags & HAMMER_RECF_DIRECT_WAIT) { 1173*1b0ab2c3SMatthew Dillon record->flags &= ~HAMMER_RECF_DIRECT_WAIT; 1174*1b0ab2c3SMatthew Dillon wakeup(&record->flags); 1175*1b0ab2c3SMatthew Dillon } 1176*1b0ab2c3SMatthew Dillon } 1177*1b0ab2c3SMatthew Dillon 1178*1b0ab2c3SMatthew Dillon 1179*1b0ab2c3SMatthew Dillon /* 1180*1b0ab2c3SMatthew Dillon * This is called before a record is either committed to the B-Tree 1181*1b0ab2c3SMatthew Dillon * or destroyed, to resolve any associated direct-IO. We must 1182*1b0ab2c3SMatthew Dillon * ensure that the data is available on-media to other consumers 1183*1b0ab2c3SMatthew Dillon * such as the reblocker or mirroring code. 1184*1b0ab2c3SMatthew Dillon * 1185*1b0ab2c3SMatthew Dillon * Note that other consumers might access the data via the block 1186*1b0ab2c3SMatthew Dillon * device's buffer cache and not the high level vnode's buffer cache. 1187*1b0ab2c3SMatthew Dillon */ 1188*1b0ab2c3SMatthew Dillon void 1189*1b0ab2c3SMatthew Dillon hammer_io_direct_wait(hammer_record_t record) 1190*1b0ab2c3SMatthew Dillon { 1191*1b0ab2c3SMatthew Dillon crit_enter(); 1192*1b0ab2c3SMatthew Dillon while (record->flags & HAMMER_RECF_DIRECT_IO) { 1193*1b0ab2c3SMatthew Dillon record->flags |= HAMMER_RECF_DIRECT_WAIT; 1194*1b0ab2c3SMatthew Dillon tsleep(&record->flags, 0, "hmdiow", 0); 1195*1b0ab2c3SMatthew Dillon } 1196*1b0ab2c3SMatthew Dillon crit_exit(); 1197*1b0ab2c3SMatthew Dillon } 1198*1b0ab2c3SMatthew Dillon 1199*1b0ab2c3SMatthew Dillon /* 120043c665aeSMatthew Dillon * This is called to remove the second-level cached zone-2 offset from 120143c665aeSMatthew Dillon * frontend buffer cache buffers, now stale due to a data relocation. 120243c665aeSMatthew Dillon * These offsets are generated by cluster_read() via VOP_BMAP, or directly 120343c665aeSMatthew Dillon * by hammer_vop_strategy_read(). 120443c665aeSMatthew Dillon * 120543c665aeSMatthew Dillon * This is rather nasty because here we have something like the reblocker 120643c665aeSMatthew Dillon * scanning the raw B-Tree with no held references on anything, really, 120743c665aeSMatthew Dillon * other then a shared lock on the B-Tree node, and we have to access the 120843c665aeSMatthew Dillon * frontend's buffer cache to check for and clean out the association. 120943c665aeSMatthew Dillon * Specifically, if the reblocker is moving data on the disk, these cached 121043c665aeSMatthew Dillon * offsets will become invalid. 121143c665aeSMatthew Dillon * 121243c665aeSMatthew Dillon * Only data record types associated with the large-data zone are subject 121343c665aeSMatthew Dillon * to direct-io and need to be checked. 121443c665aeSMatthew Dillon * 121543c665aeSMatthew Dillon */ 121643c665aeSMatthew Dillon void 121743c665aeSMatthew Dillon hammer_io_direct_uncache(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf) 121843c665aeSMatthew Dillon { 121943c665aeSMatthew Dillon struct hammer_inode_info iinfo; 122043c665aeSMatthew Dillon int zone; 122143c665aeSMatthew Dillon 122243c665aeSMatthew Dillon if (leaf->base.rec_type != HAMMER_RECTYPE_DATA) 122343c665aeSMatthew Dillon return; 122443c665aeSMatthew Dillon zone = HAMMER_ZONE_DECODE(leaf->data_offset); 122543c665aeSMatthew Dillon if (zone != HAMMER_ZONE_LARGE_DATA_INDEX) 122643c665aeSMatthew Dillon return; 122743c665aeSMatthew Dillon iinfo.obj_id = leaf->base.obj_id; 122843c665aeSMatthew Dillon iinfo.obj_asof = 0; /* unused */ 122943c665aeSMatthew Dillon iinfo.obj_localization = leaf->base.localization & 12305a930e66SMatthew Dillon HAMMER_LOCALIZE_PSEUDOFS_MASK; 123143c665aeSMatthew Dillon iinfo.u.leaf = leaf; 123243c665aeSMatthew Dillon hammer_scan_inode_snapshots(hmp, &iinfo, 123343c665aeSMatthew Dillon hammer_io_direct_uncache_callback, 123443c665aeSMatthew Dillon leaf); 123543c665aeSMatthew Dillon } 123643c665aeSMatthew Dillon 123743c665aeSMatthew Dillon static int 123843c665aeSMatthew Dillon hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data) 123943c665aeSMatthew Dillon { 124043c665aeSMatthew Dillon hammer_inode_info_t iinfo = data; 124143c665aeSMatthew Dillon hammer_off_t data_offset; 124243c665aeSMatthew Dillon hammer_off_t file_offset; 124343c665aeSMatthew Dillon struct vnode *vp; 124443c665aeSMatthew Dillon struct buf *bp; 124543c665aeSMatthew Dillon int blksize; 124643c665aeSMatthew Dillon 124743c665aeSMatthew Dillon if (ip->vp == NULL) 124843c665aeSMatthew Dillon return(0); 124943c665aeSMatthew Dillon data_offset = iinfo->u.leaf->data_offset; 125043c665aeSMatthew Dillon file_offset = iinfo->u.leaf->base.key - iinfo->u.leaf->data_len; 125143c665aeSMatthew Dillon blksize = iinfo->u.leaf->data_len; 125243c665aeSMatthew Dillon KKASSERT((blksize & HAMMER_BUFMASK) == 0); 125343c665aeSMatthew Dillon 125443c665aeSMatthew Dillon hammer_ref(&ip->lock); 125543c665aeSMatthew Dillon if (hammer_get_vnode(ip, &vp) == 0) { 125643c665aeSMatthew Dillon if ((bp = findblk(ip->vp, file_offset)) != NULL && 125743c665aeSMatthew Dillon bp->b_bio2.bio_offset != NOOFFSET) { 125843c665aeSMatthew Dillon bp = getblk(ip->vp, file_offset, blksize, 0, 0); 125943c665aeSMatthew Dillon bp->b_bio2.bio_offset = NOOFFSET; 126043c665aeSMatthew Dillon brelse(bp); 126143c665aeSMatthew Dillon } 126243c665aeSMatthew Dillon vput(vp); 126343c665aeSMatthew Dillon } 126443c665aeSMatthew Dillon hammer_rel_inode(ip, 0); 126543c665aeSMatthew Dillon return(0); 126643c665aeSMatthew Dillon } 126747637bffSMatthew Dillon 1268