1 /* $NetBSD: fdfs.c,v 1.5 2007/10/08 21:41:13 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Buffer cache routines for a file-descriptor backed filesystem. 41 * This is part of lfs_cleanerd so there is also a "segment pointer" that 42 * we can make buffers out of without duplicating memory or reading the data 43 * again. 44 */ 45 46 #include <err.h> 47 #include <fcntl.h> 48 #include <time.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <unistd.h> 53 54 #include <sys/syslog.h> 55 #include <sys/param.h> 56 #include <sys/mount.h> 57 #include <sys/stat.h> 58 59 #include "vnode.h" 60 #include "bufcache.h" 61 #include "fdfs.h" 62 63 /* 64 * Return a "vnode" interface to a given file descriptor. 65 */ 66 struct uvnode * 67 fd_vget(int fd, int bsize, int segsize, int nseg) 68 { 69 struct fdfs *fs; 70 struct uvnode *vp; 71 int i; 72 73 fs = (struct fdfs *)malloc(sizeof(*fs)); 74 if (fs == NULL) 75 return NULL; 76 if (segsize > 0) { 77 fs->fd_bufp = (struct fd_buf *)malloc(nseg * 78 sizeof(struct fd_buf)); 79 if (fs->fd_bufp == NULL) { 80 free(fs); 81 return NULL; 82 } 83 for (i = 0; i < nseg; i++) { 84 fs->fd_bufp[i].start = 0x0; 85 fs->fd_bufp[i].end = 0x0; 86 fs->fd_bufp[i].buf = (char *)malloc(segsize); 87 if (fs->fd_bufp[i].buf == NULL) { 88 while (--i >= 0) 89 free(fs->fd_bufp[i].buf); 90 free(fs->fd_bufp); 91 free(fs); 92 return NULL; 93 } 94 } 95 } else 96 fs->fd_bufp = NULL; 97 98 fs->fd_fd = fd; 99 fs->fd_bufc = nseg; 100 fs->fd_bufi = 0; 101 fs->fd_bsize = bsize; 102 fs->fd_ssize = segsize; 103 104 vp = (struct uvnode *) malloc(sizeof(*vp)); 105 if (vp == NULL) { 106 if (fs->fd_bufp) { 107 for (i = nseg - 1; i >= 0; i--) 108 free(fs->fd_bufp[i].buf); 109 free(fs->fd_bufp); 110 } 111 free(fs); 112 return NULL; 113 } 114 memset(vp, 0, sizeof(*vp)); 115 vp->v_fd = fd; 116 vp->v_fs = fs; 117 vp->v_usecount = 0; 118 vp->v_strategy_op = fd_vop_strategy; 119 vp->v_bwrite_op = fd_vop_bwrite; 120 vp->v_bmap_op = fd_vop_bmap; 121 LIST_INIT(&vp->v_cleanblkhd); 122 LIST_INIT(&vp->v_dirtyblkhd); 123 vp->v_data = NULL; 124 125 return vp; 126 } 127 128 /* 129 * Deallocate a vnode. 130 */ 131 void 132 fd_reclaim(struct uvnode *vp) 133 { 134 int i; 135 struct ubuf *bp; 136 struct fdfs *fs; 137 138 while ((bp = LIST_FIRST(&vp->v_dirtyblkhd)) != NULL) { 139 bremfree(bp); 140 buf_destroy(bp); 141 } 142 while ((bp = LIST_FIRST(&vp->v_cleanblkhd)) != NULL) { 143 bremfree(bp); 144 buf_destroy(bp); 145 } 146 147 fs = (struct fdfs *)vp->v_fs; 148 for (i = 0; i < fs->fd_bufc; i++) 149 free(fs->fd_bufp[i].buf); 150 free(fs->fd_bufp); 151 free(fs); 152 memset(vp, 0, sizeof(vp)); 153 } 154 155 /* 156 * We won't be using that last segment after all. 157 */ 158 void 159 fd_release(struct uvnode *vp) 160 { 161 --((struct fdfs *)vp->v_fs)->fd_bufi; 162 } 163 164 /* 165 * Reset buffer pointer to first buffer. 166 */ 167 void 168 fd_release_all(struct uvnode *vp) 169 { 170 ((struct fdfs *)vp->v_fs)->fd_bufi = 0; 171 } 172 173 /* 174 * Prepare a segment buffer which we will expect to read from. 175 * We never increment fd_bufi unless we have succeeded to allocate the space, 176 * if necessary, and have read the segment. 177 */ 178 int 179 fd_preload(struct uvnode *vp, daddr_t start) 180 { 181 struct fdfs *fs = (struct fdfs *)vp->v_fs; 182 struct fd_buf *t; 183 int r; 184 185 /* We might need to allocate more buffers. */ 186 if (fs->fd_bufi == fs->fd_bufc) { 187 ++fs->fd_bufc; 188 syslog(LOG_DEBUG, "increasing number of segment buffers to %d", 189 fs->fd_bufc); 190 t = realloc(fs->fd_bufp, fs->fd_bufc * sizeof(struct fd_buf)); 191 if (t == NULL) { 192 syslog(LOG_NOTICE, "failed resizing table to %d\n", 193 fs->fd_bufc); 194 return -1; 195 } 196 fs->fd_bufp = t; 197 fs->fd_bufp[fs->fd_bufi].start = 0x0; 198 fs->fd_bufp[fs->fd_bufi].end = 0x0; 199 fs->fd_bufp[fs->fd_bufi].buf = (char *)malloc(fs->fd_ssize); 200 if (fs->fd_bufp[fs->fd_bufi].buf == NULL) { 201 syslog(LOG_NOTICE, "failed to allocate buffer #%d\n", 202 fs->fd_bufc); 203 --fs->fd_bufc; 204 return -1; 205 } 206 } 207 208 /* Read the current buffer. */ 209 fs->fd_bufp[fs->fd_bufi].start = start; 210 fs->fd_bufp[fs->fd_bufi].end = start + fs->fd_ssize / fs->fd_bsize; 211 212 if ((r = pread(fs->fd_fd, fs->fd_bufp[fs->fd_bufi].buf, 213 (size_t)fs->fd_ssize, start * fs->fd_bsize)) < 0) { 214 syslog(LOG_ERR, "preload to segment buffer %d", fs->fd_bufi); 215 return r; 216 } 217 218 fs->fd_bufi = fs->fd_bufi + 1; 219 return 0; 220 } 221 222 /* 223 * Get a pointer to a block contained in one of the segment buffers, 224 * as if from bread() but avoiding the buffer cache. 225 */ 226 char * 227 fd_ptrget(struct uvnode *vp, daddr_t bn) 228 { 229 int i; 230 struct fdfs *fs; 231 232 fs = (struct fdfs *)vp->v_fs; 233 for (i = 0; i < fs->fd_bufc; i++) { 234 if (bn >= fs->fd_bufp[i].start && bn < fs->fd_bufp[i].end) { 235 return fs->fd_bufp[i].buf + 236 (bn - fs->fd_bufp[i].start) * fs->fd_bsize; 237 } 238 } 239 return NULL; 240 } 241 242 /* 243 * Strategy routine. We can read from the segment buffer if requested. 244 */ 245 int 246 fd_vop_strategy(struct ubuf * bp) 247 { 248 struct fdfs *fs; 249 char *cp; 250 int count; 251 252 fs = (struct fdfs *)bp->b_vp->v_fs; 253 if (bp->b_flags & B_READ) { 254 if ((cp = fd_ptrget(bp->b_vp, bp->b_blkno)) != NULL) { 255 free(bp->b_data); 256 bp->b_data = cp; 257 bp->b_flags |= (B_DONTFREE | B_DONE); 258 return 0; 259 } 260 count = pread(bp->b_vp->v_fd, bp->b_data, bp->b_bcount, 261 bp->b_blkno * fs->fd_bsize); 262 if (count == bp->b_bcount) 263 bp->b_flags |= B_DONE; 264 } else { 265 count = pwrite(bp->b_vp->v_fd, bp->b_data, bp->b_bcount, 266 bp->b_blkno * fs->fd_bsize); 267 if (count == 0) { 268 perror("pwrite"); 269 return -1; 270 } 271 bp->b_flags &= ~B_DELWRI; 272 reassignbuf(bp, bp->b_vp); 273 } 274 return 0; 275 } 276 277 /* 278 * Delayed write. 279 */ 280 int 281 fd_vop_bwrite(struct ubuf * bp) 282 { 283 bp->b_flags |= B_DELWRI; 284 reassignbuf(bp, bp->b_vp); 285 brelse(bp, 0); 286 return 0; 287 } 288 289 /* 290 * Map lbn to disk address. Since we are using the file 291 * descriptor as the "disk", the disk address is meaningless 292 * and we just return the block address. 293 */ 294 int 295 fd_vop_bmap(struct uvnode * vp, daddr_t lbn, daddr_t * daddrp) 296 { 297 *daddrp = lbn; 298 return 0; 299 } 300