1 /* $NetBSD: bufcache.c,v 1.20 2018/03/30 12:56:46 christos Exp $ */ 2 /*- 3 * Copyright (c) 2003 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/time.h> 34 #include <sys/buf.h> 35 #include <sys/queue.h> 36 #include <sys/mount.h> 37 38 #include <assert.h> 39 #include <err.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 #include <util.h> 45 46 #include "bufcache.h" 47 #include "vnode.h" 48 49 /* 50 * Definitions for the buffer free lists. 51 */ 52 #define BQUEUES 3 /* number of free buffer queues */ 53 54 #define BQ_LOCKED 0 /* super-blocks &c */ 55 #define BQ_LRU 1 /* lru, useful buffers */ 56 #define BQ_AGE 2 /* rubbish */ 57 58 TAILQ_HEAD(bqueues, ubuf) bufqueues[BQUEUES]; 59 60 struct bufhash_struct *bufhash; 61 62 #define HASH_MAX 1024 63 int hashmax = HASH_MAX; 64 int hashmask = (HASH_MAX - 1); 65 66 int maxbufs = BUF_CACHE_SIZE; 67 int nbufs = 0; 68 int cachehits = 0; 69 int cachemisses = 0; 70 int max_depth = 0; 71 off_t locked_queue_bytes = 0; 72 int locked_queue_count = 0; 73 74 /* Simple buffer hash function */ 75 static int 76 vl_hash(struct uvnode * vp, daddr_t lbn) 77 { 78 return (int)((unsigned long) vp + lbn) & hashmask; 79 } 80 81 /* Initialize buffer cache */ 82 void 83 bufinit(int max) 84 { 85 int i; 86 87 if (max) { 88 for (hashmax = 1; hashmax < max; hashmax <<= 1) 89 ; 90 hashmask = hashmax - 1; 91 } 92 93 for (i = 0; i < BQUEUES; i++) { 94 TAILQ_INIT(&bufqueues[i]); 95 } 96 bufhash = emalloc(hashmax * sizeof(*bufhash)); 97 for (i = 0; i < hashmax; i++) 98 LIST_INIT(&bufhash[i]); 99 } 100 101 /* Widen the hash table. */ 102 void bufrehash(int max) 103 { 104 int i, newhashmax; 105 struct ubuf *bp, *nbp; 106 struct bufhash_struct *np; 107 108 if (max < 0 || max <= hashmax) 109 return; 110 111 /* Round up to a power of two */ 112 for (newhashmax = 1; newhashmax < max; newhashmax <<= 1) 113 ; 114 115 /* update the mask right away so vl_hash() uses it */ 116 hashmask = newhashmax - 1; 117 118 /* Allocate new empty hash table, if we can */ 119 np = emalloc(newhashmax * sizeof(*bufhash)); 120 for (i = 0; i < newhashmax; i++) 121 LIST_INIT(&np[i]); 122 123 /* Now reassign all existing buffers to their new hash chains. */ 124 for (i = 0; i < hashmax; i++) { 125 bp = LIST_FIRST(&bufhash[i]); 126 while(bp) { 127 nbp = LIST_NEXT(bp, b_hash); 128 LIST_REMOVE(bp, b_hash); 129 bp->b_hashval = vl_hash(bp->b_vp, bp->b_lblkno); 130 LIST_INSERT_HEAD(&np[bp->b_hashval], bp, b_hash); 131 bp = nbp; 132 } 133 } 134 135 /* Switch over and clean up */ 136 free(bufhash); 137 bufhash = np; 138 hashmax = newhashmax; 139 } 140 141 /* Print statistics of buffer cache usage */ 142 void 143 bufstats(void) 144 { 145 printf("buffer cache: %d hits %d misses (%2.2f%%); hash width %d, depth %d\n", 146 cachehits, cachemisses, 147 (cachehits * 100.0) / (cachehits + cachemisses), 148 hashmax, max_depth); 149 } 150 151 /* 152 * Remove a buffer from the cache. 153 * Caller must remove the buffer from its free list. 154 */ 155 void 156 buf_destroy(struct ubuf * bp) 157 { 158 LIST_REMOVE(bp, b_vnbufs); 159 LIST_REMOVE(bp, b_hash); 160 if (!(bp->b_flags & B_DONTFREE)) 161 free(bp->b_data); 162 free(bp); 163 --nbufs; 164 } 165 166 /* Remove a buffer from its free list. */ 167 void 168 bremfree(struct ubuf * bp) 169 { 170 struct bqueues *dp = NULL; 171 172 /* 173 * We only calculate the head of the freelist when removing 174 * the last element of the list as that is the only time that 175 * it is needed (e.g. to reset the tail pointer). 176 */ 177 if (bp->b_flags & B_LOCKED) { 178 locked_queue_bytes -= bp->b_bcount; 179 --locked_queue_count; 180 } 181 if (TAILQ_NEXT(bp, b_freelist) == NULL) { 182 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 183 if (TAILQ_LAST(dp, bqueues) == bp) 184 break; 185 if (dp == &bufqueues[BQUEUES]) 186 errx(1, "bremfree: lost tail"); 187 } 188 ++bp->b_vp->v_usecount; 189 TAILQ_REMOVE(dp, bp, b_freelist); 190 } 191 192 /* Return a buffer if it is in the cache, otherwise return NULL. */ 193 struct ubuf * 194 incore(struct uvnode * vp, daddr_t lbn) 195 { 196 struct ubuf *bp; 197 int hash, depth; 198 199 hash = vl_hash(vp, lbn); 200 /* XXX use a real hash instead. */ 201 depth = 0; 202 LIST_FOREACH(bp, &bufhash[hash], b_hash) { 203 if (++depth > max_depth) 204 max_depth = depth; 205 assert(depth <= nbufs); 206 if (bp->b_vp == vp && bp->b_lblkno == lbn) { 207 return bp; 208 } 209 } 210 return NULL; 211 } 212 213 /* 214 * Return a buffer of the given size, lbn and uvnode. 215 * If none is in core, make a new one. 216 */ 217 struct ubuf * 218 getblk(struct uvnode * vp, daddr_t lbn, int size) 219 { 220 struct ubuf *bp; 221 #ifdef DEBUG 222 static int warned; 223 #endif 224 225 /* 226 * First check the buffer cache lists. 227 * We might sometimes need to resize a buffer. If we are growing 228 * the buffer, its contents are invalid; but shrinking is okay. 229 */ 230 if ((bp = incore(vp, lbn)) != NULL) { 231 assert(!(bp->b_flags & B_BUSY)); 232 bp->b_flags |= B_BUSY; 233 bremfree(bp); 234 if (bp->b_bcount == size) 235 return bp; 236 else if (bp->b_bcount > size) { 237 assert(!(bp->b_flags & B_DELWRI)); 238 bp->b_bcount = size; 239 bp->b_data = erealloc(bp->b_data, size); 240 return bp; 241 } 242 243 buf_destroy(bp); 244 bp = NULL; 245 } 246 247 /* 248 * Not on the list. 249 * Get a new block of the appropriate size and use that. 250 * If not enough space, free blocks from the AGE and LRU lists 251 * to make room. 252 */ 253 while (nbufs >= maxbufs + locked_queue_count) { 254 bp = TAILQ_FIRST(&bufqueues[BQ_AGE]); 255 if (bp) 256 TAILQ_REMOVE(&bufqueues[BQ_AGE], bp, b_freelist); 257 if (bp == NULL) { 258 bp = TAILQ_FIRST(&bufqueues[BQ_LRU]); 259 if (bp) 260 TAILQ_REMOVE(&bufqueues[BQ_LRU], bp, 261 b_freelist); 262 } 263 if (bp) { 264 if (bp->b_flags & B_DELWRI) 265 VOP_STRATEGY(bp); 266 buf_destroy(bp); 267 break; 268 } 269 #ifdef DEBUG 270 else if (!warned) { 271 warnx("allocating more than %d buffers", maxbufs); 272 ++warned; 273 } 274 #endif 275 break; 276 } 277 ++nbufs; 278 bp = ecalloc(1, sizeof(*bp)); 279 bp->b_data = ecalloc(1, size); 280 bp->b_vp = vp; 281 bp->b_blkno = bp->b_lblkno = lbn; 282 bp->b_bcount = size; 283 bp->b_hashval = vl_hash(vp, lbn); 284 LIST_INSERT_HEAD(&bufhash[bp->b_hashval], bp, b_hash); 285 LIST_INSERT_HEAD(&vp->v_cleanblkhd, bp, b_vnbufs); 286 bp->b_flags = B_BUSY; 287 288 return bp; 289 } 290 291 /* Write a buffer to disk according to its strategy routine. */ 292 void 293 bwrite(struct ubuf * bp) 294 { 295 bp->b_flags &= ~(B_READ | B_DONE | B_DELWRI | B_LOCKED); 296 VOP_STRATEGY(bp); 297 bp->b_flags |= B_DONE; 298 reassignbuf(bp, bp->b_vp); 299 brelse(bp, 0); 300 } 301 302 /* Put a buffer back on its free list, clear B_BUSY. */ 303 void 304 brelse(struct ubuf * bp, int set) 305 { 306 int age; 307 308 assert(bp->b_flags & B_BUSY); 309 310 bp->b_flags |= set; 311 312 age = bp->b_flags & B_AGE; 313 bp->b_flags &= ~(B_BUSY | B_AGE); 314 if (bp->b_flags & B_INVAL) { 315 buf_destroy(bp); 316 return; 317 } 318 if (bp->b_flags & B_LOCKED) { 319 locked_queue_bytes += bp->b_bcount; 320 ++locked_queue_count; 321 TAILQ_INSERT_TAIL(&bufqueues[BQ_LOCKED], bp, b_freelist); 322 } else if (age) { 323 TAILQ_INSERT_TAIL(&bufqueues[BQ_AGE], bp, b_freelist); 324 } else { 325 TAILQ_INSERT_TAIL(&bufqueues[BQ_LRU], bp, b_freelist); 326 } 327 --bp->b_vp->v_usecount; 328 329 /* Move to the front of the hash chain */ 330 if (LIST_FIRST(&bufhash[bp->b_hashval]) != bp) { 331 LIST_REMOVE(bp, b_hash); 332 LIST_INSERT_HEAD(&bufhash[bp->b_hashval], bp, b_hash); 333 } 334 } 335 336 /* Read the given block from disk, return it B_BUSY. */ 337 int 338 bread(struct uvnode * vp, daddr_t lbn, int size, int flags, struct ubuf ** bpp) 339 { 340 struct ubuf *bp; 341 daddr_t daddr; 342 343 bp = getblk(vp, lbn, size); 344 *bpp = bp; 345 if (bp->b_flags & (B_DELWRI | B_DONE)) { 346 ++cachehits; 347 return 0; 348 } 349 ++cachemisses; 350 351 /* 352 * Not found. Need to find that block's location on disk, 353 * and load it in. 354 */ 355 daddr = -1; 356 (void)VOP_BMAP(vp, lbn, &daddr); 357 bp->b_blkno = daddr; 358 if (daddr >= 0) { 359 bp->b_flags |= B_READ; 360 return VOP_STRATEGY(bp); 361 } 362 memset(bp->b_data, 0, bp->b_bcount); 363 return 0; 364 } 365 366 /* Move a buffer between dirty and clean block lists. */ 367 void 368 reassignbuf(struct ubuf * bp, struct uvnode * vp) 369 { 370 LIST_REMOVE(bp, b_vnbufs); 371 if (bp->b_flags & B_DELWRI) { 372 LIST_INSERT_HEAD(&vp->v_dirtyblkhd, bp, b_vnbufs); 373 } else { 374 LIST_INSERT_HEAD(&vp->v_cleanblkhd, bp, b_vnbufs); 375 } 376 } 377 378 #ifdef DEBUG 379 void 380 dump_free_lists(void) 381 { 382 struct ubuf *bp; 383 int i; 384 385 for (i = 0; i <= BQ_LOCKED; i++) { 386 printf("==> free list %d:\n", i); 387 TAILQ_FOREACH(bp, &bufqueues[i], b_freelist) { 388 printf("vp %p lbn %" PRId64 " flags %lx\n", 389 bp->b_vp, bp->b_lblkno, bp->b_flags); 390 } 391 } 392 } 393 #endif 394