1433d6423SLionel Sambuc 2433d6423SLionel Sambuc #define _SYSTEM 3433d6423SLionel Sambuc 4433d6423SLionel Sambuc #include <assert.h> 56c46a77dSDavid van Moolenbroek #include <string.h> 6433d6423SLionel Sambuc #include <errno.h> 7433d6423SLionel Sambuc #include <math.h> 8433d6423SLionel Sambuc #include <stdlib.h> 9433d6423SLionel Sambuc 10433d6423SLionel Sambuc #include <machine/vmparam.h> 11433d6423SLionel Sambuc 12433d6423SLionel Sambuc #include <sys/param.h> 13433d6423SLionel Sambuc #include <sys/mman.h> 14433d6423SLionel Sambuc 15433d6423SLionel Sambuc #include <minix/dmap.h> 16433d6423SLionel Sambuc #include <minix/libminixfs.h> 17433d6423SLionel Sambuc #include <minix/syslib.h> 18433d6423SLionel Sambuc #include <minix/sysutil.h> 19433d6423SLionel Sambuc #include <minix/u64.h> 20433d6423SLionel Sambuc #include <minix/bdev.h> 214472b590SDavid van Moolenbroek #include <minix/bitmap.h> 22433d6423SLionel Sambuc 236c46a77dSDavid van Moolenbroek #include "inc.h" 246c46a77dSDavid van Moolenbroek 250314acfbSDavid van Moolenbroek /* Buffer (block) cache. To acquire a block, a routine calls lmfs_get_block(), 260314acfbSDavid van Moolenbroek * telling which block it wants. The block is then regarded as "in use" and 270314acfbSDavid van Moolenbroek * has its reference count incremented. All the blocks that are not in use are 280314acfbSDavid van Moolenbroek * chained together in an LRU list, with 'front' pointing to the least recently 290314acfbSDavid van Moolenbroek * used block, and 'rear' to the most recently used block. A reverse chain is 300314acfbSDavid van Moolenbroek * also maintained. Usage for LRU is measured by the time the put_block() is 310314acfbSDavid van Moolenbroek * done. The second parameter to put_block() can violate the LRU order and put 320314acfbSDavid van Moolenbroek * a block on the front of the list, if it will probably not be needed again. 330314acfbSDavid van Moolenbroek * This is used internally only; the lmfs_put_block() API call has no second 340314acfbSDavid van Moolenbroek * parameter. If a block is modified, the modifying routine must mark the 350314acfbSDavid van Moolenbroek * block as dirty, so the block will eventually be rewritten to the disk. 360314acfbSDavid van Moolenbroek */ 370314acfbSDavid van Moolenbroek 380314acfbSDavid van Moolenbroek /* Flags to put_block(). */ 390314acfbSDavid van Moolenbroek #define ONE_SHOT 0x1 /* set if block will not be needed again */ 400314acfbSDavid van Moolenbroek 41b65ad59eSDavid van Moolenbroek #define BUFHASH(b) ((unsigned int)((b) % nr_bufs)) 42433d6423SLionel Sambuc #define MARKCLEAN lmfs_markclean 43433d6423SLionel Sambuc 44433d6423SLionel Sambuc #define MINBUFS 6 /* minimal no of bufs for sanity check */ 45433d6423SLionel Sambuc 46433d6423SLionel Sambuc static struct buf *front; /* points to least recently used free block */ 47433d6423SLionel Sambuc static struct buf *rear; /* points to most recently used free block */ 48433d6423SLionel Sambuc static unsigned int bufs_in_use;/* # bufs currently in use (not on free list)*/ 49433d6423SLionel Sambuc 50433d6423SLionel Sambuc static void rm_lru(struct buf *bp); 516c46a77dSDavid van Moolenbroek static int read_block(struct buf *bp, size_t size); 52433d6423SLionel Sambuc static void freeblock(struct buf *bp); 530314acfbSDavid van Moolenbroek static void cache_heuristic_check(void); 540314acfbSDavid van Moolenbroek static void put_block(struct buf *bp, int put_flags); 55433d6423SLionel Sambuc 56433d6423SLionel Sambuc static int vmcache = 0; /* are we using vm's secondary cache? (initially not) */ 57433d6423SLionel Sambuc 58433d6423SLionel Sambuc static struct buf *buf; 59433d6423SLionel Sambuc static struct buf **buf_hash; /* the buffer hash table */ 60433d6423SLionel Sambuc static unsigned int nr_bufs; 61433d6423SLionel Sambuc static int may_use_vmcache; 62433d6423SLionel Sambuc 6365f76edbSDavid van Moolenbroek static size_t fs_block_size = PAGE_SIZE; /* raw i/o block size */ 64433d6423SLionel Sambuc 651311233cSDavid van Moolenbroek static fsblkcnt_t fs_btotal = 0, fs_bused = 0; 661311233cSDavid van Moolenbroek 67433d6423SLionel Sambuc static int quiet = 0; 68433d6423SLionel Sambuc 69129adfebSDavid van Moolenbroek typedef struct buf *noxfer_buf_ptr_t; /* annotation for temporary buf ptrs */ 70129adfebSDavid van Moolenbroek 71433d6423SLionel Sambuc void lmfs_setquiet(int q) { quiet = q; } 72433d6423SLionel Sambuc 731311233cSDavid van Moolenbroek static int fs_bufs_heuristic(int minbufs, fsblkcnt_t btotal, 741311233cSDavid van Moolenbroek fsblkcnt_t bused, int blocksize) 75433d6423SLionel Sambuc { 76433d6423SLionel Sambuc struct vm_stats_info vsi; 77433d6423SLionel Sambuc int bufs; 78433d6423SLionel Sambuc u32_t kbytes_used_fs, kbytes_total_fs, kbcache, kb_fsmax; 79433d6423SLionel Sambuc u32_t kbytes_remain_mem; 80433d6423SLionel Sambuc 81433d6423SLionel Sambuc /* set a reasonable cache size; cache at most a certain 82433d6423SLionel Sambuc * portion of the used FS, and at most a certain %age of remaining 83433d6423SLionel Sambuc * memory 84433d6423SLionel Sambuc */ 85433d6423SLionel Sambuc if(vm_info_stats(&vsi) != OK) { 86433d6423SLionel Sambuc bufs = 1024; 87433d6423SLionel Sambuc if(!quiet) 88433d6423SLionel Sambuc printf("fslib: heuristic info fail: default to %d bufs\n", bufs); 89433d6423SLionel Sambuc return bufs; 90433d6423SLionel Sambuc } 91433d6423SLionel Sambuc 92433d6423SLionel Sambuc /* remaining free memory is unused memory plus memory in used for cache, 93433d6423SLionel Sambuc * as the cache can be evicted 94433d6423SLionel Sambuc */ 95433d6423SLionel Sambuc kbytes_remain_mem = (u64_t)(vsi.vsi_free + vsi.vsi_cached) * 96433d6423SLionel Sambuc vsi.vsi_pagesize / 1024; 97433d6423SLionel Sambuc 98433d6423SLionel Sambuc /* check fs usage. */ 99433d6423SLionel Sambuc kbytes_used_fs = (unsigned long)(((u64_t)bused * blocksize) / 1024); 100433d6423SLionel Sambuc kbytes_total_fs = (unsigned long)(((u64_t)btotal * blocksize) / 1024); 101433d6423SLionel Sambuc 102433d6423SLionel Sambuc /* heuristic for a desired cache size based on FS usage; 103433d6423SLionel Sambuc * but never bigger than half of the total filesystem 104433d6423SLionel Sambuc */ 105433d6423SLionel Sambuc kb_fsmax = sqrt_approx(kbytes_used_fs)*40; 106433d6423SLionel Sambuc kb_fsmax = MIN(kb_fsmax, kbytes_total_fs/2); 107433d6423SLionel Sambuc 108433d6423SLionel Sambuc /* heuristic for a maximum usage - 10% of remaining memory */ 109433d6423SLionel Sambuc kbcache = MIN(kbytes_remain_mem/10, kb_fsmax); 110433d6423SLionel Sambuc bufs = kbcache * 1024 / blocksize; 111433d6423SLionel Sambuc 112433d6423SLionel Sambuc /* but we simply need MINBUFS no matter what */ 113433d6423SLionel Sambuc if(bufs < minbufs) 114433d6423SLionel Sambuc bufs = minbufs; 115433d6423SLionel Sambuc 116433d6423SLionel Sambuc return bufs; 117433d6423SLionel Sambuc } 118433d6423SLionel Sambuc 1191311233cSDavid van Moolenbroek void lmfs_change_blockusage(int delta) 120433d6423SLionel Sambuc { 121433d6423SLionel Sambuc /* Change the number of allocated blocks by 'delta.' 122433d6423SLionel Sambuc * Also accumulate the delta since the last cache re-evaluation. 123433d6423SLionel Sambuc * If it is outside a certain band, ask the cache library to 124433d6423SLionel Sambuc * re-evaluate the cache size. 125433d6423SLionel Sambuc */ 1261311233cSDavid van Moolenbroek static int bitdelta = 0, warn_low = TRUE, warn_high = TRUE; 1271311233cSDavid van Moolenbroek 1281311233cSDavid van Moolenbroek /* Adjust the file system block usage counter accordingly. Do bounds 1291311233cSDavid van Moolenbroek * checking, and report file system misbehavior. 1301311233cSDavid van Moolenbroek */ 1311311233cSDavid van Moolenbroek if (delta > 0 && (fsblkcnt_t)delta > fs_btotal - fs_bused) { 1321311233cSDavid van Moolenbroek if (warn_high) { 1331311233cSDavid van Moolenbroek printf("libminixfs: block usage overflow\n"); 1341311233cSDavid van Moolenbroek warn_high = FALSE; 1351311233cSDavid van Moolenbroek } 1361311233cSDavid van Moolenbroek delta = (int)(fs_btotal - fs_bused); 1371311233cSDavid van Moolenbroek } else if (delta < 0 && (fsblkcnt_t)-delta > fs_bused) { 1381311233cSDavid van Moolenbroek if (warn_low) { 1391311233cSDavid van Moolenbroek printf("libminixfs: block usage underflow\n"); 1401311233cSDavid van Moolenbroek warn_low = FALSE; 1411311233cSDavid van Moolenbroek } 1421311233cSDavid van Moolenbroek delta = -(int)fs_bused; 1431311233cSDavid van Moolenbroek } 1441311233cSDavid van Moolenbroek fs_bused += delta; 1451311233cSDavid van Moolenbroek 146433d6423SLionel Sambuc bitdelta += delta; 1471311233cSDavid van Moolenbroek 1481311233cSDavid van Moolenbroek #define BAND_KB (10*1024) /* recheck cache every 10MB change */ 1491311233cSDavid van Moolenbroek 1501311233cSDavid van Moolenbroek /* If the accumulated delta exceeds the configured threshold, resize 1511311233cSDavid van Moolenbroek * the cache, but only if the cache isn't in use any more. In order to 1521311233cSDavid van Moolenbroek * avoid that the latter case blocks a resize forever, we also call 1531311233cSDavid van Moolenbroek * this function from lmfs_flushall(). Since lmfs_buf_pool() may call 1541311233cSDavid van Moolenbroek * lmfs_flushall(), reset 'bitdelta' before doing the heuristics check. 1551311233cSDavid van Moolenbroek */ 1561311233cSDavid van Moolenbroek if (bufs_in_use == 0 && 1571311233cSDavid van Moolenbroek (bitdelta*(int)fs_block_size/1024 > BAND_KB || 1581311233cSDavid van Moolenbroek bitdelta*(int)fs_block_size/1024 < -BAND_KB)) { 159433d6423SLionel Sambuc bitdelta = 0; 1601311233cSDavid van Moolenbroek cache_heuristic_check(); 161433d6423SLionel Sambuc } 162433d6423SLionel Sambuc } 163433d6423SLionel Sambuc 164433d6423SLionel Sambuc void lmfs_markdirty(struct buf *bp) 165433d6423SLionel Sambuc { 166433d6423SLionel Sambuc bp->lmfs_flags |= VMMC_DIRTY; 167433d6423SLionel Sambuc } 168433d6423SLionel Sambuc 169433d6423SLionel Sambuc void lmfs_markclean(struct buf *bp) 170433d6423SLionel Sambuc { 171433d6423SLionel Sambuc bp->lmfs_flags &= ~VMMC_DIRTY; 172433d6423SLionel Sambuc } 173433d6423SLionel Sambuc 174433d6423SLionel Sambuc int lmfs_isclean(struct buf *bp) 175433d6423SLionel Sambuc { 176433d6423SLionel Sambuc return !(bp->lmfs_flags & VMMC_DIRTY); 177433d6423SLionel Sambuc } 178433d6423SLionel Sambuc 179433d6423SLionel Sambuc static void free_unused_blocks(void) 180433d6423SLionel Sambuc { 181433d6423SLionel Sambuc struct buf *bp; 182433d6423SLionel Sambuc 183433d6423SLionel Sambuc int freed = 0, bytes = 0; 184433d6423SLionel Sambuc printf("libminixfs: freeing; %d blocks in use\n", bufs_in_use); 185433d6423SLionel Sambuc for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 186433d6423SLionel Sambuc if(bp->lmfs_bytes > 0 && bp->lmfs_count == 0) { 187433d6423SLionel Sambuc freed++; 188433d6423SLionel Sambuc bytes += bp->lmfs_bytes; 189433d6423SLionel Sambuc freeblock(bp); 190433d6423SLionel Sambuc } 191433d6423SLionel Sambuc } 192433d6423SLionel Sambuc printf("libminixfs: freeing; %d blocks, %d bytes\n", freed, bytes); 193433d6423SLionel Sambuc } 194433d6423SLionel Sambuc 1956c46a77dSDavid van Moolenbroek static void lmfs_alloc_block(struct buf *bp, size_t block_size) 196433d6423SLionel Sambuc { 197433d6423SLionel Sambuc int len; 198433d6423SLionel Sambuc ASSERT(!bp->data); 199433d6423SLionel Sambuc ASSERT(bp->lmfs_bytes == 0); 200433d6423SLionel Sambuc 2016c46a77dSDavid van Moolenbroek len = roundup(block_size, PAGE_SIZE); 202433d6423SLionel Sambuc 2036c46a77dSDavid van Moolenbroek if((bp->data = mmap(0, block_size, PROT_READ|PROT_WRITE, 2046c46a77dSDavid van Moolenbroek MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { 205433d6423SLionel Sambuc free_unused_blocks(); 2066c46a77dSDavid van Moolenbroek if((bp->data = mmap(0, block_size, PROT_READ|PROT_WRITE, 207433d6423SLionel Sambuc MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { 208433d6423SLionel Sambuc panic("libminixfs: could not allocate block"); 209433d6423SLionel Sambuc } 210433d6423SLionel Sambuc } 211433d6423SLionel Sambuc assert(bp->data); 2126c46a77dSDavid van Moolenbroek bp->lmfs_bytes = block_size; 213433d6423SLionel Sambuc bp->lmfs_needsetcache = 1; 214433d6423SLionel Sambuc } 215433d6423SLionel Sambuc 216433d6423SLionel Sambuc /*===========================================================================* 217433d6423SLionel Sambuc * lmfs_get_block * 218433d6423SLionel Sambuc *===========================================================================*/ 2196c46a77dSDavid van Moolenbroek int lmfs_get_block(struct buf **bpp, dev_t dev, block64_t block, int how) 220433d6423SLionel Sambuc { 2216c46a77dSDavid van Moolenbroek return lmfs_get_block_ino(bpp, dev, block, how, VMC_NO_INODE, 0); 222433d6423SLionel Sambuc } 223433d6423SLionel Sambuc 22465f76edbSDavid van Moolenbroek static void munmap_t(void *a, int len) 225433d6423SLionel Sambuc { 226433d6423SLionel Sambuc vir_bytes av = (vir_bytes) a; 227433d6423SLionel Sambuc assert(a); 228433d6423SLionel Sambuc assert(a != MAP_FAILED); 229433d6423SLionel Sambuc assert(len > 0); 230433d6423SLionel Sambuc assert(!(av % PAGE_SIZE)); 231433d6423SLionel Sambuc 232433d6423SLionel Sambuc len = roundup(len, PAGE_SIZE); 233433d6423SLionel Sambuc 234433d6423SLionel Sambuc assert(!(len % PAGE_SIZE)); 235433d6423SLionel Sambuc 236433d6423SLionel Sambuc if(munmap(a, len) < 0) 237433d6423SLionel Sambuc panic("libminixfs cache: munmap failed"); 238433d6423SLionel Sambuc } 239433d6423SLionel Sambuc 240433d6423SLionel Sambuc static void raisecount(struct buf *bp) 241433d6423SLionel Sambuc { 242*7c48de6cSDavid van Moolenbroek ASSERT(bp->lmfs_count < CHAR_MAX); 243433d6423SLionel Sambuc bp->lmfs_count++; 244433d6423SLionel Sambuc if(bp->lmfs_count == 1) bufs_in_use++; 245433d6423SLionel Sambuc assert(bufs_in_use > 0); 246433d6423SLionel Sambuc } 247433d6423SLionel Sambuc 248433d6423SLionel Sambuc static void lowercount(struct buf *bp) 249433d6423SLionel Sambuc { 250433d6423SLionel Sambuc assert(bufs_in_use > 0); 251433d6423SLionel Sambuc ASSERT(bp->lmfs_count > 0); 252433d6423SLionel Sambuc bp->lmfs_count--; 253433d6423SLionel Sambuc if(bp->lmfs_count == 0) bufs_in_use--; 254433d6423SLionel Sambuc } 255433d6423SLionel Sambuc 256433d6423SLionel Sambuc static void freeblock(struct buf *bp) 257433d6423SLionel Sambuc { 258433d6423SLionel Sambuc ASSERT(bp->lmfs_count == 0); 259433d6423SLionel Sambuc /* If the block taken is dirty, make it clean by writing it to the disk. 260433d6423SLionel Sambuc * Avoid hysteresis by flushing all other dirty blocks for the same device. 261433d6423SLionel Sambuc */ 262433d6423SLionel Sambuc if (bp->lmfs_dev != NO_DEV) { 263ebd3c067SDavid van Moolenbroek if (!lmfs_isclean(bp)) lmfs_flushdev(bp->lmfs_dev); 2646c46a77dSDavid van Moolenbroek assert(bp->lmfs_bytes > 0); 265433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; 266433d6423SLionel Sambuc } 267433d6423SLionel Sambuc 268433d6423SLionel Sambuc /* Fill in block's parameters and add it to the hash chain where it goes. */ 269433d6423SLionel Sambuc MARKCLEAN(bp); /* NO_DEV blocks may be marked dirty */ 270433d6423SLionel Sambuc if(bp->lmfs_bytes > 0) { 271433d6423SLionel Sambuc assert(bp->data); 272433d6423SLionel Sambuc munmap_t(bp->data, bp->lmfs_bytes); 273433d6423SLionel Sambuc bp->lmfs_bytes = 0; 274433d6423SLionel Sambuc bp->data = NULL; 275433d6423SLionel Sambuc } else assert(!bp->data); 276433d6423SLionel Sambuc } 277433d6423SLionel Sambuc 278433d6423SLionel Sambuc /*===========================================================================* 279e94f856bSDavid van Moolenbroek * find_block * 280e94f856bSDavid van Moolenbroek *===========================================================================*/ 281e94f856bSDavid van Moolenbroek static struct buf *find_block(dev_t dev, block64_t block) 282e94f856bSDavid van Moolenbroek { 283e94f856bSDavid van Moolenbroek /* Search the hash chain for (dev, block). Return the buffer structure if 284e94f856bSDavid van Moolenbroek * found, or NULL otherwise. 285e94f856bSDavid van Moolenbroek */ 286e94f856bSDavid van Moolenbroek struct buf *bp; 287e94f856bSDavid van Moolenbroek int b; 288e94f856bSDavid van Moolenbroek 289e94f856bSDavid van Moolenbroek assert(dev != NO_DEV); 290e94f856bSDavid van Moolenbroek 291e94f856bSDavid van Moolenbroek b = BUFHASH(block); 292e94f856bSDavid van Moolenbroek for (bp = buf_hash[b]; bp != NULL; bp = bp->lmfs_hash) 293e94f856bSDavid van Moolenbroek if (bp->lmfs_blocknr == block && bp->lmfs_dev == dev) 294e94f856bSDavid van Moolenbroek return bp; 295e94f856bSDavid van Moolenbroek 296e94f856bSDavid van Moolenbroek return NULL; 297e94f856bSDavid van Moolenbroek } 298e94f856bSDavid van Moolenbroek 299e94f856bSDavid van Moolenbroek /*===========================================================================* 3006c46a77dSDavid van Moolenbroek * get_block_ino * 301433d6423SLionel Sambuc *===========================================================================*/ 3026c46a77dSDavid van Moolenbroek static int get_block_ino(struct buf **bpp, dev_t dev, block64_t block, int how, 3036c46a77dSDavid van Moolenbroek ino_t ino, u64_t ino_off, size_t block_size) 304433d6423SLionel Sambuc { 3056c46a77dSDavid van Moolenbroek /* Check to see if the requested block is in the block cache. The requested 3066c46a77dSDavid van Moolenbroek * block is identified by the block number in 'block' on device 'dev', counted 3076c46a77dSDavid van Moolenbroek * in the file system block size. The amount of data requested for this block 3086c46a77dSDavid van Moolenbroek * is given in 'block_size', which may be less than the file system block size 3096c46a77dSDavid van Moolenbroek * iff the requested block is the last (partial) block on a device. Note that 3106c46a77dSDavid van Moolenbroek * the given block size does *not* affect the conversion of 'block' to a byte 3116c46a77dSDavid van Moolenbroek * offset! Either way, if the block could be obtained, either from the cache 3126c46a77dSDavid van Moolenbroek * or by reading from the device, return OK, with a pointer to the buffer 3136c46a77dSDavid van Moolenbroek * structure stored in 'bpp'. If not, return a negative error code (and no 3146c46a77dSDavid van Moolenbroek * buffer). If necessary, evict some other block and fetch the contents from 3156c46a77dSDavid van Moolenbroek * disk (if 'how' is NORMAL). If 'how' is NO_READ, the caller intends to 3166c46a77dSDavid van Moolenbroek * overwrite the requested block in its entirety, so it is only necessary to 3176c46a77dSDavid van Moolenbroek * see if it is in the cache; if it is not, any free buffer will do. If 'how' 3184472b590SDavid van Moolenbroek * is PEEK, the function returns the block if it is in the cache or the VM 3194472b590SDavid van Moolenbroek * cache, and an ENOENT error code otherwise. 320433d6423SLionel Sambuc * In addition to the LRU chain, there is also a hash chain to link together 321433d6423SLionel Sambuc * blocks whose block numbers end with the same bit strings, for fast lookup. 322433d6423SLionel Sambuc */ 3236c46a77dSDavid van Moolenbroek int b, r; 324433d6423SLionel Sambuc static struct buf *bp; 325b65ad59eSDavid van Moolenbroek uint64_t dev_off; 326433d6423SLionel Sambuc struct buf *prev_ptr; 327433d6423SLionel Sambuc 328433d6423SLionel Sambuc assert(buf_hash); 329433d6423SLionel Sambuc assert(buf); 330433d6423SLionel Sambuc assert(nr_bufs > 0); 331433d6423SLionel Sambuc 332433d6423SLionel Sambuc ASSERT(fs_block_size > 0); 333433d6423SLionel Sambuc 334433d6423SLionel Sambuc assert(dev != NO_DEV); 335433d6423SLionel Sambuc 336b65ad59eSDavid van Moolenbroek assert(block <= UINT64_MAX / fs_block_size); 337b65ad59eSDavid van Moolenbroek 338b65ad59eSDavid van Moolenbroek dev_off = block * fs_block_size; 339b65ad59eSDavid van Moolenbroek 340433d6423SLionel Sambuc if((ino_off % fs_block_size)) { 341433d6423SLionel Sambuc 342433d6423SLionel Sambuc printf("cache: unaligned lmfs_get_block_ino ino_off %llu\n", 343433d6423SLionel Sambuc ino_off); 344433d6423SLionel Sambuc util_stacktrace(); 345433d6423SLionel Sambuc } 346433d6423SLionel Sambuc 347e94f856bSDavid van Moolenbroek /* See if the block is in the cache. If so, we can return it right away. */ 348e94f856bSDavid van Moolenbroek bp = find_block(dev, block); 349e94f856bSDavid van Moolenbroek if (bp != NULL && !(bp->lmfs_flags & VMMC_EVICTED)) { 3506c46a77dSDavid van Moolenbroek ASSERT(bp->lmfs_dev == dev); 3516c46a77dSDavid van Moolenbroek ASSERT(bp->lmfs_dev != NO_DEV); 3526c46a77dSDavid van Moolenbroek 3536c46a77dSDavid van Moolenbroek /* The block must have exactly the requested number of bytes. */ 3546c46a77dSDavid van Moolenbroek if (bp->lmfs_bytes != block_size) 3556c46a77dSDavid van Moolenbroek return EIO; 3566c46a77dSDavid van Moolenbroek 357433d6423SLionel Sambuc /* Block needed has been found. */ 358433d6423SLionel Sambuc if (bp->lmfs_count == 0) { 359433d6423SLionel Sambuc rm_lru(bp); 360433d6423SLionel Sambuc ASSERT(bp->lmfs_needsetcache == 0); 361433d6423SLionel Sambuc ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); 362e94f856bSDavid van Moolenbroek /* FIXME: race condition against the VMMC_EVICTED check */ 363433d6423SLionel Sambuc bp->lmfs_flags |= VMMC_BLOCK_LOCKED; 364433d6423SLionel Sambuc } 365433d6423SLionel Sambuc raisecount(bp); 366433d6423SLionel Sambuc ASSERT(bp->lmfs_flags & VMMC_BLOCK_LOCKED); 367433d6423SLionel Sambuc ASSERT(bp->data); 368433d6423SLionel Sambuc 369433d6423SLionel Sambuc if(ino != VMC_NO_INODE) { 370433d6423SLionel Sambuc if(bp->lmfs_inode == VMC_NO_INODE 371433d6423SLionel Sambuc || bp->lmfs_inode != ino 372433d6423SLionel Sambuc || bp->lmfs_inode_offset != ino_off) { 373433d6423SLionel Sambuc bp->lmfs_inode = ino; 374433d6423SLionel Sambuc bp->lmfs_inode_offset = ino_off; 375433d6423SLionel Sambuc bp->lmfs_needsetcache = 1; 376433d6423SLionel Sambuc } 377433d6423SLionel Sambuc } 378433d6423SLionel Sambuc 3796c46a77dSDavid van Moolenbroek *bpp = bp; 3806c46a77dSDavid van Moolenbroek return OK; 381433d6423SLionel Sambuc } 382e94f856bSDavid van Moolenbroek 383e94f856bSDavid van Moolenbroek /* We had the block in the cache but VM evicted it; invalidate it. */ 384e94f856bSDavid van Moolenbroek if (bp != NULL) { 385e94f856bSDavid van Moolenbroek assert(bp->lmfs_flags & VMMC_EVICTED); 386e94f856bSDavid van Moolenbroek ASSERT(bp->lmfs_count == 0); 387e94f856bSDavid van Moolenbroek ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); 388e94f856bSDavid van Moolenbroek ASSERT(!(bp->lmfs_flags & VMMC_DIRTY)); 389e94f856bSDavid van Moolenbroek bp->lmfs_dev = NO_DEV; 390e94f856bSDavid van Moolenbroek bp->lmfs_bytes = 0; 391e94f856bSDavid van Moolenbroek bp->data = NULL; 392433d6423SLionel Sambuc } 393433d6423SLionel Sambuc 394433d6423SLionel Sambuc /* Desired block is not on available chain. Find a free block to use. */ 395433d6423SLionel Sambuc if(bp) { 396433d6423SLionel Sambuc ASSERT(bp->lmfs_flags & VMMC_EVICTED); 397433d6423SLionel Sambuc } else { 398433d6423SLionel Sambuc if ((bp = front) == NULL) panic("all buffers in use: %d", nr_bufs); 399433d6423SLionel Sambuc } 400433d6423SLionel Sambuc assert(bp); 401433d6423SLionel Sambuc 402433d6423SLionel Sambuc rm_lru(bp); 403433d6423SLionel Sambuc 404433d6423SLionel Sambuc /* Remove the block that was just taken from its hash chain. */ 405433d6423SLionel Sambuc b = BUFHASH(bp->lmfs_blocknr); 406433d6423SLionel Sambuc prev_ptr = buf_hash[b]; 407433d6423SLionel Sambuc if (prev_ptr == bp) { 408433d6423SLionel Sambuc buf_hash[b] = bp->lmfs_hash; 409433d6423SLionel Sambuc } else { 410433d6423SLionel Sambuc /* The block just taken is not on the front of its hash chain. */ 411433d6423SLionel Sambuc while (prev_ptr->lmfs_hash != NULL) 412433d6423SLionel Sambuc if (prev_ptr->lmfs_hash == bp) { 413433d6423SLionel Sambuc prev_ptr->lmfs_hash = bp->lmfs_hash; /* found it */ 414433d6423SLionel Sambuc break; 415433d6423SLionel Sambuc } else { 416433d6423SLionel Sambuc prev_ptr = prev_ptr->lmfs_hash; /* keep looking */ 417433d6423SLionel Sambuc } 418433d6423SLionel Sambuc } 419433d6423SLionel Sambuc 420433d6423SLionel Sambuc freeblock(bp); 421433d6423SLionel Sambuc 422433d6423SLionel Sambuc bp->lmfs_inode = ino; 423433d6423SLionel Sambuc bp->lmfs_inode_offset = ino_off; 424433d6423SLionel Sambuc 425433d6423SLionel Sambuc bp->lmfs_flags = VMMC_BLOCK_LOCKED; 426433d6423SLionel Sambuc bp->lmfs_needsetcache = 0; 427433d6423SLionel Sambuc bp->lmfs_dev = dev; /* fill in device number */ 428433d6423SLionel Sambuc bp->lmfs_blocknr = block; /* fill in block number */ 429433d6423SLionel Sambuc ASSERT(bp->lmfs_count == 0); 430433d6423SLionel Sambuc raisecount(bp); 431433d6423SLionel Sambuc b = BUFHASH(bp->lmfs_blocknr); 432433d6423SLionel Sambuc bp->lmfs_hash = buf_hash[b]; 433433d6423SLionel Sambuc 434433d6423SLionel Sambuc buf_hash[b] = bp; /* add to hash list */ 435433d6423SLionel Sambuc 436433d6423SLionel Sambuc assert(dev != NO_DEV); 437433d6423SLionel Sambuc 4384472b590SDavid van Moolenbroek /* The block is not found in our cache, but we do want it if it's in the VM 4394472b590SDavid van Moolenbroek * cache. The exception is NO_READ, purely for context switching performance 4404472b590SDavid van Moolenbroek * reasons. NO_READ is used for 1) newly allocated blocks, 2) blocks being 4414472b590SDavid van Moolenbroek * prefetched, and 3) blocks about to be fully overwritten. In the first two 4424472b590SDavid van Moolenbroek * cases, VM will not have the block in its cache anyway, and for the third 4434472b590SDavid van Moolenbroek * we save on one VM call only if the block is in the VM cache. 444433d6423SLionel Sambuc */ 445433d6423SLionel Sambuc assert(!bp->data); 446433d6423SLionel Sambuc assert(!bp->lmfs_bytes); 4474472b590SDavid van Moolenbroek if (how != NO_READ && vmcache) { 448433d6423SLionel Sambuc if((bp->data = vm_map_cacheblock(dev, dev_off, ino, ino_off, 4496c46a77dSDavid van Moolenbroek &bp->lmfs_flags, roundup(block_size, PAGE_SIZE))) != MAP_FAILED) { 4506c46a77dSDavid van Moolenbroek bp->lmfs_bytes = block_size; 451433d6423SLionel Sambuc ASSERT(!bp->lmfs_needsetcache); 4526c46a77dSDavid van Moolenbroek *bpp = bp; 4536c46a77dSDavid van Moolenbroek return OK; 454433d6423SLionel Sambuc } 455433d6423SLionel Sambuc } 456433d6423SLionel Sambuc bp->data = NULL; 457433d6423SLionel Sambuc 458cb9453caSDavid van Moolenbroek /* The block is not in the cache, and VM does not know about it. If we were 459cb9453caSDavid van Moolenbroek * requested to search for the block only, we can now return failure to the 460cb9453caSDavid van Moolenbroek * caller. Return the block to the pool without allocating data pages, since 461cb9453caSDavid van Moolenbroek * these would be freed upon recycling the block anyway. 462cb9453caSDavid van Moolenbroek */ 463cb9453caSDavid van Moolenbroek if (how == PEEK) { 464cb9453caSDavid van Moolenbroek bp->lmfs_dev = NO_DEV; 465cb9453caSDavid van Moolenbroek 4660314acfbSDavid van Moolenbroek put_block(bp, ONE_SHOT); 467cb9453caSDavid van Moolenbroek 4686c46a77dSDavid van Moolenbroek return ENOENT; 469cb9453caSDavid van Moolenbroek } 470cb9453caSDavid van Moolenbroek 471433d6423SLionel Sambuc /* Not in the cache; reserve memory for its contents. */ 472433d6423SLionel Sambuc 4736c46a77dSDavid van Moolenbroek lmfs_alloc_block(bp, block_size); 474433d6423SLionel Sambuc 475433d6423SLionel Sambuc assert(bp->data); 476433d6423SLionel Sambuc 4774472b590SDavid van Moolenbroek if (how == NORMAL) { 4786c46a77dSDavid van Moolenbroek /* Try to read the block. Return an error code on failure. */ 4796c46a77dSDavid van Moolenbroek if ((r = read_block(bp, block_size)) != OK) { 4806c46a77dSDavid van Moolenbroek put_block(bp, 0); 4816c46a77dSDavid van Moolenbroek 4826c46a77dSDavid van Moolenbroek return r; 4836c46a77dSDavid van Moolenbroek } 484cb9453caSDavid van Moolenbroek } else if(how == NO_READ) { 485433d6423SLionel Sambuc /* This block will be overwritten by new contents. */ 486433d6423SLionel Sambuc } else 487cb9453caSDavid van Moolenbroek panic("unexpected 'how' value: %d", how); 488433d6423SLionel Sambuc 489433d6423SLionel Sambuc assert(bp->data); 490433d6423SLionel Sambuc 4916c46a77dSDavid van Moolenbroek *bpp = bp; /* return the newly acquired block */ 4926c46a77dSDavid van Moolenbroek return OK; 4936c46a77dSDavid van Moolenbroek } 4946c46a77dSDavid van Moolenbroek 4956c46a77dSDavid van Moolenbroek /*===========================================================================* 4966c46a77dSDavid van Moolenbroek * lmfs_get_block_ino * 4976c46a77dSDavid van Moolenbroek *===========================================================================*/ 4986c46a77dSDavid van Moolenbroek int lmfs_get_block_ino(struct buf **bpp, dev_t dev, block64_t block, int how, 4996c46a77dSDavid van Moolenbroek ino_t ino, u64_t ino_off) 5006c46a77dSDavid van Moolenbroek { 5016c46a77dSDavid van Moolenbroek return get_block_ino(bpp, dev, block, how, ino, ino_off, fs_block_size); 5026c46a77dSDavid van Moolenbroek } 5036c46a77dSDavid van Moolenbroek 5046c46a77dSDavid van Moolenbroek /*===========================================================================* 5056c46a77dSDavid van Moolenbroek * lmfs_get_partial_block * 5066c46a77dSDavid van Moolenbroek *===========================================================================*/ 5076c46a77dSDavid van Moolenbroek int lmfs_get_partial_block(struct buf **bpp, dev_t dev, block64_t block, 5086c46a77dSDavid van Moolenbroek int how, size_t block_size) 5096c46a77dSDavid van Moolenbroek { 5106c46a77dSDavid van Moolenbroek return get_block_ino(bpp, dev, block, how, VMC_NO_INODE, 0, block_size); 511433d6423SLionel Sambuc } 512433d6423SLionel Sambuc 513433d6423SLionel Sambuc /*===========================================================================* 5140314acfbSDavid van Moolenbroek * put_block * 515433d6423SLionel Sambuc *===========================================================================*/ 5160314acfbSDavid van Moolenbroek static void put_block(struct buf *bp, int put_flags) 517433d6423SLionel Sambuc { 5180314acfbSDavid van Moolenbroek /* Return a block to the list of available blocks. Depending on 'put_flags' 519433d6423SLionel Sambuc * it may be put on the front or rear of the LRU chain. Blocks that are 5200314acfbSDavid van Moolenbroek * expected to be needed again at some point go on the rear; blocks that are 5210314acfbSDavid van Moolenbroek * unlikely to be needed again at all go on the front. 522433d6423SLionel Sambuc */ 523433d6423SLionel Sambuc dev_t dev; 524b65ad59eSDavid van Moolenbroek uint64_t dev_off; 525d75faf18SDavid van Moolenbroek int r, setflags; 526433d6423SLionel Sambuc 5270314acfbSDavid van Moolenbroek assert(bp != NULL); 528433d6423SLionel Sambuc 529433d6423SLionel Sambuc dev = bp->lmfs_dev; 530433d6423SLionel Sambuc 531b65ad59eSDavid van Moolenbroek dev_off = bp->lmfs_blocknr * fs_block_size; 532433d6423SLionel Sambuc 533433d6423SLionel Sambuc lowercount(bp); 534433d6423SLionel Sambuc if (bp->lmfs_count != 0) return; /* block is still in use */ 535433d6423SLionel Sambuc 536433d6423SLionel Sambuc /* Put this block back on the LRU chain. */ 5370314acfbSDavid van Moolenbroek if (dev == NO_DEV || dev == DEV_RAM || (put_flags & ONE_SHOT)) { 5380314acfbSDavid van Moolenbroek /* Block will not be needed again. Put it on front of chain. 539433d6423SLionel Sambuc * It will be the next block to be evicted from the cache. 540433d6423SLionel Sambuc */ 541433d6423SLionel Sambuc bp->lmfs_prev = NULL; 542433d6423SLionel Sambuc bp->lmfs_next = front; 543433d6423SLionel Sambuc if (front == NULL) 544433d6423SLionel Sambuc rear = bp; /* LRU chain was empty */ 545433d6423SLionel Sambuc else 546433d6423SLionel Sambuc front->lmfs_prev = bp; 547433d6423SLionel Sambuc front = bp; 548433d6423SLionel Sambuc } 549433d6423SLionel Sambuc else { 5500314acfbSDavid van Moolenbroek /* Block may be needed again. Put it on rear of chain. 551433d6423SLionel Sambuc * It will not be evicted from the cache for a long time. 552433d6423SLionel Sambuc */ 553433d6423SLionel Sambuc bp->lmfs_prev = rear; 554433d6423SLionel Sambuc bp->lmfs_next = NULL; 555433d6423SLionel Sambuc if (rear == NULL) 556433d6423SLionel Sambuc front = bp; 557433d6423SLionel Sambuc else 558433d6423SLionel Sambuc rear->lmfs_next = bp; 559433d6423SLionel Sambuc rear = bp; 560433d6423SLionel Sambuc } 561433d6423SLionel Sambuc 562433d6423SLionel Sambuc assert(bp->lmfs_flags & VMMC_BLOCK_LOCKED); 563433d6423SLionel Sambuc bp->lmfs_flags &= ~VMMC_BLOCK_LOCKED; 564433d6423SLionel Sambuc 565cb9453caSDavid van Moolenbroek /* block has sensible content - if necessary, identify it to VM */ 566433d6423SLionel Sambuc if(vmcache && bp->lmfs_needsetcache && dev != NO_DEV) { 567cb9453caSDavid van Moolenbroek assert(bp->data); 568cb9453caSDavid van Moolenbroek 5690314acfbSDavid van Moolenbroek setflags = (put_flags & ONE_SHOT) ? VMSF_ONCE : 0; 5706c46a77dSDavid van Moolenbroek 571d75faf18SDavid van Moolenbroek if ((r = vm_set_cacheblock(bp->data, dev, dev_off, bp->lmfs_inode, 5726c46a77dSDavid van Moolenbroek bp->lmfs_inode_offset, &bp->lmfs_flags, 5736c46a77dSDavid van Moolenbroek roundup(bp->lmfs_bytes, PAGE_SIZE), setflags)) != OK) { 574433d6423SLionel Sambuc if(r == ENOSYS) { 575433d6423SLionel Sambuc printf("libminixfs: ENOSYS, disabling VM calls\n"); 576433d6423SLionel Sambuc vmcache = 0; 5776c46a77dSDavid van Moolenbroek } else if (r == ENOMEM) { 5786c46a77dSDavid van Moolenbroek /* Do not panic in this case. Running out of memory is 5796c46a77dSDavid van Moolenbroek * bad, especially since it may lead to applications 5806c46a77dSDavid van Moolenbroek * crashing when trying to access memory-mapped pages 5816c46a77dSDavid van Moolenbroek * we haven't been able to pass off to the VM cache, 5826c46a77dSDavid van Moolenbroek * but the entire file system crashing is always worse. 5836c46a77dSDavid van Moolenbroek */ 5846c46a77dSDavid van Moolenbroek printf("libminixfs: no memory for cache block!\n"); 585433d6423SLionel Sambuc } else { 586433d6423SLionel Sambuc panic("libminixfs: setblock of %p dev 0x%llx off " 587433d6423SLionel Sambuc "0x%llx failed\n", bp->data, dev, dev_off); 588433d6423SLionel Sambuc } 589433d6423SLionel Sambuc } 590433d6423SLionel Sambuc } 591433d6423SLionel Sambuc bp->lmfs_needsetcache = 0; 592d75faf18SDavid van Moolenbroek 593d75faf18SDavid van Moolenbroek /* Now that we (may) have given the block to VM, invalidate the block if it 594d75faf18SDavid van Moolenbroek * is a one-shot block. Otherwise, it may still be reobtained immediately 595d75faf18SDavid van Moolenbroek * after, which could be a problem if VM already forgot the block and we are 596d75faf18SDavid van Moolenbroek * expected to pass it to VM again, which then wouldn't happen. 597d75faf18SDavid van Moolenbroek */ 5980314acfbSDavid van Moolenbroek if (put_flags & ONE_SHOT) 599d75faf18SDavid van Moolenbroek bp->lmfs_dev = NO_DEV; 600e94f856bSDavid van Moolenbroek } 601433d6423SLionel Sambuc 602e94f856bSDavid van Moolenbroek /*===========================================================================* 6030314acfbSDavid van Moolenbroek * lmfs_put_block * 6040314acfbSDavid van Moolenbroek *===========================================================================*/ 6050314acfbSDavid van Moolenbroek void lmfs_put_block(struct buf *bp) 6060314acfbSDavid van Moolenbroek { 6070314acfbSDavid van Moolenbroek /* User interface to put_block(). */ 6080314acfbSDavid van Moolenbroek 6090314acfbSDavid van Moolenbroek if (bp == NULL) return; /* for poorly written file systems */ 6100314acfbSDavid van Moolenbroek 6110314acfbSDavid van Moolenbroek put_block(bp, 0); 6120314acfbSDavid van Moolenbroek } 6130314acfbSDavid van Moolenbroek 6140314acfbSDavid van Moolenbroek /*===========================================================================* 615e94f856bSDavid van Moolenbroek * lmfs_free_block * 616e94f856bSDavid van Moolenbroek *===========================================================================*/ 617e94f856bSDavid van Moolenbroek void lmfs_free_block(dev_t dev, block64_t block) 618e94f856bSDavid van Moolenbroek { 619e94f856bSDavid van Moolenbroek /* The file system has just freed the given block. The block may previously 620e94f856bSDavid van Moolenbroek * have been in use as data block for an inode. Therefore, we now need to tell 621e94f856bSDavid van Moolenbroek * VM that the block is no longer associated with an inode. If we fail to do so 622e94f856bSDavid van Moolenbroek * and the inode now has a hole at this location, mapping in the hole would 623e94f856bSDavid van Moolenbroek * yield the old block contents rather than a zeroed page. In addition, if the 624e94f856bSDavid van Moolenbroek * block is in the cache, it will be removed, even if it was dirty. 625e94f856bSDavid van Moolenbroek */ 626e94f856bSDavid van Moolenbroek struct buf *bp; 627e94f856bSDavid van Moolenbroek int r; 628e94f856bSDavid van Moolenbroek 629e94f856bSDavid van Moolenbroek /* Tell VM to forget about the block. The primary purpose of this call is to 630e94f856bSDavid van Moolenbroek * break the inode association, but since the block is part of a mounted file 631e94f856bSDavid van Moolenbroek * system, it is not expected to be accessed directly anyway. So, save some 632e94f856bSDavid van Moolenbroek * cache memory by throwing it out of the VM cache altogether. 633e94f856bSDavid van Moolenbroek */ 634e94f856bSDavid van Moolenbroek if (vmcache) { 635e94f856bSDavid van Moolenbroek if ((r = vm_forget_cacheblock(dev, block * fs_block_size, 636e94f856bSDavid van Moolenbroek fs_block_size)) != OK) 637e94f856bSDavid van Moolenbroek printf("libminixfs: vm_forget_cacheblock failed (%d)\n", r); 638e94f856bSDavid van Moolenbroek } 639e94f856bSDavid van Moolenbroek 640e94f856bSDavid van Moolenbroek if ((bp = find_block(dev, block)) != NULL) { 641e94f856bSDavid van Moolenbroek lmfs_markclean(bp); 642e94f856bSDavid van Moolenbroek 643e94f856bSDavid van Moolenbroek /* Invalidate the block. The block may or may not be in use right now, 644e94f856bSDavid van Moolenbroek * so don't be smart about freeing memory or repositioning in the LRU. 645e94f856bSDavid van Moolenbroek */ 646e94f856bSDavid van Moolenbroek bp->lmfs_dev = NO_DEV; 647e94f856bSDavid van Moolenbroek } 648e94f856bSDavid van Moolenbroek 649e94f856bSDavid van Moolenbroek /* Note that this is *not* the right place to implement TRIM support. Even 650e94f856bSDavid van Moolenbroek * though the block is freed, on the device it may still be part of a 651e94f856bSDavid van Moolenbroek * previous checkpoint or snapshot of some sort. Only the file system can 652e94f856bSDavid van Moolenbroek * be trusted to decide which blocks can be reused on the device! 653e94f856bSDavid van Moolenbroek */ 654433d6423SLionel Sambuc } 655433d6423SLionel Sambuc 656d75faf18SDavid van Moolenbroek /*===========================================================================* 657d75faf18SDavid van Moolenbroek * lmfs_zero_block_ino * 658d75faf18SDavid van Moolenbroek *===========================================================================*/ 659d75faf18SDavid van Moolenbroek void lmfs_zero_block_ino(dev_t dev, ino_t ino, u64_t ino_off) 660d75faf18SDavid van Moolenbroek { 661d75faf18SDavid van Moolenbroek /* Files may have holes. From an application perspective, these are just file 662d75faf18SDavid van Moolenbroek * regions filled with zeroes. From a file system perspective however, holes 663d75faf18SDavid van Moolenbroek * may represent unallocated regions on disk. Thus, these holes do not have 664d75faf18SDavid van Moolenbroek * corresponding blocks on the disk, and therefore also no block number. 665d75faf18SDavid van Moolenbroek * Therefore, we cannot simply use lmfs_get_block_ino() for them. For reads, 666d75faf18SDavid van Moolenbroek * this is not a problem, since the file system can just zero out the target 667d75faf18SDavid van Moolenbroek * application buffer instead. For mapped pages however, this *is* a problem, 668d75faf18SDavid van Moolenbroek * since the VM cache needs to be told about the corresponding block, and VM 669d75faf18SDavid van Moolenbroek * does not accept blocks without a device offset. The role of this function is 670d75faf18SDavid van Moolenbroek * therefore to tell VM about the hole using a fake device offset. The device 671d75faf18SDavid van Moolenbroek * offsets are picked so that the VM cache will see a block memory-mapped for 672d75faf18SDavid van Moolenbroek * the hole in the file, while the same block is not visible when 673d75faf18SDavid van Moolenbroek * memory-mapping the block device. 674d75faf18SDavid van Moolenbroek */ 675d75faf18SDavid van Moolenbroek struct buf *bp; 676d75faf18SDavid van Moolenbroek static block64_t fake_block = 0; 6776c46a77dSDavid van Moolenbroek int r; 678d75faf18SDavid van Moolenbroek 679d75faf18SDavid van Moolenbroek if (!vmcache) 680d75faf18SDavid van Moolenbroek return; 681d75faf18SDavid van Moolenbroek 682d75faf18SDavid van Moolenbroek assert(fs_block_size > 0); 683d75faf18SDavid van Moolenbroek 684d75faf18SDavid van Moolenbroek /* Pick a block number which is above the threshold of what can possibly be 685d75faf18SDavid van Moolenbroek * mapped in by mmap'ing the device, since off_t is signed, and it is safe to 686d75faf18SDavid van Moolenbroek * say that it will take a while before we have 8-exabyte devices. Pick a 687d75faf18SDavid van Moolenbroek * different block number each time to avoid possible concurrency issues. 688d75faf18SDavid van Moolenbroek * FIXME: it does not seem like VM actually verifies mmap offsets though.. 689d75faf18SDavid van Moolenbroek */ 690d75faf18SDavid van Moolenbroek if (fake_block == 0 || ++fake_block >= UINT64_MAX / fs_block_size) 691d75faf18SDavid van Moolenbroek fake_block = ((uint64_t)INT64_MAX + 1) / fs_block_size; 692d75faf18SDavid van Moolenbroek 693d75faf18SDavid van Moolenbroek /* Obtain a block. */ 6946c46a77dSDavid van Moolenbroek if ((r = lmfs_get_block_ino(&bp, dev, fake_block, NO_READ, ino, 6956c46a77dSDavid van Moolenbroek ino_off)) != OK) 6966c46a77dSDavid van Moolenbroek panic("libminixfs: getting a NO_READ block failed: %d", r); 697d75faf18SDavid van Moolenbroek assert(bp != NULL); 698d75faf18SDavid van Moolenbroek assert(bp->lmfs_dev != NO_DEV); 699d75faf18SDavid van Moolenbroek 700d75faf18SDavid van Moolenbroek /* The block is already zeroed, as it has just been allocated with mmap. File 701d75faf18SDavid van Moolenbroek * systems do not rely on this assumption yet, so if VM ever gets changed to 702d75faf18SDavid van Moolenbroek * not clear the blocks we allocate (e.g., by recycling pages in the VM cache 703d75faf18SDavid van Moolenbroek * for the same process, which would be safe), we need to add a memset here. 704d75faf18SDavid van Moolenbroek */ 705d75faf18SDavid van Moolenbroek 706d75faf18SDavid van Moolenbroek /* Release the block. We don't expect it to be accessed ever again. Moreover, 707d75faf18SDavid van Moolenbroek * if we keep the block around in the VM cache, it may erroneously be mapped 708d75faf18SDavid van Moolenbroek * in beyond the file end later. Hence, use VMSF_ONCE when passing it to VM. 709d75faf18SDavid van Moolenbroek * TODO: tell VM that it is an all-zeroes block, so that VM can deduplicate 710d75faf18SDavid van Moolenbroek * all such pages in its cache. 711d75faf18SDavid van Moolenbroek */ 7120314acfbSDavid van Moolenbroek put_block(bp, ONE_SHOT); 713d75faf18SDavid van Moolenbroek } 714d75faf18SDavid van Moolenbroek 7151311233cSDavid van Moolenbroek void lmfs_set_blockusage(fsblkcnt_t btotal, fsblkcnt_t bused) 716433d6423SLionel Sambuc { 7171311233cSDavid van Moolenbroek 7181311233cSDavid van Moolenbroek assert(bused <= btotal); 7191311233cSDavid van Moolenbroek fs_btotal = btotal; 7201311233cSDavid van Moolenbroek fs_bused = bused; 7211311233cSDavid van Moolenbroek 7221311233cSDavid van Moolenbroek /* if the cache isn't in use, we could resize it. */ 7231311233cSDavid van Moolenbroek if (bufs_in_use == 0) 7240314acfbSDavid van Moolenbroek cache_heuristic_check(); 725433d6423SLionel Sambuc } 726433d6423SLionel Sambuc 727433d6423SLionel Sambuc /*===========================================================================* 728433d6423SLionel Sambuc * read_block * 729433d6423SLionel Sambuc *===========================================================================*/ 7306c46a77dSDavid van Moolenbroek static int read_block(struct buf *bp, size_t block_size) 731433d6423SLionel Sambuc { 7326c46a77dSDavid van Moolenbroek /* Read a disk block of 'size' bytes. The given size is always the FS block 7336c46a77dSDavid van Moolenbroek * size, except for the last block of a device. If an I/O error occurs, 7346c46a77dSDavid van Moolenbroek * invalidate the block and return an error code. 735433d6423SLionel Sambuc */ 7366c46a77dSDavid van Moolenbroek ssize_t r; 737433d6423SLionel Sambuc off_t pos; 738433d6423SLionel Sambuc dev_t dev = bp->lmfs_dev; 739433d6423SLionel Sambuc 740433d6423SLionel Sambuc assert(dev != NO_DEV); 741433d6423SLionel Sambuc 7426c46a77dSDavid van Moolenbroek ASSERT(bp->lmfs_bytes == block_size); 743433d6423SLionel Sambuc ASSERT(fs_block_size > 0); 744433d6423SLionel Sambuc 745433d6423SLionel Sambuc pos = (off_t)bp->lmfs_blocknr * fs_block_size; 7466c46a77dSDavid van Moolenbroek if (block_size > PAGE_SIZE) { 747433d6423SLionel Sambuc #define MAXPAGES 20 748433d6423SLionel Sambuc vir_bytes blockrem, vaddr = (vir_bytes) bp->data; 749433d6423SLionel Sambuc int p = 0; 750433d6423SLionel Sambuc static iovec_t iovec[MAXPAGES]; 7516c46a77dSDavid van Moolenbroek blockrem = block_size; 752433d6423SLionel Sambuc while(blockrem > 0) { 753433d6423SLionel Sambuc vir_bytes chunk = blockrem >= PAGE_SIZE ? PAGE_SIZE : blockrem; 754433d6423SLionel Sambuc iovec[p].iov_addr = vaddr; 755433d6423SLionel Sambuc iovec[p].iov_size = chunk; 756433d6423SLionel Sambuc vaddr += chunk; 757433d6423SLionel Sambuc blockrem -= chunk; 758433d6423SLionel Sambuc p++; 759433d6423SLionel Sambuc } 760433d6423SLionel Sambuc r = bdev_gather(dev, pos, iovec, p, BDEV_NOFLAGS); 761433d6423SLionel Sambuc } else { 7626c46a77dSDavid van Moolenbroek r = bdev_read(dev, pos, bp->data, block_size, BDEV_NOFLAGS); 763433d6423SLionel Sambuc } 7646c46a77dSDavid van Moolenbroek if (r != (ssize_t)block_size) { 7656c46a77dSDavid van Moolenbroek printf("fs cache: I/O error on device %d/%d, block %"PRIu64" (%zd)\n", 7666c46a77dSDavid van Moolenbroek major(dev), minor(dev), bp->lmfs_blocknr, r); 7676c46a77dSDavid van Moolenbroek if (r >= 0) 7686c46a77dSDavid van Moolenbroek r = EIO; /* TODO: retry retrieving (just) the remaining part */ 769433d6423SLionel Sambuc 770433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; /* invalidate block */ 771433d6423SLionel Sambuc 7726c46a77dSDavid van Moolenbroek return r; 773433d6423SLionel Sambuc } 774433d6423SLionel Sambuc 7756c46a77dSDavid van Moolenbroek return OK; 776433d6423SLionel Sambuc } 777433d6423SLionel Sambuc 778433d6423SLionel Sambuc /*===========================================================================* 779433d6423SLionel Sambuc * lmfs_invalidate * 780433d6423SLionel Sambuc *===========================================================================*/ 781433d6423SLionel Sambuc void lmfs_invalidate( 782433d6423SLionel Sambuc dev_t device /* device whose blocks are to be purged */ 783433d6423SLionel Sambuc ) 784433d6423SLionel Sambuc { 785433d6423SLionel Sambuc /* Remove all the blocks belonging to some device from the cache. */ 786433d6423SLionel Sambuc 787433d6423SLionel Sambuc register struct buf *bp; 788433d6423SLionel Sambuc 789cb9453caSDavid van Moolenbroek assert(device != NO_DEV); 790cb9453caSDavid van Moolenbroek 791433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 792433d6423SLionel Sambuc if (bp->lmfs_dev == device) { 793433d6423SLionel Sambuc assert(bp->data); 794433d6423SLionel Sambuc assert(bp->lmfs_bytes > 0); 795433d6423SLionel Sambuc munmap_t(bp->data, bp->lmfs_bytes); 796433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; 797433d6423SLionel Sambuc bp->lmfs_bytes = 0; 798433d6423SLionel Sambuc bp->data = NULL; 799433d6423SLionel Sambuc } 800433d6423SLionel Sambuc } 801433d6423SLionel Sambuc 802e94f856bSDavid van Moolenbroek /* Clear the cache even if VM caching is disabled for the file system: 803e94f856bSDavid van Moolenbroek * caching may be disabled as side effect of an error, leaving blocks behind 804e94f856bSDavid van Moolenbroek * in the actual VM cache. 805e94f856bSDavid van Moolenbroek */ 806433d6423SLionel Sambuc vm_clear_cache(device); 807433d6423SLionel Sambuc } 808433d6423SLionel Sambuc 809433d6423SLionel Sambuc /*===========================================================================* 8104472b590SDavid van Moolenbroek * sort_blocks * 811433d6423SLionel Sambuc *===========================================================================*/ 8124472b590SDavid van Moolenbroek static void sort_blocks(struct buf **bufq, unsigned int bufqsize) 813433d6423SLionel Sambuc { 8144472b590SDavid van Moolenbroek struct buf *bp; 8154472b590SDavid van Moolenbroek int i, j, gap; 816433d6423SLionel Sambuc 8174472b590SDavid van Moolenbroek gap = 1; 8184472b590SDavid van Moolenbroek do 8194472b590SDavid van Moolenbroek gap = 3 * gap + 1; 8204472b590SDavid van Moolenbroek while ((unsigned int)gap <= bufqsize); 821433d6423SLionel Sambuc 8224472b590SDavid van Moolenbroek while (gap != 1) { 8234472b590SDavid van Moolenbroek gap /= 3; 8244472b590SDavid van Moolenbroek for (j = gap; (unsigned int)j < bufqsize; j++) { 8254472b590SDavid van Moolenbroek for (i = j - gap; i >= 0 && 8264472b590SDavid van Moolenbroek bufq[i]->lmfs_blocknr > bufq[i + gap]->lmfs_blocknr; 8274472b590SDavid van Moolenbroek i -= gap) { 8284472b590SDavid van Moolenbroek bp = bufq[i]; 8294472b590SDavid van Moolenbroek bufq[i] = bufq[i + gap]; 8304472b590SDavid van Moolenbroek bufq[i + gap] = bp; 831433d6423SLionel Sambuc } 832433d6423SLionel Sambuc } 8334472b590SDavid van Moolenbroek } 834433d6423SLionel Sambuc } 835433d6423SLionel Sambuc 836433d6423SLionel Sambuc /*===========================================================================* 8374472b590SDavid van Moolenbroek * rw_scattered * 838433d6423SLionel Sambuc *===========================================================================*/ 8394472b590SDavid van Moolenbroek static void rw_scattered( 840433d6423SLionel Sambuc dev_t dev, /* major-minor device number */ 841433d6423SLionel Sambuc struct buf **bufq, /* pointer to array of buffers */ 8424472b590SDavid van Moolenbroek unsigned int bufqsize, /* number of buffers */ 843433d6423SLionel Sambuc int rw_flag /* READING or WRITING */ 844433d6423SLionel Sambuc ) 845433d6423SLionel Sambuc { 846433d6423SLionel Sambuc /* Read or write scattered data from a device. */ 847433d6423SLionel Sambuc 848433d6423SLionel Sambuc register struct buf *bp; 849433d6423SLionel Sambuc register iovec_t *iop; 850433d6423SLionel Sambuc static iovec_t iovec[NR_IOREQS]; 851433d6423SLionel Sambuc off_t pos; 8524472b590SDavid van Moolenbroek unsigned int i, iov_per_block; 85365f76edbSDavid van Moolenbroek unsigned int start_in_use = bufs_in_use, start_bufqsize = bufqsize; 854433d6423SLionel Sambuc 855433d6423SLionel Sambuc if(bufqsize == 0) return; 856433d6423SLionel Sambuc 857433d6423SLionel Sambuc /* for READING, check all buffers on the list are obtained and held 858433d6423SLionel Sambuc * (count > 0) 859433d6423SLionel Sambuc */ 860433d6423SLionel Sambuc if (rw_flag == READING) { 8614472b590SDavid van Moolenbroek assert(bufqsize <= LMFS_MAX_PREFETCH); 8624472b590SDavid van Moolenbroek 863433d6423SLionel Sambuc for(i = 0; i < bufqsize; i++) { 864433d6423SLionel Sambuc assert(bufq[i] != NULL); 865433d6423SLionel Sambuc assert(bufq[i]->lmfs_count > 0); 866433d6423SLionel Sambuc } 867433d6423SLionel Sambuc 868433d6423SLionel Sambuc /* therefore they are all 'in use' and must be at least this many */ 869433d6423SLionel Sambuc assert(start_in_use >= start_bufqsize); 870433d6423SLionel Sambuc } 871433d6423SLionel Sambuc 872433d6423SLionel Sambuc assert(dev != NO_DEV); 873433d6423SLionel Sambuc assert(fs_block_size > 0); 8746c46a77dSDavid van Moolenbroek assert(howmany(fs_block_size, PAGE_SIZE) <= NR_IOREQS); 875433d6423SLionel Sambuc 8764472b590SDavid van Moolenbroek /* For WRITING, (Shell) sort buffers on lmfs_blocknr. 8774472b590SDavid van Moolenbroek * For READING, the buffers are already sorted. 8784472b590SDavid van Moolenbroek */ 8794472b590SDavid van Moolenbroek if (rw_flag == WRITING) 8804472b590SDavid van Moolenbroek sort_blocks(bufq, bufqsize); 881433d6423SLionel Sambuc 882433d6423SLionel Sambuc /* Set up I/O vector and do I/O. The result of bdev I/O is OK if everything 883433d6423SLionel Sambuc * went fine, otherwise the error code for the first failed transfer. 884433d6423SLionel Sambuc */ 885433d6423SLionel Sambuc while (bufqsize > 0) { 8864472b590SDavid van Moolenbroek unsigned int p, nblocks = 0, niovecs = 0; 887433d6423SLionel Sambuc int r; 888433d6423SLionel Sambuc for (iop = iovec; nblocks < bufqsize; nblocks++) { 889433d6423SLionel Sambuc vir_bytes vdata, blockrem; 890433d6423SLionel Sambuc bp = bufq[nblocks]; 891b65ad59eSDavid van Moolenbroek if (bp->lmfs_blocknr != bufq[0]->lmfs_blocknr + nblocks) 892433d6423SLionel Sambuc break; 8936c46a77dSDavid van Moolenbroek blockrem = bp->lmfs_bytes; 8946c46a77dSDavid van Moolenbroek iov_per_block = howmany(blockrem, PAGE_SIZE); 8954472b590SDavid van Moolenbroek if (niovecs > NR_IOREQS - iov_per_block) break; 896433d6423SLionel Sambuc vdata = (vir_bytes) bp->data; 897433d6423SLionel Sambuc for(p = 0; p < iov_per_block; p++) { 8986c46a77dSDavid van Moolenbroek vir_bytes chunk = 8996c46a77dSDavid van Moolenbroek blockrem < PAGE_SIZE ? blockrem : PAGE_SIZE; 900433d6423SLionel Sambuc iop->iov_addr = vdata; 901433d6423SLionel Sambuc iop->iov_size = chunk; 902433d6423SLionel Sambuc vdata += PAGE_SIZE; 903433d6423SLionel Sambuc blockrem -= chunk; 904433d6423SLionel Sambuc iop++; 905433d6423SLionel Sambuc niovecs++; 906433d6423SLionel Sambuc } 907433d6423SLionel Sambuc assert(p == iov_per_block); 908433d6423SLionel Sambuc assert(blockrem == 0); 909433d6423SLionel Sambuc } 910433d6423SLionel Sambuc 911433d6423SLionel Sambuc assert(nblocks > 0); 9124472b590SDavid van Moolenbroek assert(niovecs > 0 && niovecs <= NR_IOREQS); 913433d6423SLionel Sambuc 914433d6423SLionel Sambuc pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; 915433d6423SLionel Sambuc if (rw_flag == READING) 916433d6423SLionel Sambuc r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); 917433d6423SLionel Sambuc else 918433d6423SLionel Sambuc r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); 919433d6423SLionel Sambuc 920433d6423SLionel Sambuc /* Harvest the results. The driver may have returned an error, or it 921433d6423SLionel Sambuc * may have done less than what we asked for. 922433d6423SLionel Sambuc */ 923433d6423SLionel Sambuc if (r < 0) { 924b65ad59eSDavid van Moolenbroek printf("fs cache: I/O error %d on device %d/%d, " 925b65ad59eSDavid van Moolenbroek "block %"PRIu64"\n", 926433d6423SLionel Sambuc r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); 927433d6423SLionel Sambuc } 928433d6423SLionel Sambuc for (i = 0; i < nblocks; i++) { 929433d6423SLionel Sambuc bp = bufq[i]; 9306c46a77dSDavid van Moolenbroek if (r < (ssize_t)bp->lmfs_bytes) { 931433d6423SLionel Sambuc /* Transfer failed. */ 932433d6423SLionel Sambuc if (i == 0) { 933433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; /* Invalidate block */ 934433d6423SLionel Sambuc } 935433d6423SLionel Sambuc break; 936433d6423SLionel Sambuc } 937433d6423SLionel Sambuc if (rw_flag == READING) { 9380314acfbSDavid van Moolenbroek lmfs_put_block(bp); 939433d6423SLionel Sambuc } else { 940433d6423SLionel Sambuc MARKCLEAN(bp); 941433d6423SLionel Sambuc } 9426c46a77dSDavid van Moolenbroek r -= bp->lmfs_bytes; 943433d6423SLionel Sambuc } 944433d6423SLionel Sambuc 945433d6423SLionel Sambuc bufq += i; 946433d6423SLionel Sambuc bufqsize -= i; 947433d6423SLionel Sambuc 948433d6423SLionel Sambuc if (rw_flag == READING) { 949433d6423SLionel Sambuc /* Don't bother reading more than the device is willing to 950433d6423SLionel Sambuc * give at this time. Don't forget to release those extras. 951433d6423SLionel Sambuc */ 952433d6423SLionel Sambuc while (bufqsize > 0) { 9534472b590SDavid van Moolenbroek bp = *bufq++; 9544472b590SDavid van Moolenbroek bp->lmfs_dev = NO_DEV; /* invalidate block */ 9554472b590SDavid van Moolenbroek lmfs_put_block(bp); 956433d6423SLionel Sambuc bufqsize--; 957433d6423SLionel Sambuc } 958433d6423SLionel Sambuc } 959433d6423SLionel Sambuc if (rw_flag == WRITING && i == 0) { 960433d6423SLionel Sambuc /* We're not making progress, this means we might keep 961433d6423SLionel Sambuc * looping. Buffers remain dirty if un-written. Buffers are 962433d6423SLionel Sambuc * lost if invalidate()d or LRU-removed while dirty. This 963433d6423SLionel Sambuc * is better than keeping unwritable blocks around forever.. 964433d6423SLionel Sambuc */ 965433d6423SLionel Sambuc break; 966433d6423SLionel Sambuc } 967433d6423SLionel Sambuc } 968433d6423SLionel Sambuc 969433d6423SLionel Sambuc if(rw_flag == READING) { 970433d6423SLionel Sambuc assert(start_in_use >= start_bufqsize); 971433d6423SLionel Sambuc 972433d6423SLionel Sambuc /* READING callers assume all bufs are released. */ 973433d6423SLionel Sambuc assert(start_in_use - start_bufqsize == bufs_in_use); 974433d6423SLionel Sambuc } 975433d6423SLionel Sambuc } 976433d6423SLionel Sambuc 977433d6423SLionel Sambuc /*===========================================================================* 9784472b590SDavid van Moolenbroek * lmfs_readahead * 9794472b590SDavid van Moolenbroek *===========================================================================*/ 9804472b590SDavid van Moolenbroek void lmfs_readahead(dev_t dev, block64_t base_block, unsigned int nblocks, 9814472b590SDavid van Moolenbroek size_t last_size) 9824472b590SDavid van Moolenbroek { 9834472b590SDavid van Moolenbroek /* Read ahead 'nblocks' blocks starting from the block 'base_block' on device 9844472b590SDavid van Moolenbroek * 'dev'. The number of blocks must be between 1 and LMFS_MAX_PREFETCH, 9854472b590SDavid van Moolenbroek * inclusive. All blocks have the file system's block size, possibly except the 9864472b590SDavid van Moolenbroek * last block in the range, which is of size 'last_size'. The caller must 9874472b590SDavid van Moolenbroek * ensure that none of the blocks in the range are already in the cache. 9884472b590SDavid van Moolenbroek * However, the caller must also not rely on all or even any of the blocks to 9894472b590SDavid van Moolenbroek * be present in the cache afterwards--failures are (deliberately!) ignored. 9904472b590SDavid van Moolenbroek */ 991129adfebSDavid van Moolenbroek static noxfer_buf_ptr_t bufq[LMFS_MAX_PREFETCH]; /* static for size only */ 9924472b590SDavid van Moolenbroek struct buf *bp; 9934472b590SDavid van Moolenbroek unsigned int count; 9944472b590SDavid van Moolenbroek int r; 9954472b590SDavid van Moolenbroek 9964472b590SDavid van Moolenbroek assert(nblocks >= 1 && nblocks <= LMFS_MAX_PREFETCH); 9974472b590SDavid van Moolenbroek 9984472b590SDavid van Moolenbroek for (count = 0; count < nblocks; count++) { 9994472b590SDavid van Moolenbroek if (count == nblocks - 1) 10004472b590SDavid van Moolenbroek r = lmfs_get_partial_block(&bp, dev, base_block + count, 10014472b590SDavid van Moolenbroek NO_READ, last_size); 10024472b590SDavid van Moolenbroek else 10034472b590SDavid van Moolenbroek r = lmfs_get_block(&bp, dev, base_block + count, NO_READ); 10044472b590SDavid van Moolenbroek 10054472b590SDavid van Moolenbroek if (r != OK) 10064472b590SDavid van Moolenbroek break; 10074472b590SDavid van Moolenbroek 10084472b590SDavid van Moolenbroek /* We could add a flag that makes the get_block() calls fail if the 10094472b590SDavid van Moolenbroek * block is already in the cache, but it is not a major concern if it 10104472b590SDavid van Moolenbroek * is: we just perform a useless read in that case. However, if the 10114472b590SDavid van Moolenbroek * block is cached *and* dirty, we are about to lose its new contents. 10124472b590SDavid van Moolenbroek */ 10134472b590SDavid van Moolenbroek assert(lmfs_isclean(bp)); 10144472b590SDavid van Moolenbroek 10154472b590SDavid van Moolenbroek bufq[count] = bp; 10164472b590SDavid van Moolenbroek } 10174472b590SDavid van Moolenbroek 10184472b590SDavid van Moolenbroek rw_scattered(dev, bufq, count, READING); 10194472b590SDavid van Moolenbroek } 10204472b590SDavid van Moolenbroek 10214472b590SDavid van Moolenbroek /*===========================================================================* 10224472b590SDavid van Moolenbroek * lmfs_prefetch * 10234472b590SDavid van Moolenbroek *===========================================================================*/ 10244472b590SDavid van Moolenbroek unsigned int lmfs_readahead_limit(void) 10254472b590SDavid van Moolenbroek { 10264472b590SDavid van Moolenbroek /* Return the maximum number of blocks that should be read ahead at once. The 10274472b590SDavid van Moolenbroek * return value is guaranteed to be between 1 and LMFS_MAX_PREFETCH, inclusive. 10284472b590SDavid van Moolenbroek */ 10294472b590SDavid van Moolenbroek unsigned int max_transfer, max_bufs; 10304472b590SDavid van Moolenbroek 10314472b590SDavid van Moolenbroek /* The returned value is the minimum of two factors: the maximum number of 10324472b590SDavid van Moolenbroek * blocks that can be transferred in a single I/O gather request (see how 10334472b590SDavid van Moolenbroek * rw_scattered() generates I/O requests), and a policy limit on the number 10344472b590SDavid van Moolenbroek * of buffers that any read-ahead operation may use (that is, thrash). 10354472b590SDavid van Moolenbroek */ 10364472b590SDavid van Moolenbroek max_transfer = NR_IOREQS / MAX(fs_block_size / PAGE_SIZE, 1); 10374472b590SDavid van Moolenbroek 10384472b590SDavid van Moolenbroek /* The constants have been imported from MFS as is, and may need tuning. */ 10394472b590SDavid van Moolenbroek if (nr_bufs < 50) 10404472b590SDavid van Moolenbroek max_bufs = 18; 10414472b590SDavid van Moolenbroek else 10424472b590SDavid van Moolenbroek max_bufs = nr_bufs - 4; 10434472b590SDavid van Moolenbroek 10444472b590SDavid van Moolenbroek return MIN(max_transfer, max_bufs); 10454472b590SDavid van Moolenbroek } 10464472b590SDavid van Moolenbroek 10474472b590SDavid van Moolenbroek /*===========================================================================* 10484472b590SDavid van Moolenbroek * lmfs_prefetch * 10494472b590SDavid van Moolenbroek *===========================================================================*/ 10504472b590SDavid van Moolenbroek void lmfs_prefetch(dev_t dev, const block64_t *blockset, unsigned int nblocks) 10514472b590SDavid van Moolenbroek { 10524472b590SDavid van Moolenbroek /* The given set of blocks is expected to be needed soon, so prefetch a 10534472b590SDavid van Moolenbroek * convenient subset. The blocks are expected to be sorted by likelihood of 10544472b590SDavid van Moolenbroek * being accessed soon, making the first block of the set the most important 10554472b590SDavid van Moolenbroek * block to prefetch right now. The caller must have made sure that the blocks 10564472b590SDavid van Moolenbroek * are not in the cache already. The array may have duplicate block numbers. 10574472b590SDavid van Moolenbroek */ 10584472b590SDavid van Moolenbroek bitchunk_t blocks_before[BITMAP_CHUNKS(LMFS_MAX_PREFETCH)]; 10594472b590SDavid van Moolenbroek bitchunk_t blocks_after[BITMAP_CHUNKS(LMFS_MAX_PREFETCH)]; 10604472b590SDavid van Moolenbroek block64_t block, base_block; 10614472b590SDavid van Moolenbroek unsigned int i, bit, nr_before, nr_after, span, limit, nr_blocks; 10624472b590SDavid van Moolenbroek 10634472b590SDavid van Moolenbroek if (nblocks == 0) 10644472b590SDavid van Moolenbroek return; 10654472b590SDavid van Moolenbroek 10664472b590SDavid van Moolenbroek /* Here is the deal. We are going to prefetch one range only, because seeking 10674472b590SDavid van Moolenbroek * is too expensive for just prefetching. The range we select should at least 10684472b590SDavid van Moolenbroek * include the first ("base") block of the given set, since that is the block 10694472b590SDavid van Moolenbroek * the caller is primarily interested in. Thus, the rest of the range is 10704472b590SDavid van Moolenbroek * going to have to be directly around this base block. We first check which 10714472b590SDavid van Moolenbroek * blocks from the set fall just before and after the base block, which then 10724472b590SDavid van Moolenbroek * allows us to construct a contiguous range of desired blocks directly 10734472b590SDavid van Moolenbroek * around the base block, in O(n) time. As a natural part of this, we ignore 10744472b590SDavid van Moolenbroek * duplicate blocks in the given set. We then read from the beginning of this 10754472b590SDavid van Moolenbroek * range, in order to maximize the chance that a next prefetch request will 10764472b590SDavid van Moolenbroek * continue from the last disk position without requiring a seek. However, we 10774472b590SDavid van Moolenbroek * do correct for the maximum number of blocks we can (or should) read in at 10784472b590SDavid van Moolenbroek * once, such that we will still end up reading the base block. 10794472b590SDavid van Moolenbroek */ 10804472b590SDavid van Moolenbroek base_block = blockset[0]; 10814472b590SDavid van Moolenbroek 10824472b590SDavid van Moolenbroek memset(blocks_before, 0, sizeof(blocks_before)); 10834472b590SDavid van Moolenbroek memset(blocks_after, 0, sizeof(blocks_after)); 10844472b590SDavid van Moolenbroek 10854472b590SDavid van Moolenbroek for (i = 1; i < nblocks; i++) { 10864472b590SDavid van Moolenbroek block = blockset[i]; 10874472b590SDavid van Moolenbroek 10884472b590SDavid van Moolenbroek if (block < base_block && block + LMFS_MAX_PREFETCH >= base_block) { 10894472b590SDavid van Moolenbroek bit = base_block - block - 1; 10904472b590SDavid van Moolenbroek assert(bit < LMFS_MAX_PREFETCH); 10914472b590SDavid van Moolenbroek SET_BIT(blocks_before, bit); 10924472b590SDavid van Moolenbroek } else if (block > base_block && 10934472b590SDavid van Moolenbroek block - LMFS_MAX_PREFETCH <= base_block) { 10944472b590SDavid van Moolenbroek bit = block - base_block - 1; 10954472b590SDavid van Moolenbroek assert(bit < LMFS_MAX_PREFETCH); 10964472b590SDavid van Moolenbroek SET_BIT(blocks_after, bit); 10974472b590SDavid van Moolenbroek } 10984472b590SDavid van Moolenbroek } 10994472b590SDavid van Moolenbroek 11004472b590SDavid van Moolenbroek for (nr_before = 0; nr_before < LMFS_MAX_PREFETCH; nr_before++) 11014472b590SDavid van Moolenbroek if (!GET_BIT(blocks_before, nr_before)) 11024472b590SDavid van Moolenbroek break; 11034472b590SDavid van Moolenbroek 11044472b590SDavid van Moolenbroek for (nr_after = 0; nr_after < LMFS_MAX_PREFETCH; nr_after++) 11054472b590SDavid van Moolenbroek if (!GET_BIT(blocks_after, nr_after)) 11064472b590SDavid van Moolenbroek break; 11074472b590SDavid van Moolenbroek 11084472b590SDavid van Moolenbroek /* The number of blocks to prefetch is the minimum of two factors: the number 11094472b590SDavid van Moolenbroek * of blocks in the range around the base block, and the maximum number of 11104472b590SDavid van Moolenbroek * blocks that should be read ahead at once at all. 11114472b590SDavid van Moolenbroek */ 11124472b590SDavid van Moolenbroek span = nr_before + 1 + nr_after; 11134472b590SDavid van Moolenbroek limit = lmfs_readahead_limit(); 11144472b590SDavid van Moolenbroek 11154472b590SDavid van Moolenbroek nr_blocks = MIN(span, limit); 11164472b590SDavid van Moolenbroek assert(nr_blocks >= 1 && nr_blocks <= LMFS_MAX_PREFETCH); 11174472b590SDavid van Moolenbroek 11184472b590SDavid van Moolenbroek /* Start prefetching from the lowest block within the contiguous range, but 11194472b590SDavid van Moolenbroek * make sure that we read at least the original base block itself, too. 11204472b590SDavid van Moolenbroek */ 11214472b590SDavid van Moolenbroek base_block -= MIN(nr_before, nr_blocks - 1); 11224472b590SDavid van Moolenbroek 11234472b590SDavid van Moolenbroek lmfs_readahead(dev, base_block, nr_blocks, fs_block_size); 11244472b590SDavid van Moolenbroek } 11254472b590SDavid van Moolenbroek 11264472b590SDavid van Moolenbroek /*===========================================================================* 11274472b590SDavid van Moolenbroek * lmfs_flushdev * 11284472b590SDavid van Moolenbroek *===========================================================================*/ 11294472b590SDavid van Moolenbroek void lmfs_flushdev(dev_t dev) 11304472b590SDavid van Moolenbroek { 11314472b590SDavid van Moolenbroek /* Flush all dirty blocks for one device. */ 11324472b590SDavid van Moolenbroek 11334472b590SDavid van Moolenbroek register struct buf *bp; 1134129adfebSDavid van Moolenbroek static noxfer_buf_ptr_t *dirty; 11354472b590SDavid van Moolenbroek static unsigned int dirtylistsize = 0; 11364472b590SDavid van Moolenbroek unsigned int ndirty; 11374472b590SDavid van Moolenbroek 11384472b590SDavid van Moolenbroek if(dirtylistsize != nr_bufs) { 11394472b590SDavid van Moolenbroek if(dirtylistsize > 0) { 11404472b590SDavid van Moolenbroek assert(dirty != NULL); 11414472b590SDavid van Moolenbroek free(dirty); 11424472b590SDavid van Moolenbroek } 11434472b590SDavid van Moolenbroek if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs))) 11444472b590SDavid van Moolenbroek panic("couldn't allocate dirty buf list"); 11454472b590SDavid van Moolenbroek dirtylistsize = nr_bufs; 11464472b590SDavid van Moolenbroek } 11474472b590SDavid van Moolenbroek 11484472b590SDavid van Moolenbroek for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++) { 11494472b590SDavid van Moolenbroek /* Do not flush dirty blocks that are in use (lmfs_count>0): the file 11504472b590SDavid van Moolenbroek * system may mark the block as dirty before changing its contents, in 11514472b590SDavid van Moolenbroek * which case the new contents could end up being lost. 11524472b590SDavid van Moolenbroek */ 11534472b590SDavid van Moolenbroek if (!lmfs_isclean(bp) && bp->lmfs_dev == dev && bp->lmfs_count == 0) { 11544472b590SDavid van Moolenbroek dirty[ndirty++] = bp; 11554472b590SDavid van Moolenbroek } 11564472b590SDavid van Moolenbroek } 11574472b590SDavid van Moolenbroek 11584472b590SDavid van Moolenbroek rw_scattered(dev, dirty, ndirty, WRITING); 11594472b590SDavid van Moolenbroek } 11604472b590SDavid van Moolenbroek 11614472b590SDavid van Moolenbroek /*===========================================================================* 1162433d6423SLionel Sambuc * rm_lru * 1163433d6423SLionel Sambuc *===========================================================================*/ 1164433d6423SLionel Sambuc static void rm_lru(struct buf *bp) 1165433d6423SLionel Sambuc { 1166433d6423SLionel Sambuc /* Remove a block from its LRU chain. */ 1167433d6423SLionel Sambuc struct buf *next_ptr, *prev_ptr; 1168433d6423SLionel Sambuc 1169433d6423SLionel Sambuc next_ptr = bp->lmfs_next; /* successor on LRU chain */ 1170433d6423SLionel Sambuc prev_ptr = bp->lmfs_prev; /* predecessor on LRU chain */ 1171433d6423SLionel Sambuc if (prev_ptr != NULL) 1172433d6423SLionel Sambuc prev_ptr->lmfs_next = next_ptr; 1173433d6423SLionel Sambuc else 1174433d6423SLionel Sambuc front = next_ptr; /* this block was at front of chain */ 1175433d6423SLionel Sambuc 1176433d6423SLionel Sambuc if (next_ptr != NULL) 1177433d6423SLionel Sambuc next_ptr->lmfs_prev = prev_ptr; 1178433d6423SLionel Sambuc else 1179433d6423SLionel Sambuc rear = prev_ptr; /* this block was at rear of chain */ 1180433d6423SLionel Sambuc } 1181433d6423SLionel Sambuc 1182433d6423SLionel Sambuc /*===========================================================================* 1183433d6423SLionel Sambuc * cache_resize * 1184433d6423SLionel Sambuc *===========================================================================*/ 11851311233cSDavid van Moolenbroek static void cache_resize(size_t blocksize, unsigned int bufs) 1186433d6423SLionel Sambuc { 1187433d6423SLionel Sambuc struct buf *bp; 1188433d6423SLionel Sambuc 1189433d6423SLionel Sambuc assert(blocksize > 0); 1190433d6423SLionel Sambuc assert(bufs >= MINBUFS); 1191433d6423SLionel Sambuc 1192433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) 1193433d6423SLionel Sambuc if(bp->lmfs_count != 0) panic("change blocksize with buffer in use"); 1194433d6423SLionel Sambuc 1195433d6423SLionel Sambuc lmfs_buf_pool(bufs); 1196433d6423SLionel Sambuc 1197433d6423SLionel Sambuc fs_block_size = blocksize; 1198433d6423SLionel Sambuc } 1199433d6423SLionel Sambuc 12000314acfbSDavid van Moolenbroek static void cache_heuristic_check(void) 1201433d6423SLionel Sambuc { 1202433d6423SLionel Sambuc int bufs, d; 1203433d6423SLionel Sambuc 12041311233cSDavid van Moolenbroek bufs = fs_bufs_heuristic(MINBUFS, fs_btotal, fs_bused, fs_block_size); 1205433d6423SLionel Sambuc 1206433d6423SLionel Sambuc /* set the cache to the new heuristic size if the new one 1207433d6423SLionel Sambuc * is more than 10% off from the current one. 1208433d6423SLionel Sambuc */ 1209433d6423SLionel Sambuc d = bufs-nr_bufs; 1210433d6423SLionel Sambuc if(d < 0) d = -d; 1211433d6423SLionel Sambuc if(d*100/nr_bufs > 10) { 1212433d6423SLionel Sambuc cache_resize(fs_block_size, bufs); 1213433d6423SLionel Sambuc } 1214433d6423SLionel Sambuc } 1215433d6423SLionel Sambuc 1216433d6423SLionel Sambuc /*===========================================================================* 1217433d6423SLionel Sambuc * lmfs_set_blocksize * 1218433d6423SLionel Sambuc *===========================================================================*/ 12191311233cSDavid van Moolenbroek void lmfs_set_blocksize(size_t new_block_size) 1220433d6423SLionel Sambuc { 1221433d6423SLionel Sambuc cache_resize(new_block_size, MINBUFS); 12220314acfbSDavid van Moolenbroek cache_heuristic_check(); 1223433d6423SLionel Sambuc 1224433d6423SLionel Sambuc /* Decide whether to use seconday cache or not. 12250314acfbSDavid van Moolenbroek * Only do this if the block size is a multiple of the page size, and using 12260314acfbSDavid van Moolenbroek * the VM cache has been enabled for this FS. 1227433d6423SLionel Sambuc */ 1228433d6423SLionel Sambuc 1229433d6423SLionel Sambuc vmcache = 0; 1230433d6423SLionel Sambuc 1231433d6423SLionel Sambuc if(may_use_vmcache && !(new_block_size % PAGE_SIZE)) 1232433d6423SLionel Sambuc vmcache = 1; 1233433d6423SLionel Sambuc } 1234433d6423SLionel Sambuc 1235433d6423SLionel Sambuc /*===========================================================================* 1236433d6423SLionel Sambuc * lmfs_buf_pool * 1237433d6423SLionel Sambuc *===========================================================================*/ 1238433d6423SLionel Sambuc void lmfs_buf_pool(int new_nr_bufs) 1239433d6423SLionel Sambuc { 1240433d6423SLionel Sambuc /* Initialize the buffer pool. */ 1241433d6423SLionel Sambuc register struct buf *bp; 1242433d6423SLionel Sambuc 1243433d6423SLionel Sambuc assert(new_nr_bufs >= MINBUFS); 1244433d6423SLionel Sambuc 1245433d6423SLionel Sambuc if(nr_bufs > 0) { 1246433d6423SLionel Sambuc assert(buf); 1247c5beebb6SDavid van Moolenbroek lmfs_flushall(); 1248433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 1249433d6423SLionel Sambuc if(bp->data) { 1250433d6423SLionel Sambuc assert(bp->lmfs_bytes > 0); 1251433d6423SLionel Sambuc munmap_t(bp->data, bp->lmfs_bytes); 1252433d6423SLionel Sambuc } 1253433d6423SLionel Sambuc } 1254433d6423SLionel Sambuc } 1255433d6423SLionel Sambuc 1256433d6423SLionel Sambuc if(buf) 1257433d6423SLionel Sambuc free(buf); 1258433d6423SLionel Sambuc 1259433d6423SLionel Sambuc if(!(buf = calloc(sizeof(buf[0]), new_nr_bufs))) 1260433d6423SLionel Sambuc panic("couldn't allocate buf list (%d)", new_nr_bufs); 1261433d6423SLionel Sambuc 1262433d6423SLionel Sambuc if(buf_hash) 1263433d6423SLionel Sambuc free(buf_hash); 1264433d6423SLionel Sambuc if(!(buf_hash = calloc(sizeof(buf_hash[0]), new_nr_bufs))) 1265433d6423SLionel Sambuc panic("couldn't allocate buf hash list (%d)", new_nr_bufs); 1266433d6423SLionel Sambuc 1267433d6423SLionel Sambuc nr_bufs = new_nr_bufs; 1268433d6423SLionel Sambuc 1269433d6423SLionel Sambuc bufs_in_use = 0; 1270433d6423SLionel Sambuc front = &buf[0]; 1271433d6423SLionel Sambuc rear = &buf[nr_bufs - 1]; 1272433d6423SLionel Sambuc 1273433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 1274433d6423SLionel Sambuc bp->lmfs_blocknr = NO_BLOCK; 1275433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; 1276433d6423SLionel Sambuc bp->lmfs_next = bp + 1; 1277433d6423SLionel Sambuc bp->lmfs_prev = bp - 1; 1278433d6423SLionel Sambuc bp->data = NULL; 1279433d6423SLionel Sambuc bp->lmfs_bytes = 0; 1280433d6423SLionel Sambuc } 1281433d6423SLionel Sambuc front->lmfs_prev = NULL; 1282433d6423SLionel Sambuc rear->lmfs_next = NULL; 1283433d6423SLionel Sambuc 1284433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->lmfs_hash = bp->lmfs_next; 1285433d6423SLionel Sambuc buf_hash[0] = front; 1286433d6423SLionel Sambuc } 1287433d6423SLionel Sambuc 1288433d6423SLionel Sambuc void lmfs_flushall(void) 1289433d6423SLionel Sambuc { 1290433d6423SLionel Sambuc struct buf *bp; 1291433d6423SLionel Sambuc for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) 1292433d6423SLionel Sambuc if(bp->lmfs_dev != NO_DEV && !lmfs_isclean(bp)) 1293ebd3c067SDavid van Moolenbroek lmfs_flushdev(bp->lmfs_dev); 12941311233cSDavid van Moolenbroek 12951311233cSDavid van Moolenbroek /* This is the moment where it is least likely (although certainly not 12961311233cSDavid van Moolenbroek * impossible!) that there are buffers in use, since buffers should not 12971311233cSDavid van Moolenbroek * be held across file system syncs. See if we already intended to 12981311233cSDavid van Moolenbroek * resize the buffer cache, but couldn't. Be aware that we may be 12991311233cSDavid van Moolenbroek * called indirectly from within lmfs_change_blockusage(), so care must 13001311233cSDavid van Moolenbroek * be taken not to recurse infinitely. TODO: see if it is better to 13011311233cSDavid van Moolenbroek * resize the cache from here *only*, thus guaranteeing a clean cache. 13021311233cSDavid van Moolenbroek */ 13031311233cSDavid van Moolenbroek lmfs_change_blockusage(0); 1304433d6423SLionel Sambuc } 1305433d6423SLionel Sambuc 13061311233cSDavid van Moolenbroek size_t lmfs_fs_block_size(void) 1307433d6423SLionel Sambuc { 1308433d6423SLionel Sambuc return fs_block_size; 1309433d6423SLionel Sambuc } 1310433d6423SLionel Sambuc 1311433d6423SLionel Sambuc void lmfs_may_use_vmcache(int ok) 1312433d6423SLionel Sambuc { 1313433d6423SLionel Sambuc may_use_vmcache = ok; 1314433d6423SLionel Sambuc } 1315