1433d6423SLionel Sambuc 2433d6423SLionel Sambuc #define _SYSTEM 3433d6423SLionel Sambuc 4433d6423SLionel Sambuc #include <assert.h> 5433d6423SLionel Sambuc #include <errno.h> 6433d6423SLionel Sambuc #include <math.h> 7433d6423SLionel Sambuc #include <stdlib.h> 8433d6423SLionel Sambuc 9433d6423SLionel Sambuc #include <machine/vmparam.h> 10433d6423SLionel Sambuc 11433d6423SLionel Sambuc #include <sys/param.h> 12433d6423SLionel Sambuc #include <sys/mman.h> 13433d6423SLionel Sambuc 14433d6423SLionel Sambuc #include <minix/dmap.h> 15433d6423SLionel Sambuc #include <minix/libminixfs.h> 16433d6423SLionel Sambuc #include <minix/syslib.h> 17433d6423SLionel Sambuc #include <minix/sysutil.h> 18433d6423SLionel Sambuc #include <minix/u64.h> 19433d6423SLionel Sambuc #include <minix/bdev.h> 20433d6423SLionel Sambuc 21b65ad59eSDavid van Moolenbroek #define BUFHASH(b) ((unsigned int)((b) % nr_bufs)) 22433d6423SLionel Sambuc #define MARKCLEAN lmfs_markclean 23433d6423SLionel Sambuc 24433d6423SLionel Sambuc #define MINBUFS 6 /* minimal no of bufs for sanity check */ 25433d6423SLionel Sambuc 26433d6423SLionel Sambuc static struct buf *front; /* points to least recently used free block */ 27433d6423SLionel Sambuc static struct buf *rear; /* points to most recently used free block */ 28433d6423SLionel Sambuc static unsigned int bufs_in_use;/* # bufs currently in use (not on free list)*/ 29433d6423SLionel Sambuc 30433d6423SLionel Sambuc static void rm_lru(struct buf *bp); 31433d6423SLionel Sambuc static void read_block(struct buf *); 32433d6423SLionel Sambuc static void freeblock(struct buf *bp); 33433d6423SLionel Sambuc static void cache_heuristic_check(int major); 34433d6423SLionel Sambuc 35433d6423SLionel Sambuc static int vmcache = 0; /* are we using vm's secondary cache? (initially not) */ 36433d6423SLionel Sambuc 37433d6423SLionel Sambuc static struct buf *buf; 38433d6423SLionel Sambuc static struct buf **buf_hash; /* the buffer hash table */ 39433d6423SLionel Sambuc static unsigned int nr_bufs; 40433d6423SLionel Sambuc static int may_use_vmcache; 41433d6423SLionel Sambuc 4265f76edbSDavid van Moolenbroek static size_t fs_block_size = PAGE_SIZE; /* raw i/o block size */ 43433d6423SLionel Sambuc 44433d6423SLionel Sambuc static int rdwt_err; 45433d6423SLionel Sambuc 46433d6423SLionel Sambuc static int quiet = 0; 47433d6423SLionel Sambuc 48433d6423SLionel Sambuc void lmfs_setquiet(int q) { quiet = q; } 49433d6423SLionel Sambuc 50433d6423SLionel Sambuc static u32_t fs_bufs_heuristic(int minbufs, u32_t btotal, u64_t bfree, 51433d6423SLionel Sambuc int blocksize, dev_t majordev) 52433d6423SLionel Sambuc { 53433d6423SLionel Sambuc struct vm_stats_info vsi; 54433d6423SLionel Sambuc int bufs; 55433d6423SLionel Sambuc u32_t kbytes_used_fs, kbytes_total_fs, kbcache, kb_fsmax; 56433d6423SLionel Sambuc u32_t kbytes_remain_mem; 57433d6423SLionel Sambuc u64_t bused; 58433d6423SLionel Sambuc 59433d6423SLionel Sambuc bused = btotal-bfree; 60433d6423SLionel Sambuc 61433d6423SLionel Sambuc /* set a reasonable cache size; cache at most a certain 62433d6423SLionel Sambuc * portion of the used FS, and at most a certain %age of remaining 63433d6423SLionel Sambuc * memory 64433d6423SLionel Sambuc */ 65433d6423SLionel Sambuc if(vm_info_stats(&vsi) != OK) { 66433d6423SLionel Sambuc bufs = 1024; 67433d6423SLionel Sambuc if(!quiet) 68433d6423SLionel Sambuc printf("fslib: heuristic info fail: default to %d bufs\n", bufs); 69433d6423SLionel Sambuc return bufs; 70433d6423SLionel Sambuc } 71433d6423SLionel Sambuc 72433d6423SLionel Sambuc /* remaining free memory is unused memory plus memory in used for cache, 73433d6423SLionel Sambuc * as the cache can be evicted 74433d6423SLionel Sambuc */ 75433d6423SLionel Sambuc kbytes_remain_mem = (u64_t)(vsi.vsi_free + vsi.vsi_cached) * 76433d6423SLionel Sambuc vsi.vsi_pagesize / 1024; 77433d6423SLionel Sambuc 78433d6423SLionel Sambuc /* check fs usage. */ 79433d6423SLionel Sambuc kbytes_used_fs = (unsigned long)(((u64_t)bused * blocksize) / 1024); 80433d6423SLionel Sambuc kbytes_total_fs = (unsigned long)(((u64_t)btotal * blocksize) / 1024); 81433d6423SLionel Sambuc 82433d6423SLionel Sambuc /* heuristic for a desired cache size based on FS usage; 83433d6423SLionel Sambuc * but never bigger than half of the total filesystem 84433d6423SLionel Sambuc */ 85433d6423SLionel Sambuc kb_fsmax = sqrt_approx(kbytes_used_fs)*40; 86433d6423SLionel Sambuc kb_fsmax = MIN(kb_fsmax, kbytes_total_fs/2); 87433d6423SLionel Sambuc 88433d6423SLionel Sambuc /* heuristic for a maximum usage - 10% of remaining memory */ 89433d6423SLionel Sambuc kbcache = MIN(kbytes_remain_mem/10, kb_fsmax); 90433d6423SLionel Sambuc bufs = kbcache * 1024 / blocksize; 91433d6423SLionel Sambuc 92433d6423SLionel Sambuc /* but we simply need MINBUFS no matter what */ 93433d6423SLionel Sambuc if(bufs < minbufs) 94433d6423SLionel Sambuc bufs = minbufs; 95433d6423SLionel Sambuc 96433d6423SLionel Sambuc return bufs; 97433d6423SLionel Sambuc } 98433d6423SLionel Sambuc 99433d6423SLionel Sambuc void lmfs_blockschange(dev_t dev, int delta) 100433d6423SLionel Sambuc { 101433d6423SLionel Sambuc /* Change the number of allocated blocks by 'delta.' 102433d6423SLionel Sambuc * Also accumulate the delta since the last cache re-evaluation. 103433d6423SLionel Sambuc * If it is outside a certain band, ask the cache library to 104433d6423SLionel Sambuc * re-evaluate the cache size. 105433d6423SLionel Sambuc */ 106433d6423SLionel Sambuc static int bitdelta = 0; 107433d6423SLionel Sambuc bitdelta += delta; 108433d6423SLionel Sambuc #define BANDKB (10*1024) /* recheck cache every 10MB change */ 10965f76edbSDavid van Moolenbroek if(bitdelta*(int)fs_block_size/1024 > BANDKB || 11065f76edbSDavid van Moolenbroek bitdelta*(int)fs_block_size/1024 < -BANDKB) { 111433d6423SLionel Sambuc lmfs_cache_reevaluate(dev); 112433d6423SLionel Sambuc bitdelta = 0; 113433d6423SLionel Sambuc } 114433d6423SLionel Sambuc } 115433d6423SLionel Sambuc 116433d6423SLionel Sambuc void lmfs_markdirty(struct buf *bp) 117433d6423SLionel Sambuc { 118433d6423SLionel Sambuc bp->lmfs_flags |= VMMC_DIRTY; 119433d6423SLionel Sambuc } 120433d6423SLionel Sambuc 121433d6423SLionel Sambuc void lmfs_markclean(struct buf *bp) 122433d6423SLionel Sambuc { 123433d6423SLionel Sambuc bp->lmfs_flags &= ~VMMC_DIRTY; 124433d6423SLionel Sambuc } 125433d6423SLionel Sambuc 126433d6423SLionel Sambuc int lmfs_isclean(struct buf *bp) 127433d6423SLionel Sambuc { 128433d6423SLionel Sambuc return !(bp->lmfs_flags & VMMC_DIRTY); 129433d6423SLionel Sambuc } 130433d6423SLionel Sambuc 131433d6423SLionel Sambuc dev_t lmfs_dev(struct buf *bp) 132433d6423SLionel Sambuc { 133433d6423SLionel Sambuc return bp->lmfs_dev; 134433d6423SLionel Sambuc } 135433d6423SLionel Sambuc 136433d6423SLionel Sambuc int lmfs_bytes(struct buf *bp) 137433d6423SLionel Sambuc { 138433d6423SLionel Sambuc return bp->lmfs_bytes; 139433d6423SLionel Sambuc } 140433d6423SLionel Sambuc 141433d6423SLionel Sambuc static void free_unused_blocks(void) 142433d6423SLionel Sambuc { 143433d6423SLionel Sambuc struct buf *bp; 144433d6423SLionel Sambuc 145433d6423SLionel Sambuc int freed = 0, bytes = 0; 146433d6423SLionel Sambuc printf("libminixfs: freeing; %d blocks in use\n", bufs_in_use); 147433d6423SLionel Sambuc for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 148433d6423SLionel Sambuc if(bp->lmfs_bytes > 0 && bp->lmfs_count == 0) { 149433d6423SLionel Sambuc freed++; 150433d6423SLionel Sambuc bytes += bp->lmfs_bytes; 151433d6423SLionel Sambuc freeblock(bp); 152433d6423SLionel Sambuc } 153433d6423SLionel Sambuc } 154433d6423SLionel Sambuc printf("libminixfs: freeing; %d blocks, %d bytes\n", freed, bytes); 155433d6423SLionel Sambuc } 156433d6423SLionel Sambuc 157433d6423SLionel Sambuc static void lmfs_alloc_block(struct buf *bp) 158433d6423SLionel Sambuc { 159433d6423SLionel Sambuc int len; 160433d6423SLionel Sambuc ASSERT(!bp->data); 161433d6423SLionel Sambuc ASSERT(bp->lmfs_bytes == 0); 162433d6423SLionel Sambuc 163433d6423SLionel Sambuc len = roundup(fs_block_size, PAGE_SIZE); 164433d6423SLionel Sambuc 165433d6423SLionel Sambuc if((bp->data = mmap(0, fs_block_size, 166433d6423SLionel Sambuc PROT_READ|PROT_WRITE, MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { 167433d6423SLionel Sambuc free_unused_blocks(); 168433d6423SLionel Sambuc if((bp->data = mmap(0, fs_block_size, PROT_READ|PROT_WRITE, 169433d6423SLionel Sambuc MAP_PREALLOC|MAP_ANON, -1, 0)) == MAP_FAILED) { 170433d6423SLionel Sambuc panic("libminixfs: could not allocate block"); 171433d6423SLionel Sambuc } 172433d6423SLionel Sambuc } 173433d6423SLionel Sambuc assert(bp->data); 174433d6423SLionel Sambuc bp->lmfs_bytes = fs_block_size; 175433d6423SLionel Sambuc bp->lmfs_needsetcache = 1; 176433d6423SLionel Sambuc } 177433d6423SLionel Sambuc 178433d6423SLionel Sambuc /*===========================================================================* 179433d6423SLionel Sambuc * lmfs_get_block * 180433d6423SLionel Sambuc *===========================================================================*/ 181b65ad59eSDavid van Moolenbroek struct buf *lmfs_get_block(dev_t dev, block64_t block, int only_search) 182433d6423SLionel Sambuc { 183433d6423SLionel Sambuc return lmfs_get_block_ino(dev, block, only_search, VMC_NO_INODE, 0); 184433d6423SLionel Sambuc } 185433d6423SLionel Sambuc 18665f76edbSDavid van Moolenbroek static void munmap_t(void *a, int len) 187433d6423SLionel Sambuc { 188433d6423SLionel Sambuc vir_bytes av = (vir_bytes) a; 189433d6423SLionel Sambuc assert(a); 190433d6423SLionel Sambuc assert(a != MAP_FAILED); 191433d6423SLionel Sambuc assert(len > 0); 192433d6423SLionel Sambuc assert(!(av % PAGE_SIZE)); 193433d6423SLionel Sambuc 194433d6423SLionel Sambuc len = roundup(len, PAGE_SIZE); 195433d6423SLionel Sambuc 196433d6423SLionel Sambuc assert(!(len % PAGE_SIZE)); 197433d6423SLionel Sambuc 198433d6423SLionel Sambuc if(munmap(a, len) < 0) 199433d6423SLionel Sambuc panic("libminixfs cache: munmap failed"); 200433d6423SLionel Sambuc } 201433d6423SLionel Sambuc 202433d6423SLionel Sambuc static void raisecount(struct buf *bp) 203433d6423SLionel Sambuc { 204433d6423SLionel Sambuc assert(bufs_in_use >= 0); 205433d6423SLionel Sambuc ASSERT(bp->lmfs_count >= 0); 206433d6423SLionel Sambuc bp->lmfs_count++; 207433d6423SLionel Sambuc if(bp->lmfs_count == 1) bufs_in_use++; 208433d6423SLionel Sambuc assert(bufs_in_use > 0); 209433d6423SLionel Sambuc } 210433d6423SLionel Sambuc 211433d6423SLionel Sambuc static void lowercount(struct buf *bp) 212433d6423SLionel Sambuc { 213433d6423SLionel Sambuc assert(bufs_in_use > 0); 214433d6423SLionel Sambuc ASSERT(bp->lmfs_count > 0); 215433d6423SLionel Sambuc bp->lmfs_count--; 216433d6423SLionel Sambuc if(bp->lmfs_count == 0) bufs_in_use--; 217433d6423SLionel Sambuc assert(bufs_in_use >= 0); 218433d6423SLionel Sambuc } 219433d6423SLionel Sambuc 220433d6423SLionel Sambuc static void freeblock(struct buf *bp) 221433d6423SLionel Sambuc { 222433d6423SLionel Sambuc ASSERT(bp->lmfs_count == 0); 223433d6423SLionel Sambuc /* If the block taken is dirty, make it clean by writing it to the disk. 224433d6423SLionel Sambuc * Avoid hysteresis by flushing all other dirty blocks for the same device. 225433d6423SLionel Sambuc */ 226433d6423SLionel Sambuc if (bp->lmfs_dev != NO_DEV) { 227ebd3c067SDavid van Moolenbroek if (!lmfs_isclean(bp)) lmfs_flushdev(bp->lmfs_dev); 228433d6423SLionel Sambuc assert(bp->lmfs_bytes == fs_block_size); 229433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; 230433d6423SLionel Sambuc } 231433d6423SLionel Sambuc 232433d6423SLionel Sambuc /* Fill in block's parameters and add it to the hash chain where it goes. */ 233433d6423SLionel Sambuc MARKCLEAN(bp); /* NO_DEV blocks may be marked dirty */ 234433d6423SLionel Sambuc if(bp->lmfs_bytes > 0) { 235433d6423SLionel Sambuc assert(bp->data); 236433d6423SLionel Sambuc munmap_t(bp->data, bp->lmfs_bytes); 237433d6423SLionel Sambuc bp->lmfs_bytes = 0; 238433d6423SLionel Sambuc bp->data = NULL; 239433d6423SLionel Sambuc } else assert(!bp->data); 240433d6423SLionel Sambuc } 241433d6423SLionel Sambuc 242433d6423SLionel Sambuc /*===========================================================================* 243e94f856bSDavid van Moolenbroek * find_block * 244e94f856bSDavid van Moolenbroek *===========================================================================*/ 245e94f856bSDavid van Moolenbroek static struct buf *find_block(dev_t dev, block64_t block) 246e94f856bSDavid van Moolenbroek { 247e94f856bSDavid van Moolenbroek /* Search the hash chain for (dev, block). Return the buffer structure if 248e94f856bSDavid van Moolenbroek * found, or NULL otherwise. 249e94f856bSDavid van Moolenbroek */ 250e94f856bSDavid van Moolenbroek struct buf *bp; 251e94f856bSDavid van Moolenbroek int b; 252e94f856bSDavid van Moolenbroek 253e94f856bSDavid van Moolenbroek assert(dev != NO_DEV); 254e94f856bSDavid van Moolenbroek 255e94f856bSDavid van Moolenbroek b = BUFHASH(block); 256e94f856bSDavid van Moolenbroek for (bp = buf_hash[b]; bp != NULL; bp = bp->lmfs_hash) 257e94f856bSDavid van Moolenbroek if (bp->lmfs_blocknr == block && bp->lmfs_dev == dev) 258e94f856bSDavid van Moolenbroek return bp; 259e94f856bSDavid van Moolenbroek 260e94f856bSDavid van Moolenbroek return NULL; 261e94f856bSDavid van Moolenbroek } 262e94f856bSDavid van Moolenbroek 263e94f856bSDavid van Moolenbroek /*===========================================================================* 264433d6423SLionel Sambuc * lmfs_get_block_ino * 265433d6423SLionel Sambuc *===========================================================================*/ 266b65ad59eSDavid van Moolenbroek struct buf *lmfs_get_block_ino(dev_t dev, block64_t block, int only_search, 267433d6423SLionel Sambuc ino_t ino, u64_t ino_off) 268433d6423SLionel Sambuc { 269433d6423SLionel Sambuc /* Check to see if the requested block is in the block cache. If so, return 270433d6423SLionel Sambuc * a pointer to it. If not, evict some other block and fetch it (unless 271433d6423SLionel Sambuc * 'only_search' is 1). All the blocks in the cache that are not in use 272433d6423SLionel Sambuc * are linked together in a chain, with 'front' pointing to the least recently 273433d6423SLionel Sambuc * used block and 'rear' to the most recently used block. If 'only_search' is 274433d6423SLionel Sambuc * 1, the block being requested will be overwritten in its entirety, so it is 275433d6423SLionel Sambuc * only necessary to see if it is in the cache; if it is not, any free buffer 276433d6423SLionel Sambuc * will do. It is not necessary to actually read the block in from disk. 277433d6423SLionel Sambuc * If 'only_search' is PREFETCH, the block need not be read from the disk, 278433d6423SLionel Sambuc * and the device is not to be marked on the block, so callers can tell if 279433d6423SLionel Sambuc * the block returned is valid. 280433d6423SLionel Sambuc * In addition to the LRU chain, there is also a hash chain to link together 281433d6423SLionel Sambuc * blocks whose block numbers end with the same bit strings, for fast lookup. 282433d6423SLionel Sambuc */ 283433d6423SLionel Sambuc 284433d6423SLionel Sambuc int b; 285433d6423SLionel Sambuc static struct buf *bp; 286b65ad59eSDavid van Moolenbroek uint64_t dev_off; 287433d6423SLionel Sambuc struct buf *prev_ptr; 288433d6423SLionel Sambuc 289433d6423SLionel Sambuc assert(buf_hash); 290433d6423SLionel Sambuc assert(buf); 291433d6423SLionel Sambuc assert(nr_bufs > 0); 292433d6423SLionel Sambuc 293433d6423SLionel Sambuc ASSERT(fs_block_size > 0); 294433d6423SLionel Sambuc 295433d6423SLionel Sambuc assert(dev != NO_DEV); 296433d6423SLionel Sambuc 297b65ad59eSDavid van Moolenbroek assert(block <= UINT64_MAX / fs_block_size); 298b65ad59eSDavid van Moolenbroek 299b65ad59eSDavid van Moolenbroek dev_off = block * fs_block_size; 300b65ad59eSDavid van Moolenbroek 301433d6423SLionel Sambuc if((ino_off % fs_block_size)) { 302433d6423SLionel Sambuc 303433d6423SLionel Sambuc printf("cache: unaligned lmfs_get_block_ino ino_off %llu\n", 304433d6423SLionel Sambuc ino_off); 305433d6423SLionel Sambuc util_stacktrace(); 306433d6423SLionel Sambuc } 307433d6423SLionel Sambuc 308e94f856bSDavid van Moolenbroek /* See if the block is in the cache. If so, we can return it right away. */ 309e94f856bSDavid van Moolenbroek bp = find_block(dev, block); 310e94f856bSDavid van Moolenbroek if (bp != NULL && !(bp->lmfs_flags & VMMC_EVICTED)) { 311433d6423SLionel Sambuc /* Block needed has been found. */ 312433d6423SLionel Sambuc if (bp->lmfs_count == 0) { 313433d6423SLionel Sambuc rm_lru(bp); 314433d6423SLionel Sambuc ASSERT(bp->lmfs_needsetcache == 0); 315433d6423SLionel Sambuc ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); 316e94f856bSDavid van Moolenbroek /* FIXME: race condition against the VMMC_EVICTED check */ 317433d6423SLionel Sambuc bp->lmfs_flags |= VMMC_BLOCK_LOCKED; 318433d6423SLionel Sambuc } 319433d6423SLionel Sambuc raisecount(bp); 320433d6423SLionel Sambuc ASSERT(bp->lmfs_bytes == fs_block_size); 321433d6423SLionel Sambuc ASSERT(bp->lmfs_dev == dev); 322433d6423SLionel Sambuc ASSERT(bp->lmfs_dev != NO_DEV); 323433d6423SLionel Sambuc ASSERT(bp->lmfs_flags & VMMC_BLOCK_LOCKED); 324433d6423SLionel Sambuc ASSERT(bp->data); 325433d6423SLionel Sambuc 326433d6423SLionel Sambuc if(ino != VMC_NO_INODE) { 327433d6423SLionel Sambuc if(bp->lmfs_inode == VMC_NO_INODE 328433d6423SLionel Sambuc || bp->lmfs_inode != ino 329433d6423SLionel Sambuc || bp->lmfs_inode_offset != ino_off) { 330433d6423SLionel Sambuc bp->lmfs_inode = ino; 331433d6423SLionel Sambuc bp->lmfs_inode_offset = ino_off; 332433d6423SLionel Sambuc bp->lmfs_needsetcache = 1; 333433d6423SLionel Sambuc } 334433d6423SLionel Sambuc } 335433d6423SLionel Sambuc 336433d6423SLionel Sambuc return(bp); 337433d6423SLionel Sambuc } 338e94f856bSDavid van Moolenbroek 339e94f856bSDavid van Moolenbroek /* We had the block in the cache but VM evicted it; invalidate it. */ 340e94f856bSDavid van Moolenbroek if (bp != NULL) { 341e94f856bSDavid van Moolenbroek assert(bp->lmfs_flags & VMMC_EVICTED); 342e94f856bSDavid van Moolenbroek ASSERT(bp->lmfs_count == 0); 343e94f856bSDavid van Moolenbroek ASSERT(!(bp->lmfs_flags & VMMC_BLOCK_LOCKED)); 344e94f856bSDavid van Moolenbroek ASSERT(!(bp->lmfs_flags & VMMC_DIRTY)); 345e94f856bSDavid van Moolenbroek bp->lmfs_dev = NO_DEV; 346e94f856bSDavid van Moolenbroek bp->lmfs_bytes = 0; 347e94f856bSDavid van Moolenbroek bp->data = NULL; 348433d6423SLionel Sambuc } 349433d6423SLionel Sambuc 350433d6423SLionel Sambuc /* Desired block is not on available chain. Find a free block to use. */ 351433d6423SLionel Sambuc if(bp) { 352433d6423SLionel Sambuc ASSERT(bp->lmfs_flags & VMMC_EVICTED); 353433d6423SLionel Sambuc } else { 354433d6423SLionel Sambuc if ((bp = front) == NULL) panic("all buffers in use: %d", nr_bufs); 355433d6423SLionel Sambuc } 356433d6423SLionel Sambuc assert(bp); 357433d6423SLionel Sambuc 358433d6423SLionel Sambuc rm_lru(bp); 359433d6423SLionel Sambuc 360433d6423SLionel Sambuc /* Remove the block that was just taken from its hash chain. */ 361433d6423SLionel Sambuc b = BUFHASH(bp->lmfs_blocknr); 362433d6423SLionel Sambuc prev_ptr = buf_hash[b]; 363433d6423SLionel Sambuc if (prev_ptr == bp) { 364433d6423SLionel Sambuc buf_hash[b] = bp->lmfs_hash; 365433d6423SLionel Sambuc } else { 366433d6423SLionel Sambuc /* The block just taken is not on the front of its hash chain. */ 367433d6423SLionel Sambuc while (prev_ptr->lmfs_hash != NULL) 368433d6423SLionel Sambuc if (prev_ptr->lmfs_hash == bp) { 369433d6423SLionel Sambuc prev_ptr->lmfs_hash = bp->lmfs_hash; /* found it */ 370433d6423SLionel Sambuc break; 371433d6423SLionel Sambuc } else { 372433d6423SLionel Sambuc prev_ptr = prev_ptr->lmfs_hash; /* keep looking */ 373433d6423SLionel Sambuc } 374433d6423SLionel Sambuc } 375433d6423SLionel Sambuc 376433d6423SLionel Sambuc freeblock(bp); 377433d6423SLionel Sambuc 378433d6423SLionel Sambuc bp->lmfs_inode = ino; 379433d6423SLionel Sambuc bp->lmfs_inode_offset = ino_off; 380433d6423SLionel Sambuc 381433d6423SLionel Sambuc bp->lmfs_flags = VMMC_BLOCK_LOCKED; 382433d6423SLionel Sambuc bp->lmfs_needsetcache = 0; 383433d6423SLionel Sambuc bp->lmfs_dev = dev; /* fill in device number */ 384433d6423SLionel Sambuc bp->lmfs_blocknr = block; /* fill in block number */ 385433d6423SLionel Sambuc ASSERT(bp->lmfs_count == 0); 386433d6423SLionel Sambuc raisecount(bp); 387433d6423SLionel Sambuc b = BUFHASH(bp->lmfs_blocknr); 388433d6423SLionel Sambuc bp->lmfs_hash = buf_hash[b]; 389433d6423SLionel Sambuc 390433d6423SLionel Sambuc buf_hash[b] = bp; /* add to hash list */ 391433d6423SLionel Sambuc 392433d6423SLionel Sambuc assert(dev != NO_DEV); 393433d6423SLionel Sambuc 394433d6423SLionel Sambuc /* Block is not found in our cache, but we do want it 395433d6423SLionel Sambuc * if it's in the vm cache. 396433d6423SLionel Sambuc */ 397433d6423SLionel Sambuc assert(!bp->data); 398433d6423SLionel Sambuc assert(!bp->lmfs_bytes); 399433d6423SLionel Sambuc if(vmcache) { 400433d6423SLionel Sambuc if((bp->data = vm_map_cacheblock(dev, dev_off, ino, ino_off, 401433d6423SLionel Sambuc &bp->lmfs_flags, fs_block_size)) != MAP_FAILED) { 402433d6423SLionel Sambuc bp->lmfs_bytes = fs_block_size; 403433d6423SLionel Sambuc ASSERT(!bp->lmfs_needsetcache); 404433d6423SLionel Sambuc return bp; 405433d6423SLionel Sambuc } 406433d6423SLionel Sambuc } 407433d6423SLionel Sambuc bp->data = NULL; 408433d6423SLionel Sambuc 409433d6423SLionel Sambuc /* Not in the cache; reserve memory for its contents. */ 410433d6423SLionel Sambuc 411433d6423SLionel Sambuc lmfs_alloc_block(bp); 412433d6423SLionel Sambuc 413433d6423SLionel Sambuc assert(bp->data); 414433d6423SLionel Sambuc 415433d6423SLionel Sambuc if(only_search == PREFETCH) { 416433d6423SLionel Sambuc /* PREFETCH: don't do i/o. */ 417433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; 418433d6423SLionel Sambuc } else if (only_search == NORMAL) { 419433d6423SLionel Sambuc read_block(bp); 420433d6423SLionel Sambuc } else if(only_search == NO_READ) { 421433d6423SLionel Sambuc /* This block will be overwritten by new contents. */ 422433d6423SLionel Sambuc } else 423433d6423SLionel Sambuc panic("unexpected only_search value: %d", only_search); 424433d6423SLionel Sambuc 425433d6423SLionel Sambuc assert(bp->data); 426433d6423SLionel Sambuc 427433d6423SLionel Sambuc return(bp); /* return the newly acquired block */ 428433d6423SLionel Sambuc } 429433d6423SLionel Sambuc 430433d6423SLionel Sambuc /*===========================================================================* 431433d6423SLionel Sambuc * lmfs_put_block * 432433d6423SLionel Sambuc *===========================================================================*/ 433433d6423SLionel Sambuc void lmfs_put_block( 434433d6423SLionel Sambuc struct buf *bp, /* pointer to the buffer to be released */ 435433d6423SLionel Sambuc int block_type /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */ 436433d6423SLionel Sambuc ) 437433d6423SLionel Sambuc { 438433d6423SLionel Sambuc /* Return a block to the list of available blocks. Depending on 'block_type' 439433d6423SLionel Sambuc * it may be put on the front or rear of the LRU chain. Blocks that are 440433d6423SLionel Sambuc * expected to be needed again shortly (e.g., partially full data blocks) 441433d6423SLionel Sambuc * go on the rear; blocks that are unlikely to be needed again shortly 442433d6423SLionel Sambuc * (e.g., full data blocks) go on the front. Blocks whose loss can hurt 443433d6423SLionel Sambuc * the integrity of the file system (e.g., inode blocks) are written to 444433d6423SLionel Sambuc * disk immediately if they are dirty. 445433d6423SLionel Sambuc */ 446433d6423SLionel Sambuc dev_t dev; 447b65ad59eSDavid van Moolenbroek uint64_t dev_off; 448*d75faf18SDavid van Moolenbroek int r, setflags; 449433d6423SLionel Sambuc 450433d6423SLionel Sambuc if (bp == NULL) return; /* it is easier to check here than in caller */ 451433d6423SLionel Sambuc 452433d6423SLionel Sambuc dev = bp->lmfs_dev; 453433d6423SLionel Sambuc 454b65ad59eSDavid van Moolenbroek dev_off = bp->lmfs_blocknr * fs_block_size; 455433d6423SLionel Sambuc 456433d6423SLionel Sambuc lowercount(bp); 457433d6423SLionel Sambuc if (bp->lmfs_count != 0) return; /* block is still in use */ 458433d6423SLionel Sambuc 459433d6423SLionel Sambuc /* Put this block back on the LRU chain. */ 460e94f856bSDavid van Moolenbroek if (dev == NO_DEV || dev == DEV_RAM || (block_type & ONE_SHOT)) { 461433d6423SLionel Sambuc /* Block probably won't be needed quickly. Put it on front of chain. 462433d6423SLionel Sambuc * It will be the next block to be evicted from the cache. 463433d6423SLionel Sambuc */ 464433d6423SLionel Sambuc bp->lmfs_prev = NULL; 465433d6423SLionel Sambuc bp->lmfs_next = front; 466433d6423SLionel Sambuc if (front == NULL) 467433d6423SLionel Sambuc rear = bp; /* LRU chain was empty */ 468433d6423SLionel Sambuc else 469433d6423SLionel Sambuc front->lmfs_prev = bp; 470433d6423SLionel Sambuc front = bp; 471433d6423SLionel Sambuc } 472433d6423SLionel Sambuc else { 473433d6423SLionel Sambuc /* Block probably will be needed quickly. Put it on rear of chain. 474433d6423SLionel Sambuc * It will not be evicted from the cache for a long time. 475433d6423SLionel Sambuc */ 476433d6423SLionel Sambuc bp->lmfs_prev = rear; 477433d6423SLionel Sambuc bp->lmfs_next = NULL; 478433d6423SLionel Sambuc if (rear == NULL) 479433d6423SLionel Sambuc front = bp; 480433d6423SLionel Sambuc else 481433d6423SLionel Sambuc rear->lmfs_next = bp; 482433d6423SLionel Sambuc rear = bp; 483433d6423SLionel Sambuc } 484433d6423SLionel Sambuc 485433d6423SLionel Sambuc assert(bp->lmfs_flags & VMMC_BLOCK_LOCKED); 486433d6423SLionel Sambuc bp->lmfs_flags &= ~VMMC_BLOCK_LOCKED; 487433d6423SLionel Sambuc 488433d6423SLionel Sambuc /* block has sensible content - if necesary, identify it to VM */ 489433d6423SLionel Sambuc if(vmcache && bp->lmfs_needsetcache && dev != NO_DEV) { 490*d75faf18SDavid van Moolenbroek setflags = (block_type & ONE_SHOT) ? VMSF_ONCE : 0; 491*d75faf18SDavid van Moolenbroek if ((r = vm_set_cacheblock(bp->data, dev, dev_off, bp->lmfs_inode, 492*d75faf18SDavid van Moolenbroek bp->lmfs_inode_offset, &bp->lmfs_flags, fs_block_size, 493*d75faf18SDavid van Moolenbroek setflags)) != OK) { 494433d6423SLionel Sambuc if(r == ENOSYS) { 495433d6423SLionel Sambuc printf("libminixfs: ENOSYS, disabling VM calls\n"); 496433d6423SLionel Sambuc vmcache = 0; 497433d6423SLionel Sambuc } else { 498433d6423SLionel Sambuc panic("libminixfs: setblock of %p dev 0x%llx off " 499433d6423SLionel Sambuc "0x%llx failed\n", bp->data, dev, dev_off); 500433d6423SLionel Sambuc } 501433d6423SLionel Sambuc } 502433d6423SLionel Sambuc } 503433d6423SLionel Sambuc bp->lmfs_needsetcache = 0; 504*d75faf18SDavid van Moolenbroek 505*d75faf18SDavid van Moolenbroek /* Now that we (may) have given the block to VM, invalidate the block if it 506*d75faf18SDavid van Moolenbroek * is a one-shot block. Otherwise, it may still be reobtained immediately 507*d75faf18SDavid van Moolenbroek * after, which could be a problem if VM already forgot the block and we are 508*d75faf18SDavid van Moolenbroek * expected to pass it to VM again, which then wouldn't happen. 509*d75faf18SDavid van Moolenbroek */ 510*d75faf18SDavid van Moolenbroek if (block_type & ONE_SHOT) 511*d75faf18SDavid van Moolenbroek bp->lmfs_dev = NO_DEV; 512e94f856bSDavid van Moolenbroek } 513433d6423SLionel Sambuc 514e94f856bSDavid van Moolenbroek /*===========================================================================* 515e94f856bSDavid van Moolenbroek * lmfs_free_block * 516e94f856bSDavid van Moolenbroek *===========================================================================*/ 517e94f856bSDavid van Moolenbroek void lmfs_free_block(dev_t dev, block64_t block) 518e94f856bSDavid van Moolenbroek { 519e94f856bSDavid van Moolenbroek /* The file system has just freed the given block. The block may previously 520e94f856bSDavid van Moolenbroek * have been in use as data block for an inode. Therefore, we now need to tell 521e94f856bSDavid van Moolenbroek * VM that the block is no longer associated with an inode. If we fail to do so 522e94f856bSDavid van Moolenbroek * and the inode now has a hole at this location, mapping in the hole would 523e94f856bSDavid van Moolenbroek * yield the old block contents rather than a zeroed page. In addition, if the 524e94f856bSDavid van Moolenbroek * block is in the cache, it will be removed, even if it was dirty. 525e94f856bSDavid van Moolenbroek */ 526e94f856bSDavid van Moolenbroek struct buf *bp; 527e94f856bSDavid van Moolenbroek int r; 528e94f856bSDavid van Moolenbroek 529e94f856bSDavid van Moolenbroek /* Tell VM to forget about the block. The primary purpose of this call is to 530e94f856bSDavid van Moolenbroek * break the inode association, but since the block is part of a mounted file 531e94f856bSDavid van Moolenbroek * system, it is not expected to be accessed directly anyway. So, save some 532e94f856bSDavid van Moolenbroek * cache memory by throwing it out of the VM cache altogether. 533e94f856bSDavid van Moolenbroek */ 534e94f856bSDavid van Moolenbroek if (vmcache) { 535e94f856bSDavid van Moolenbroek if ((r = vm_forget_cacheblock(dev, block * fs_block_size, 536e94f856bSDavid van Moolenbroek fs_block_size)) != OK) 537e94f856bSDavid van Moolenbroek printf("libminixfs: vm_forget_cacheblock failed (%d)\n", r); 538e94f856bSDavid van Moolenbroek } 539e94f856bSDavid van Moolenbroek 540e94f856bSDavid van Moolenbroek if ((bp = find_block(dev, block)) != NULL) { 541e94f856bSDavid van Moolenbroek lmfs_markclean(bp); 542e94f856bSDavid van Moolenbroek 543e94f856bSDavid van Moolenbroek /* Invalidate the block. The block may or may not be in use right now, 544e94f856bSDavid van Moolenbroek * so don't be smart about freeing memory or repositioning in the LRU. 545e94f856bSDavid van Moolenbroek */ 546e94f856bSDavid van Moolenbroek bp->lmfs_dev = NO_DEV; 547e94f856bSDavid van Moolenbroek } 548e94f856bSDavid van Moolenbroek 549e94f856bSDavid van Moolenbroek /* Note that this is *not* the right place to implement TRIM support. Even 550e94f856bSDavid van Moolenbroek * though the block is freed, on the device it may still be part of a 551e94f856bSDavid van Moolenbroek * previous checkpoint or snapshot of some sort. Only the file system can 552e94f856bSDavid van Moolenbroek * be trusted to decide which blocks can be reused on the device! 553e94f856bSDavid van Moolenbroek */ 554433d6423SLionel Sambuc } 555433d6423SLionel Sambuc 556*d75faf18SDavid van Moolenbroek /*===========================================================================* 557*d75faf18SDavid van Moolenbroek * lmfs_zero_block_ino * 558*d75faf18SDavid van Moolenbroek *===========================================================================*/ 559*d75faf18SDavid van Moolenbroek void lmfs_zero_block_ino(dev_t dev, ino_t ino, u64_t ino_off) 560*d75faf18SDavid van Moolenbroek { 561*d75faf18SDavid van Moolenbroek /* Files may have holes. From an application perspective, these are just file 562*d75faf18SDavid van Moolenbroek * regions filled with zeroes. From a file system perspective however, holes 563*d75faf18SDavid van Moolenbroek * may represent unallocated regions on disk. Thus, these holes do not have 564*d75faf18SDavid van Moolenbroek * corresponding blocks on the disk, and therefore also no block number. 565*d75faf18SDavid van Moolenbroek * Therefore, we cannot simply use lmfs_get_block_ino() for them. For reads, 566*d75faf18SDavid van Moolenbroek * this is not a problem, since the file system can just zero out the target 567*d75faf18SDavid van Moolenbroek * application buffer instead. For mapped pages however, this *is* a problem, 568*d75faf18SDavid van Moolenbroek * since the VM cache needs to be told about the corresponding block, and VM 569*d75faf18SDavid van Moolenbroek * does not accept blocks without a device offset. The role of this function is 570*d75faf18SDavid van Moolenbroek * therefore to tell VM about the hole using a fake device offset. The device 571*d75faf18SDavid van Moolenbroek * offsets are picked so that the VM cache will see a block memory-mapped for 572*d75faf18SDavid van Moolenbroek * the hole in the file, while the same block is not visible when 573*d75faf18SDavid van Moolenbroek * memory-mapping the block device. 574*d75faf18SDavid van Moolenbroek */ 575*d75faf18SDavid van Moolenbroek struct buf *bp; 576*d75faf18SDavid van Moolenbroek static block64_t fake_block = 0; 577*d75faf18SDavid van Moolenbroek 578*d75faf18SDavid van Moolenbroek if (!vmcache) 579*d75faf18SDavid van Moolenbroek return; 580*d75faf18SDavid van Moolenbroek 581*d75faf18SDavid van Moolenbroek assert(fs_block_size > 0); 582*d75faf18SDavid van Moolenbroek 583*d75faf18SDavid van Moolenbroek /* Pick a block number which is above the threshold of what can possibly be 584*d75faf18SDavid van Moolenbroek * mapped in by mmap'ing the device, since off_t is signed, and it is safe to 585*d75faf18SDavid van Moolenbroek * say that it will take a while before we have 8-exabyte devices. Pick a 586*d75faf18SDavid van Moolenbroek * different block number each time to avoid possible concurrency issues. 587*d75faf18SDavid van Moolenbroek * FIXME: it does not seem like VM actually verifies mmap offsets though.. 588*d75faf18SDavid van Moolenbroek */ 589*d75faf18SDavid van Moolenbroek if (fake_block == 0 || ++fake_block >= UINT64_MAX / fs_block_size) 590*d75faf18SDavid van Moolenbroek fake_block = ((uint64_t)INT64_MAX + 1) / fs_block_size; 591*d75faf18SDavid van Moolenbroek 592*d75faf18SDavid van Moolenbroek /* Obtain a block. */ 593*d75faf18SDavid van Moolenbroek bp = lmfs_get_block_ino(dev, fake_block, NO_READ, ino, ino_off); 594*d75faf18SDavid van Moolenbroek assert(bp != NULL); 595*d75faf18SDavid van Moolenbroek assert(bp->lmfs_dev != NO_DEV); 596*d75faf18SDavid van Moolenbroek 597*d75faf18SDavid van Moolenbroek /* The block is already zeroed, as it has just been allocated with mmap. File 598*d75faf18SDavid van Moolenbroek * systems do not rely on this assumption yet, so if VM ever gets changed to 599*d75faf18SDavid van Moolenbroek * not clear the blocks we allocate (e.g., by recycling pages in the VM cache 600*d75faf18SDavid van Moolenbroek * for the same process, which would be safe), we need to add a memset here. 601*d75faf18SDavid van Moolenbroek */ 602*d75faf18SDavid van Moolenbroek 603*d75faf18SDavid van Moolenbroek /* Release the block. We don't expect it to be accessed ever again. Moreover, 604*d75faf18SDavid van Moolenbroek * if we keep the block around in the VM cache, it may erroneously be mapped 605*d75faf18SDavid van Moolenbroek * in beyond the file end later. Hence, use VMSF_ONCE when passing it to VM. 606*d75faf18SDavid van Moolenbroek * TODO: tell VM that it is an all-zeroes block, so that VM can deduplicate 607*d75faf18SDavid van Moolenbroek * all such pages in its cache. 608*d75faf18SDavid van Moolenbroek */ 609*d75faf18SDavid van Moolenbroek lmfs_put_block(bp, ONE_SHOT); 610*d75faf18SDavid van Moolenbroek } 611*d75faf18SDavid van Moolenbroek 612433d6423SLionel Sambuc void lmfs_cache_reevaluate(dev_t dev) 613433d6423SLionel Sambuc { 614433d6423SLionel Sambuc if(bufs_in_use == 0 && dev != NO_DEV) { 615433d6423SLionel Sambuc /* if the cache isn't in use any more, we could resize it. */ 616433d6423SLionel Sambuc cache_heuristic_check(major(dev)); 617433d6423SLionel Sambuc } 618433d6423SLionel Sambuc } 619433d6423SLionel Sambuc 620433d6423SLionel Sambuc /*===========================================================================* 621433d6423SLionel Sambuc * read_block * 622433d6423SLionel Sambuc *===========================================================================*/ 623433d6423SLionel Sambuc static void read_block( 624433d6423SLionel Sambuc struct buf *bp /* buffer pointer */ 625433d6423SLionel Sambuc ) 626433d6423SLionel Sambuc { 627433d6423SLionel Sambuc /* Read or write a disk block. This is the only routine in which actual disk 628433d6423SLionel Sambuc * I/O is invoked. If an error occurs, a message is printed here, but the error 629433d6423SLionel Sambuc * is not reported to the caller. If the error occurred while purging a block 630433d6423SLionel Sambuc * from the cache, it is not clear what the caller could do about it anyway. 631433d6423SLionel Sambuc */ 632433d6423SLionel Sambuc int r, op_failed; 633433d6423SLionel Sambuc off_t pos; 634433d6423SLionel Sambuc dev_t dev = bp->lmfs_dev; 635433d6423SLionel Sambuc 636433d6423SLionel Sambuc op_failed = 0; 637433d6423SLionel Sambuc 638433d6423SLionel Sambuc assert(dev != NO_DEV); 639433d6423SLionel Sambuc 640433d6423SLionel Sambuc ASSERT(bp->lmfs_bytes == fs_block_size); 641433d6423SLionel Sambuc ASSERT(fs_block_size > 0); 642433d6423SLionel Sambuc 643433d6423SLionel Sambuc pos = (off_t)bp->lmfs_blocknr * fs_block_size; 644433d6423SLionel Sambuc if(fs_block_size > PAGE_SIZE) { 645433d6423SLionel Sambuc #define MAXPAGES 20 646433d6423SLionel Sambuc vir_bytes blockrem, vaddr = (vir_bytes) bp->data; 647433d6423SLionel Sambuc int p = 0; 648433d6423SLionel Sambuc static iovec_t iovec[MAXPAGES]; 649433d6423SLionel Sambuc blockrem = fs_block_size; 650433d6423SLionel Sambuc while(blockrem > 0) { 651433d6423SLionel Sambuc vir_bytes chunk = blockrem >= PAGE_SIZE ? PAGE_SIZE : blockrem; 652433d6423SLionel Sambuc iovec[p].iov_addr = vaddr; 653433d6423SLionel Sambuc iovec[p].iov_size = chunk; 654433d6423SLionel Sambuc vaddr += chunk; 655433d6423SLionel Sambuc blockrem -= chunk; 656433d6423SLionel Sambuc p++; 657433d6423SLionel Sambuc } 658433d6423SLionel Sambuc r = bdev_gather(dev, pos, iovec, p, BDEV_NOFLAGS); 659433d6423SLionel Sambuc } else { 660433d6423SLionel Sambuc r = bdev_read(dev, pos, bp->data, fs_block_size, 661433d6423SLionel Sambuc BDEV_NOFLAGS); 662433d6423SLionel Sambuc } 663433d6423SLionel Sambuc if (r < 0) { 664b65ad59eSDavid van Moolenbroek printf("fs cache: I/O error on device %d/%d, block %"PRIu64"\n", 665433d6423SLionel Sambuc major(dev), minor(dev), bp->lmfs_blocknr); 666433d6423SLionel Sambuc op_failed = 1; 667433d6423SLionel Sambuc } else if (r != (ssize_t) fs_block_size) { 668433d6423SLionel Sambuc r = END_OF_FILE; 669433d6423SLionel Sambuc op_failed = 1; 670433d6423SLionel Sambuc } 671433d6423SLionel Sambuc 672433d6423SLionel Sambuc if (op_failed) { 673433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; /* invalidate block */ 674433d6423SLionel Sambuc 675433d6423SLionel Sambuc /* Report read errors to interested parties. */ 676433d6423SLionel Sambuc rdwt_err = r; 677433d6423SLionel Sambuc } 678433d6423SLionel Sambuc 679433d6423SLionel Sambuc } 680433d6423SLionel Sambuc 681433d6423SLionel Sambuc /*===========================================================================* 682433d6423SLionel Sambuc * lmfs_invalidate * 683433d6423SLionel Sambuc *===========================================================================*/ 684433d6423SLionel Sambuc void lmfs_invalidate( 685433d6423SLionel Sambuc dev_t device /* device whose blocks are to be purged */ 686433d6423SLionel Sambuc ) 687433d6423SLionel Sambuc { 688433d6423SLionel Sambuc /* Remove all the blocks belonging to some device from the cache. */ 689433d6423SLionel Sambuc 690433d6423SLionel Sambuc register struct buf *bp; 691433d6423SLionel Sambuc 692433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 693433d6423SLionel Sambuc if (bp->lmfs_dev == device) { 694433d6423SLionel Sambuc assert(bp->data); 695433d6423SLionel Sambuc assert(bp->lmfs_bytes > 0); 696433d6423SLionel Sambuc munmap_t(bp->data, bp->lmfs_bytes); 697433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; 698433d6423SLionel Sambuc bp->lmfs_bytes = 0; 699433d6423SLionel Sambuc bp->data = NULL; 700433d6423SLionel Sambuc } 701433d6423SLionel Sambuc } 702433d6423SLionel Sambuc 703e94f856bSDavid van Moolenbroek /* Clear the cache even if VM caching is disabled for the file system: 704e94f856bSDavid van Moolenbroek * caching may be disabled as side effect of an error, leaving blocks behind 705e94f856bSDavid van Moolenbroek * in the actual VM cache. 706e94f856bSDavid van Moolenbroek */ 707433d6423SLionel Sambuc vm_clear_cache(device); 708433d6423SLionel Sambuc } 709433d6423SLionel Sambuc 710433d6423SLionel Sambuc /*===========================================================================* 711ebd3c067SDavid van Moolenbroek * lmfs_flushdev * 712433d6423SLionel Sambuc *===========================================================================*/ 713ebd3c067SDavid van Moolenbroek void lmfs_flushdev(dev_t dev) 714433d6423SLionel Sambuc { 715433d6423SLionel Sambuc /* Flush all dirty blocks for one device. */ 716433d6423SLionel Sambuc 717433d6423SLionel Sambuc register struct buf *bp; 718433d6423SLionel Sambuc static struct buf **dirty; /* static so it isn't on stack */ 719433d6423SLionel Sambuc static unsigned int dirtylistsize = 0; 720433d6423SLionel Sambuc int ndirty; 721433d6423SLionel Sambuc 722433d6423SLionel Sambuc if(dirtylistsize != nr_bufs) { 723433d6423SLionel Sambuc if(dirtylistsize > 0) { 724433d6423SLionel Sambuc assert(dirty != NULL); 725433d6423SLionel Sambuc free(dirty); 726433d6423SLionel Sambuc } 727433d6423SLionel Sambuc if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs))) 728433d6423SLionel Sambuc panic("couldn't allocate dirty buf list"); 729433d6423SLionel Sambuc dirtylistsize = nr_bufs; 730433d6423SLionel Sambuc } 731433d6423SLionel Sambuc 732433d6423SLionel Sambuc for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++) { 733433d6423SLionel Sambuc if (!lmfs_isclean(bp) && bp->lmfs_dev == dev) { 734433d6423SLionel Sambuc dirty[ndirty++] = bp; 735433d6423SLionel Sambuc } 736433d6423SLionel Sambuc } 737433d6423SLionel Sambuc 738433d6423SLionel Sambuc lmfs_rw_scattered(dev, dirty, ndirty, WRITING); 739433d6423SLionel Sambuc } 740433d6423SLionel Sambuc 741433d6423SLionel Sambuc /*===========================================================================* 742433d6423SLionel Sambuc * lmfs_rw_scattered * 743433d6423SLionel Sambuc *===========================================================================*/ 744433d6423SLionel Sambuc void lmfs_rw_scattered( 745433d6423SLionel Sambuc dev_t dev, /* major-minor device number */ 746433d6423SLionel Sambuc struct buf **bufq, /* pointer to array of buffers */ 747433d6423SLionel Sambuc int bufqsize, /* number of buffers */ 748433d6423SLionel Sambuc int rw_flag /* READING or WRITING */ 749433d6423SLionel Sambuc ) 750433d6423SLionel Sambuc { 751433d6423SLionel Sambuc /* Read or write scattered data from a device. */ 752433d6423SLionel Sambuc 753433d6423SLionel Sambuc register struct buf *bp; 754433d6423SLionel Sambuc int gap; 755433d6423SLionel Sambuc register int i; 756433d6423SLionel Sambuc register iovec_t *iop; 757433d6423SLionel Sambuc static iovec_t iovec[NR_IOREQS]; 758433d6423SLionel Sambuc off_t pos; 759433d6423SLionel Sambuc int iov_per_block; 76065f76edbSDavid van Moolenbroek unsigned int start_in_use = bufs_in_use, start_bufqsize = bufqsize; 761433d6423SLionel Sambuc 762433d6423SLionel Sambuc assert(bufqsize >= 0); 763433d6423SLionel Sambuc if(bufqsize == 0) return; 764433d6423SLionel Sambuc 765433d6423SLionel Sambuc /* for READING, check all buffers on the list are obtained and held 766433d6423SLionel Sambuc * (count > 0) 767433d6423SLionel Sambuc */ 768433d6423SLionel Sambuc if (rw_flag == READING) { 769433d6423SLionel Sambuc for(i = 0; i < bufqsize; i++) { 770433d6423SLionel Sambuc assert(bufq[i] != NULL); 771433d6423SLionel Sambuc assert(bufq[i]->lmfs_count > 0); 772433d6423SLionel Sambuc } 773433d6423SLionel Sambuc 774433d6423SLionel Sambuc /* therefore they are all 'in use' and must be at least this many */ 775433d6423SLionel Sambuc assert(start_in_use >= start_bufqsize); 776433d6423SLionel Sambuc } 777433d6423SLionel Sambuc 778433d6423SLionel Sambuc assert(dev != NO_DEV); 779433d6423SLionel Sambuc assert(fs_block_size > 0); 780433d6423SLionel Sambuc iov_per_block = roundup(fs_block_size, PAGE_SIZE) / PAGE_SIZE; 781433d6423SLionel Sambuc assert(iov_per_block < NR_IOREQS); 782433d6423SLionel Sambuc 783433d6423SLionel Sambuc /* (Shell) sort buffers on lmfs_blocknr. */ 784433d6423SLionel Sambuc gap = 1; 785433d6423SLionel Sambuc do 786433d6423SLionel Sambuc gap = 3 * gap + 1; 787433d6423SLionel Sambuc while (gap <= bufqsize); 788433d6423SLionel Sambuc while (gap != 1) { 789433d6423SLionel Sambuc int j; 790433d6423SLionel Sambuc gap /= 3; 791433d6423SLionel Sambuc for (j = gap; j < bufqsize; j++) { 792433d6423SLionel Sambuc for (i = j - gap; 793433d6423SLionel Sambuc i >= 0 && bufq[i]->lmfs_blocknr > bufq[i + gap]->lmfs_blocknr; 794433d6423SLionel Sambuc i -= gap) { 795433d6423SLionel Sambuc bp = bufq[i]; 796433d6423SLionel Sambuc bufq[i] = bufq[i + gap]; 797433d6423SLionel Sambuc bufq[i + gap] = bp; 798433d6423SLionel Sambuc } 799433d6423SLionel Sambuc } 800433d6423SLionel Sambuc } 801433d6423SLionel Sambuc 802433d6423SLionel Sambuc /* Set up I/O vector and do I/O. The result of bdev I/O is OK if everything 803433d6423SLionel Sambuc * went fine, otherwise the error code for the first failed transfer. 804433d6423SLionel Sambuc */ 805433d6423SLionel Sambuc while (bufqsize > 0) { 806433d6423SLionel Sambuc int nblocks = 0, niovecs = 0; 807433d6423SLionel Sambuc int r; 808433d6423SLionel Sambuc for (iop = iovec; nblocks < bufqsize; nblocks++) { 809433d6423SLionel Sambuc int p; 810433d6423SLionel Sambuc vir_bytes vdata, blockrem; 811433d6423SLionel Sambuc bp = bufq[nblocks]; 812b65ad59eSDavid van Moolenbroek if (bp->lmfs_blocknr != bufq[0]->lmfs_blocknr + nblocks) 813433d6423SLionel Sambuc break; 814433d6423SLionel Sambuc if(niovecs >= NR_IOREQS-iov_per_block) break; 815433d6423SLionel Sambuc vdata = (vir_bytes) bp->data; 816433d6423SLionel Sambuc blockrem = fs_block_size; 817433d6423SLionel Sambuc for(p = 0; p < iov_per_block; p++) { 818433d6423SLionel Sambuc vir_bytes chunk = blockrem < PAGE_SIZE ? blockrem : PAGE_SIZE; 819433d6423SLionel Sambuc iop->iov_addr = vdata; 820433d6423SLionel Sambuc iop->iov_size = chunk; 821433d6423SLionel Sambuc vdata += PAGE_SIZE; 822433d6423SLionel Sambuc blockrem -= chunk; 823433d6423SLionel Sambuc iop++; 824433d6423SLionel Sambuc niovecs++; 825433d6423SLionel Sambuc } 826433d6423SLionel Sambuc assert(p == iov_per_block); 827433d6423SLionel Sambuc assert(blockrem == 0); 828433d6423SLionel Sambuc } 829433d6423SLionel Sambuc 830433d6423SLionel Sambuc assert(nblocks > 0); 831433d6423SLionel Sambuc assert(niovecs > 0); 832433d6423SLionel Sambuc 833433d6423SLionel Sambuc pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; 834433d6423SLionel Sambuc if (rw_flag == READING) 835433d6423SLionel Sambuc r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); 836433d6423SLionel Sambuc else 837433d6423SLionel Sambuc r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); 838433d6423SLionel Sambuc 839433d6423SLionel Sambuc /* Harvest the results. The driver may have returned an error, or it 840433d6423SLionel Sambuc * may have done less than what we asked for. 841433d6423SLionel Sambuc */ 842433d6423SLionel Sambuc if (r < 0) { 843b65ad59eSDavid van Moolenbroek printf("fs cache: I/O error %d on device %d/%d, " 844b65ad59eSDavid van Moolenbroek "block %"PRIu64"\n", 845433d6423SLionel Sambuc r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); 846433d6423SLionel Sambuc } 847433d6423SLionel Sambuc for (i = 0; i < nblocks; i++) { 848433d6423SLionel Sambuc bp = bufq[i]; 849433d6423SLionel Sambuc if (r < (ssize_t) fs_block_size) { 850433d6423SLionel Sambuc /* Transfer failed. */ 851433d6423SLionel Sambuc if (i == 0) { 852433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; /* Invalidate block */ 853433d6423SLionel Sambuc } 854433d6423SLionel Sambuc break; 855433d6423SLionel Sambuc } 856433d6423SLionel Sambuc if (rw_flag == READING) { 857433d6423SLionel Sambuc bp->lmfs_dev = dev; /* validate block */ 858433d6423SLionel Sambuc lmfs_put_block(bp, PARTIAL_DATA_BLOCK); 859433d6423SLionel Sambuc } else { 860433d6423SLionel Sambuc MARKCLEAN(bp); 861433d6423SLionel Sambuc } 862433d6423SLionel Sambuc r -= fs_block_size; 863433d6423SLionel Sambuc } 864433d6423SLionel Sambuc 865433d6423SLionel Sambuc bufq += i; 866433d6423SLionel Sambuc bufqsize -= i; 867433d6423SLionel Sambuc 868433d6423SLionel Sambuc if (rw_flag == READING) { 869433d6423SLionel Sambuc /* Don't bother reading more than the device is willing to 870433d6423SLionel Sambuc * give at this time. Don't forget to release those extras. 871433d6423SLionel Sambuc */ 872433d6423SLionel Sambuc while (bufqsize > 0) { 873433d6423SLionel Sambuc lmfs_put_block(*bufq++, PARTIAL_DATA_BLOCK); 874433d6423SLionel Sambuc bufqsize--; 875433d6423SLionel Sambuc } 876433d6423SLionel Sambuc } 877433d6423SLionel Sambuc if (rw_flag == WRITING && i == 0) { 878433d6423SLionel Sambuc /* We're not making progress, this means we might keep 879433d6423SLionel Sambuc * looping. Buffers remain dirty if un-written. Buffers are 880433d6423SLionel Sambuc * lost if invalidate()d or LRU-removed while dirty. This 881433d6423SLionel Sambuc * is better than keeping unwritable blocks around forever.. 882433d6423SLionel Sambuc */ 883433d6423SLionel Sambuc break; 884433d6423SLionel Sambuc } 885433d6423SLionel Sambuc } 886433d6423SLionel Sambuc 887433d6423SLionel Sambuc if(rw_flag == READING) { 888433d6423SLionel Sambuc assert(start_in_use >= start_bufqsize); 889433d6423SLionel Sambuc 890433d6423SLionel Sambuc /* READING callers assume all bufs are released. */ 891433d6423SLionel Sambuc assert(start_in_use - start_bufqsize == bufs_in_use); 892433d6423SLionel Sambuc } 893433d6423SLionel Sambuc } 894433d6423SLionel Sambuc 895433d6423SLionel Sambuc /*===========================================================================* 896433d6423SLionel Sambuc * rm_lru * 897433d6423SLionel Sambuc *===========================================================================*/ 898433d6423SLionel Sambuc static void rm_lru(struct buf *bp) 899433d6423SLionel Sambuc { 900433d6423SLionel Sambuc /* Remove a block from its LRU chain. */ 901433d6423SLionel Sambuc struct buf *next_ptr, *prev_ptr; 902433d6423SLionel Sambuc 903433d6423SLionel Sambuc next_ptr = bp->lmfs_next; /* successor on LRU chain */ 904433d6423SLionel Sambuc prev_ptr = bp->lmfs_prev; /* predecessor on LRU chain */ 905433d6423SLionel Sambuc if (prev_ptr != NULL) 906433d6423SLionel Sambuc prev_ptr->lmfs_next = next_ptr; 907433d6423SLionel Sambuc else 908433d6423SLionel Sambuc front = next_ptr; /* this block was at front of chain */ 909433d6423SLionel Sambuc 910433d6423SLionel Sambuc if (next_ptr != NULL) 911433d6423SLionel Sambuc next_ptr->lmfs_prev = prev_ptr; 912433d6423SLionel Sambuc else 913433d6423SLionel Sambuc rear = prev_ptr; /* this block was at rear of chain */ 914433d6423SLionel Sambuc } 915433d6423SLionel Sambuc 916433d6423SLionel Sambuc /*===========================================================================* 917433d6423SLionel Sambuc * cache_resize * 918433d6423SLionel Sambuc *===========================================================================*/ 919433d6423SLionel Sambuc static void cache_resize(unsigned int blocksize, unsigned int bufs) 920433d6423SLionel Sambuc { 921433d6423SLionel Sambuc struct buf *bp; 922433d6423SLionel Sambuc 923433d6423SLionel Sambuc assert(blocksize > 0); 924433d6423SLionel Sambuc assert(bufs >= MINBUFS); 925433d6423SLionel Sambuc 926433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) 927433d6423SLionel Sambuc if(bp->lmfs_count != 0) panic("change blocksize with buffer in use"); 928433d6423SLionel Sambuc 929433d6423SLionel Sambuc lmfs_buf_pool(bufs); 930433d6423SLionel Sambuc 931433d6423SLionel Sambuc fs_block_size = blocksize; 932433d6423SLionel Sambuc } 933433d6423SLionel Sambuc 934433d6423SLionel Sambuc static void cache_heuristic_check(int major) 935433d6423SLionel Sambuc { 936433d6423SLionel Sambuc int bufs, d; 937433d6423SLionel Sambuc u64_t btotal, bfree, bused; 938433d6423SLionel Sambuc 939433d6423SLionel Sambuc fs_blockstats(&btotal, &bfree, &bused); 940433d6423SLionel Sambuc 941433d6423SLionel Sambuc bufs = fs_bufs_heuristic(10, btotal, bfree, 942433d6423SLionel Sambuc fs_block_size, major); 943433d6423SLionel Sambuc 944433d6423SLionel Sambuc /* set the cache to the new heuristic size if the new one 945433d6423SLionel Sambuc * is more than 10% off from the current one. 946433d6423SLionel Sambuc */ 947433d6423SLionel Sambuc d = bufs-nr_bufs; 948433d6423SLionel Sambuc if(d < 0) d = -d; 949433d6423SLionel Sambuc if(d*100/nr_bufs > 10) { 950433d6423SLionel Sambuc cache_resize(fs_block_size, bufs); 951433d6423SLionel Sambuc } 952433d6423SLionel Sambuc } 953433d6423SLionel Sambuc 954433d6423SLionel Sambuc /*===========================================================================* 955433d6423SLionel Sambuc * lmfs_set_blocksize * 956433d6423SLionel Sambuc *===========================================================================*/ 957433d6423SLionel Sambuc void lmfs_set_blocksize(int new_block_size, int major) 958433d6423SLionel Sambuc { 959433d6423SLionel Sambuc cache_resize(new_block_size, MINBUFS); 960433d6423SLionel Sambuc cache_heuristic_check(major); 961433d6423SLionel Sambuc 962433d6423SLionel Sambuc /* Decide whether to use seconday cache or not. 963433d6423SLionel Sambuc * Only do this if 964433d6423SLionel Sambuc * - it's available, and 965433d6423SLionel Sambuc * - use of it hasn't been disabled for this fs, and 966433d6423SLionel Sambuc * - our main FS device isn't a memory device 967433d6423SLionel Sambuc */ 968433d6423SLionel Sambuc 969433d6423SLionel Sambuc vmcache = 0; 970433d6423SLionel Sambuc 971433d6423SLionel Sambuc if(may_use_vmcache && !(new_block_size % PAGE_SIZE)) 972433d6423SLionel Sambuc vmcache = 1; 973433d6423SLionel Sambuc } 974433d6423SLionel Sambuc 975433d6423SLionel Sambuc /*===========================================================================* 976433d6423SLionel Sambuc * lmfs_buf_pool * 977433d6423SLionel Sambuc *===========================================================================*/ 978433d6423SLionel Sambuc void lmfs_buf_pool(int new_nr_bufs) 979433d6423SLionel Sambuc { 980433d6423SLionel Sambuc /* Initialize the buffer pool. */ 981433d6423SLionel Sambuc register struct buf *bp; 982433d6423SLionel Sambuc 983433d6423SLionel Sambuc assert(new_nr_bufs >= MINBUFS); 984433d6423SLionel Sambuc 985433d6423SLionel Sambuc if(nr_bufs > 0) { 986433d6423SLionel Sambuc assert(buf); 987c5beebb6SDavid van Moolenbroek lmfs_flushall(); 988433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 989433d6423SLionel Sambuc if(bp->data) { 990433d6423SLionel Sambuc assert(bp->lmfs_bytes > 0); 991433d6423SLionel Sambuc munmap_t(bp->data, bp->lmfs_bytes); 992433d6423SLionel Sambuc } 993433d6423SLionel Sambuc } 994433d6423SLionel Sambuc } 995433d6423SLionel Sambuc 996433d6423SLionel Sambuc if(buf) 997433d6423SLionel Sambuc free(buf); 998433d6423SLionel Sambuc 999433d6423SLionel Sambuc if(!(buf = calloc(sizeof(buf[0]), new_nr_bufs))) 1000433d6423SLionel Sambuc panic("couldn't allocate buf list (%d)", new_nr_bufs); 1001433d6423SLionel Sambuc 1002433d6423SLionel Sambuc if(buf_hash) 1003433d6423SLionel Sambuc free(buf_hash); 1004433d6423SLionel Sambuc if(!(buf_hash = calloc(sizeof(buf_hash[0]), new_nr_bufs))) 1005433d6423SLionel Sambuc panic("couldn't allocate buf hash list (%d)", new_nr_bufs); 1006433d6423SLionel Sambuc 1007433d6423SLionel Sambuc nr_bufs = new_nr_bufs; 1008433d6423SLionel Sambuc 1009433d6423SLionel Sambuc bufs_in_use = 0; 1010433d6423SLionel Sambuc front = &buf[0]; 1011433d6423SLionel Sambuc rear = &buf[nr_bufs - 1]; 1012433d6423SLionel Sambuc 1013433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { 1014433d6423SLionel Sambuc bp->lmfs_blocknr = NO_BLOCK; 1015433d6423SLionel Sambuc bp->lmfs_dev = NO_DEV; 1016433d6423SLionel Sambuc bp->lmfs_next = bp + 1; 1017433d6423SLionel Sambuc bp->lmfs_prev = bp - 1; 1018433d6423SLionel Sambuc bp->data = NULL; 1019433d6423SLionel Sambuc bp->lmfs_bytes = 0; 1020433d6423SLionel Sambuc } 1021433d6423SLionel Sambuc front->lmfs_prev = NULL; 1022433d6423SLionel Sambuc rear->lmfs_next = NULL; 1023433d6423SLionel Sambuc 1024433d6423SLionel Sambuc for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->lmfs_hash = bp->lmfs_next; 1025433d6423SLionel Sambuc buf_hash[0] = front; 1026433d6423SLionel Sambuc } 1027433d6423SLionel Sambuc 1028433d6423SLionel Sambuc int lmfs_bufs_in_use(void) 1029433d6423SLionel Sambuc { 1030433d6423SLionel Sambuc return bufs_in_use; 1031433d6423SLionel Sambuc } 1032433d6423SLionel Sambuc 1033433d6423SLionel Sambuc int lmfs_nr_bufs(void) 1034433d6423SLionel Sambuc { 1035433d6423SLionel Sambuc return nr_bufs; 1036433d6423SLionel Sambuc } 1037433d6423SLionel Sambuc 1038433d6423SLionel Sambuc void lmfs_flushall(void) 1039433d6423SLionel Sambuc { 1040433d6423SLionel Sambuc struct buf *bp; 1041433d6423SLionel Sambuc for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) 1042433d6423SLionel Sambuc if(bp->lmfs_dev != NO_DEV && !lmfs_isclean(bp)) 1043ebd3c067SDavid van Moolenbroek lmfs_flushdev(bp->lmfs_dev); 1044433d6423SLionel Sambuc } 1045433d6423SLionel Sambuc 1046433d6423SLionel Sambuc int lmfs_fs_block_size(void) 1047433d6423SLionel Sambuc { 1048433d6423SLionel Sambuc return fs_block_size; 1049433d6423SLionel Sambuc } 1050433d6423SLionel Sambuc 1051433d6423SLionel Sambuc void lmfs_may_use_vmcache(int ok) 1052433d6423SLionel Sambuc { 1053433d6423SLionel Sambuc may_use_vmcache = ok; 1054433d6423SLionel Sambuc } 1055433d6423SLionel Sambuc 1056433d6423SLionel Sambuc void lmfs_reset_rdwt_err(void) 1057433d6423SLionel Sambuc { 1058433d6423SLionel Sambuc rdwt_err = OK; 1059433d6423SLionel Sambuc } 1060433d6423SLionel Sambuc 1061433d6423SLionel Sambuc int lmfs_rdwt_err(void) 1062433d6423SLionel Sambuc { 1063433d6423SLionel Sambuc return rdwt_err; 1064433d6423SLionel Sambuc } 1065