1*7114Smckusick /* vfs_cluster.c 4.33 82/06/07 */ 28Sbill 38Sbill #include "../h/param.h" 48Sbill #include "../h/systm.h" 58Sbill #include "../h/dir.h" 68Sbill #include "../h/user.h" 78Sbill #include "../h/buf.h" 88Sbill #include "../h/conf.h" 98Sbill #include "../h/proc.h" 108Sbill #include "../h/seg.h" 118Sbill #include "../h/pte.h" 128Sbill #include "../h/vm.h" 132045Swnj #include "../h/trace.h" 148Sbill 1591Sbill /* 168Sbill * Read in (if necessary) the block and return a buffer pointer. 178Sbill */ 188Sbill struct buf * 196563Smckusic bread(dev, blkno, size) 206563Smckusic dev_t dev; 216563Smckusic daddr_t blkno; 226563Smckusic int size; 238Sbill { 248Sbill register struct buf *bp; 258Sbill 266563Smckusic bp = getblk(dev, blkno, size); 278Sbill if (bp->b_flags&B_DONE) { 283199Swnj trace(TR_BREADHIT, dev, blkno); 298Sbill return(bp); 308Sbill } 318Sbill bp->b_flags |= B_READ; 328Sbill (*bdevsw[major(dev)].d_strategy)(bp); 333199Swnj trace(TR_BREADMISS, dev, blkno); 348Sbill u.u_vm.vm_inblk++; /* pay for read */ 357015Smckusick biowait(bp); 368Sbill return(bp); 378Sbill } 388Sbill 398Sbill /* 408Sbill * Read in the block, like bread, but also start I/O on the 418Sbill * read-ahead block (which is not allocated to the caller) 428Sbill */ 438Sbill struct buf * 44*7114Smckusick breada(dev, blkno, size, rablkno, rasize) 456563Smckusic dev_t dev; 46*7114Smckusick daddr_t blkno; int size; 47*7114Smckusick daddr_t rablkno; int rasize; 488Sbill { 498Sbill register struct buf *bp, *rabp; 508Sbill 518Sbill bp = NULL; 527015Smckusick /* 537015Smckusick * If the block isn't in core, then allocate 547015Smckusick * a buffer and initiate i/o (getblk checks 557015Smckusick * for a cache hit). 567015Smckusick */ 578Sbill if (!incore(dev, blkno)) { 586563Smckusic bp = getblk(dev, blkno, size); 598Sbill if ((bp->b_flags&B_DONE) == 0) { 608Sbill bp->b_flags |= B_READ; 618Sbill (*bdevsw[major(dev)].d_strategy)(bp); 623199Swnj trace(TR_BREADMISS, dev, blkno); 638Sbill u.u_vm.vm_inblk++; /* pay for read */ 647015Smckusick } else 653199Swnj trace(TR_BREADHIT, dev, blkno); 668Sbill } 677015Smckusick 687015Smckusick /* 697015Smckusick * If there's a read-ahead block, start i/o 707015Smckusick * on it also (as above). 717015Smckusick */ 728Sbill if (rablkno && !incore(dev, rablkno)) { 73*7114Smckusick rabp = getblk(dev, rablkno, rasize); 742045Swnj if (rabp->b_flags & B_DONE) { 758Sbill brelse(rabp); 763199Swnj trace(TR_BREADHITRA, dev, blkno); 772045Swnj } else { 788Sbill rabp->b_flags |= B_READ|B_ASYNC; 798Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 803199Swnj trace(TR_BREADMISSRA, dev, rablock); 818Sbill u.u_vm.vm_inblk++; /* pay in advance */ 828Sbill } 838Sbill } 847015Smckusick 857015Smckusick /* 86*7114Smckusick * If block was in core, let bread get it. 87*7114Smckusick * If block wasn't in core, then the read was started 88*7114Smckusick * above, and just wait for it. 897015Smckusick */ 90*7114Smckusick if (bp == NULL) 91*7114Smckusick return (bread(dev, blkno, size)); 927015Smckusick biowait(bp); 93*7114Smckusick return (bp); 948Sbill } 958Sbill 968Sbill /* 978Sbill * Write the buffer, waiting for completion. 988Sbill * Then release the buffer. 998Sbill */ 1008Sbill bwrite(bp) 1017015Smckusick register struct buf *bp; 1028Sbill { 1038Sbill register flag; 1048Sbill 1058Sbill flag = bp->b_flags; 1068Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 1078Sbill if ((flag&B_DELWRI) == 0) 1088Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 1094033Swnj trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 1108Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 1117015Smckusick 1127015Smckusick /* 1137015Smckusick * If the write was synchronous, then await i/o completion. 1147015Smckusick * If the write was "delayed", then we put the buffer on 1157015Smckusick * the q of blocks awaiting i/o completion status. 1167015Smckusick * Otherwise, the i/o must be finished and we check for 1177015Smckusick * an error. 1187015Smckusick */ 1198Sbill if ((flag&B_ASYNC) == 0) { 1207015Smckusick biowait(bp); 1218Sbill brelse(bp); 1228Sbill } else if (flag & B_DELWRI) 1238Sbill bp->b_flags |= B_AGE; 1248Sbill else 1258Sbill geterror(bp); 1268Sbill } 1278Sbill 1288Sbill /* 1298Sbill * Release the buffer, marking it so that if it is grabbed 1308Sbill * for another purpose it will be written out before being 1318Sbill * given up (e.g. when writing a partial block where it is 1328Sbill * assumed that another write for the same block will soon follow). 1338Sbill * This can't be done for magtape, since writes must be done 1348Sbill * in the same order as requested. 1358Sbill */ 1368Sbill bdwrite(bp) 1377015Smckusick register struct buf *bp; 1388Sbill { 1392403Skre register int flags; 1408Sbill 1418Sbill if ((bp->b_flags&B_DELWRI) == 0) 1428Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 1432403Skre flags = bdevsw[major(bp->b_dev)].d_flags; 1442403Skre if(flags & B_TAPE) 1458Sbill bawrite(bp); 1468Sbill else { 1478Sbill bp->b_flags |= B_DELWRI | B_DONE; 1488Sbill brelse(bp); 1498Sbill } 1508Sbill } 1518Sbill 1528Sbill /* 1538Sbill * Release the buffer, start I/O on it, but don't wait for completion. 1548Sbill */ 1558Sbill bawrite(bp) 1567015Smckusick register struct buf *bp; 1578Sbill { 1588Sbill 1598Sbill bp->b_flags |= B_ASYNC; 1608Sbill bwrite(bp); 1618Sbill } 1628Sbill 1638Sbill /* 1647015Smckusick * Release the buffer, with no I/O implied. 1658Sbill */ 1668Sbill brelse(bp) 1677015Smckusick register struct buf *bp; 1688Sbill { 1692325Swnj register struct buf *flist; 1708Sbill register s; 1718Sbill 1727015Smckusick /* 1737015Smckusick * If someone's waiting for the buffer, or 1747015Smckusick * is waiting for a buffer wake 'em up. 1757015Smckusick */ 1768Sbill if (bp->b_flags&B_WANTED) 1778Sbill wakeup((caddr_t)bp); 1782325Swnj if (bfreelist[0].b_flags&B_WANTED) { 1792325Swnj bfreelist[0].b_flags &= ~B_WANTED; 1802325Swnj wakeup((caddr_t)bfreelist); 1818Sbill } 1822683Swnj if (bp->b_flags&B_ERROR) 1832683Swnj if (bp->b_flags & B_LOCKED) 1842683Swnj bp->b_flags &= ~B_ERROR; /* try again later */ 1852683Swnj else 1862683Swnj bp->b_dev = NODEV; /* no assoc */ 1877015Smckusick 1887015Smckusick /* 1897015Smckusick * Stick the buffer back on a free list. 1907015Smckusick */ 1918Sbill s = spl6(); 1922325Swnj if (bp->b_flags & (B_ERROR|B_INVAL)) { 1932325Swnj /* block has no info ... put at front of most free list */ 1942325Swnj flist = &bfreelist[BQUEUES-1]; 1957015Smckusick binsheadfree(bp, flist); 1968Sbill } else { 1972325Swnj if (bp->b_flags & B_LOCKED) 1982325Swnj flist = &bfreelist[BQ_LOCKED]; 1992325Swnj else if (bp->b_flags & B_AGE) 2002325Swnj flist = &bfreelist[BQ_AGE]; 2012325Swnj else 2022325Swnj flist = &bfreelist[BQ_LRU]; 2037015Smckusick binstailfree(bp, flist); 2048Sbill } 2058Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2068Sbill splx(s); 2078Sbill } 2088Sbill 2098Sbill /* 2108Sbill * See if the block is associated with some buffer 2118Sbill * (mainly to avoid getting hung up on a wait in breada) 2128Sbill */ 2138Sbill incore(dev, blkno) 2147015Smckusick dev_t dev; 2157015Smckusick daddr_t blkno; 2168Sbill { 2178Sbill register struct buf *bp; 2182325Swnj register struct buf *dp; 2198Sbill 2206563Smckusic dp = BUFHASH(dev, blkno); 2212325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 2226563Smckusic if (bp->b_blkno == blkno && bp->b_dev == dev && 2237015Smckusick (bp->b_flags & B_INVAL) == 0) 22491Sbill return (1); 22591Sbill return (0); 2268Sbill } 2278Sbill 2288Sbill struct buf * 2296563Smckusic baddr(dev, blkno, size) 2306563Smckusic dev_t dev; 2316563Smckusic daddr_t blkno; 2326563Smckusic int size; 2338Sbill { 2348Sbill 2358Sbill if (incore(dev, blkno)) 2366563Smckusic return (bread(dev, blkno, size)); 2378Sbill return (0); 2388Sbill } 2398Sbill 2408Sbill /* 2418Sbill * Assign a buffer for the given block. If the appropriate 2428Sbill * block is already associated, return it; otherwise search 2438Sbill * for the oldest non-busy buffer and reassign it. 2445424Swnj * 2455424Swnj * We use splx here because this routine may be called 2465424Swnj * on the interrupt stack during a dump, and we don't 2475424Swnj * want to lower the ipl back to 0. 2488Sbill */ 2498Sbill struct buf * 2506563Smckusic getblk(dev, blkno, size) 2516563Smckusic dev_t dev; 2526563Smckusic daddr_t blkno; 2536563Smckusic int size; 2548Sbill { 25591Sbill register struct buf *bp, *dp, *ep; 2565424Swnj int s; 2578Sbill 2581831Sbill if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 2591831Sbill blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 2607015Smckusick /* 2617015Smckusick * Search the cache for the block. If we hit, but 2627015Smckusick * the buffer is in use for i/o, then we wait until 2637015Smckusick * the i/o has completed. 2647015Smckusick */ 2656563Smckusic dp = BUFHASH(dev, blkno); 2667015Smckusick loop: 2672325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 2686563Smckusic if (bp->b_blkno != blkno || bp->b_dev != dev || 2692325Swnj bp->b_flags&B_INVAL) 2708Sbill continue; 2715424Swnj s = spl6(); 2728Sbill if (bp->b_flags&B_BUSY) { 2738Sbill bp->b_flags |= B_WANTED; 2748Sbill sleep((caddr_t)bp, PRIBIO+1); 2755424Swnj splx(s); 2768Sbill goto loop; 2778Sbill } 2785424Swnj splx(s); 2798Sbill notavail(bp); 2806563Smckusic brealloc(bp, size); 2818Sbill bp->b_flags |= B_CACHE; 2828Sbill return(bp); 2838Sbill } 28491Sbill if (major(dev) >= nblkdev) 28591Sbill panic("blkdev"); 2867015Smckusick /* 2877015Smckusick * Not found in the cache, select something from 2887015Smckusick * a free list. Preference is to LRU list, then AGE list. 2897015Smckusick */ 2905424Swnj s = spl6(); 2912325Swnj for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 2922325Swnj if (ep->av_forw != ep) 2932325Swnj break; 2942325Swnj if (ep == bfreelist) { /* no free blocks at all */ 2952325Swnj ep->b_flags |= B_WANTED; 2962325Swnj sleep((caddr_t)ep, PRIBIO+1); 2975424Swnj splx(s); 2988Sbill goto loop; 2998Sbill } 3005424Swnj splx(s); 3012325Swnj bp = ep->av_forw; 3028Sbill notavail(bp); 3038Sbill if (bp->b_flags & B_DELWRI) { 3048Sbill bp->b_flags |= B_ASYNC; 3058Sbill bwrite(bp); 3068Sbill goto loop; 3078Sbill } 3084033Swnj trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 3098Sbill bp->b_flags = B_BUSY; 3106563Smckusic bfree(bp); 3117015Smckusick bremhash(bp); 3127015Smckusick binshash(bp, dp); 3138Sbill bp->b_dev = dev; 3146563Smckusic bp->b_blkno = blkno; 3156563Smckusic brealloc(bp, size); 3168Sbill return(bp); 3178Sbill } 3188Sbill 3198Sbill /* 3208Sbill * get an empty block, 3218Sbill * not assigned to any particular device 3228Sbill */ 3238Sbill struct buf * 3246563Smckusic geteblk(size) 3256563Smckusic int size; 3268Sbill { 327182Sbill register struct buf *bp, *dp; 3285431Sroot int s; 3298Sbill 3308Sbill loop: 3315431Sroot s = spl6(); 3322325Swnj for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 3332325Swnj if (dp->av_forw != dp) 3342325Swnj break; 3352325Swnj if (dp == bfreelist) { /* no free blocks */ 3362325Swnj dp->b_flags |= B_WANTED; 3372325Swnj sleep((caddr_t)dp, PRIBIO+1); 3382325Swnj goto loop; 3398Sbill } 3405431Sroot splx(s); 3412325Swnj bp = dp->av_forw; 3428Sbill notavail(bp); 3438Sbill if (bp->b_flags & B_DELWRI) { 3448Sbill bp->b_flags |= B_ASYNC; 3458Sbill bwrite(bp); 3468Sbill goto loop; 3478Sbill } 3484033Swnj trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 3492325Swnj bp->b_flags = B_BUSY|B_INVAL; 3507015Smckusick bfree(bp); 3517015Smckusick bremhash(bp); 3527015Smckusick binshash(bp, dp); 3538Sbill bp->b_dev = (dev_t)NODEV; 3547016Smckusick brealloc(bp, size); 3558Sbill return(bp); 3568Sbill } 3578Sbill 3588Sbill /* 3596563Smckusic * Allocate space associated with a buffer. 3606563Smckusic */ 3616563Smckusic brealloc(bp, size) 3626563Smckusic register struct buf *bp; 3636563Smckusic int size; 3646563Smckusic { 3656563Smckusic daddr_t start, last; 3666563Smckusic register struct buf *ep; 3676563Smckusic struct buf *dp; 3686563Smckusic int s; 3696563Smckusic 3706563Smckusic /* 3716563Smckusic * First need to make sure that all overlaping previous I/O 3726563Smckusic * is dispatched with. 3736563Smckusic */ 3746563Smckusic if (size == bp->b_bcount) 3756563Smckusic return; 3767016Smckusick if (size < bp->b_bcount || bp->b_dev == NODEV) 3777016Smckusick goto allocit; 3787016Smckusick 3796563Smckusic start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); 3806563Smckusic last = bp->b_blkno + (size / DEV_BSIZE) - 1; 3816563Smckusic if (bp->b_bcount == 0) { 3826563Smckusic start++; 3836563Smckusic if (start == last) 3846563Smckusic goto allocit; 3856563Smckusic } 3866563Smckusic dp = BUFHASH(bp->b_dev, bp->b_blkno); 3876563Smckusic loop: 3886563Smckusic for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 3896563Smckusic if (ep->b_blkno < start || ep->b_blkno > last || 3906563Smckusic ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) 3916563Smckusic continue; 3926563Smckusic s = spl6(); 3936563Smckusic if (ep->b_flags&B_BUSY) { 3946563Smckusic ep->b_flags |= B_WANTED; 3956563Smckusic sleep((caddr_t)ep, PRIBIO+1); 3967015Smckusick (void) splx(s); 3976563Smckusic goto loop; 3986563Smckusic } 3997015Smckusick (void) splx(s); 4006563Smckusic /* 4016563Smckusic * What we would really like to do is kill this 4026563Smckusic * I/O since it is now useless. We cannot do that 4036563Smckusic * so we force it to complete, so that it cannot 4046563Smckusic * over-write our useful data later. 4056563Smckusic */ 4066563Smckusic if (ep->b_flags & B_DELWRI) { 4076563Smckusic notavail(ep); 4086563Smckusic ep->b_flags |= B_ASYNC; 4096563Smckusic bwrite(ep); 4106563Smckusic goto loop; 4116563Smckusic } 4126563Smckusic } 4136563Smckusic allocit: 4146563Smckusic /* 4156563Smckusic * Here the buffer is already available, so all we 4166563Smckusic * need to do is set the size. Someday a better memory 4176563Smckusic * management scheme will be implemented. 4186563Smckusic */ 4196563Smckusic bp->b_bcount = size; 4206563Smckusic } 4216563Smckusic 4226563Smckusic /* 4236563Smckusic * Release space associated with a buffer. 4246563Smckusic */ 4256563Smckusic bfree(bp) 4266563Smckusic struct buf *bp; 4276563Smckusic { 4286563Smckusic /* 4296563Smckusic * Here the buffer does not change, so all we 4306563Smckusic * need to do is set the size. Someday a better memory 4316563Smckusic * management scheme will be implemented. 4326563Smckusic */ 4336563Smckusic bp->b_bcount = 0; 4346563Smckusic } 4356563Smckusic 4366563Smckusic /* 4378Sbill * Wait for I/O completion on the buffer; return errors 4388Sbill * to the user. 4398Sbill */ 4407015Smckusick biowait(bp) 4416563Smckusic register struct buf *bp; 4428Sbill { 4435431Sroot int s; 4448Sbill 4455431Sroot s = spl6(); 4468Sbill while ((bp->b_flags&B_DONE)==0) 4478Sbill sleep((caddr_t)bp, PRIBIO); 4485431Sroot splx(s); 4498Sbill geterror(bp); 4508Sbill } 4518Sbill 4528Sbill /* 4538Sbill * Mark I/O complete on a buffer. If the header 4548Sbill * indicates a dirty page push completion, the 4558Sbill * header is inserted into the ``cleaned'' list 4568Sbill * to be processed by the pageout daemon. Otherwise 4578Sbill * release it if I/O is asynchronous, and wake 4588Sbill * up anyone waiting for it. 4598Sbill */ 4607015Smckusick biodone(bp) 4617015Smckusick register struct buf *bp; 4628Sbill { 4638Sbill register int s; 4648Sbill 465420Sbill if (bp->b_flags & B_DONE) 4667015Smckusick panic("dup biodone"); 4678Sbill bp->b_flags |= B_DONE; 4688Sbill if (bp->b_flags & B_DIRTY) { 4698Sbill if (bp->b_flags & B_ERROR) 4708Sbill panic("IO err in push"); 4718Sbill s = spl6(); 4728Sbill bp->av_forw = bclnlist; 4738Sbill bp->b_bcount = swsize[bp - swbuf]; 4748Sbill bp->b_pfcent = swpf[bp - swbuf]; 4753601Swnj cnt.v_pgout++; 4763601Swnj cnt.v_pgpgout += bp->b_bcount / NBPG; 4778Sbill bclnlist = bp; 4788Sbill if (bswlist.b_flags & B_WANTED) 4798Sbill wakeup((caddr_t)&proc[2]); 4808Sbill splx(s); 481383Sbill return; 4828Sbill } 4838Sbill if (bp->b_flags&B_ASYNC) 4848Sbill brelse(bp); 4858Sbill else { 4868Sbill bp->b_flags &= ~B_WANTED; 4878Sbill wakeup((caddr_t)bp); 4888Sbill } 4898Sbill } 4908Sbill 4918Sbill /* 4928Sbill * make sure all write-behind blocks 4938Sbill * on dev (or NODEV for all) 4948Sbill * are flushed out. 4958Sbill * (from umount and update) 4966563Smckusic * (and temporarily pagein) 4978Sbill */ 4988Sbill bflush(dev) 4997015Smckusick dev_t dev; 5008Sbill { 5018Sbill register struct buf *bp; 5022325Swnj register struct buf *flist; 5035431Sroot int s; 5048Sbill 5058Sbill loop: 5065431Sroot s = spl6(); 5072325Swnj for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 5082325Swnj for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 5097015Smckusick if ((bp->b_flags & B_DELWRI) == 0) 5107015Smckusick continue; 5117015Smckusick if (dev == NODEV || dev == bp->b_dev) { 5128Sbill bp->b_flags |= B_ASYNC; 5138Sbill notavail(bp); 5148Sbill bwrite(bp); 5158Sbill goto loop; 5168Sbill } 5178Sbill } 5185431Sroot splx(s); 5198Sbill } 5208Sbill 5218Sbill /* 5228Sbill * Pick up the device's error number and pass it to the user; 5238Sbill * if there is an error but the number is 0 set a generalized 5248Sbill * code. Actually the latter is always true because devices 5258Sbill * don't yet return specific errors. 5268Sbill */ 5278Sbill geterror(bp) 5287015Smckusick register struct buf *bp; 5298Sbill { 5308Sbill 5318Sbill if (bp->b_flags&B_ERROR) 5328Sbill if ((u.u_error = bp->b_error)==0) 5338Sbill u.u_error = EIO; 5348Sbill } 5352299Skre 5362299Skre /* 5372299Skre * Invalidate in core blocks belonging to closed or umounted filesystem 5382299Skre * 5392299Skre * This is not nicely done at all - the buffer ought to be removed from the 5402299Skre * hash chains & have its dev/blkno fields clobbered, but unfortunately we 5412299Skre * can't do that here, as it is quite possible that the block is still 5422299Skre * being used for i/o. Eventually, all disc drivers should be forced to 5432299Skre * have a close routine, which ought ensure that the queue is empty, then 5442299Skre * properly flush the queues. Until that happy day, this suffices for 5452299Skre * correctness. ... kre 5462299Skre */ 5472299Skre binval(dev) 5487015Smckusick dev_t dev; 5492299Skre { 5502361Skre register struct buf *bp; 5512361Skre register struct bufhd *hp; 5522361Skre #define dp ((struct buf *)hp) 5532299Skre 5542361Skre for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 5552361Skre for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 5562361Skre if (bp->b_dev == dev) 5572361Skre bp->b_flags |= B_INVAL; 5582299Skre } 559