1*7016Smckusick /* vfs_cluster.c 4.32 82/06/01 */ 28Sbill 38Sbill #include "../h/param.h" 48Sbill #include "../h/systm.h" 58Sbill #include "../h/dir.h" 68Sbill #include "../h/user.h" 78Sbill #include "../h/buf.h" 88Sbill #include "../h/conf.h" 98Sbill #include "../h/proc.h" 108Sbill #include "../h/seg.h" 118Sbill #include "../h/pte.h" 128Sbill #include "../h/vm.h" 132045Swnj #include "../h/trace.h" 148Sbill 1591Sbill /* 168Sbill * Read in (if necessary) the block and return a buffer pointer. 178Sbill */ 188Sbill struct buf * 196563Smckusic bread(dev, blkno, size) 206563Smckusic dev_t dev; 216563Smckusic daddr_t blkno; 226563Smckusic int size; 238Sbill { 248Sbill register struct buf *bp; 258Sbill 266563Smckusic bp = getblk(dev, blkno, size); 278Sbill if (bp->b_flags&B_DONE) { 283199Swnj trace(TR_BREADHIT, dev, blkno); 298Sbill return(bp); 308Sbill } 318Sbill bp->b_flags |= B_READ; 328Sbill (*bdevsw[major(dev)].d_strategy)(bp); 333199Swnj trace(TR_BREADMISS, dev, blkno); 348Sbill u.u_vm.vm_inblk++; /* pay for read */ 357015Smckusick biowait(bp); 368Sbill return(bp); 378Sbill } 388Sbill 398Sbill /* 408Sbill * Read in the block, like bread, but also start I/O on the 418Sbill * read-ahead block (which is not allocated to the caller) 428Sbill */ 438Sbill struct buf * 446563Smckusic breada(dev, blkno, rablkno, size) 456563Smckusic dev_t dev; 466563Smckusic daddr_t blkno, rablkno; 476563Smckusic int size; 488Sbill { 498Sbill register struct buf *bp, *rabp; 508Sbill 518Sbill bp = NULL; 527015Smckusick /* 537015Smckusick * If the block isn't in core, then allocate 547015Smckusick * a buffer and initiate i/o (getblk checks 557015Smckusick * for a cache hit). 567015Smckusick */ 578Sbill if (!incore(dev, blkno)) { 586563Smckusic bp = getblk(dev, blkno, size); 598Sbill if ((bp->b_flags&B_DONE) == 0) { 608Sbill bp->b_flags |= B_READ; 618Sbill (*bdevsw[major(dev)].d_strategy)(bp); 623199Swnj trace(TR_BREADMISS, dev, blkno); 638Sbill u.u_vm.vm_inblk++; /* pay for read */ 647015Smckusick } else 653199Swnj trace(TR_BREADHIT, dev, blkno); 668Sbill } 677015Smckusick 687015Smckusick /* 697015Smckusick * If there's a read-ahead block, start i/o 707015Smckusick * on it also (as above). 717015Smckusick */ 728Sbill if (rablkno && !incore(dev, rablkno)) { 736563Smckusic rabp = getblk(dev, rablkno, size); 742045Swnj if (rabp->b_flags & B_DONE) { 758Sbill brelse(rabp); 763199Swnj trace(TR_BREADHITRA, dev, blkno); 772045Swnj } else { 788Sbill rabp->b_flags |= B_READ|B_ASYNC; 798Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 803199Swnj trace(TR_BREADMISSRA, dev, rablock); 818Sbill u.u_vm.vm_inblk++; /* pay in advance */ 828Sbill } 838Sbill } 847015Smckusick 857015Smckusick /* 867015Smckusick * If we get here with bp NULL, then the block 877015Smckusick * must've been in core and bread will find it for us. 887015Smckusick */ 898Sbill if(bp == NULL) 906563Smckusic return(bread(dev, blkno, size)); 917015Smckusick biowait(bp); 928Sbill return(bp); 938Sbill } 948Sbill 958Sbill /* 968Sbill * Write the buffer, waiting for completion. 978Sbill * Then release the buffer. 988Sbill */ 998Sbill bwrite(bp) 1007015Smckusick register struct buf *bp; 1018Sbill { 1028Sbill register flag; 1038Sbill 1048Sbill flag = bp->b_flags; 1058Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 1068Sbill if ((flag&B_DELWRI) == 0) 1078Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 1084033Swnj trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 1098Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 1107015Smckusick 1117015Smckusick /* 1127015Smckusick * If the write was synchronous, then await i/o completion. 1137015Smckusick * If the write was "delayed", then we put the buffer on 1147015Smckusick * the q of blocks awaiting i/o completion status. 1157015Smckusick * Otherwise, the i/o must be finished and we check for 1167015Smckusick * an error. 1177015Smckusick */ 1188Sbill if ((flag&B_ASYNC) == 0) { 1197015Smckusick biowait(bp); 1208Sbill brelse(bp); 1218Sbill } else if (flag & B_DELWRI) 1228Sbill bp->b_flags |= B_AGE; 1238Sbill else 1248Sbill geterror(bp); 1258Sbill } 1268Sbill 1278Sbill /* 1288Sbill * Release the buffer, marking it so that if it is grabbed 1298Sbill * for another purpose it will be written out before being 1308Sbill * given up (e.g. when writing a partial block where it is 1318Sbill * assumed that another write for the same block will soon follow). 1328Sbill * This can't be done for magtape, since writes must be done 1338Sbill * in the same order as requested. 1348Sbill */ 1358Sbill bdwrite(bp) 1367015Smckusick register struct buf *bp; 1378Sbill { 1382403Skre register int flags; 1398Sbill 1408Sbill if ((bp->b_flags&B_DELWRI) == 0) 1418Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 1422403Skre flags = bdevsw[major(bp->b_dev)].d_flags; 1432403Skre if(flags & B_TAPE) 1448Sbill bawrite(bp); 1458Sbill else { 1468Sbill bp->b_flags |= B_DELWRI | B_DONE; 1478Sbill brelse(bp); 1488Sbill } 1498Sbill } 1508Sbill 1518Sbill /* 1528Sbill * Release the buffer, start I/O on it, but don't wait for completion. 1538Sbill */ 1548Sbill bawrite(bp) 1557015Smckusick register struct buf *bp; 1568Sbill { 1578Sbill 1588Sbill bp->b_flags |= B_ASYNC; 1598Sbill bwrite(bp); 1608Sbill } 1618Sbill 1628Sbill /* 1637015Smckusick * Release the buffer, with no I/O implied. 1648Sbill */ 1658Sbill brelse(bp) 1667015Smckusick register struct buf *bp; 1678Sbill { 1682325Swnj register struct buf *flist; 1698Sbill register s; 1708Sbill 1717015Smckusick /* 1727015Smckusick * If someone's waiting for the buffer, or 1737015Smckusick * is waiting for a buffer wake 'em up. 1747015Smckusick */ 1758Sbill if (bp->b_flags&B_WANTED) 1768Sbill wakeup((caddr_t)bp); 1772325Swnj if (bfreelist[0].b_flags&B_WANTED) { 1782325Swnj bfreelist[0].b_flags &= ~B_WANTED; 1792325Swnj wakeup((caddr_t)bfreelist); 1808Sbill } 1812683Swnj if (bp->b_flags&B_ERROR) 1822683Swnj if (bp->b_flags & B_LOCKED) 1832683Swnj bp->b_flags &= ~B_ERROR; /* try again later */ 1842683Swnj else 1852683Swnj bp->b_dev = NODEV; /* no assoc */ 1867015Smckusick 1877015Smckusick /* 1887015Smckusick * Stick the buffer back on a free list. 1897015Smckusick */ 1908Sbill s = spl6(); 1912325Swnj if (bp->b_flags & (B_ERROR|B_INVAL)) { 1922325Swnj /* block has no info ... put at front of most free list */ 1932325Swnj flist = &bfreelist[BQUEUES-1]; 1947015Smckusick binsheadfree(bp, flist); 1958Sbill } else { 1962325Swnj if (bp->b_flags & B_LOCKED) 1972325Swnj flist = &bfreelist[BQ_LOCKED]; 1982325Swnj else if (bp->b_flags & B_AGE) 1992325Swnj flist = &bfreelist[BQ_AGE]; 2002325Swnj else 2012325Swnj flist = &bfreelist[BQ_LRU]; 2027015Smckusick binstailfree(bp, flist); 2038Sbill } 2048Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2058Sbill splx(s); 2068Sbill } 2078Sbill 2088Sbill /* 2098Sbill * See if the block is associated with some buffer 2108Sbill * (mainly to avoid getting hung up on a wait in breada) 2118Sbill */ 2128Sbill incore(dev, blkno) 2137015Smckusick dev_t dev; 2147015Smckusick daddr_t blkno; 2158Sbill { 2168Sbill register struct buf *bp; 2172325Swnj register struct buf *dp; 2188Sbill 2196563Smckusic dp = BUFHASH(dev, blkno); 2202325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 2216563Smckusic if (bp->b_blkno == blkno && bp->b_dev == dev && 2227015Smckusick (bp->b_flags & B_INVAL) == 0) 22391Sbill return (1); 22491Sbill return (0); 2258Sbill } 2268Sbill 2278Sbill struct buf * 2286563Smckusic baddr(dev, blkno, size) 2296563Smckusic dev_t dev; 2306563Smckusic daddr_t blkno; 2316563Smckusic int size; 2328Sbill { 2338Sbill 2348Sbill if (incore(dev, blkno)) 2356563Smckusic return (bread(dev, blkno, size)); 2368Sbill return (0); 2378Sbill } 2388Sbill 2398Sbill /* 2408Sbill * Assign a buffer for the given block. If the appropriate 2418Sbill * block is already associated, return it; otherwise search 2428Sbill * for the oldest non-busy buffer and reassign it. 2435424Swnj * 2445424Swnj * We use splx here because this routine may be called 2455424Swnj * on the interrupt stack during a dump, and we don't 2465424Swnj * want to lower the ipl back to 0. 2478Sbill */ 2488Sbill struct buf * 2496563Smckusic getblk(dev, blkno, size) 2506563Smckusic dev_t dev; 2516563Smckusic daddr_t blkno; 2526563Smckusic int size; 2538Sbill { 25491Sbill register struct buf *bp, *dp, *ep; 2555424Swnj int s; 2568Sbill 2571831Sbill if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 2581831Sbill blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 2597015Smckusick /* 2607015Smckusick * Search the cache for the block. If we hit, but 2617015Smckusick * the buffer is in use for i/o, then we wait until 2627015Smckusick * the i/o has completed. 2637015Smckusick */ 2646563Smckusic dp = BUFHASH(dev, blkno); 2657015Smckusick loop: 2662325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 2676563Smckusic if (bp->b_blkno != blkno || bp->b_dev != dev || 2682325Swnj bp->b_flags&B_INVAL) 2698Sbill continue; 2705424Swnj s = spl6(); 2718Sbill if (bp->b_flags&B_BUSY) { 2728Sbill bp->b_flags |= B_WANTED; 2738Sbill sleep((caddr_t)bp, PRIBIO+1); 2745424Swnj splx(s); 2758Sbill goto loop; 2768Sbill } 2775424Swnj splx(s); 2788Sbill notavail(bp); 2796563Smckusic brealloc(bp, size); 2808Sbill bp->b_flags |= B_CACHE; 2818Sbill return(bp); 2828Sbill } 28391Sbill if (major(dev) >= nblkdev) 28491Sbill panic("blkdev"); 2857015Smckusick /* 2867015Smckusick * Not found in the cache, select something from 2877015Smckusick * a free list. Preference is to LRU list, then AGE list. 2887015Smckusick */ 2895424Swnj s = spl6(); 2902325Swnj for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 2912325Swnj if (ep->av_forw != ep) 2922325Swnj break; 2932325Swnj if (ep == bfreelist) { /* no free blocks at all */ 2942325Swnj ep->b_flags |= B_WANTED; 2952325Swnj sleep((caddr_t)ep, PRIBIO+1); 2965424Swnj splx(s); 2978Sbill goto loop; 2988Sbill } 2995424Swnj splx(s); 3002325Swnj bp = ep->av_forw; 3018Sbill notavail(bp); 3028Sbill if (bp->b_flags & B_DELWRI) { 3038Sbill bp->b_flags |= B_ASYNC; 3048Sbill bwrite(bp); 3058Sbill goto loop; 3068Sbill } 3074033Swnj trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 3088Sbill bp->b_flags = B_BUSY; 3096563Smckusic bfree(bp); 3107015Smckusick bremhash(bp); 3117015Smckusick binshash(bp, dp); 3128Sbill bp->b_dev = dev; 3136563Smckusic bp->b_blkno = blkno; 3146563Smckusic brealloc(bp, size); 3158Sbill return(bp); 3168Sbill } 3178Sbill 3188Sbill /* 3198Sbill * get an empty block, 3208Sbill * not assigned to any particular device 3218Sbill */ 3228Sbill struct buf * 3236563Smckusic geteblk(size) 3246563Smckusic int size; 3258Sbill { 326182Sbill register struct buf *bp, *dp; 3275431Sroot int s; 3288Sbill 3298Sbill loop: 3305431Sroot s = spl6(); 3312325Swnj for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 3322325Swnj if (dp->av_forw != dp) 3332325Swnj break; 3342325Swnj if (dp == bfreelist) { /* no free blocks */ 3352325Swnj dp->b_flags |= B_WANTED; 3362325Swnj sleep((caddr_t)dp, PRIBIO+1); 3372325Swnj goto loop; 3388Sbill } 3395431Sroot splx(s); 3402325Swnj bp = dp->av_forw; 3418Sbill notavail(bp); 3428Sbill if (bp->b_flags & B_DELWRI) { 3438Sbill bp->b_flags |= B_ASYNC; 3448Sbill bwrite(bp); 3458Sbill goto loop; 3468Sbill } 3474033Swnj trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 3482325Swnj bp->b_flags = B_BUSY|B_INVAL; 3497015Smckusick bfree(bp); 3507015Smckusick bremhash(bp); 3517015Smckusick binshash(bp, dp); 3528Sbill bp->b_dev = (dev_t)NODEV; 353*7016Smckusick brealloc(bp, size); 3548Sbill return(bp); 3558Sbill } 3568Sbill 3578Sbill /* 3586563Smckusic * Allocate space associated with a buffer. 3596563Smckusic */ 3606563Smckusic brealloc(bp, size) 3616563Smckusic register struct buf *bp; 3626563Smckusic int size; 3636563Smckusic { 3646563Smckusic daddr_t start, last; 3656563Smckusic register struct buf *ep; 3666563Smckusic struct buf *dp; 3676563Smckusic int s; 3686563Smckusic 3696563Smckusic /* 3706563Smckusic * First need to make sure that all overlaping previous I/O 3716563Smckusic * is dispatched with. 3726563Smckusic */ 3736563Smckusic if (size == bp->b_bcount) 3746563Smckusic return; 375*7016Smckusick if (size < bp->b_bcount || bp->b_dev == NODEV) 376*7016Smckusick goto allocit; 377*7016Smckusick 3786563Smckusic start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); 3796563Smckusic last = bp->b_blkno + (size / DEV_BSIZE) - 1; 3806563Smckusic if (bp->b_bcount == 0) { 3816563Smckusic start++; 3826563Smckusic if (start == last) 3836563Smckusic goto allocit; 3846563Smckusic } 3856563Smckusic dp = BUFHASH(bp->b_dev, bp->b_blkno); 3866563Smckusic loop: 3876563Smckusic for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 3886563Smckusic if (ep->b_blkno < start || ep->b_blkno > last || 3896563Smckusic ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) 3906563Smckusic continue; 3916563Smckusic s = spl6(); 3926563Smckusic if (ep->b_flags&B_BUSY) { 3936563Smckusic ep->b_flags |= B_WANTED; 3946563Smckusic sleep((caddr_t)ep, PRIBIO+1); 3957015Smckusick (void) splx(s); 3966563Smckusic goto loop; 3976563Smckusic } 3987015Smckusick (void) splx(s); 3996563Smckusic /* 4006563Smckusic * What we would really like to do is kill this 4016563Smckusic * I/O since it is now useless. We cannot do that 4026563Smckusic * so we force it to complete, so that it cannot 4036563Smckusic * over-write our useful data later. 4046563Smckusic */ 4056563Smckusic if (ep->b_flags & B_DELWRI) { 4066563Smckusic notavail(ep); 4076563Smckusic ep->b_flags |= B_ASYNC; 4086563Smckusic bwrite(ep); 4096563Smckusic goto loop; 4106563Smckusic } 4116563Smckusic } 4126563Smckusic allocit: 4136563Smckusic /* 4146563Smckusic * Here the buffer is already available, so all we 4156563Smckusic * need to do is set the size. Someday a better memory 4166563Smckusic * management scheme will be implemented. 4176563Smckusic */ 4186563Smckusic bp->b_bcount = size; 4196563Smckusic } 4206563Smckusic 4216563Smckusic /* 4226563Smckusic * Release space associated with a buffer. 4236563Smckusic */ 4246563Smckusic bfree(bp) 4256563Smckusic struct buf *bp; 4266563Smckusic { 4276563Smckusic /* 4286563Smckusic * Here the buffer does not change, so all we 4296563Smckusic * need to do is set the size. Someday a better memory 4306563Smckusic * management scheme will be implemented. 4316563Smckusic */ 4326563Smckusic bp->b_bcount = 0; 4336563Smckusic } 4346563Smckusic 4356563Smckusic /* 4368Sbill * Wait for I/O completion on the buffer; return errors 4378Sbill * to the user. 4388Sbill */ 4397015Smckusick biowait(bp) 4406563Smckusic register struct buf *bp; 4418Sbill { 4425431Sroot int s; 4438Sbill 4445431Sroot s = spl6(); 4458Sbill while ((bp->b_flags&B_DONE)==0) 4468Sbill sleep((caddr_t)bp, PRIBIO); 4475431Sroot splx(s); 4488Sbill geterror(bp); 4498Sbill } 4508Sbill 4518Sbill /* 4528Sbill * Mark I/O complete on a buffer. If the header 4538Sbill * indicates a dirty page push completion, the 4548Sbill * header is inserted into the ``cleaned'' list 4558Sbill * to be processed by the pageout daemon. Otherwise 4568Sbill * release it if I/O is asynchronous, and wake 4578Sbill * up anyone waiting for it. 4588Sbill */ 4597015Smckusick biodone(bp) 4607015Smckusick register struct buf *bp; 4618Sbill { 4628Sbill register int s; 4638Sbill 464420Sbill if (bp->b_flags & B_DONE) 4657015Smckusick panic("dup biodone"); 4668Sbill bp->b_flags |= B_DONE; 4678Sbill if (bp->b_flags & B_DIRTY) { 4688Sbill if (bp->b_flags & B_ERROR) 4698Sbill panic("IO err in push"); 4708Sbill s = spl6(); 4718Sbill bp->av_forw = bclnlist; 4728Sbill bp->b_bcount = swsize[bp - swbuf]; 4738Sbill bp->b_pfcent = swpf[bp - swbuf]; 4743601Swnj cnt.v_pgout++; 4753601Swnj cnt.v_pgpgout += bp->b_bcount / NBPG; 4768Sbill bclnlist = bp; 4778Sbill if (bswlist.b_flags & B_WANTED) 4788Sbill wakeup((caddr_t)&proc[2]); 4798Sbill splx(s); 480383Sbill return; 4818Sbill } 4828Sbill if (bp->b_flags&B_ASYNC) 4838Sbill brelse(bp); 4848Sbill else { 4858Sbill bp->b_flags &= ~B_WANTED; 4868Sbill wakeup((caddr_t)bp); 4878Sbill } 4888Sbill } 4898Sbill 4908Sbill /* 4918Sbill * make sure all write-behind blocks 4928Sbill * on dev (or NODEV for all) 4938Sbill * are flushed out. 4948Sbill * (from umount and update) 4956563Smckusic * (and temporarily pagein) 4968Sbill */ 4978Sbill bflush(dev) 4987015Smckusick dev_t dev; 4998Sbill { 5008Sbill register struct buf *bp; 5012325Swnj register struct buf *flist; 5025431Sroot int s; 5038Sbill 5048Sbill loop: 5055431Sroot s = spl6(); 5062325Swnj for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 5072325Swnj for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 5087015Smckusick if ((bp->b_flags & B_DELWRI) == 0) 5097015Smckusick continue; 5107015Smckusick if (dev == NODEV || dev == bp->b_dev) { 5118Sbill bp->b_flags |= B_ASYNC; 5128Sbill notavail(bp); 5138Sbill bwrite(bp); 5148Sbill goto loop; 5158Sbill } 5168Sbill } 5175431Sroot splx(s); 5188Sbill } 5198Sbill 5208Sbill /* 5218Sbill * Pick up the device's error number and pass it to the user; 5228Sbill * if there is an error but the number is 0 set a generalized 5238Sbill * code. Actually the latter is always true because devices 5248Sbill * don't yet return specific errors. 5258Sbill */ 5268Sbill geterror(bp) 5277015Smckusick register struct buf *bp; 5288Sbill { 5298Sbill 5308Sbill if (bp->b_flags&B_ERROR) 5318Sbill if ((u.u_error = bp->b_error)==0) 5328Sbill u.u_error = EIO; 5338Sbill } 5342299Skre 5352299Skre /* 5362299Skre * Invalidate in core blocks belonging to closed or umounted filesystem 5372299Skre * 5382299Skre * This is not nicely done at all - the buffer ought to be removed from the 5392299Skre * hash chains & have its dev/blkno fields clobbered, but unfortunately we 5402299Skre * can't do that here, as it is quite possible that the block is still 5412299Skre * being used for i/o. Eventually, all disc drivers should be forced to 5422299Skre * have a close routine, which ought ensure that the queue is empty, then 5432299Skre * properly flush the queues. Until that happy day, this suffices for 5442299Skre * correctness. ... kre 5452299Skre */ 5462299Skre binval(dev) 5477015Smckusick dev_t dev; 5482299Skre { 5492361Skre register struct buf *bp; 5502361Skre register struct bufhd *hp; 5512361Skre #define dp ((struct buf *)hp) 5522299Skre 5532361Skre for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 5542361Skre for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 5552361Skre if (bp->b_dev == dev) 5562361Skre bp->b_flags |= B_INVAL; 5572299Skre } 558