1*9763Ssam /* vfs_cluster.c 4.40 82/12/17 */ 28Sbill 3*9763Ssam #include "../machine/pte.h" 4*9763Ssam 58Sbill #include "../h/param.h" 68Sbill #include "../h/systm.h" 78Sbill #include "../h/dir.h" 88Sbill #include "../h/user.h" 98Sbill #include "../h/buf.h" 108Sbill #include "../h/conf.h" 118Sbill #include "../h/proc.h" 128Sbill #include "../h/seg.h" 138Sbill #include "../h/vm.h" 142045Swnj #include "../h/trace.h" 158Sbill 1691Sbill /* 178Sbill * Read in (if necessary) the block and return a buffer pointer. 188Sbill */ 198Sbill struct buf * 206563Smckusic bread(dev, blkno, size) 216563Smckusic dev_t dev; 226563Smckusic daddr_t blkno; 236563Smckusic int size; 248Sbill { 258Sbill register struct buf *bp; 268Sbill 278670S if (size == 0) 288670S panic("bread: size 0"); 296563Smckusic bp = getblk(dev, blkno, size); 308Sbill if (bp->b_flags&B_DONE) { 313199Swnj trace(TR_BREADHIT, dev, blkno); 328Sbill return(bp); 338Sbill } 348Sbill bp->b_flags |= B_READ; 358670S if (bp->b_bcount > bp->b_bufsize) 368670S panic("bread"); 378Sbill (*bdevsw[major(dev)].d_strategy)(bp); 383199Swnj trace(TR_BREADMISS, dev, blkno); 398039Sroot u.u_ru.ru_inblock++; /* pay for read */ 407015Smckusick biowait(bp); 418Sbill return(bp); 428Sbill } 438Sbill 448Sbill /* 458Sbill * Read in the block, like bread, but also start I/O on the 468Sbill * read-ahead block (which is not allocated to the caller) 478Sbill */ 488Sbill struct buf * 498592Sroot breada(dev, blkno, size, rablkno, rabsize) 506563Smckusic dev_t dev; 517114Smckusick daddr_t blkno; int size; 528592Sroot daddr_t rablkno; int rabsize; 538Sbill { 548Sbill register struct buf *bp, *rabp; 558Sbill 568Sbill bp = NULL; 577015Smckusick /* 587015Smckusick * If the block isn't in core, then allocate 597015Smckusick * a buffer and initiate i/o (getblk checks 607015Smckusick * for a cache hit). 617015Smckusick */ 628Sbill if (!incore(dev, blkno)) { 636563Smckusic bp = getblk(dev, blkno, size); 648Sbill if ((bp->b_flags&B_DONE) == 0) { 658Sbill bp->b_flags |= B_READ; 668670S if (bp->b_bcount > bp->b_bufsize) 678670S panic("breada"); 688Sbill (*bdevsw[major(dev)].d_strategy)(bp); 693199Swnj trace(TR_BREADMISS, dev, blkno); 708039Sroot u.u_ru.ru_inblock++; /* pay for read */ 717015Smckusick } else 723199Swnj trace(TR_BREADHIT, dev, blkno); 738Sbill } 747015Smckusick 757015Smckusick /* 767015Smckusick * If there's a read-ahead block, start i/o 777015Smckusick * on it also (as above). 787015Smckusick */ 798Sbill if (rablkno && !incore(dev, rablkno)) { 808592Sroot rabp = getblk(dev, rablkno, rabsize); 812045Swnj if (rabp->b_flags & B_DONE) { 828Sbill brelse(rabp); 833199Swnj trace(TR_BREADHITRA, dev, blkno); 842045Swnj } else { 858Sbill rabp->b_flags |= B_READ|B_ASYNC; 868670S if (rabp->b_bcount > rabp->b_bufsize) 878670S panic("breadrabp"); 888Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 893199Swnj trace(TR_BREADMISSRA, dev, rablock); 908039Sroot u.u_ru.ru_inblock++; /* pay in advance */ 918Sbill } 928Sbill } 937015Smckusick 947015Smckusick /* 957114Smckusick * If block was in core, let bread get it. 967114Smckusick * If block wasn't in core, then the read was started 977114Smckusick * above, and just wait for it. 987015Smckusick */ 997114Smckusick if (bp == NULL) 1007114Smckusick return (bread(dev, blkno, size)); 1017015Smckusick biowait(bp); 1027114Smckusick return (bp); 1038Sbill } 1048Sbill 1058Sbill /* 1068Sbill * Write the buffer, waiting for completion. 1078Sbill * Then release the buffer. 1088Sbill */ 1098Sbill bwrite(bp) 1107015Smckusick register struct buf *bp; 1118Sbill { 1128Sbill register flag; 1138Sbill 1148Sbill flag = bp->b_flags; 1158Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 1168Sbill if ((flag&B_DELWRI) == 0) 1178039Sroot u.u_ru.ru_oublock++; /* noone paid yet */ 1184033Swnj trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 1198670S if (bp->b_bcount > bp->b_bufsize) 1208670S panic("bwrite"); 1218Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 1227015Smckusick 1237015Smckusick /* 1247015Smckusick * If the write was synchronous, then await i/o completion. 1257015Smckusick * If the write was "delayed", then we put the buffer on 1267015Smckusick * the q of blocks awaiting i/o completion status. 1277015Smckusick * Otherwise, the i/o must be finished and we check for 1287015Smckusick * an error. 1297015Smckusick */ 1308Sbill if ((flag&B_ASYNC) == 0) { 1317015Smckusick biowait(bp); 1328Sbill brelse(bp); 1338Sbill } else if (flag & B_DELWRI) 1348Sbill bp->b_flags |= B_AGE; 1358Sbill else 1367723Swnj u.u_error = geterror(bp); 1378Sbill } 1388Sbill 1398Sbill /* 1408Sbill * Release the buffer, marking it so that if it is grabbed 1418Sbill * for another purpose it will be written out before being 1428Sbill * given up (e.g. when writing a partial block where it is 1438Sbill * assumed that another write for the same block will soon follow). 1448Sbill * This can't be done for magtape, since writes must be done 1458Sbill * in the same order as requested. 1468Sbill */ 1478Sbill bdwrite(bp) 1487015Smckusick register struct buf *bp; 1498Sbill { 1502403Skre register int flags; 1518Sbill 1528Sbill if ((bp->b_flags&B_DELWRI) == 0) 1538039Sroot u.u_ru.ru_oublock++; /* noone paid yet */ 1542403Skre flags = bdevsw[major(bp->b_dev)].d_flags; 1552403Skre if(flags & B_TAPE) 1568Sbill bawrite(bp); 1578Sbill else { 1588Sbill bp->b_flags |= B_DELWRI | B_DONE; 1598Sbill brelse(bp); 1608Sbill } 1618Sbill } 1628Sbill 1638Sbill /* 1648Sbill * Release the buffer, start I/O on it, but don't wait for completion. 1658Sbill */ 1668Sbill bawrite(bp) 1677015Smckusick register struct buf *bp; 1688Sbill { 1698Sbill 1708Sbill bp->b_flags |= B_ASYNC; 1718Sbill bwrite(bp); 1728Sbill } 1738Sbill 1748Sbill /* 1757015Smckusick * Release the buffer, with no I/O implied. 1768Sbill */ 1778Sbill brelse(bp) 1787015Smckusick register struct buf *bp; 1798Sbill { 1802325Swnj register struct buf *flist; 1818Sbill register s; 1828Sbill 1837015Smckusick /* 1847015Smckusick * If someone's waiting for the buffer, or 1857015Smckusick * is waiting for a buffer wake 'em up. 1867015Smckusick */ 1878Sbill if (bp->b_flags&B_WANTED) 1888Sbill wakeup((caddr_t)bp); 1892325Swnj if (bfreelist[0].b_flags&B_WANTED) { 1902325Swnj bfreelist[0].b_flags &= ~B_WANTED; 1912325Swnj wakeup((caddr_t)bfreelist); 1928Sbill } 1932683Swnj if (bp->b_flags&B_ERROR) 1942683Swnj if (bp->b_flags & B_LOCKED) 1952683Swnj bp->b_flags &= ~B_ERROR; /* try again later */ 1962683Swnj else 1972683Swnj bp->b_dev = NODEV; /* no assoc */ 1987015Smckusick 1997015Smckusick /* 2007015Smckusick * Stick the buffer back on a free list. 2017015Smckusick */ 2028Sbill s = spl6(); 2038670S if (bp->b_bufsize <= 0) { 2048670S /* block has no buffer ... put at front of unused buffer list */ 2058670S flist = &bfreelist[BQ_EMPTY]; 2068670S binsheadfree(bp, flist); 2078670S } else if (bp->b_flags & (B_ERROR|B_INVAL)) { 2082325Swnj /* block has no info ... put at front of most free list */ 2098670S flist = &bfreelist[BQ_AGE]; 2107015Smckusick binsheadfree(bp, flist); 2118Sbill } else { 2122325Swnj if (bp->b_flags & B_LOCKED) 2132325Swnj flist = &bfreelist[BQ_LOCKED]; 2142325Swnj else if (bp->b_flags & B_AGE) 2152325Swnj flist = &bfreelist[BQ_AGE]; 2162325Swnj else 2172325Swnj flist = &bfreelist[BQ_LRU]; 2187015Smckusick binstailfree(bp, flist); 2198Sbill } 2208Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2218Sbill splx(s); 2228Sbill } 2238Sbill 2248Sbill /* 2258Sbill * See if the block is associated with some buffer 2268Sbill * (mainly to avoid getting hung up on a wait in breada) 2278Sbill */ 2288Sbill incore(dev, blkno) 2297015Smckusick dev_t dev; 2307015Smckusick daddr_t blkno; 2318Sbill { 2328Sbill register struct buf *bp; 2332325Swnj register struct buf *dp; 2348Sbill 2356563Smckusic dp = BUFHASH(dev, blkno); 2362325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 2376563Smckusic if (bp->b_blkno == blkno && bp->b_dev == dev && 2387015Smckusick (bp->b_flags & B_INVAL) == 0) 23991Sbill return (1); 24091Sbill return (0); 2418Sbill } 2428Sbill 2438Sbill struct buf * 2446563Smckusic baddr(dev, blkno, size) 2456563Smckusic dev_t dev; 2466563Smckusic daddr_t blkno; 2476563Smckusic int size; 2488Sbill { 2498Sbill 2508Sbill if (incore(dev, blkno)) 2516563Smckusic return (bread(dev, blkno, size)); 2528Sbill return (0); 2538Sbill } 2548Sbill 2558Sbill /* 2568Sbill * Assign a buffer for the given block. If the appropriate 2578Sbill * block is already associated, return it; otherwise search 2588Sbill * for the oldest non-busy buffer and reassign it. 2595424Swnj * 2605424Swnj * We use splx here because this routine may be called 2615424Swnj * on the interrupt stack during a dump, and we don't 2625424Swnj * want to lower the ipl back to 0. 2638Sbill */ 2648Sbill struct buf * 2656563Smckusic getblk(dev, blkno, size) 2666563Smckusic dev_t dev; 2676563Smckusic daddr_t blkno; 2686563Smckusic int size; 2698Sbill { 2708670S register struct buf *bp, *dp; 2715424Swnj int s; 2728Sbill 273*9763Ssam if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) /* XXX */ 2741831Sbill blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 2757015Smckusick /* 2767015Smckusick * Search the cache for the block. If we hit, but 2777015Smckusick * the buffer is in use for i/o, then we wait until 2787015Smckusick * the i/o has completed. 2797015Smckusick */ 2806563Smckusic dp = BUFHASH(dev, blkno); 2817015Smckusick loop: 2822325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 2836563Smckusic if (bp->b_blkno != blkno || bp->b_dev != dev || 2842325Swnj bp->b_flags&B_INVAL) 2858Sbill continue; 2865424Swnj s = spl6(); 2878Sbill if (bp->b_flags&B_BUSY) { 2888Sbill bp->b_flags |= B_WANTED; 2898Sbill sleep((caddr_t)bp, PRIBIO+1); 2905424Swnj splx(s); 2918Sbill goto loop; 2928Sbill } 2935424Swnj splx(s); 2948Sbill notavail(bp); 2957188Sroot if (brealloc(bp, size) == 0) 2967188Sroot goto loop; 2978Sbill bp->b_flags |= B_CACHE; 2988Sbill return(bp); 2998Sbill } 30091Sbill if (major(dev) >= nblkdev) 30191Sbill panic("blkdev"); 3028670S bp = getnewbuf(); 3036563Smckusic bfree(bp); 3047015Smckusick bremhash(bp); 3057015Smckusick binshash(bp, dp); 3068Sbill bp->b_dev = dev; 3076563Smckusic bp->b_blkno = blkno; 3088670S bp->b_error = 0; 3097188Sroot if (brealloc(bp, size) == 0) 3107188Sroot goto loop; 3118Sbill return(bp); 3128Sbill } 3138Sbill 3148Sbill /* 3158Sbill * get an empty block, 3168Sbill * not assigned to any particular device 3178Sbill */ 3188Sbill struct buf * 3196563Smckusic geteblk(size) 3206563Smckusic int size; 3218Sbill { 3228670S register struct buf *bp, *flist; 3238Sbill 3248Sbill loop: 3258670S bp = getnewbuf(); 3268670S bp->b_flags |= B_INVAL; 3277015Smckusick bfree(bp); 3287015Smckusick bremhash(bp); 3298670S flist = &bfreelist[BQ_AGE]; 3308670S binshash(bp, flist); 3318Sbill bp->b_dev = (dev_t)NODEV; 3328670S bp->b_error = 0; 3337188Sroot if (brealloc(bp, size) == 0) 3347188Sroot goto loop; 3358Sbill return(bp); 3368Sbill } 3378Sbill 3388Sbill /* 3396563Smckusic * Allocate space associated with a buffer. 340*9763Ssam * If can't get space, buffer is released 3416563Smckusic */ 3426563Smckusic brealloc(bp, size) 3436563Smckusic register struct buf *bp; 3446563Smckusic int size; 3456563Smckusic { 3466563Smckusic daddr_t start, last; 3476563Smckusic register struct buf *ep; 3486563Smckusic struct buf *dp; 3496563Smckusic int s; 3506563Smckusic 3516563Smckusic /* 3526563Smckusic * First need to make sure that all overlaping previous I/O 3536563Smckusic * is dispatched with. 3546563Smckusic */ 3556563Smckusic if (size == bp->b_bcount) 3567188Sroot return (1); 3577188Sroot if (size < bp->b_bcount) { 3587188Sroot if (bp->b_flags & B_DELWRI) { 3597188Sroot bwrite(bp); 3607188Sroot return (0); 3617188Sroot } 3627188Sroot if (bp->b_flags & B_LOCKED) 3637188Sroot panic("brealloc"); 364*9763Ssam return (allocbuf(bp, size)); 3657188Sroot } 3667188Sroot bp->b_flags &= ~B_DONE; 367*9763Ssam if (bp->b_dev == NODEV) 368*9763Ssam return (allocbuf(bp, size)); 3697016Smckusick 3707188Sroot /* 3717188Sroot * Search cache for any buffers that overlap the one that we 3727188Sroot * are trying to allocate. Overlapping buffers must be marked 3737188Sroot * invalid, after being written out if they are dirty. (indicated 3747188Sroot * by B_DELWRI) A disk block must be mapped by at most one buffer 3757188Sroot * at any point in time. Care must be taken to avoid deadlocking 3767188Sroot * when two buffer are trying to get the same set of disk blocks. 3777188Sroot */ 3787188Sroot start = bp->b_blkno; 3797188Sroot last = start + (size / DEV_BSIZE) - 1; 3806563Smckusic dp = BUFHASH(bp->b_dev, bp->b_blkno); 3816563Smckusic loop: 3826563Smckusic for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 3837188Sroot if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL)) 3846563Smckusic continue; 3857188Sroot /* look for overlap */ 3867188Sroot if (ep->b_bcount == 0 || ep->b_blkno > last || 3877188Sroot ep->b_blkno + (ep->b_bcount / DEV_BSIZE) <= start) 3887188Sroot continue; 3896563Smckusic s = spl6(); 3906563Smckusic if (ep->b_flags&B_BUSY) { 3916563Smckusic ep->b_flags |= B_WANTED; 3926563Smckusic sleep((caddr_t)ep, PRIBIO+1); 3938670S splx(s); 3946563Smckusic goto loop; 3956563Smckusic } 3968670S splx(s); 3977188Sroot notavail(ep); 3986563Smckusic if (ep->b_flags & B_DELWRI) { 3996563Smckusic bwrite(ep); 4006563Smckusic goto loop; 4016563Smckusic } 4027188Sroot ep->b_flags |= B_INVAL; 4037188Sroot brelse(ep); 4046563Smckusic } 405*9763Ssam return (allocbuf(bp, size)); 4068670S } 4078670S 4088670S /* 4098670S * Expand or contract the actual memory allocated to a buffer. 410*9763Ssam * If no memory is available, release buffer and take error exit 4118670S */ 4128670S allocbuf(tp, size) 4138670S register struct buf *tp; 4148670S int size; 4158670S { 4168670S register struct buf *bp, *ep; 4178670S int sizealloc, take; 418*9763Ssam #ifdef sun 419*9763Ssam register char *a; 420*9763Ssam int osize; 421*9763Ssam #endif 4228670S 423*9763Ssam #ifndef sun 4248670S sizealloc = roundup(size, CLBYTES); 425*9763Ssam #else 426*9763Ssam sizealloc = roundup(size, BUFALLOCSIZE); 427*9763Ssam #endif 4286563Smckusic /* 4298670S * Buffer size does not change 4306563Smckusic */ 4318670S if (sizealloc == tp->b_bufsize) 4328670S goto out; 433*9763Ssam #ifndef sun 4348670S /* 4358670S * Buffer size is shrinking. 4368670S * Place excess space in a buffer header taken from the 4378670S * BQ_EMPTY buffer list and placed on the "most free" list. 4388670S * If no extra buffer headers are available, leave the 4398670S * extra space in the present buffer. 4408670S */ 4418670S if (sizealloc < tp->b_bufsize) { 4428670S ep = bfreelist[BQ_EMPTY].av_forw; 4438670S if (ep == &bfreelist[BQ_EMPTY]) 4448670S goto out; 4458670S notavail(ep); 4468670S pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr, 4478670S (int)tp->b_bufsize - sizealloc); 4488670S ep->b_bufsize = tp->b_bufsize - sizealloc; 4498670S tp->b_bufsize = sizealloc; 4508670S ep->b_flags |= B_INVAL; 4518670S ep->b_bcount = 0; 4528670S brelse(ep); 4538670S goto out; 4548670S } 4558670S /* 4568670S * More buffer space is needed. Get it out of buffers on 4578670S * the "most free" list, placing the empty headers on the 4588670S * BQ_EMPTY buffer header list. 4598670S */ 4608670S while (tp->b_bufsize < sizealloc) { 4618670S take = sizealloc - tp->b_bufsize; 4628670S bp = getnewbuf(); 4638670S if (take >= bp->b_bufsize) 4648670S take = bp->b_bufsize; 4658670S pagemove(&bp->b_un.b_addr[bp->b_bufsize - take], 4668670S &tp->b_un.b_addr[tp->b_bufsize], take); 4678670S tp->b_bufsize += take; 4688670S bp->b_bufsize = bp->b_bufsize - take; 4698670S if (bp->b_bcount > bp->b_bufsize) 4708670S bp->b_bcount = bp->b_bufsize; 4718670S if (bp->b_bufsize <= 0) { 4728670S bremhash(bp); 4738670S binshash(bp, &bfreelist[BQ_EMPTY]); 4748670S bp->b_dev = (dev_t)NODEV; 4758670S bp->b_error = 0; 4768670S bp->b_flags |= B_INVAL; 4778670S } 4788670S brelse(bp); 4798670S } 480*9763Ssam #else 481*9763Ssam /* 482*9763Ssam * Buffer size is shrinking 483*9763Ssam * Just put the tail end back in the map 484*9763Ssam */ 485*9763Ssam if (sizealloc < tp->b_bufsize) { 486*9763Ssam rmfree(buffermap, (long)(tp->b_bufsize - sizealloc), 487*9763Ssam (long)(tp->b_un.b_addr + sizealloc)); 488*9763Ssam tp->b_bufsize = sizealloc; 489*9763Ssam goto out; 490*9763Ssam } 491*9763Ssam /* 492*9763Ssam * Buffer is being expanded or created 493*9763Ssam * If being expanded, attempt to get contiguous 494*9763Ssam * section, otherwise get a new chunk and copy. 495*9763Ssam * If no space, free up a buffer on the AGE list 496*9763Ssam * and try again. 497*9763Ssam */ 498*9763Ssam do { 499*9763Ssam if ((osize = tp->b_bufsize)) { 500*9763Ssam a = (char *)rmget(buffermap, (long)(sizealloc-osize), 501*9763Ssam (long)(tp->b_un.b_addr + osize)); 502*9763Ssam if (a == 0) { 503*9763Ssam a = (char *)rmalloc(buffermap, (long)sizealloc); 504*9763Ssam if (a != 0) { 505*9763Ssam bcopy(tp->b_un.b_addr, a, osize); 506*9763Ssam rmfree(buffermap, (long)osize, 507*9763Ssam (long)tp->b_un.b_addr); 508*9763Ssam tp->b_un.b_addr = a; 509*9763Ssam } 510*9763Ssam } 511*9763Ssam } else { 512*9763Ssam a = (char *)rmalloc(buffermap, (long)sizealloc); 513*9763Ssam if (a != 0) 514*9763Ssam tp->b_un.b_addr = a; 515*9763Ssam } 516*9763Ssam } while (a == 0 && bfreemem()); 517*9763Ssam if (a == 0) { 518*9763Ssam brelse(tp); 519*9763Ssam return (0); 520*9763Ssam } 521*9763Ssam tp->b_bufsize = sizealloc; 522*9763Ssam #endif 5238670S out: 5248670S tp->b_bcount = size; 525*9763Ssam return (1); 5266563Smckusic } 5276563Smckusic 5286563Smckusic /* 5296563Smckusic * Release space associated with a buffer. 5306563Smckusic */ 5316563Smckusic bfree(bp) 5326563Smckusic struct buf *bp; 5336563Smckusic { 534*9763Ssam #ifdef sun 535*9763Ssam if (bp->b_bufsize) { 536*9763Ssam rmfree(buffermap, (long)bp->b_bufsize, (long)bp->b_un.b_addr); 537*9763Ssam bp->b_bufsize = 0; 538*9763Ssam } 539*9763Ssam #endif 5406563Smckusic bp->b_bcount = 0; 5416563Smckusic } 5426563Smckusic 543*9763Ssam #ifdef sun 5446563Smckusic /* 545*9763Ssam * Attempt to free up buffer space by flushing 546*9763Ssam * something in the free list. 547*9763Ssam * Don't wait for something, that could cause deadlocks 548*9763Ssam * We start with BQ_AGE because we know BQ_EMPTY take no memory. 549*9763Ssam */ 550*9763Ssam bfreemem() 551*9763Ssam { 552*9763Ssam register struct buf *bp, *dp; 553*9763Ssam int s; 554*9763Ssam 555*9763Ssam loop: 556*9763Ssam s = spl6(); 557*9763Ssam for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 558*9763Ssam if (dp->av_forw != dp) 559*9763Ssam break; 560*9763Ssam splx(s); 561*9763Ssam if (dp == bfreelist) { /* no free blocks */ 562*9763Ssam return (0); 563*9763Ssam } 564*9763Ssam bp = dp->av_forw; 565*9763Ssam notavail(bp); 566*9763Ssam if (bp->b_flags & B_DELWRI) { 567*9763Ssam bp->b_flags |= B_ASYNC; 568*9763Ssam bwrite(bp); 569*9763Ssam goto loop; 570*9763Ssam } 571*9763Ssam trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 572*9763Ssam bp->b_flags = B_BUSY | B_INVAL; 573*9763Ssam bfree(bp); 574*9763Ssam bremhash(bp); 575*9763Ssam binshash(bp, &bfreelist[BQ_EMPTY]); 576*9763Ssam bp->b_dev = (dev_t)NODEV; 577*9763Ssam bp->b_error = 0; 578*9763Ssam brelse(bp); 579*9763Ssam return (1); 580*9763Ssam } 581*9763Ssam #endif 582*9763Ssam 583*9763Ssam /* 5848670S * Find a buffer which is available for use. 5858670S * Select something from a free list. 5868670S * Preference is to AGE list, then LRU list. 5878670S */ 5888670S struct buf * 5898670S getnewbuf() 5908670S { 5918670S register struct buf *bp, *dp; 5928670S int s; 5938670S 5948670S loop: 5958670S s = spl6(); 596*9763Ssam #ifndef sun 5978670S for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 598*9763Ssam #else 599*9763Ssam for (dp = &bfreelist[BQ_EMPTY]; dp > bfreelist; dp--) 600*9763Ssam #endif 6018670S if (dp->av_forw != dp) 6028670S break; 6038670S if (dp == bfreelist) { /* no free blocks */ 6048670S dp->b_flags |= B_WANTED; 6058670S sleep((caddr_t)dp, PRIBIO+1); 6068670S goto loop; 6078670S } 6088670S splx(s); 6098670S bp = dp->av_forw; 6108670S notavail(bp); 6118670S if (bp->b_flags & B_DELWRI) { 6128670S bp->b_flags |= B_ASYNC; 6138670S bwrite(bp); 6148670S goto loop; 6158670S } 6168670S trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 6178670S bp->b_flags = B_BUSY; 6188670S return (bp); 6198670S } 6208670S 6218670S /* 6228Sbill * Wait for I/O completion on the buffer; return errors 6238Sbill * to the user. 6248Sbill */ 6257015Smckusick biowait(bp) 6266563Smckusic register struct buf *bp; 6278Sbill { 6285431Sroot int s; 6298Sbill 6305431Sroot s = spl6(); 6318Sbill while ((bp->b_flags&B_DONE)==0) 6328Sbill sleep((caddr_t)bp, PRIBIO); 6335431Sroot splx(s); 6347723Swnj u.u_error = geterror(bp); 6358Sbill } 6368Sbill 6378Sbill /* 6388Sbill * Mark I/O complete on a buffer. If the header 6398Sbill * indicates a dirty page push completion, the 6408Sbill * header is inserted into the ``cleaned'' list 6418Sbill * to be processed by the pageout daemon. Otherwise 6428Sbill * release it if I/O is asynchronous, and wake 6438Sbill * up anyone waiting for it. 6448Sbill */ 6457015Smckusick biodone(bp) 6467015Smckusick register struct buf *bp; 6478Sbill { 6488Sbill register int s; 6498Sbill 650420Sbill if (bp->b_flags & B_DONE) 6517015Smckusick panic("dup biodone"); 6528Sbill bp->b_flags |= B_DONE; 6538Sbill if (bp->b_flags & B_DIRTY) { 6548Sbill if (bp->b_flags & B_ERROR) 6558Sbill panic("IO err in push"); 6568Sbill s = spl6(); 6578Sbill bp->av_forw = bclnlist; 6588Sbill bp->b_bcount = swsize[bp - swbuf]; 6598Sbill bp->b_pfcent = swpf[bp - swbuf]; 6603601Swnj cnt.v_pgout++; 6613601Swnj cnt.v_pgpgout += bp->b_bcount / NBPG; 6628Sbill bclnlist = bp; 6638Sbill if (bswlist.b_flags & B_WANTED) 6648Sbill wakeup((caddr_t)&proc[2]); 6658Sbill splx(s); 666383Sbill return; 6678Sbill } 668*9763Ssam if (bp->b_flags & B_CALL) { 669*9763Ssam bp->b_flags &= ~B_CALL; 670*9763Ssam (*bp->b_iodone)(bp); 671*9763Ssam return; 672*9763Ssam } 6738Sbill if (bp->b_flags&B_ASYNC) 6748Sbill brelse(bp); 6758Sbill else { 6768Sbill bp->b_flags &= ~B_WANTED; 6778Sbill wakeup((caddr_t)bp); 6788Sbill } 6798Sbill } 6808Sbill 6818Sbill /* 6828670S * Insure that no part of a specified block is in an incore buffer. 6838670S */ 6848670S blkflush(dev, blkno, size) 6858670S dev_t dev; 6868670S daddr_t blkno; 6878670S long size; 6888670S { 6898670S register struct buf *ep; 6908670S struct buf *dp; 6918670S daddr_t start, last; 6928670S int s; 6938670S 6948670S start = blkno; 6958670S last = start + (size / DEV_BSIZE) - 1; 6968670S dp = BUFHASH(dev, blkno); 6978670S loop: 6988670S for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 6998670S if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) 7008670S continue; 7018670S /* look for overlap */ 7028670S if (ep->b_bcount == 0 || ep->b_blkno > last || 7038670S ep->b_blkno + (ep->b_bcount / DEV_BSIZE) <= start) 7048670S continue; 7058670S s = spl6(); 7068670S if (ep->b_flags&B_BUSY) { 7078670S ep->b_flags |= B_WANTED; 7088670S sleep((caddr_t)ep, PRIBIO+1); 7098670S splx(s); 7108670S goto loop; 7118670S } 7128670S if (ep->b_flags & B_DELWRI) { 7138670S splx(s); 7148670S notavail(ep); 7158670S bwrite(ep); 7168670S goto loop; 7178670S } 7188670S splx(s); 7198670S } 7208670S } 7218670S 7228670S /* 7238Sbill * make sure all write-behind blocks 7248Sbill * on dev (or NODEV for all) 7258Sbill * are flushed out. 7268Sbill * (from umount and update) 7276563Smckusic * (and temporarily pagein) 7288Sbill */ 7298Sbill bflush(dev) 7307015Smckusick dev_t dev; 7318Sbill { 7328Sbill register struct buf *bp; 7332325Swnj register struct buf *flist; 7345431Sroot int s; 7358Sbill 7368Sbill loop: 7375431Sroot s = spl6(); 7388670S for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) 7392325Swnj for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 7407015Smckusick if ((bp->b_flags & B_DELWRI) == 0) 7417015Smckusick continue; 7427015Smckusick if (dev == NODEV || dev == bp->b_dev) { 7438Sbill bp->b_flags |= B_ASYNC; 7448Sbill notavail(bp); 7458Sbill bwrite(bp); 7468Sbill goto loop; 7478Sbill } 7488Sbill } 7495431Sroot splx(s); 7508Sbill } 7518Sbill 7528Sbill /* 7538Sbill * Pick up the device's error number and pass it to the user; 7548Sbill * if there is an error but the number is 0 set a generalized 7558Sbill * code. Actually the latter is always true because devices 7568Sbill * don't yet return specific errors. 7578Sbill */ 7588Sbill geterror(bp) 7597015Smckusick register struct buf *bp; 7608Sbill { 7617723Swnj int error = 0; 7628Sbill 7638Sbill if (bp->b_flags&B_ERROR) 7647723Swnj if ((error = bp->b_error)==0) 7657723Swnj return (EIO); 7667723Swnj return (error); 7678Sbill } 7682299Skre 7692299Skre /* 7702299Skre * Invalidate in core blocks belonging to closed or umounted filesystem 7712299Skre * 7722299Skre * This is not nicely done at all - the buffer ought to be removed from the 7732299Skre * hash chains & have its dev/blkno fields clobbered, but unfortunately we 7742299Skre * can't do that here, as it is quite possible that the block is still 7752299Skre * being used for i/o. Eventually, all disc drivers should be forced to 7762299Skre * have a close routine, which ought ensure that the queue is empty, then 7772299Skre * properly flush the queues. Until that happy day, this suffices for 7782299Skre * correctness. ... kre 7792299Skre */ 7802299Skre binval(dev) 7817015Smckusick dev_t dev; 7822299Skre { 7832361Skre register struct buf *bp; 7842361Skre register struct bufhd *hp; 7852361Skre #define dp ((struct buf *)hp) 7862299Skre 7872361Skre for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 7882361Skre for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 7892361Skre if (bp->b_dev == dev) 7902361Skre bp->b_flags |= B_INVAL; 7912299Skre } 792