123395Smckusick /* 237736Smckusick * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 337736Smckusick * All rights reserved. 423395Smckusick * 537736Smckusick * Redistribution and use in source and binary forms are permitted 637736Smckusick * provided that the above copyright notice and this paragraph are 737736Smckusick * duplicated in all such forms and that any documentation, 837736Smckusick * advertising materials, and other materials related to such 937736Smckusick * distribution and use acknowledge that the software was developed 1037736Smckusick * by the University of California, Berkeley. The name of the 1137736Smckusick * University may not be used to endorse or promote products derived 1237736Smckusick * from this software without specific prior written permission. 1337736Smckusick * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1437736Smckusick * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1537736Smckusick * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1637736Smckusick * 17*38776Smckusick * @(#)vfs_cluster.c 7.10 (Berkeley) 08/26/89 1823395Smckusick */ 198Sbill 2017098Sbloom #include "param.h" 2117098Sbloom #include "user.h" 2217098Sbloom #include "buf.h" 2337736Smckusick #include "vnode.h" 2417098Sbloom #include "trace.h" 25*38776Smckusick #include "ucred.h" 268Sbill 2791Sbill /* 288Sbill * Read in (if necessary) the block and return a buffer pointer. 298Sbill */ 30*38776Smckusick bread(vp, blkno, size, cred, bpp) 3137736Smckusick struct vnode *vp; 326563Smckusic daddr_t blkno; 336563Smckusic int size; 34*38776Smckusick struct ucred *cred; 3537736Smckusick struct buf **bpp; 368Sbill { 378Sbill register struct buf *bp; 388Sbill 398670S if (size == 0) 408670S panic("bread: size 0"); 4137736Smckusick *bpp = bp = getblk(vp, blkno, size); 4232608Smckusick if (bp->b_flags&(B_DONE|B_DELWRI)) { 4337736Smckusick trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), blkno); 4437736Smckusick return (0); 458Sbill } 468Sbill bp->b_flags |= B_READ; 478670S if (bp->b_bcount > bp->b_bufsize) 488670S panic("bread"); 49*38776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 50*38776Smckusick crhold(cred); 51*38776Smckusick bp->b_rcred = cred; 52*38776Smckusick } 5337736Smckusick VOP_STRATEGY(bp); 5437736Smckusick trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), blkno); 558039Sroot u.u_ru.ru_inblock++; /* pay for read */ 5637736Smckusick return (biowait(bp)); 578Sbill } 588Sbill 598Sbill /* 608Sbill * Read in the block, like bread, but also start I/O on the 618Sbill * read-ahead block (which is not allocated to the caller) 628Sbill */ 63*38776Smckusick breada(vp, blkno, size, rablkno, rabsize, cred, bpp) 6437736Smckusick struct vnode *vp; 657114Smckusick daddr_t blkno; int size; 668592Sroot daddr_t rablkno; int rabsize; 67*38776Smckusick struct ucred *cred; 6837736Smckusick struct buf **bpp; 698Sbill { 708Sbill register struct buf *bp, *rabp; 718Sbill 728Sbill bp = NULL; 737015Smckusick /* 747015Smckusick * If the block isn't in core, then allocate 757015Smckusick * a buffer and initiate i/o (getblk checks 767015Smckusick * for a cache hit). 777015Smckusick */ 7837736Smckusick if (!incore(vp, blkno)) { 7937736Smckusick *bpp = bp = getblk(vp, blkno, size); 8032608Smckusick if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) { 818Sbill bp->b_flags |= B_READ; 828670S if (bp->b_bcount > bp->b_bufsize) 838670S panic("breada"); 84*38776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 85*38776Smckusick crhold(cred); 86*38776Smckusick bp->b_rcred = cred; 87*38776Smckusick } 8837736Smckusick VOP_STRATEGY(bp); 8937736Smckusick trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), 9037736Smckusick blkno); 918039Sroot u.u_ru.ru_inblock++; /* pay for read */ 927015Smckusick } else 9337736Smckusick trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), 9437736Smckusick blkno); 958Sbill } 967015Smckusick 977015Smckusick /* 987015Smckusick * If there's a read-ahead block, start i/o 997015Smckusick * on it also (as above). 1007015Smckusick */ 10137736Smckusick if (rablkno && !incore(vp, rablkno)) { 10237736Smckusick rabp = getblk(vp, rablkno, rabsize); 10332608Smckusick if (rabp->b_flags & (B_DONE|B_DELWRI)) { 1048Sbill brelse(rabp); 10537736Smckusick trace(TR_BREADHITRA, 10637736Smckusick pack(vp->v_mount->m_fsid[0], rabsize), blkno); 1072045Swnj } else { 1088Sbill rabp->b_flags |= B_READ|B_ASYNC; 1098670S if (rabp->b_bcount > rabp->b_bufsize) 1108670S panic("breadrabp"); 111*38776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 112*38776Smckusick crhold(cred); 113*38776Smckusick bp->b_rcred = cred; 114*38776Smckusick } 11537736Smckusick VOP_STRATEGY(rabp); 11637736Smckusick trace(TR_BREADMISSRA, 11737736Smckusick pack(vp->v_mount->m_fsid[0], rabsize), rablock); 1188039Sroot u.u_ru.ru_inblock++; /* pay in advance */ 1198Sbill } 1208Sbill } 1217015Smckusick 1227015Smckusick /* 1237114Smckusick * If block was in core, let bread get it. 1247114Smckusick * If block wasn't in core, then the read was started 1257114Smckusick * above, and just wait for it. 1267015Smckusick */ 1277114Smckusick if (bp == NULL) 128*38776Smckusick return (bread(vp, blkno, size, cred, bpp)); 12937736Smckusick return (biowait(bp)); 1308Sbill } 1318Sbill 1328Sbill /* 1338Sbill * Write the buffer, waiting for completion. 1348Sbill * Then release the buffer. 1358Sbill */ 1368Sbill bwrite(bp) 1377015Smckusick register struct buf *bp; 1388Sbill { 13937736Smckusick register int flag; 14037736Smckusick int error; 1418Sbill 1428Sbill flag = bp->b_flags; 1439857Ssam bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 1448Sbill if ((flag&B_DELWRI) == 0) 1458039Sroot u.u_ru.ru_oublock++; /* noone paid yet */ 14637736Smckusick trace(TR_BWRITE, 14737736Smckusick pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bcount), bp->b_blkno); 1488670S if (bp->b_bcount > bp->b_bufsize) 1498670S panic("bwrite"); 15037736Smckusick VOP_STRATEGY(bp); 1517015Smckusick 1527015Smckusick /* 1537015Smckusick * If the write was synchronous, then await i/o completion. 1547015Smckusick * If the write was "delayed", then we put the buffer on 1557015Smckusick * the q of blocks awaiting i/o completion status. 1567015Smckusick */ 1578Sbill if ((flag&B_ASYNC) == 0) { 15837736Smckusick error = biowait(bp); 1598Sbill brelse(bp); 16037736Smckusick } else if (flag & B_DELWRI) { 1618Sbill bp->b_flags |= B_AGE; 16237736Smckusick error = 0; 16337736Smckusick } 16437736Smckusick return (error); 1658Sbill } 1668Sbill 1678Sbill /* 1688Sbill * Release the buffer, marking it so that if it is grabbed 1698Sbill * for another purpose it will be written out before being 1708Sbill * given up (e.g. when writing a partial block where it is 1718Sbill * assumed that another write for the same block will soon follow). 1728Sbill * This can't be done for magtape, since writes must be done 1738Sbill * in the same order as requested. 1748Sbill */ 1758Sbill bdwrite(bp) 1767015Smckusick register struct buf *bp; 1778Sbill { 1788Sbill 1798Sbill if ((bp->b_flags&B_DELWRI) == 0) 1808039Sroot u.u_ru.ru_oublock++; /* noone paid yet */ 18137736Smckusick #ifdef notdef 18237736Smckusick /* 18337736Smckusick * This does not work for buffers associated with 18437736Smckusick * vnodes that are remote - they have no dev. 18537736Smckusick * Besides, we don't use bio with tapes, so rather 18637736Smckusick * than develop a fix, we just ifdef this out for now. 18737736Smckusick */ 18830749Skarels if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE) 1898Sbill bawrite(bp); 1908Sbill else { 1918Sbill bp->b_flags |= B_DELWRI | B_DONE; 1928Sbill brelse(bp); 1938Sbill } 19437736Smckusick #endif 19537736Smckusick bp->b_flags |= B_DELWRI | B_DONE; 19637736Smckusick brelse(bp); 1978Sbill } 1988Sbill 1998Sbill /* 2008Sbill * Release the buffer, start I/O on it, but don't wait for completion. 2018Sbill */ 2028Sbill bawrite(bp) 2037015Smckusick register struct buf *bp; 2048Sbill { 2058Sbill 2068Sbill bp->b_flags |= B_ASYNC; 20737736Smckusick (void) bwrite(bp); 2088Sbill } 2098Sbill 2108Sbill /* 2117015Smckusick * Release the buffer, with no I/O implied. 2128Sbill */ 2138Sbill brelse(bp) 2147015Smckusick register struct buf *bp; 2158Sbill { 2162325Swnj register struct buf *flist; 2178Sbill register s; 2188Sbill 21937736Smckusick trace(TR_BRELSE, 22037736Smckusick pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno); 2217015Smckusick /* 2227015Smckusick * If someone's waiting for the buffer, or 2237015Smckusick * is waiting for a buffer wake 'em up. 2247015Smckusick */ 2258Sbill if (bp->b_flags&B_WANTED) 2268Sbill wakeup((caddr_t)bp); 2272325Swnj if (bfreelist[0].b_flags&B_WANTED) { 2282325Swnj bfreelist[0].b_flags &= ~B_WANTED; 2292325Swnj wakeup((caddr_t)bfreelist); 2308Sbill } 23137736Smckusick if (bp->b_flags & B_NOCACHE) { 23237736Smckusick bp->b_flags |= B_INVAL; 23337736Smckusick } 2342683Swnj if (bp->b_flags&B_ERROR) 2352683Swnj if (bp->b_flags & B_LOCKED) 2362683Swnj bp->b_flags &= ~B_ERROR; /* try again later */ 2372683Swnj else 23837736Smckusick brelvp(bp); /* no assoc */ 2397015Smckusick 2407015Smckusick /* 2417015Smckusick * Stick the buffer back on a free list. 2427015Smckusick */ 24326271Skarels s = splbio(); 2448670S if (bp->b_bufsize <= 0) { 2458670S /* block has no buffer ... put at front of unused buffer list */ 2468670S flist = &bfreelist[BQ_EMPTY]; 2478670S binsheadfree(bp, flist); 2488670S } else if (bp->b_flags & (B_ERROR|B_INVAL)) { 2492325Swnj /* block has no info ... put at front of most free list */ 2508670S flist = &bfreelist[BQ_AGE]; 2517015Smckusick binsheadfree(bp, flist); 2528Sbill } else { 2532325Swnj if (bp->b_flags & B_LOCKED) 2542325Swnj flist = &bfreelist[BQ_LOCKED]; 2552325Swnj else if (bp->b_flags & B_AGE) 2562325Swnj flist = &bfreelist[BQ_AGE]; 2572325Swnj else 2582325Swnj flist = &bfreelist[BQ_LRU]; 2597015Smckusick binstailfree(bp, flist); 2608Sbill } 26137736Smckusick bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE); 2628Sbill splx(s); 2638Sbill } 2648Sbill 2658Sbill /* 2668Sbill * See if the block is associated with some buffer 2678Sbill * (mainly to avoid getting hung up on a wait in breada) 2688Sbill */ 26937736Smckusick incore(vp, blkno) 27037736Smckusick struct vnode *vp; 2717015Smckusick daddr_t blkno; 2728Sbill { 2738Sbill register struct buf *bp; 2742325Swnj register struct buf *dp; 2758Sbill 27638225Smckusick dp = BUFHASH(vp, blkno); 2772325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 27837736Smckusick if (bp->b_blkno == blkno && bp->b_vp == vp && 2797015Smckusick (bp->b_flags & B_INVAL) == 0) 28091Sbill return (1); 28191Sbill return (0); 2828Sbill } 2838Sbill 284*38776Smckusick baddr(vp, blkno, size, cred, bpp) 28537736Smckusick struct vnode *vp; 2866563Smckusic daddr_t blkno; 2876563Smckusic int size; 288*38776Smckusick struct ucred *cred; 28937736Smckusick struct buf **bpp; 2908Sbill { 2918Sbill 29237736Smckusick if (incore(vp, blkno)) 293*38776Smckusick return (bread(vp, blkno, size, cred, bpp)); 29437736Smckusick *bpp = 0; 2958Sbill return (0); 2968Sbill } 2978Sbill 2988Sbill /* 2998Sbill * Assign a buffer for the given block. If the appropriate 3008Sbill * block is already associated, return it; otherwise search 3018Sbill * for the oldest non-busy buffer and reassign it. 3025424Swnj * 30332608Smckusick * If we find the buffer, but it is dirty (marked DELWRI) and 30432608Smckusick * its size is changing, we must write it out first. When the 30532608Smckusick * buffer is shrinking, the write is done by brealloc to avoid 30632608Smckusick * losing the unwritten data. When the buffer is growing, the 30732608Smckusick * write is done by getblk, so that bread will not read stale 30832608Smckusick * disk data over the modified data in the buffer. 30932608Smckusick * 3105424Swnj * We use splx here because this routine may be called 3115424Swnj * on the interrupt stack during a dump, and we don't 3125424Swnj * want to lower the ipl back to 0. 3138Sbill */ 3148Sbill struct buf * 31537736Smckusick getblk(vp, blkno, size) 31637736Smckusick register struct vnode *vp; 3176563Smckusic daddr_t blkno; 3186563Smckusic int size; 3198Sbill { 3208670S register struct buf *bp, *dp; 3215424Swnj int s; 3228Sbill 32325255Smckusick if (size > MAXBSIZE) 32425255Smckusick panic("getblk: size too big"); 3257015Smckusick /* 32624730Smckusick * To prevent overflow of 32-bit ints when converting block 32724730Smckusick * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set 32824730Smckusick * to the maximum number that can be converted to a byte offset 32924730Smckusick * without overflow. This is historic code; what bug it fixed, 33024730Smckusick * or whether it is still a reasonable thing to do is open to 33124730Smckusick * dispute. mkm 9/85 33224730Smckusick */ 33324730Smckusick if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT)) 33424730Smckusick blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1); 33524730Smckusick /* 3367015Smckusick * Search the cache for the block. If we hit, but 3377015Smckusick * the buffer is in use for i/o, then we wait until 3387015Smckusick * the i/o has completed. 3397015Smckusick */ 34037736Smckusick dp = BUFHASH(vp, blkno); 3417015Smckusick loop: 3422325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 34337736Smckusick if (bp->b_blkno != blkno || bp->b_vp != vp || 3442325Swnj bp->b_flags&B_INVAL) 3458Sbill continue; 34626271Skarels s = splbio(); 3478Sbill if (bp->b_flags&B_BUSY) { 3488Sbill bp->b_flags |= B_WANTED; 3498Sbill sleep((caddr_t)bp, PRIBIO+1); 3505424Swnj splx(s); 3518Sbill goto loop; 3528Sbill } 3535424Swnj splx(s); 3548Sbill notavail(bp); 35532608Smckusick if (bp->b_bcount != size) { 35632608Smckusick if (bp->b_bcount < size && (bp->b_flags&B_DELWRI)) { 35732608Smckusick bp->b_flags &= ~B_ASYNC; 35837736Smckusick (void) bwrite(bp); 35932608Smckusick goto loop; 36032608Smckusick } 36132608Smckusick if (brealloc(bp, size) == 0) 36232608Smckusick goto loop; 36332608Smckusick } 36416855Smckusick if (bp->b_bcount != size && brealloc(bp, size) == 0) 3657188Sroot goto loop; 3668Sbill bp->b_flags |= B_CACHE; 36726271Skarels return (bp); 3688Sbill } 3698670S bp = getnewbuf(); 3706563Smckusic bfree(bp); 3717015Smckusick bremhash(bp); 37237736Smckusick if (bp->b_vp) 37337736Smckusick brelvp(bp); 37438345Smckusick VREF(vp); 37537736Smckusick bp->b_vp = vp; 37637736Smckusick bp->b_dev = vp->v_rdev; 3776563Smckusic bp->b_blkno = blkno; 3788670S bp->b_error = 0; 37937736Smckusick bp->b_resid = 0; 38037736Smckusick binshash(bp, dp); 3817188Sroot if (brealloc(bp, size) == 0) 3827188Sroot goto loop; 38326271Skarels return (bp); 3848Sbill } 3858Sbill 3868Sbill /* 3878Sbill * get an empty block, 3888Sbill * not assigned to any particular device 3898Sbill */ 3908Sbill struct buf * 3916563Smckusic geteblk(size) 3926563Smckusic int size; 3938Sbill { 3948670S register struct buf *bp, *flist; 3958Sbill 39625255Smckusick if (size > MAXBSIZE) 39725255Smckusick panic("geteblk: size too big"); 3988Sbill loop: 3998670S bp = getnewbuf(); 4008670S bp->b_flags |= B_INVAL; 4017015Smckusick bfree(bp); 4027015Smckusick bremhash(bp); 4038670S flist = &bfreelist[BQ_AGE]; 40437736Smckusick brelvp(bp); 40537736Smckusick bp->b_error = 0; 40637736Smckusick bp->b_resid = 0; 4078670S binshash(bp, flist); 4087188Sroot if (brealloc(bp, size) == 0) 4097188Sroot goto loop; 41026271Skarels return (bp); 4118Sbill } 4128Sbill 4138Sbill /* 4146563Smckusic * Allocate space associated with a buffer. 4159763Ssam * If can't get space, buffer is released 4166563Smckusic */ 4176563Smckusic brealloc(bp, size) 4186563Smckusic register struct buf *bp; 4196563Smckusic int size; 4206563Smckusic { 4216563Smckusic daddr_t start, last; 4226563Smckusic register struct buf *ep; 4236563Smckusic struct buf *dp; 4246563Smckusic int s; 4256563Smckusic 4266563Smckusic /* 42730749Skarels * First need to make sure that all overlapping previous I/O 4286563Smckusic * is dispatched with. 4296563Smckusic */ 4306563Smckusic if (size == bp->b_bcount) 4317188Sroot return (1); 4327188Sroot if (size < bp->b_bcount) { 4337188Sroot if (bp->b_flags & B_DELWRI) { 43437736Smckusick (void) bwrite(bp); 4357188Sroot return (0); 4367188Sroot } 4377188Sroot if (bp->b_flags & B_LOCKED) 4387188Sroot panic("brealloc"); 4399763Ssam return (allocbuf(bp, size)); 4407188Sroot } 4417188Sroot bp->b_flags &= ~B_DONE; 44237736Smckusick if (bp->b_vp == (struct vnode *)0) 4439763Ssam return (allocbuf(bp, size)); 4447016Smckusick 44537736Smckusick trace(TR_BREALLOC, 44637736Smckusick pack(bp->b_vp->v_mount->m_fsid[0], size), bp->b_blkno); 4477188Sroot /* 4487188Sroot * Search cache for any buffers that overlap the one that we 4497188Sroot * are trying to allocate. Overlapping buffers must be marked 4507188Sroot * invalid, after being written out if they are dirty. (indicated 4517188Sroot * by B_DELWRI) A disk block must be mapped by at most one buffer 4527188Sroot * at any point in time. Care must be taken to avoid deadlocking 4537188Sroot * when two buffer are trying to get the same set of disk blocks. 4547188Sroot */ 4557188Sroot start = bp->b_blkno; 45612644Ssam last = start + btodb(size) - 1; 45737736Smckusick dp = BUFHASH(bp->b_vp, bp->b_blkno); 4586563Smckusic loop: 4596563Smckusic for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 46037736Smckusick if (ep == bp || ep->b_vp != bp->b_vp || 46137736Smckusick (ep->b_flags & B_INVAL)) 4626563Smckusic continue; 4637188Sroot /* look for overlap */ 4647188Sroot if (ep->b_bcount == 0 || ep->b_blkno > last || 46512644Ssam ep->b_blkno + btodb(ep->b_bcount) <= start) 4667188Sroot continue; 46726271Skarels s = splbio(); 4686563Smckusic if (ep->b_flags&B_BUSY) { 4696563Smckusic ep->b_flags |= B_WANTED; 4706563Smckusic sleep((caddr_t)ep, PRIBIO+1); 4718670S splx(s); 4726563Smckusic goto loop; 4736563Smckusic } 4748670S splx(s); 4757188Sroot notavail(ep); 4766563Smckusic if (ep->b_flags & B_DELWRI) { 47737736Smckusick (void) bwrite(ep); 4786563Smckusic goto loop; 4796563Smckusic } 4807188Sroot ep->b_flags |= B_INVAL; 4817188Sroot brelse(ep); 4826563Smckusic } 4839763Ssam return (allocbuf(bp, size)); 4848670S } 4858670S 4868670S /* 4878670S * Find a buffer which is available for use. 4888670S * Select something from a free list. 4898670S * Preference is to AGE list, then LRU list. 4908670S */ 4918670S struct buf * 4928670S getnewbuf() 4938670S { 4948670S register struct buf *bp, *dp; 495*38776Smckusick register struct ucred *cred; 4968670S int s; 4978670S 4988670S loop: 49926271Skarels s = splbio(); 5008670S for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 5018670S if (dp->av_forw != dp) 5028670S break; 5038670S if (dp == bfreelist) { /* no free blocks */ 5048670S dp->b_flags |= B_WANTED; 5058670S sleep((caddr_t)dp, PRIBIO+1); 50612170Ssam splx(s); 5078670S goto loop; 5088670S } 5098670S splx(s); 5108670S bp = dp->av_forw; 5118670S notavail(bp); 5128670S if (bp->b_flags & B_DELWRI) { 51338614Smckusick (void) bawrite(bp); 5148670S goto loop; 5158670S } 51637736Smckusick trace(TR_BRELSE, 51737736Smckusick pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno); 51837736Smckusick brelvp(bp); 519*38776Smckusick if (bp->b_rcred != NOCRED) { 520*38776Smckusick cred = bp->b_rcred; 521*38776Smckusick bp->b_rcred = NOCRED; 522*38776Smckusick crfree(cred); 523*38776Smckusick } 524*38776Smckusick if (bp->b_wcred != NOCRED) { 525*38776Smckusick cred = bp->b_wcred; 526*38776Smckusick bp->b_wcred = NOCRED; 527*38776Smckusick crfree(cred); 528*38776Smckusick } 5298670S bp->b_flags = B_BUSY; 5308670S return (bp); 5318670S } 5328670S 5338670S /* 5348Sbill * Wait for I/O completion on the buffer; return errors 5358Sbill * to the user. 5368Sbill */ 5377015Smckusick biowait(bp) 5386563Smckusic register struct buf *bp; 5398Sbill { 5405431Sroot int s; 5418Sbill 54226271Skarels s = splbio(); 543*38776Smckusick while ((bp->b_flags & B_DONE) == 0) 5448Sbill sleep((caddr_t)bp, PRIBIO); 5455431Sroot splx(s); 54637736Smckusick /* 54737736Smckusick * Pick up the device's error number and pass it to the user; 54837736Smckusick * if there is an error but the number is 0 set a generalized code. 54937736Smckusick */ 55037736Smckusick if ((bp->b_flags & B_ERROR) == 0) 55137736Smckusick return (0); 55237736Smckusick if (bp->b_error) 55337736Smckusick return (bp->b_error); 55437736Smckusick return (EIO); 5558Sbill } 5568Sbill 5578Sbill /* 55813128Ssam * Mark I/O complete on a buffer. 55913128Ssam * If someone should be called, e.g. the pageout 56013128Ssam * daemon, do so. Otherwise, wake up anyone 56113128Ssam * waiting for it. 5628Sbill */ 5637015Smckusick biodone(bp) 5647015Smckusick register struct buf *bp; 5658Sbill { 5668Sbill 567420Sbill if (bp->b_flags & B_DONE) 5687015Smckusick panic("dup biodone"); 5698Sbill bp->b_flags |= B_DONE; 570*38776Smckusick if ((bp->b_flags & B_READ) == 0) 571*38776Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 5729763Ssam if (bp->b_flags & B_CALL) { 5739763Ssam bp->b_flags &= ~B_CALL; 5749763Ssam (*bp->b_iodone)(bp); 5759763Ssam return; 5769763Ssam } 5778Sbill if (bp->b_flags&B_ASYNC) 5788Sbill brelse(bp); 5798Sbill else { 5808Sbill bp->b_flags &= ~B_WANTED; 5818Sbill wakeup((caddr_t)bp); 5828Sbill } 5838Sbill } 5848Sbill 5858Sbill /* 58637736Smckusick * Ensure that no part of a specified block is in an incore buffer. 58730749Skarels #ifdef SECSIZE 58830749Skarels * "size" is given in device blocks (the units of b_blkno). 58930749Skarels #endif SECSIZE 5908670S */ 59137736Smckusick blkflush(vp, blkno, size) 59237736Smckusick struct vnode *vp; 5938670S daddr_t blkno; 5948670S long size; 5958670S { 5968670S register struct buf *ep; 5978670S struct buf *dp; 5988670S daddr_t start, last; 59937736Smckusick int s, error, allerrors = 0; 6008670S 6018670S start = blkno; 60212644Ssam last = start + btodb(size) - 1; 60337736Smckusick dp = BUFHASH(vp, blkno); 6048670S loop: 6058670S for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 60637736Smckusick if (ep->b_vp != vp || (ep->b_flags & B_INVAL)) 6078670S continue; 6088670S /* look for overlap */ 6098670S if (ep->b_bcount == 0 || ep->b_blkno > last || 61012644Ssam ep->b_blkno + btodb(ep->b_bcount) <= start) 6118670S continue; 61226271Skarels s = splbio(); 6138670S if (ep->b_flags&B_BUSY) { 6148670S ep->b_flags |= B_WANTED; 6158670S sleep((caddr_t)ep, PRIBIO+1); 6168670S splx(s); 6178670S goto loop; 6188670S } 6198670S if (ep->b_flags & B_DELWRI) { 6208670S splx(s); 6218670S notavail(ep); 62237736Smckusick if (error = bwrite(ep)) 62337736Smckusick allerrors = error; 6248670S goto loop; 6258670S } 6268670S splx(s); 6278670S } 62837736Smckusick return (allerrors); 6298670S } 6308670S 6318670S /* 63237736Smckusick * Make sure all write-behind blocks associated 633*38776Smckusick * with mount point are flushed out (from sync). 6348Sbill */ 635*38776Smckusick bflush(mountp) 636*38776Smckusick struct mount *mountp; 6378Sbill { 6388Sbill register struct buf *bp; 6392325Swnj register struct buf *flist; 6405431Sroot int s; 6418Sbill 6428Sbill loop: 64326271Skarels s = splbio(); 644*38776Smckusick for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) { 645*38776Smckusick for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 646*38776Smckusick if ((bp->b_flags & B_BUSY)) 647*38776Smckusick continue; 648*38776Smckusick if ((bp->b_flags & B_DELWRI) == 0) 649*38776Smckusick continue; 650*38776Smckusick if (bp->b_vp && bp->b_vp->v_mount == mountp) { 65137736Smckusick notavail(bp); 65238614Smckusick (void) bawrite(bp); 653*38776Smckusick splx(s); 654*38776Smckusick goto loop; 65537736Smckusick } 65637736Smckusick } 65737736Smckusick } 658*38776Smckusick splx(s); 6598Sbill } 6602299Skre 6612299Skre /* 6622299Skre * Invalidate in core blocks belonging to closed or umounted filesystem 6632299Skre * 66438614Smckusick * We walk through the buffer pool and invalidate any buffers for the 665*38776Smckusick * indicated mount point. Normally this routine is preceeded by a bflush 66638614Smckusick * call, so that on a quiescent filesystem there will be no dirty 66738614Smckusick * buffers when we are done. We return the count of dirty buffers when 66838614Smckusick * we are finished. 6692299Skre */ 670*38776Smckusick binval(mountp) 671*38776Smckusick struct mount *mountp; 6722299Skre { 6732361Skre register struct buf *bp; 6742361Skre register struct bufhd *hp; 67538633Smckusick int s, dirty = 0; 6762361Skre #define dp ((struct buf *)hp) 6772299Skre 678*38776Smckusick loop: 679*38776Smckusick s = splbio(); 68038614Smckusick for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) { 68138614Smckusick for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 682*38776Smckusick if (bp->b_vp == NULL || bp->b_vp->v_mount != mountp) 68338614Smckusick continue; 68438633Smckusick if (bp->b_flags & B_BUSY) { 68538633Smckusick bp->b_flags |= B_WANTED; 68638633Smckusick sleep((caddr_t)bp, PRIBIO+1); 68738633Smckusick splx(s); 68838633Smckusick goto loop; 68938633Smckusick } 69038614Smckusick notavail(bp); 69138614Smckusick if (bp->b_flags & B_DELWRI) { 69238614Smckusick (void) bawrite(bp); 69338614Smckusick dirty++; 69438614Smckusick continue; 69537736Smckusick } 69638614Smckusick bp->b_flags |= B_INVAL; 69738614Smckusick brelvp(bp); 69838614Smckusick brelse(bp); 69938614Smckusick } 70038614Smckusick } 70138614Smckusick return (dirty); 7022299Skre } 70337736Smckusick 70437736Smckusick brelvp(bp) 70537736Smckusick struct buf *bp; 70637736Smckusick { 70737736Smckusick struct vnode *vp; 70837736Smckusick 70937736Smckusick if (bp->b_vp == (struct vnode *) 0) 71037736Smckusick return; 71137736Smckusick vp = bp->b_vp; 71237736Smckusick bp->b_vp = (struct vnode *) 0; 71337736Smckusick vrele(vp); 71437736Smckusick } 715