xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 38614)
123395Smckusick /*
237736Smckusick  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
537736Smckusick  * Redistribution and use in source and binary forms are permitted
637736Smckusick  * provided that the above copyright notice and this paragraph are
737736Smckusick  * duplicated in all such forms and that any documentation,
837736Smckusick  * advertising materials, and other materials related to such
937736Smckusick  * distribution and use acknowledge that the software was developed
1037736Smckusick  * by the University of California, Berkeley.  The name of the
1137736Smckusick  * University may not be used to endorse or promote products derived
1237736Smckusick  * from this software without specific prior written permission.
1337736Smckusick  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1437736Smckusick  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1537736Smckusick  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1637736Smckusick  *
17*38614Smckusick  *	@(#)vfs_cluster.c	7.8 (Berkeley) 08/15/89
1823395Smckusick  */
198Sbill 
2017098Sbloom #include "param.h"
2117098Sbloom #include "user.h"
2217098Sbloom #include "buf.h"
2337736Smckusick #include "vnode.h"
2417098Sbloom #include "trace.h"
258Sbill 
2691Sbill /*
278Sbill  * Read in (if necessary) the block and return a buffer pointer.
288Sbill  */
2937736Smckusick bread(vp, blkno, size, bpp)
3037736Smckusick 	struct vnode *vp;
316563Smckusic 	daddr_t blkno;
326563Smckusic 	int size;
3337736Smckusick 	struct buf **bpp;
348Sbill {
358Sbill 	register struct buf *bp;
368Sbill 
378670S 	if (size == 0)
388670S 		panic("bread: size 0");
3937736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
4032608Smckusick 	if (bp->b_flags&(B_DONE|B_DELWRI)) {
4137736Smckusick 		trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), blkno);
4237736Smckusick 		return (0);
438Sbill 	}
448Sbill 	bp->b_flags |= B_READ;
458670S 	if (bp->b_bcount > bp->b_bufsize)
468670S 		panic("bread");
4737736Smckusick 	VOP_STRATEGY(bp);
4837736Smckusick 	trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), blkno);
498039Sroot 	u.u_ru.ru_inblock++;		/* pay for read */
5037736Smckusick 	return (biowait(bp));
518Sbill }
528Sbill 
538Sbill /*
548Sbill  * Read in the block, like bread, but also start I/O on the
558Sbill  * read-ahead block (which is not allocated to the caller)
568Sbill  */
5737736Smckusick breada(vp, blkno, size, rablkno, rabsize, bpp)
5837736Smckusick 	struct vnode *vp;
597114Smckusick 	daddr_t blkno; int size;
608592Sroot 	daddr_t rablkno; int rabsize;
6137736Smckusick 	struct buf **bpp;
628Sbill {
638Sbill 	register struct buf *bp, *rabp;
648Sbill 
658Sbill 	bp = NULL;
667015Smckusick 	/*
677015Smckusick 	 * If the block isn't in core, then allocate
687015Smckusick 	 * a buffer and initiate i/o (getblk checks
697015Smckusick 	 * for a cache hit).
707015Smckusick 	 */
7137736Smckusick 	if (!incore(vp, blkno)) {
7237736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
7332608Smckusick 		if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) {
748Sbill 			bp->b_flags |= B_READ;
758670S 			if (bp->b_bcount > bp->b_bufsize)
768670S 				panic("breada");
7737736Smckusick 			VOP_STRATEGY(bp);
7837736Smckusick 			trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size),
7937736Smckusick 			    blkno);
808039Sroot 			u.u_ru.ru_inblock++;		/* pay for read */
817015Smckusick 		} else
8237736Smckusick 			trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size),
8337736Smckusick 			    blkno);
848Sbill 	}
857015Smckusick 
867015Smckusick 	/*
877015Smckusick 	 * If there's a read-ahead block, start i/o
887015Smckusick 	 * on it also (as above).
897015Smckusick 	 */
9037736Smckusick 	if (rablkno && !incore(vp, rablkno)) {
9137736Smckusick 		rabp = getblk(vp, rablkno, rabsize);
9232608Smckusick 		if (rabp->b_flags & (B_DONE|B_DELWRI)) {
938Sbill 			brelse(rabp);
9437736Smckusick 			trace(TR_BREADHITRA,
9537736Smckusick 			    pack(vp->v_mount->m_fsid[0], rabsize), blkno);
962045Swnj 		} else {
978Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
988670S 			if (rabp->b_bcount > rabp->b_bufsize)
998670S 				panic("breadrabp");
10037736Smckusick 			VOP_STRATEGY(rabp);
10137736Smckusick 			trace(TR_BREADMISSRA,
10237736Smckusick 			    pack(vp->v_mount->m_fsid[0], rabsize), rablock);
1038039Sroot 			u.u_ru.ru_inblock++;		/* pay in advance */
1048Sbill 		}
1058Sbill 	}
1067015Smckusick 
1077015Smckusick 	/*
1087114Smckusick 	 * If block was in core, let bread get it.
1097114Smckusick 	 * If block wasn't in core, then the read was started
1107114Smckusick 	 * above, and just wait for it.
1117015Smckusick 	 */
1127114Smckusick 	if (bp == NULL)
11337736Smckusick 		return (bread(vp, blkno, size, bpp));
11437736Smckusick 	return (biowait(bp));
1158Sbill }
1168Sbill 
1178Sbill /*
1188Sbill  * Write the buffer, waiting for completion.
1198Sbill  * Then release the buffer.
1208Sbill  */
1218Sbill bwrite(bp)
1227015Smckusick 	register struct buf *bp;
1238Sbill {
12437736Smckusick 	register int flag;
12537736Smckusick 	int error;
1268Sbill 
1278Sbill 	flag = bp->b_flags;
1289857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
1298Sbill 	if ((flag&B_DELWRI) == 0)
1308039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
13137736Smckusick 	trace(TR_BWRITE,
13237736Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bcount), bp->b_blkno);
1338670S 	if (bp->b_bcount > bp->b_bufsize)
1348670S 		panic("bwrite");
13537736Smckusick 	VOP_STRATEGY(bp);
1367015Smckusick 
1377015Smckusick 	/*
1387015Smckusick 	 * If the write was synchronous, then await i/o completion.
1397015Smckusick 	 * If the write was "delayed", then we put the buffer on
1407015Smckusick 	 * the q of blocks awaiting i/o completion status.
1417015Smckusick 	 */
1428Sbill 	if ((flag&B_ASYNC) == 0) {
14337736Smckusick 		error = biowait(bp);
1448Sbill 		brelse(bp);
14537736Smckusick 	} else if (flag & B_DELWRI) {
1468Sbill 		bp->b_flags |= B_AGE;
14737736Smckusick 		error = 0;
14837736Smckusick 	}
14937736Smckusick 	return (error);
1508Sbill }
1518Sbill 
1528Sbill /*
1538Sbill  * Release the buffer, marking it so that if it is grabbed
1548Sbill  * for another purpose it will be written out before being
1558Sbill  * given up (e.g. when writing a partial block where it is
1568Sbill  * assumed that another write for the same block will soon follow).
1578Sbill  * This can't be done for magtape, since writes must be done
1588Sbill  * in the same order as requested.
1598Sbill  */
1608Sbill bdwrite(bp)
1617015Smckusick 	register struct buf *bp;
1628Sbill {
1638Sbill 
1648Sbill 	if ((bp->b_flags&B_DELWRI) == 0)
1658039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
16637736Smckusick #ifdef notdef
16737736Smckusick 	/*
16837736Smckusick 	 * This does not work for buffers associated with
16937736Smckusick 	 * vnodes that are remote - they have no dev.
17037736Smckusick 	 * Besides, we don't use bio with tapes, so rather
17137736Smckusick 	 * than develop a fix, we just ifdef this out for now.
17237736Smckusick 	 */
17330749Skarels 	if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
1748Sbill 		bawrite(bp);
1758Sbill 	else {
1768Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
1778Sbill 		brelse(bp);
1788Sbill 	}
17937736Smckusick #endif
18037736Smckusick 	bp->b_flags |= B_DELWRI | B_DONE;
18137736Smckusick 	brelse(bp);
1828Sbill }
1838Sbill 
1848Sbill /*
1858Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
1868Sbill  */
1878Sbill bawrite(bp)
1887015Smckusick 	register struct buf *bp;
1898Sbill {
1908Sbill 
1918Sbill 	bp->b_flags |= B_ASYNC;
19237736Smckusick 	(void) bwrite(bp);
1938Sbill }
1948Sbill 
1958Sbill /*
1967015Smckusick  * Release the buffer, with no I/O implied.
1978Sbill  */
1988Sbill brelse(bp)
1997015Smckusick 	register struct buf *bp;
2008Sbill {
2012325Swnj 	register struct buf *flist;
2028Sbill 	register s;
2038Sbill 
20437736Smckusick 	trace(TR_BRELSE,
20537736Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno);
2067015Smckusick 	/*
2077015Smckusick 	 * If someone's waiting for the buffer, or
2087015Smckusick 	 * is waiting for a buffer wake 'em up.
2097015Smckusick 	 */
2108Sbill 	if (bp->b_flags&B_WANTED)
2118Sbill 		wakeup((caddr_t)bp);
2122325Swnj 	if (bfreelist[0].b_flags&B_WANTED) {
2132325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2142325Swnj 		wakeup((caddr_t)bfreelist);
2158Sbill 	}
21637736Smckusick 	if (bp->b_flags & B_NOCACHE) {
21737736Smckusick 		bp->b_flags |= B_INVAL;
21837736Smckusick 	}
2192683Swnj 	if (bp->b_flags&B_ERROR)
2202683Swnj 		if (bp->b_flags & B_LOCKED)
2212683Swnj 			bp->b_flags &= ~B_ERROR;	/* try again later */
2222683Swnj 		else
22337736Smckusick 			brelvp(bp); 	 		/* no assoc */
2247015Smckusick 
2257015Smckusick 	/*
2267015Smckusick 	 * Stick the buffer back on a free list.
2277015Smckusick 	 */
22826271Skarels 	s = splbio();
2298670S 	if (bp->b_bufsize <= 0) {
2308670S 		/* block has no buffer ... put at front of unused buffer list */
2318670S 		flist = &bfreelist[BQ_EMPTY];
2328670S 		binsheadfree(bp, flist);
2338670S 	} else if (bp->b_flags & (B_ERROR|B_INVAL)) {
2342325Swnj 		/* block has no info ... put at front of most free list */
2358670S 		flist = &bfreelist[BQ_AGE];
2367015Smckusick 		binsheadfree(bp, flist);
2378Sbill 	} else {
2382325Swnj 		if (bp->b_flags & B_LOCKED)
2392325Swnj 			flist = &bfreelist[BQ_LOCKED];
2402325Swnj 		else if (bp->b_flags & B_AGE)
2412325Swnj 			flist = &bfreelist[BQ_AGE];
2422325Swnj 		else
2432325Swnj 			flist = &bfreelist[BQ_LRU];
2447015Smckusick 		binstailfree(bp, flist);
2458Sbill 	}
24637736Smckusick 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE);
2478Sbill 	splx(s);
2488Sbill }
2498Sbill 
2508Sbill /*
2518Sbill  * See if the block is associated with some buffer
2528Sbill  * (mainly to avoid getting hung up on a wait in breada)
2538Sbill  */
25437736Smckusick incore(vp, blkno)
25537736Smckusick 	struct vnode *vp;
2567015Smckusick 	daddr_t blkno;
2578Sbill {
2588Sbill 	register struct buf *bp;
2592325Swnj 	register struct buf *dp;
2608Sbill 
26138225Smckusick 	dp = BUFHASH(vp, blkno);
2622325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
26337736Smckusick 		if (bp->b_blkno == blkno && bp->b_vp == vp &&
2647015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
26591Sbill 			return (1);
26691Sbill 	return (0);
2678Sbill }
2688Sbill 
26937736Smckusick baddr(vp, blkno, size, bpp)
27037736Smckusick 	struct vnode *vp;
2716563Smckusic 	daddr_t blkno;
2726563Smckusic 	int size;
27337736Smckusick 	struct buf **bpp;
2748Sbill {
2758Sbill 
27637736Smckusick 	if (incore(vp, blkno))
27737736Smckusick 		return (bread(vp, blkno, size, bpp));
27837736Smckusick 	*bpp = 0;
2798Sbill 	return (0);
2808Sbill }
2818Sbill 
2828Sbill /*
2838Sbill  * Assign a buffer for the given block.  If the appropriate
2848Sbill  * block is already associated, return it; otherwise search
2858Sbill  * for the oldest non-busy buffer and reassign it.
2865424Swnj  *
28732608Smckusick  * If we find the buffer, but it is dirty (marked DELWRI) and
28832608Smckusick  * its size is changing, we must write it out first. When the
28932608Smckusick  * buffer is shrinking, the write is done by brealloc to avoid
29032608Smckusick  * losing the unwritten data. When the buffer is growing, the
29132608Smckusick  * write is done by getblk, so that bread will not read stale
29232608Smckusick  * disk data over the modified data in the buffer.
29332608Smckusick  *
2945424Swnj  * We use splx here because this routine may be called
2955424Swnj  * on the interrupt stack during a dump, and we don't
2965424Swnj  * want to lower the ipl back to 0.
2978Sbill  */
2988Sbill struct buf *
29937736Smckusick getblk(vp, blkno, size)
30037736Smckusick 	register struct vnode *vp;
3016563Smckusic 	daddr_t blkno;
3026563Smckusic 	int size;
3038Sbill {
3048670S 	register struct buf *bp, *dp;
3055424Swnj 	int s;
3068Sbill 
30725255Smckusick 	if (size > MAXBSIZE)
30825255Smckusick 		panic("getblk: size too big");
3097015Smckusick 	/*
31024730Smckusick 	 * To prevent overflow of 32-bit ints when converting block
31124730Smckusick 	 * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set
31224730Smckusick 	 * to the maximum number that can be converted to a byte offset
31324730Smckusick 	 * without overflow. This is historic code; what bug it fixed,
31424730Smckusick 	 * or whether it is still a reasonable thing to do is open to
31524730Smckusick 	 * dispute. mkm 9/85
31624730Smckusick 	 */
31724730Smckusick 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT))
31824730Smckusick 		blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1);
31924730Smckusick 	/*
3207015Smckusick 	 * Search the cache for the block.  If we hit, but
3217015Smckusick 	 * the buffer is in use for i/o, then we wait until
3227015Smckusick 	 * the i/o has completed.
3237015Smckusick 	 */
32437736Smckusick 	dp = BUFHASH(vp, blkno);
3257015Smckusick loop:
3262325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
32737736Smckusick 		if (bp->b_blkno != blkno || bp->b_vp != vp ||
3282325Swnj 		    bp->b_flags&B_INVAL)
3298Sbill 			continue;
33026271Skarels 		s = splbio();
3318Sbill 		if (bp->b_flags&B_BUSY) {
3328Sbill 			bp->b_flags |= B_WANTED;
3338Sbill 			sleep((caddr_t)bp, PRIBIO+1);
3345424Swnj 			splx(s);
3358Sbill 			goto loop;
3368Sbill 		}
3375424Swnj 		splx(s);
3388Sbill 		notavail(bp);
33932608Smckusick 		if (bp->b_bcount != size) {
34032608Smckusick 			if (bp->b_bcount < size && (bp->b_flags&B_DELWRI)) {
34132608Smckusick 				bp->b_flags &= ~B_ASYNC;
34237736Smckusick 				(void) bwrite(bp);
34332608Smckusick 				goto loop;
34432608Smckusick 			}
34532608Smckusick 			if (brealloc(bp, size) == 0)
34632608Smckusick 				goto loop;
34732608Smckusick 		}
34816855Smckusick 		if (bp->b_bcount != size && brealloc(bp, size) == 0)
3497188Sroot 			goto loop;
3508Sbill 		bp->b_flags |= B_CACHE;
35126271Skarels 		return (bp);
3528Sbill 	}
3538670S 	bp = getnewbuf();
3546563Smckusic 	bfree(bp);
3557015Smckusick 	bremhash(bp);
35637736Smckusick 	if (bp->b_vp)
35737736Smckusick 		brelvp(bp);
35838345Smckusick 	VREF(vp);
35937736Smckusick 	bp->b_vp = vp;
36037736Smckusick 	bp->b_dev = vp->v_rdev;
3616563Smckusic 	bp->b_blkno = blkno;
3628670S 	bp->b_error = 0;
36337736Smckusick 	bp->b_resid = 0;
36437736Smckusick 	binshash(bp, dp);
3657188Sroot 	if (brealloc(bp, size) == 0)
3667188Sroot 		goto loop;
36726271Skarels 	return (bp);
3688Sbill }
3698Sbill 
3708Sbill /*
3718Sbill  * get an empty block,
3728Sbill  * not assigned to any particular device
3738Sbill  */
3748Sbill struct buf *
3756563Smckusic geteblk(size)
3766563Smckusic 	int size;
3778Sbill {
3788670S 	register struct buf *bp, *flist;
3798Sbill 
38025255Smckusick 	if (size > MAXBSIZE)
38125255Smckusick 		panic("geteblk: size too big");
3828Sbill loop:
3838670S 	bp = getnewbuf();
3848670S 	bp->b_flags |= B_INVAL;
3857015Smckusick 	bfree(bp);
3867015Smckusick 	bremhash(bp);
3878670S 	flist = &bfreelist[BQ_AGE];
38837736Smckusick 	brelvp(bp);
38937736Smckusick 	bp->b_error = 0;
39037736Smckusick 	bp->b_resid = 0;
3918670S 	binshash(bp, flist);
3927188Sroot 	if (brealloc(bp, size) == 0)
3937188Sroot 		goto loop;
39426271Skarels 	return (bp);
3958Sbill }
3968Sbill 
3978Sbill /*
3986563Smckusic  * Allocate space associated with a buffer.
3999763Ssam  * If can't get space, buffer is released
4006563Smckusic  */
4016563Smckusic brealloc(bp, size)
4026563Smckusic 	register struct buf *bp;
4036563Smckusic 	int size;
4046563Smckusic {
4056563Smckusic 	daddr_t start, last;
4066563Smckusic 	register struct buf *ep;
4076563Smckusic 	struct buf *dp;
4086563Smckusic 	int s;
4096563Smckusic 
4106563Smckusic 	/*
41130749Skarels 	 * First need to make sure that all overlapping previous I/O
4126563Smckusic 	 * is dispatched with.
4136563Smckusic 	 */
4146563Smckusic 	if (size == bp->b_bcount)
4157188Sroot 		return (1);
4167188Sroot 	if (size < bp->b_bcount) {
4177188Sroot 		if (bp->b_flags & B_DELWRI) {
41837736Smckusick 			(void) bwrite(bp);
4197188Sroot 			return (0);
4207188Sroot 		}
4217188Sroot 		if (bp->b_flags & B_LOCKED)
4227188Sroot 			panic("brealloc");
4239763Ssam 		return (allocbuf(bp, size));
4247188Sroot 	}
4257188Sroot 	bp->b_flags &= ~B_DONE;
42637736Smckusick 	if (bp->b_vp == (struct vnode *)0)
4279763Ssam 		return (allocbuf(bp, size));
4287016Smckusick 
42937736Smckusick 	trace(TR_BREALLOC,
43037736Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], size), bp->b_blkno);
4317188Sroot 	/*
4327188Sroot 	 * Search cache for any buffers that overlap the one that we
4337188Sroot 	 * are trying to allocate. Overlapping buffers must be marked
4347188Sroot 	 * invalid, after being written out if they are dirty. (indicated
4357188Sroot 	 * by B_DELWRI) A disk block must be mapped by at most one buffer
4367188Sroot 	 * at any point in time. Care must be taken to avoid deadlocking
4377188Sroot 	 * when two buffer are trying to get the same set of disk blocks.
4387188Sroot 	 */
4397188Sroot 	start = bp->b_blkno;
44012644Ssam 	last = start + btodb(size) - 1;
44137736Smckusick 	dp = BUFHASH(bp->b_vp, bp->b_blkno);
4426563Smckusic loop:
4436563Smckusic 	for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
44437736Smckusick 		if (ep == bp || ep->b_vp != bp->b_vp ||
44537736Smckusick 		    (ep->b_flags & B_INVAL))
4466563Smckusic 			continue;
4477188Sroot 		/* look for overlap */
4487188Sroot 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
44912644Ssam 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
4507188Sroot 			continue;
45126271Skarels 		s = splbio();
4526563Smckusic 		if (ep->b_flags&B_BUSY) {
4536563Smckusic 			ep->b_flags |= B_WANTED;
4546563Smckusic 			sleep((caddr_t)ep, PRIBIO+1);
4558670S 			splx(s);
4566563Smckusic 			goto loop;
4576563Smckusic 		}
4588670S 		splx(s);
4597188Sroot 		notavail(ep);
4606563Smckusic 		if (ep->b_flags & B_DELWRI) {
46137736Smckusick 			(void) bwrite(ep);
4626563Smckusic 			goto loop;
4636563Smckusic 		}
4647188Sroot 		ep->b_flags |= B_INVAL;
4657188Sroot 		brelse(ep);
4666563Smckusic 	}
4679763Ssam 	return (allocbuf(bp, size));
4688670S }
4698670S 
4708670S /*
4718670S  * Find a buffer which is available for use.
4728670S  * Select something from a free list.
4738670S  * Preference is to AGE list, then LRU list.
4748670S  */
4758670S struct buf *
4768670S getnewbuf()
4778670S {
4788670S 	register struct buf *bp, *dp;
4798670S 	int s;
4808670S 
4818670S loop:
48226271Skarels 	s = splbio();
4838670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
4848670S 		if (dp->av_forw != dp)
4858670S 			break;
4868670S 	if (dp == bfreelist) {		/* no free blocks */
4878670S 		dp->b_flags |= B_WANTED;
4888670S 		sleep((caddr_t)dp, PRIBIO+1);
48912170Ssam 		splx(s);
4908670S 		goto loop;
4918670S 	}
4928670S 	splx(s);
4938670S 	bp = dp->av_forw;
4948670S 	notavail(bp);
4958670S 	if (bp->b_flags & B_DELWRI) {
496*38614Smckusick 		(void) bawrite(bp);
4978670S 		goto loop;
4988670S 	}
49937736Smckusick 	trace(TR_BRELSE,
50037736Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno);
50137736Smckusick 	brelvp(bp);
5028670S 	bp->b_flags = B_BUSY;
5038670S 	return (bp);
5048670S }
5058670S 
5068670S /*
5078Sbill  * Wait for I/O completion on the buffer; return errors
5088Sbill  * to the user.
5098Sbill  */
5107015Smckusick biowait(bp)
5116563Smckusic 	register struct buf *bp;
5128Sbill {
5135431Sroot 	int s;
5148Sbill 
51526271Skarels 	s = splbio();
5168Sbill 	while ((bp->b_flags&B_DONE)==0)
5178Sbill 		sleep((caddr_t)bp, PRIBIO);
5185431Sroot 	splx(s);
51937736Smckusick 	/*
52037736Smckusick 	 * Pick up the device's error number and pass it to the user;
52137736Smckusick 	 * if there is an error but the number is 0 set a generalized code.
52237736Smckusick 	 */
52337736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
52437736Smckusick 		return (0);
52537736Smckusick 	if (bp->b_error)
52637736Smckusick 		return (bp->b_error);
52737736Smckusick 	return (EIO);
5288Sbill }
5298Sbill 
5308Sbill /*
53113128Ssam  * Mark I/O complete on a buffer.
53213128Ssam  * If someone should be called, e.g. the pageout
53313128Ssam  * daemon, do so.  Otherwise, wake up anyone
53413128Ssam  * waiting for it.
5358Sbill  */
5367015Smckusick biodone(bp)
5377015Smckusick 	register struct buf *bp;
5388Sbill {
5398Sbill 
540420Sbill 	if (bp->b_flags & B_DONE)
5417015Smckusick 		panic("dup biodone");
5428Sbill 	bp->b_flags |= B_DONE;
5439763Ssam 	if (bp->b_flags & B_CALL) {
5449763Ssam 		bp->b_flags &= ~B_CALL;
5459763Ssam 		(*bp->b_iodone)(bp);
5469763Ssam 		return;
5479763Ssam 	}
5488Sbill 	if (bp->b_flags&B_ASYNC)
5498Sbill 		brelse(bp);
5508Sbill 	else {
5518Sbill 		bp->b_flags &= ~B_WANTED;
5528Sbill 		wakeup((caddr_t)bp);
5538Sbill 	}
5548Sbill }
5558Sbill 
5568Sbill /*
55737736Smckusick  * Ensure that no part of a specified block is in an incore buffer.
55830749Skarels #ifdef SECSIZE
55930749Skarels  * "size" is given in device blocks (the units of b_blkno).
56030749Skarels #endif SECSIZE
5618670S  */
56237736Smckusick blkflush(vp, blkno, size)
56337736Smckusick 	struct vnode *vp;
5648670S 	daddr_t blkno;
5658670S 	long size;
5668670S {
5678670S 	register struct buf *ep;
5688670S 	struct buf *dp;
5698670S 	daddr_t start, last;
57037736Smckusick 	int s, error, allerrors = 0;
5718670S 
5728670S 	start = blkno;
57312644Ssam 	last = start + btodb(size) - 1;
57437736Smckusick 	dp = BUFHASH(vp, blkno);
5758670S loop:
5768670S 	for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
57737736Smckusick 		if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
5788670S 			continue;
5798670S 		/* look for overlap */
5808670S 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
58112644Ssam 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
5828670S 			continue;
58326271Skarels 		s = splbio();
5848670S 		if (ep->b_flags&B_BUSY) {
5858670S 			ep->b_flags |= B_WANTED;
5868670S 			sleep((caddr_t)ep, PRIBIO+1);
5878670S 			splx(s);
5888670S 			goto loop;
5898670S 		}
5908670S 		if (ep->b_flags & B_DELWRI) {
5918670S 			splx(s);
5928670S 			notavail(ep);
59337736Smckusick 			if (error = bwrite(ep))
59437736Smckusick 				allerrors = error;
5958670S 			goto loop;
5968670S 		}
5978670S 		splx(s);
5988670S 	}
59937736Smckusick 	return (allerrors);
6008670S }
6018670S 
6028670S /*
60337736Smckusick  * Make sure all write-behind blocks associated
60437736Smckusick  * with vp are flushed out (from sync).
6058Sbill  */
6068Sbill bflush(dev)
6077015Smckusick 	dev_t dev;
6088Sbill {
6098Sbill 	register struct buf *bp;
6102325Swnj 	register struct buf *flist;
6115431Sroot 	int s;
6128Sbill 
6138Sbill loop:
61426271Skarels 	s = splbio();
6158670S 	for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++)
6162325Swnj 	for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
6177015Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
6187015Smckusick 			continue;
6197015Smckusick 		if (dev == NODEV || dev == bp->b_dev) {
6208Sbill 			notavail(bp);
621*38614Smckusick 			(void) bawrite(bp);
62212173Ssam 			splx(s);
6238Sbill 			goto loop;
6248Sbill 		}
6258Sbill 	}
6265431Sroot 	splx(s);
6278Sbill }
6288Sbill 
62937736Smckusick #ifdef unused
6308Sbill /*
63137736Smckusick  * Invalidate blocks associated with vp which are on the freelist.
63237736Smckusick  * Make sure all write-behind blocks associated with vp are flushed out.
6338Sbill  */
63437736Smckusick binvalfree(vp)
63537736Smckusick 	struct vnode *vp;
63637736Smckusick {
6377015Smckusick 	register struct buf *bp;
63837736Smckusick 	register struct buf *flist;
63937736Smckusick 	int s;
6408Sbill 
64137736Smckusick loop:
64237736Smckusick 	s = splbio();
64337736Smckusick 	for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++)
64437736Smckusick 	for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
64537736Smckusick 		if (vp == (struct vnode *) 0 || vp == bp->b_vp) {
64637736Smckusick 			if (bp->b_flags & B_DELWRI) {
64737736Smckusick 				notavail(bp);
64837736Smckusick 				(void) splx(s);
649*38614Smckusick 				(void) bawrite(bp);
65037736Smckusick 			} else {
65137736Smckusick 				bp->b_flags |= B_INVAL;
65237736Smckusick 				brelvp(bp);
65337736Smckusick 				(void) splx(s);
65437736Smckusick 			}
65537736Smckusick 			goto loop;
65637736Smckusick 		}
65737736Smckusick 	}
65837736Smckusick 	(void) splx(s);
6598Sbill }
66037736Smckusick #endif /* unused */
6612299Skre 
6622299Skre /*
6632299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6642299Skre  *
665*38614Smckusick  * We walk through the buffer pool and invalidate any buffers for the
666*38614Smckusick  * indicated device. Normally this routine is preceeded by a bflush
667*38614Smckusick  * call, so that on a quiescent filesystem there will be no dirty
668*38614Smckusick  * buffers when we are done. We return the count of dirty buffers when
669*38614Smckusick  * we are finished.
6702299Skre  */
6712299Skre binval(dev)
6727015Smckusick 	dev_t dev;
6732299Skre {
6742361Skre 	register struct buf *bp;
6752361Skre 	register struct bufhd *hp;
676*38614Smckusick 	int dirty = 0;
6772361Skre #define dp ((struct buf *)hp)
6782299Skre 
679*38614Smckusick 	for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) {
680*38614Smckusick 		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
681*38614Smckusick 			if (bp->b_dev != dev || (bp->b_flags & B_INVAL))
682*38614Smckusick 				continue;
683*38614Smckusick 			notavail(bp);
684*38614Smckusick 			if (bp->b_flags & B_DELWRI) {
685*38614Smckusick 				(void) bawrite(bp);
686*38614Smckusick 				dirty++;
687*38614Smckusick 				continue;
68837736Smckusick 			}
689*38614Smckusick 			bp->b_flags |= B_INVAL;
690*38614Smckusick 			brelvp(bp);
691*38614Smckusick 			brelse(bp);
692*38614Smckusick 		}
693*38614Smckusick 	}
694*38614Smckusick 	return (dirty);
6952299Skre }
69637736Smckusick 
69737736Smckusick brelvp(bp)
69837736Smckusick 	struct buf *bp;
69937736Smckusick {
70037736Smckusick 	struct vnode *vp;
70137736Smckusick 
70237736Smckusick 	if (bp->b_vp == (struct vnode *) 0)
70337736Smckusick 		return;
70437736Smckusick 	vp = bp->b_vp;
70537736Smckusick 	bp->b_vp = (struct vnode *) 0;
70637736Smckusick 	vrele(vp);
70737736Smckusick }
708