xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 40652)
123395Smckusick /*
237736Smckusick  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
537736Smckusick  * Redistribution and use in source and binary forms are permitted
637736Smckusick  * provided that the above copyright notice and this paragraph are
737736Smckusick  * duplicated in all such forms and that any documentation,
837736Smckusick  * advertising materials, and other materials related to such
937736Smckusick  * distribution and use acknowledge that the software was developed
1037736Smckusick  * by the University of California, Berkeley.  The name of the
1137736Smckusick  * University may not be used to endorse or promote products derived
1237736Smckusick  * from this software without specific prior written permission.
1337736Smckusick  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1437736Smckusick  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1537736Smckusick  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1637736Smckusick  *
17*40652Smckusick  *	@(#)vfs_cluster.c	7.25 (Berkeley) 03/27/90
1823395Smckusick  */
198Sbill 
2017098Sbloom #include "param.h"
2117098Sbloom #include "user.h"
2217098Sbloom #include "buf.h"
2337736Smckusick #include "vnode.h"
24*40652Smckusick #include "specdev.h"
2539668Smckusick #include "mount.h"
2617098Sbloom #include "trace.h"
2738776Smckusick #include "ucred.h"
288Sbill 
2991Sbill /*
308Sbill  * Read in (if necessary) the block and return a buffer pointer.
318Sbill  */
3238776Smckusick bread(vp, blkno, size, cred, bpp)
3337736Smckusick 	struct vnode *vp;
346563Smckusic 	daddr_t blkno;
356563Smckusic 	int size;
3638776Smckusick 	struct ucred *cred;
3737736Smckusick 	struct buf **bpp;
388Sbill {
398Sbill 	register struct buf *bp;
408Sbill 
418670S 	if (size == 0)
428670S 		panic("bread: size 0");
4337736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
4432608Smckusick 	if (bp->b_flags&(B_DONE|B_DELWRI)) {
4540341Smckusick 		trace(TR_BREADHIT, pack(vp, size), blkno);
4637736Smckusick 		return (0);
478Sbill 	}
488Sbill 	bp->b_flags |= B_READ;
498670S 	if (bp->b_bcount > bp->b_bufsize)
508670S 		panic("bread");
5138776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
5238776Smckusick 		crhold(cred);
5338776Smckusick 		bp->b_rcred = cred;
5438776Smckusick 	}
5537736Smckusick 	VOP_STRATEGY(bp);
5640341Smckusick 	trace(TR_BREADMISS, pack(vp, size), blkno);
578039Sroot 	u.u_ru.ru_inblock++;		/* pay for read */
5837736Smckusick 	return (biowait(bp));
598Sbill }
608Sbill 
618Sbill /*
628Sbill  * Read in the block, like bread, but also start I/O on the
638Sbill  * read-ahead block (which is not allocated to the caller)
648Sbill  */
6538776Smckusick breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
6637736Smckusick 	struct vnode *vp;
677114Smckusick 	daddr_t blkno; int size;
688592Sroot 	daddr_t rablkno; int rabsize;
6938776Smckusick 	struct ucred *cred;
7037736Smckusick 	struct buf **bpp;
718Sbill {
728Sbill 	register struct buf *bp, *rabp;
738Sbill 
748Sbill 	bp = NULL;
757015Smckusick 	/*
767015Smckusick 	 * If the block isn't in core, then allocate
777015Smckusick 	 * a buffer and initiate i/o (getblk checks
787015Smckusick 	 * for a cache hit).
797015Smckusick 	 */
8037736Smckusick 	if (!incore(vp, blkno)) {
8137736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
8232608Smckusick 		if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) {
838Sbill 			bp->b_flags |= B_READ;
848670S 			if (bp->b_bcount > bp->b_bufsize)
858670S 				panic("breada");
8638776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
8738776Smckusick 				crhold(cred);
8838776Smckusick 				bp->b_rcred = cred;
8938776Smckusick 			}
9037736Smckusick 			VOP_STRATEGY(bp);
9140341Smckusick 			trace(TR_BREADMISS, pack(vp, size), blkno);
928039Sroot 			u.u_ru.ru_inblock++;		/* pay for read */
937015Smckusick 		} else
9440341Smckusick 			trace(TR_BREADHIT, pack(vp, size), blkno);
958Sbill 	}
967015Smckusick 
977015Smckusick 	/*
987015Smckusick 	 * If there's a read-ahead block, start i/o
997015Smckusick 	 * on it also (as above).
1007015Smckusick 	 */
10139895Smckusick 	if (!incore(vp, rablkno)) {
10237736Smckusick 		rabp = getblk(vp, rablkno, rabsize);
10332608Smckusick 		if (rabp->b_flags & (B_DONE|B_DELWRI)) {
1048Sbill 			brelse(rabp);
10540341Smckusick 			trace(TR_BREADHITRA, pack(vp, rabsize), rablkno);
1062045Swnj 		} else {
1078Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
1088670S 			if (rabp->b_bcount > rabp->b_bufsize)
1098670S 				panic("breadrabp");
11038880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
11138776Smckusick 				crhold(cred);
11238880Smckusick 				rabp->b_rcred = cred;
11338776Smckusick 			}
11437736Smckusick 			VOP_STRATEGY(rabp);
11540341Smckusick 			trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno);
1168039Sroot 			u.u_ru.ru_inblock++;		/* pay in advance */
1178Sbill 		}
1188Sbill 	}
1197015Smckusick 
1207015Smckusick 	/*
1217114Smckusick 	 * If block was in core, let bread get it.
1227114Smckusick 	 * If block wasn't in core, then the read was started
1237114Smckusick 	 * above, and just wait for it.
1247015Smckusick 	 */
1257114Smckusick 	if (bp == NULL)
12638776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
12737736Smckusick 	return (biowait(bp));
1288Sbill }
1298Sbill 
1308Sbill /*
1318Sbill  * Write the buffer, waiting for completion.
1328Sbill  * Then release the buffer.
1338Sbill  */
1348Sbill bwrite(bp)
1357015Smckusick 	register struct buf *bp;
1368Sbill {
13737736Smckusick 	register int flag;
13840226Smckusick 	int s, error;
1398Sbill 
1408Sbill 	flag = bp->b_flags;
1419857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
1428Sbill 	if ((flag&B_DELWRI) == 0)
1438039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
14439882Smckusick 	else
14539882Smckusick 		reassignbuf(bp, bp->b_vp);
14640341Smckusick 	trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
1478670S 	if (bp->b_bcount > bp->b_bufsize)
1488670S 		panic("bwrite");
14940226Smckusick 	s = splbio();
15039882Smckusick 	bp->b_vp->v_numoutput++;
15140226Smckusick 	splx(s);
15237736Smckusick 	VOP_STRATEGY(bp);
1537015Smckusick 
1547015Smckusick 	/*
1557015Smckusick 	 * If the write was synchronous, then await i/o completion.
1567015Smckusick 	 * If the write was "delayed", then we put the buffer on
1577015Smckusick 	 * the q of blocks awaiting i/o completion status.
1587015Smckusick 	 */
1598Sbill 	if ((flag&B_ASYNC) == 0) {
16037736Smckusick 		error = biowait(bp);
1618Sbill 		brelse(bp);
16237736Smckusick 	} else if (flag & B_DELWRI) {
1638Sbill 		bp->b_flags |= B_AGE;
16437736Smckusick 		error = 0;
16537736Smckusick 	}
16637736Smckusick 	return (error);
1678Sbill }
1688Sbill 
1698Sbill /*
1708Sbill  * Release the buffer, marking it so that if it is grabbed
1718Sbill  * for another purpose it will be written out before being
1728Sbill  * given up (e.g. when writing a partial block where it is
1738Sbill  * assumed that another write for the same block will soon follow).
1748Sbill  * This can't be done for magtape, since writes must be done
1758Sbill  * in the same order as requested.
1768Sbill  */
1778Sbill bdwrite(bp)
1787015Smckusick 	register struct buf *bp;
1798Sbill {
1808Sbill 
18139882Smckusick 	if ((bp->b_flags & B_DELWRI) == 0) {
18239882Smckusick 		bp->b_flags |= B_DELWRI;
18339882Smckusick 		reassignbuf(bp, bp->b_vp);
1848039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
18539882Smckusick 	}
18637736Smckusick 	/*
18739668Smckusick 	 * If this is a tape drive, the write must be initiated.
18837736Smckusick 	 */
18939668Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, B_TAPE, 0, NOCRED) == 0) {
1908Sbill 		bawrite(bp);
19139668Smckusick 	} else {
1928Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
1938Sbill 		brelse(bp);
1948Sbill 	}
1958Sbill }
1968Sbill 
1978Sbill /*
1988Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
1998Sbill  */
2008Sbill bawrite(bp)
2017015Smckusick 	register struct buf *bp;
2028Sbill {
2038Sbill 
2048Sbill 	bp->b_flags |= B_ASYNC;
20537736Smckusick 	(void) bwrite(bp);
2068Sbill }
2078Sbill 
2088Sbill /*
2097015Smckusick  * Release the buffer, with no I/O implied.
2108Sbill  */
2118Sbill brelse(bp)
2127015Smckusick 	register struct buf *bp;
2138Sbill {
2142325Swnj 	register struct buf *flist;
2158Sbill 	register s;
2168Sbill 
21740341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
2187015Smckusick 	/*
21939668Smckusick 	 * If a process is waiting for the buffer, or
22039668Smckusick 	 * is waiting for a free buffer, awaken it.
2217015Smckusick 	 */
2228Sbill 	if (bp->b_flags&B_WANTED)
2238Sbill 		wakeup((caddr_t)bp);
2242325Swnj 	if (bfreelist[0].b_flags&B_WANTED) {
2252325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2262325Swnj 		wakeup((caddr_t)bfreelist);
2278Sbill 	}
22839668Smckusick 	/*
22939668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
23039668Smckusick 	 */
23139668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
23239668Smckusick 		bp->b_flags &= ~B_ERROR;
23339668Smckusick 
23439668Smckusick 	/*
23539668Smckusick 	 * Disassociate buffers that are no longer valid.
23639668Smckusick 	 */
23739668Smckusick 	if (bp->b_flags & (B_NOCACHE|B_ERROR))
23837736Smckusick 		bp->b_flags |= B_INVAL;
23939668Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR|B_INVAL))) {
24039668Smckusick 		if (bp->b_vp)
24139668Smckusick 			brelvp(bp);
24239668Smckusick 		bp->b_flags &= ~B_DELWRI;
24337736Smckusick 	}
2447015Smckusick 	/*
2457015Smckusick 	 * Stick the buffer back on a free list.
2467015Smckusick 	 */
24726271Skarels 	s = splbio();
2488670S 	if (bp->b_bufsize <= 0) {
2498670S 		/* block has no buffer ... put at front of unused buffer list */
2508670S 		flist = &bfreelist[BQ_EMPTY];
2518670S 		binsheadfree(bp, flist);
2528670S 	} else if (bp->b_flags & (B_ERROR|B_INVAL)) {
2532325Swnj 		/* block has no info ... put at front of most free list */
2548670S 		flist = &bfreelist[BQ_AGE];
2557015Smckusick 		binsheadfree(bp, flist);
2568Sbill 	} else {
2572325Swnj 		if (bp->b_flags & B_LOCKED)
2582325Swnj 			flist = &bfreelist[BQ_LOCKED];
2592325Swnj 		else if (bp->b_flags & B_AGE)
2602325Swnj 			flist = &bfreelist[BQ_AGE];
2612325Swnj 		else
2622325Swnj 			flist = &bfreelist[BQ_LRU];
2637015Smckusick 		binstailfree(bp, flist);
2648Sbill 	}
26537736Smckusick 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE);
2668Sbill 	splx(s);
2678Sbill }
2688Sbill 
2698Sbill /*
2708Sbill  * See if the block is associated with some buffer
2718Sbill  * (mainly to avoid getting hung up on a wait in breada)
2728Sbill  */
27337736Smckusick incore(vp, blkno)
27437736Smckusick 	struct vnode *vp;
2757015Smckusick 	daddr_t blkno;
2768Sbill {
2778Sbill 	register struct buf *bp;
2782325Swnj 	register struct buf *dp;
2798Sbill 
28038225Smckusick 	dp = BUFHASH(vp, blkno);
2812325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
28239668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
2837015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
28491Sbill 			return (1);
28591Sbill 	return (0);
2868Sbill }
2878Sbill 
28839668Smckusick /*
28939668Smckusick  * Return a block if it is in memory.
29039668Smckusick  */
29138776Smckusick baddr(vp, blkno, size, cred, bpp)
29237736Smckusick 	struct vnode *vp;
2936563Smckusic 	daddr_t blkno;
2946563Smckusic 	int size;
29538776Smckusick 	struct ucred *cred;
29637736Smckusick 	struct buf **bpp;
2978Sbill {
2988Sbill 
29937736Smckusick 	if (incore(vp, blkno))
30038776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
30137736Smckusick 	*bpp = 0;
3028Sbill 	return (0);
3038Sbill }
3048Sbill 
3058Sbill /*
3068Sbill  * Assign a buffer for the given block.  If the appropriate
3078Sbill  * block is already associated, return it; otherwise search
3088Sbill  * for the oldest non-busy buffer and reassign it.
3095424Swnj  *
3105424Swnj  * We use splx here because this routine may be called
3115424Swnj  * on the interrupt stack during a dump, and we don't
3125424Swnj  * want to lower the ipl back to 0.
3138Sbill  */
3148Sbill struct buf *
31537736Smckusick getblk(vp, blkno, size)
31637736Smckusick 	register struct vnode *vp;
3176563Smckusic 	daddr_t blkno;
3186563Smckusic 	int size;
3198Sbill {
3208670S 	register struct buf *bp, *dp;
3215424Swnj 	int s;
3228Sbill 
32325255Smckusick 	if (size > MAXBSIZE)
32425255Smckusick 		panic("getblk: size too big");
3257015Smckusick 	/*
32624730Smckusick 	 * To prevent overflow of 32-bit ints when converting block
32724730Smckusick 	 * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set
32824730Smckusick 	 * to the maximum number that can be converted to a byte offset
32924730Smckusick 	 * without overflow. This is historic code; what bug it fixed,
33024730Smckusick 	 * or whether it is still a reasonable thing to do is open to
33124730Smckusick 	 * dispute. mkm 9/85
33239668Smckusick 	 *
33339668Smckusick 	 * Make it a panic to see if it ever really happens. mkm 11/89
33424730Smckusick 	 */
33539668Smckusick 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT)) {
33639668Smckusick 		panic("getblk: blkno too big");
33724730Smckusick 		blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1);
33839668Smckusick 	}
33924730Smckusick 	/*
3407015Smckusick 	 * Search the cache for the block.  If we hit, but
3417015Smckusick 	 * the buffer is in use for i/o, then we wait until
3427015Smckusick 	 * the i/o has completed.
3437015Smckusick 	 */
34437736Smckusick 	dp = BUFHASH(vp, blkno);
3457015Smckusick loop:
3462325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
34739668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
3482325Swnj 		    bp->b_flags&B_INVAL)
3498Sbill 			continue;
35026271Skarels 		s = splbio();
3518Sbill 		if (bp->b_flags&B_BUSY) {
3528Sbill 			bp->b_flags |= B_WANTED;
3538Sbill 			sleep((caddr_t)bp, PRIBIO+1);
3545424Swnj 			splx(s);
3558Sbill 			goto loop;
3568Sbill 		}
35739882Smckusick 		bremfree(bp);
35839882Smckusick 		bp->b_flags |= B_BUSY;
3595424Swnj 		splx(s);
36032608Smckusick 		if (bp->b_bcount != size) {
36139668Smckusick 			printf("getblk: stray size");
36239668Smckusick 			bp->b_flags |= B_INVAL;
36339668Smckusick 			bwrite(bp);
36439668Smckusick 			goto loop;
36532608Smckusick 		}
3668Sbill 		bp->b_flags |= B_CACHE;
36726271Skarels 		return (bp);
3688Sbill 	}
3698670S 	bp = getnewbuf();
3706563Smckusic 	bfree(bp);
3717015Smckusick 	bremhash(bp);
37239668Smckusick 	bgetvp(vp, bp);
37339668Smckusick 	bp->b_lblkno = blkno;
3746563Smckusic 	bp->b_blkno = blkno;
3758670S 	bp->b_error = 0;
37637736Smckusick 	bp->b_resid = 0;
37737736Smckusick 	binshash(bp, dp);
37839668Smckusick 	brealloc(bp, size);
37926271Skarels 	return (bp);
3808Sbill }
3818Sbill 
3828Sbill /*
3838Sbill  * get an empty block,
3848Sbill  * not assigned to any particular device
3858Sbill  */
3868Sbill struct buf *
3876563Smckusic geteblk(size)
3886563Smckusic 	int size;
3898Sbill {
3908670S 	register struct buf *bp, *flist;
3918Sbill 
39225255Smckusick 	if (size > MAXBSIZE)
39325255Smckusick 		panic("geteblk: size too big");
3948670S 	bp = getnewbuf();
3958670S 	bp->b_flags |= B_INVAL;
3967015Smckusick 	bfree(bp);
3977015Smckusick 	bremhash(bp);
3988670S 	flist = &bfreelist[BQ_AGE];
39937736Smckusick 	bp->b_error = 0;
40037736Smckusick 	bp->b_resid = 0;
4018670S 	binshash(bp, flist);
40239668Smckusick 	brealloc(bp, size);
40326271Skarels 	return (bp);
4048Sbill }
4058Sbill 
4068Sbill /*
4076563Smckusic  * Allocate space associated with a buffer.
4086563Smckusic  */
4096563Smckusic brealloc(bp, size)
4106563Smckusic 	register struct buf *bp;
4116563Smckusic 	int size;
4126563Smckusic {
4136563Smckusic 	daddr_t start, last;
4146563Smckusic 	register struct buf *ep;
4156563Smckusic 	struct buf *dp;
4166563Smckusic 	int s;
4176563Smckusic 
4186563Smckusic 	if (size == bp->b_bcount)
41939668Smckusick 		return;
42039668Smckusick 	allocbuf(bp, size);
4218670S }
4228670S 
4238670S /*
4248670S  * Find a buffer which is available for use.
4258670S  * Select something from a free list.
4268670S  * Preference is to AGE list, then LRU list.
4278670S  */
4288670S struct buf *
4298670S getnewbuf()
4308670S {
4318670S 	register struct buf *bp, *dp;
43238776Smckusick 	register struct ucred *cred;
4338670S 	int s;
4348670S 
4358670S loop:
43626271Skarels 	s = splbio();
4378670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
4388670S 		if (dp->av_forw != dp)
4398670S 			break;
4408670S 	if (dp == bfreelist) {		/* no free blocks */
4418670S 		dp->b_flags |= B_WANTED;
4428670S 		sleep((caddr_t)dp, PRIBIO+1);
44312170Ssam 		splx(s);
4448670S 		goto loop;
4458670S 	}
44639882Smckusick 	bp = dp->av_forw;
44739882Smckusick 	bremfree(bp);
44839882Smckusick 	bp->b_flags |= B_BUSY;
4498670S 	splx(s);
4508670S 	if (bp->b_flags & B_DELWRI) {
45138614Smckusick 		(void) bawrite(bp);
4528670S 		goto loop;
4538670S 	}
45440341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
45539668Smckusick 	if (bp->b_vp)
45639668Smckusick 		brelvp(bp);
45738776Smckusick 	if (bp->b_rcred != NOCRED) {
45838776Smckusick 		cred = bp->b_rcred;
45938776Smckusick 		bp->b_rcred = NOCRED;
46038776Smckusick 		crfree(cred);
46138776Smckusick 	}
46238776Smckusick 	if (bp->b_wcred != NOCRED) {
46338776Smckusick 		cred = bp->b_wcred;
46438776Smckusick 		bp->b_wcred = NOCRED;
46538776Smckusick 		crfree(cred);
46638776Smckusick 	}
4678670S 	bp->b_flags = B_BUSY;
4688670S 	return (bp);
4698670S }
4708670S 
4718670S /*
4728Sbill  * Wait for I/O completion on the buffer; return errors
4738Sbill  * to the user.
4748Sbill  */
4757015Smckusick biowait(bp)
4766563Smckusic 	register struct buf *bp;
4778Sbill {
4785431Sroot 	int s;
4798Sbill 
48026271Skarels 	s = splbio();
48138776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
4828Sbill 		sleep((caddr_t)bp, PRIBIO);
4835431Sroot 	splx(s);
48437736Smckusick 	/*
48537736Smckusick 	 * Pick up the device's error number and pass it to the user;
48637736Smckusick 	 * if there is an error but the number is 0 set a generalized code.
48737736Smckusick 	 */
48837736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
48937736Smckusick 		return (0);
49037736Smckusick 	if (bp->b_error)
49137736Smckusick 		return (bp->b_error);
49237736Smckusick 	return (EIO);
4938Sbill }
4948Sbill 
4958Sbill /*
49613128Ssam  * Mark I/O complete on a buffer.
49713128Ssam  * If someone should be called, e.g. the pageout
49813128Ssam  * daemon, do so.  Otherwise, wake up anyone
49913128Ssam  * waiting for it.
5008Sbill  */
5017015Smckusick biodone(bp)
5027015Smckusick 	register struct buf *bp;
5038Sbill {
50439882Smckusick 	register struct vnode *vp;
5058Sbill 
506420Sbill 	if (bp->b_flags & B_DONE)
5077015Smckusick 		panic("dup biodone");
5088Sbill 	bp->b_flags |= B_DONE;
50939882Smckusick 	if ((bp->b_flags & B_READ) == 0) {
51038776Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
51139882Smckusick 		if (vp = bp->b_vp) {
51239882Smckusick 			vp->v_numoutput--;
51339882Smckusick 			if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
51439882Smckusick 				if (vp->v_numoutput < 0)
51539882Smckusick 					panic("biodone: neg numoutput");
51639882Smckusick 				vp->v_flag &= ~VBWAIT;
51739882Smckusick 				wakeup((caddr_t)&vp->v_numoutput);
51839882Smckusick 			}
51939882Smckusick 		}
52039882Smckusick 	}
5219763Ssam 	if (bp->b_flags & B_CALL) {
5229763Ssam 		bp->b_flags &= ~B_CALL;
5239763Ssam 		(*bp->b_iodone)(bp);
5249763Ssam 		return;
5259763Ssam 	}
5268Sbill 	if (bp->b_flags&B_ASYNC)
5278Sbill 		brelse(bp);
5288Sbill 	else {
5298Sbill 		bp->b_flags &= ~B_WANTED;
5308Sbill 		wakeup((caddr_t)bp);
5318Sbill 	}
5328Sbill }
5338Sbill 
5348Sbill /*
53537736Smckusick  * Make sure all write-behind blocks associated
53638776Smckusick  * with mount point are flushed out (from sync).
5378Sbill  */
53839668Smckusick mntflushbuf(mountp, flags)
53938776Smckusick 	struct mount *mountp;
54039668Smckusick 	int flags;
5418Sbill {
54239668Smckusick 	register struct vnode *vp;
54339764Smckusick 	struct vnode *nvp;
54439668Smckusick 
54539668Smckusick loop:
54639764Smckusick 	for (vp = mountp->m_mounth; vp; vp = nvp) {
54739764Smckusick 		nvp = vp->v_mountf;
54839668Smckusick 		if (vget(vp))
54939668Smckusick 			goto loop;
55039668Smckusick 		vflushbuf(vp, flags);
55139668Smckusick 		vput(vp);
55239668Smckusick 	}
55339668Smckusick }
55439668Smckusick 
55539668Smckusick /*
55639668Smckusick  * Flush all dirty buffers associated with a vnode.
55739668Smckusick  */
55839668Smckusick vflushbuf(vp, flags)
55939668Smckusick 	register struct vnode *vp;
56039668Smckusick 	int flags;
56139668Smckusick {
5628Sbill 	register struct buf *bp;
56339668Smckusick 	struct buf *nbp;
5645431Sroot 	int s;
5658Sbill 
5668Sbill loop:
56726271Skarels 	s = splbio();
56839882Smckusick 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
56939668Smckusick 		nbp = bp->b_blockf;
57039668Smckusick 		if ((bp->b_flags & B_BUSY))
57139668Smckusick 			continue;
57239668Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
57339882Smckusick 			panic("vflushbuf: not dirty");
57439882Smckusick 		bremfree(bp);
57539882Smckusick 		bp->b_flags |= B_BUSY;
57639668Smckusick 		splx(s);
57739882Smckusick 		/*
57839882Smckusick 		 * Wait for I/O associated with indirect blocks to complete,
57939882Smckusick 		 * since there is no way to quickly wait for them below.
58039882Smckusick 		 * NB - This is really specific to ufs, but is done here
58139882Smckusick 		 * as it is easier and quicker.
58239882Smckusick 		 */
58339882Smckusick 		if (bp->b_vp == vp || (flags & B_SYNC) == 0) {
58439882Smckusick 			(void) bawrite(bp);
58540639Smckusick 			s = splbio();
58639882Smckusick 		} else {
58739882Smckusick 			(void) bwrite(bp);
58839882Smckusick 			goto loop;
58939882Smckusick 		}
59039668Smckusick 	}
59139738Smckusick 	splx(s);
59239668Smckusick 	if ((flags & B_SYNC) == 0)
59339668Smckusick 		return;
59439668Smckusick 	s = splbio();
59539882Smckusick 	while (vp->v_numoutput) {
59639882Smckusick 		vp->v_flag |= VBWAIT;
59739882Smckusick 		sleep((caddr_t)&vp->v_numoutput, PRIBIO+1);
59837736Smckusick 	}
59939738Smckusick 	splx(s);
60039882Smckusick 	if (vp->v_dirtyblkhd) {
60139882Smckusick 		vprint("vflushbuf: dirty", vp);
60239882Smckusick 		goto loop;
60339882Smckusick 	}
6048Sbill }
6052299Skre 
6062299Skre /*
6072299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6082299Skre  *
60939668Smckusick  * Go through the list of vnodes associated with the file system;
61039668Smckusick  * for each vnode invalidate any buffers that it holds. Normally
61139668Smckusick  * this routine is preceeded by a bflush call, so that on a quiescent
61239668Smckusick  * filesystem there will be no dirty buffers when we are done. Binval
61339668Smckusick  * returns the count of dirty buffers when it is finished.
6142299Skre  */
61539668Smckusick mntinvalbuf(mountp)
61638776Smckusick 	struct mount *mountp;
6172299Skre {
61839668Smckusick 	register struct vnode *vp;
61939764Smckusick 	struct vnode *nvp;
62039668Smckusick 	int dirty = 0;
62139668Smckusick 
62239668Smckusick loop:
62339764Smckusick 	for (vp = mountp->m_mounth; vp; vp = nvp) {
62439764Smckusick 		nvp = vp->v_mountf;
62539668Smckusick 		if (vget(vp))
62639668Smckusick 			goto loop;
62739668Smckusick 		dirty += vinvalbuf(vp, 1);
62839668Smckusick 		vput(vp);
62939668Smckusick 	}
63039668Smckusick 	return (dirty);
63139668Smckusick }
63239668Smckusick 
63339668Smckusick /*
63439668Smckusick  * Flush out and invalidate all buffers associated with a vnode.
63539668Smckusick  * Called with the underlying object locked.
63639668Smckusick  */
63739668Smckusick vinvalbuf(vp, save)
63839668Smckusick 	register struct vnode *vp;
63939668Smckusick 	int save;
64039668Smckusick {
6412361Skre 	register struct buf *bp;
64239882Smckusick 	struct buf *nbp, *blist;
64338633Smckusick 	int s, dirty = 0;
6442299Skre 
64539882Smckusick 	for (;;) {
64639882Smckusick 		if (blist = vp->v_dirtyblkhd)
64739882Smckusick 			/* void */;
64839882Smckusick 		else if (blist = vp->v_cleanblkhd)
64939882Smckusick 			/* void */;
65039882Smckusick 		else
65139882Smckusick 			break;
65239882Smckusick 		for (bp = blist; bp; bp = nbp) {
65339882Smckusick 			nbp = bp->b_blockf;
65439882Smckusick 			s = splbio();
65539882Smckusick 			if (bp->b_flags & B_BUSY) {
65639882Smckusick 				bp->b_flags |= B_WANTED;
65739882Smckusick 				sleep((caddr_t)bp, PRIBIO+1);
65839882Smckusick 				splx(s);
65939882Smckusick 				break;
66039882Smckusick 			}
66139882Smckusick 			bremfree(bp);
66239882Smckusick 			bp->b_flags |= B_BUSY;
66338808Smckusick 			splx(s);
66439882Smckusick 			if (save && (bp->b_flags & B_DELWRI)) {
66538614Smckusick 				dirty++;
66639668Smckusick 				(void) bwrite(bp);
66739882Smckusick 				break;
66837736Smckusick 			}
66940034Smckusick 			if (bp->b_vp != vp)
67040034Smckusick 				reassignbuf(bp, bp->b_vp);
67140034Smckusick 			else
67240034Smckusick 				bp->b_flags |= B_INVAL;
67339882Smckusick 			brelse(bp);
67438614Smckusick 		}
67538614Smckusick 	}
67639882Smckusick 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
67739668Smckusick 		panic("vinvalbuf: flush failed");
67838614Smckusick 	return (dirty);
6792299Skre }
68037736Smckusick 
68139668Smckusick /*
68239668Smckusick  * Associate a buffer with a vnode.
68339668Smckusick  */
68439668Smckusick bgetvp(vp, bp)
68539668Smckusick 	register struct vnode *vp;
68639668Smckusick 	register struct buf *bp;
68739668Smckusick {
68839668Smckusick 
68939668Smckusick 	if (bp->b_vp)
69039668Smckusick 		panic("bgetvp: not free");
69139808Smckusick 	VHOLD(vp);
69239668Smckusick 	bp->b_vp = vp;
69339668Smckusick 	if (vp->v_type == VBLK || vp->v_type == VCHR)
69439668Smckusick 		bp->b_dev = vp->v_rdev;
69539668Smckusick 	else
69639668Smckusick 		bp->b_dev = NODEV;
69739668Smckusick 	/*
69839668Smckusick 	 * Insert onto list for new vnode.
69939668Smckusick 	 */
70039882Smckusick 	if (vp->v_cleanblkhd) {
70139882Smckusick 		bp->b_blockf = vp->v_cleanblkhd;
70239882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
70339882Smckusick 		vp->v_cleanblkhd->b_blockb = &bp->b_blockf;
70439882Smckusick 		vp->v_cleanblkhd = bp;
70539668Smckusick 	} else {
70639882Smckusick 		vp->v_cleanblkhd = bp;
70739882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
70839668Smckusick 		bp->b_blockf = NULL;
70939668Smckusick 	}
71039668Smckusick }
71139668Smckusick 
71239668Smckusick /*
71339668Smckusick  * Disassociate a buffer from a vnode.
71439668Smckusick  */
71537736Smckusick brelvp(bp)
71639668Smckusick 	register struct buf *bp;
71737736Smckusick {
71839668Smckusick 	struct buf *bq;
71937736Smckusick 	struct vnode *vp;
72037736Smckusick 
72137736Smckusick 	if (bp->b_vp == (struct vnode *) 0)
72239668Smckusick 		panic("brelvp: NULL");
72339668Smckusick 	/*
72439668Smckusick 	 * Delete from old vnode list, if on one.
72539668Smckusick 	 */
72639668Smckusick 	if (bp->b_blockb) {
72739668Smckusick 		if (bq = bp->b_blockf)
72839668Smckusick 			bq->b_blockb = bp->b_blockb;
72939668Smckusick 		*bp->b_blockb = bq;
73039668Smckusick 		bp->b_blockf = NULL;
73139668Smckusick 		bp->b_blockb = NULL;
73239668Smckusick 	}
73337736Smckusick 	vp = bp->b_vp;
73437736Smckusick 	bp->b_vp = (struct vnode *) 0;
73539808Smckusick 	HOLDRELE(vp);
73637736Smckusick }
73739668Smckusick 
73839668Smckusick /*
73939668Smckusick  * Reassign a buffer from one vnode to another.
74039668Smckusick  * Used to assign file specific control information
74139668Smckusick  * (indirect blocks) to the vnode to which they belong.
74239668Smckusick  */
74339668Smckusick reassignbuf(bp, newvp)
74439668Smckusick 	register struct buf *bp;
74539668Smckusick 	register struct vnode *newvp;
74639668Smckusick {
74739882Smckusick 	register struct buf *bq, **listheadp;
74839668Smckusick 
74939882Smckusick 	if (newvp == NULL)
75039882Smckusick 		panic("reassignbuf: NULL");
75139668Smckusick 	/*
75239668Smckusick 	 * Delete from old vnode list, if on one.
75339668Smckusick 	 */
75439668Smckusick 	if (bp->b_blockb) {
75539668Smckusick 		if (bq = bp->b_blockf)
75639668Smckusick 			bq->b_blockb = bp->b_blockb;
75739668Smckusick 		*bp->b_blockb = bq;
75839668Smckusick 	}
75939668Smckusick 	/*
76039882Smckusick 	 * If dirty, put on list of dirty buffers;
76139882Smckusick 	 * otherwise insert onto list of clean buffers.
76239668Smckusick 	 */
76339882Smckusick 	if (bp->b_flags & B_DELWRI)
76439882Smckusick 		listheadp = &newvp->v_dirtyblkhd;
76539882Smckusick 	else
76639882Smckusick 		listheadp = &newvp->v_cleanblkhd;
76739882Smckusick 	if (*listheadp) {
76839882Smckusick 		bp->b_blockf = *listheadp;
76939882Smckusick 		bp->b_blockb = listheadp;
77039882Smckusick 		bp->b_blockf->b_blockb = &bp->b_blockf;
77139882Smckusick 		*listheadp = bp;
77239668Smckusick 	} else {
77339882Smckusick 		*listheadp = bp;
77439882Smckusick 		bp->b_blockb = listheadp;
77539668Smckusick 		bp->b_blockf = NULL;
77639668Smckusick 	}
77739668Smckusick }
778