xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 46151)
123395Smckusick /*
237736Smckusick  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
544454Sbostic  * %sccs.include.redist.c%
637736Smckusick  *
7*46151Smckusick  *	@(#)vfs_cluster.c	7.32 (Berkeley) 01/28/91
823395Smckusick  */
98Sbill 
1017098Sbloom #include "param.h"
1117098Sbloom #include "user.h"
1217098Sbloom #include "buf.h"
1337736Smckusick #include "vnode.h"
1440652Smckusick #include "specdev.h"
1539668Smckusick #include "mount.h"
1617098Sbloom #include "trace.h"
1738776Smckusick #include "ucred.h"
188Sbill 
1991Sbill /*
20*46151Smckusick  * Find the block in the buffer pool.
21*46151Smckusick  * If the buffer is not present, allocate a new buffer and load
22*46151Smckusick  * its contents according to the filesystem fill routine.
238Sbill  */
2438776Smckusick bread(vp, blkno, size, cred, bpp)
2537736Smckusick 	struct vnode *vp;
266563Smckusic 	daddr_t blkno;
276563Smckusic 	int size;
2838776Smckusick 	struct ucred *cred;
2937736Smckusick 	struct buf **bpp;
308Sbill {
318Sbill 	register struct buf *bp;
328Sbill 
338670S 	if (size == 0)
348670S 		panic("bread: size 0");
3537736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
36*46151Smckusick 	if (bp->b_flags & (B_DONE | B_DELWRI)) {
3740341Smckusick 		trace(TR_BREADHIT, pack(vp, size), blkno);
3837736Smckusick 		return (0);
398Sbill 	}
408Sbill 	bp->b_flags |= B_READ;
418670S 	if (bp->b_bcount > bp->b_bufsize)
428670S 		panic("bread");
4338776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
4438776Smckusick 		crhold(cred);
4538776Smckusick 		bp->b_rcred = cred;
4638776Smckusick 	}
4737736Smckusick 	VOP_STRATEGY(bp);
4840341Smckusick 	trace(TR_BREADMISS, pack(vp, size), blkno);
498039Sroot 	u.u_ru.ru_inblock++;		/* pay for read */
5037736Smckusick 	return (biowait(bp));
518Sbill }
528Sbill 
538Sbill /*
54*46151Smckusick  * Operates like bread, but also starts I/O on the specified
55*46151Smckusick  * read-ahead block.
568Sbill  */
5738776Smckusick breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
5837736Smckusick 	struct vnode *vp;
597114Smckusick 	daddr_t blkno; int size;
608592Sroot 	daddr_t rablkno; int rabsize;
6138776Smckusick 	struct ucred *cred;
6237736Smckusick 	struct buf **bpp;
638Sbill {
648Sbill 	register struct buf *bp, *rabp;
658Sbill 
668Sbill 	bp = NULL;
677015Smckusick 	/*
68*46151Smckusick 	 * If the block is not memory resident,
69*46151Smckusick 	 * allocate a buffer and start I/O.
707015Smckusick 	 */
7137736Smckusick 	if (!incore(vp, blkno)) {
7237736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
73*46151Smckusick 		if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
748Sbill 			bp->b_flags |= B_READ;
758670S 			if (bp->b_bcount > bp->b_bufsize)
768670S 				panic("breada");
7738776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
7838776Smckusick 				crhold(cred);
7938776Smckusick 				bp->b_rcred = cred;
8038776Smckusick 			}
8137736Smckusick 			VOP_STRATEGY(bp);
8240341Smckusick 			trace(TR_BREADMISS, pack(vp, size), blkno);
838039Sroot 			u.u_ru.ru_inblock++;		/* pay for read */
847015Smckusick 		} else
8540341Smckusick 			trace(TR_BREADHIT, pack(vp, size), blkno);
868Sbill 	}
877015Smckusick 
887015Smckusick 	/*
89*46151Smckusick 	 * If there is a read-ahead block, start I/O on it too.
907015Smckusick 	 */
9139895Smckusick 	if (!incore(vp, rablkno)) {
9237736Smckusick 		rabp = getblk(vp, rablkno, rabsize);
93*46151Smckusick 		if (rabp->b_flags & (B_DONE | B_DELWRI)) {
948Sbill 			brelse(rabp);
9540341Smckusick 			trace(TR_BREADHITRA, pack(vp, rabsize), rablkno);
962045Swnj 		} else {
97*46151Smckusick 			rabp->b_flags |= B_ASYNC | B_READ;
988670S 			if (rabp->b_bcount > rabp->b_bufsize)
998670S 				panic("breadrabp");
10038880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
10138776Smckusick 				crhold(cred);
10238880Smckusick 				rabp->b_rcred = cred;
10338776Smckusick 			}
10437736Smckusick 			VOP_STRATEGY(rabp);
10540341Smckusick 			trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno);
1068039Sroot 			u.u_ru.ru_inblock++;		/* pay in advance */
1078Sbill 		}
1088Sbill 	}
1097015Smckusick 
1107015Smckusick 	/*
111*46151Smckusick 	 * If block was memory resident, let bread get it.
112*46151Smckusick 	 * If block was not memory resident, the read was
113*46151Smckusick 	 * started above, so just wait for the read to complete.
1147015Smckusick 	 */
1157114Smckusick 	if (bp == NULL)
11638776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
11737736Smckusick 	return (biowait(bp));
1188Sbill }
1198Sbill 
1208Sbill /*
121*46151Smckusick  * Synchronous write.
122*46151Smckusick  * Release buffer on completion.
1238Sbill  */
1248Sbill bwrite(bp)
1257015Smckusick 	register struct buf *bp;
1268Sbill {
12737736Smckusick 	register int flag;
12840226Smckusick 	int s, error;
1298Sbill 
1308Sbill 	flag = bp->b_flags;
1319857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
132*46151Smckusick 	if ((flag & B_DELWRI) == 0)
1338039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
13439882Smckusick 	else
13539882Smckusick 		reassignbuf(bp, bp->b_vp);
13640341Smckusick 	trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
1378670S 	if (bp->b_bcount > bp->b_bufsize)
1388670S 		panic("bwrite");
13940226Smckusick 	s = splbio();
14039882Smckusick 	bp->b_vp->v_numoutput++;
14140226Smckusick 	splx(s);
14237736Smckusick 	VOP_STRATEGY(bp);
1437015Smckusick 
1447015Smckusick 	/*
145*46151Smckusick 	 * If the write was synchronous, then await I/O completion.
1467015Smckusick 	 * If the write was "delayed", then we put the buffer on
147*46151Smckusick 	 * the queue of blocks awaiting I/O completion status.
1487015Smckusick 	 */
149*46151Smckusick 	if ((flag & B_ASYNC) == 0) {
15037736Smckusick 		error = biowait(bp);
1518Sbill 		brelse(bp);
15237736Smckusick 	} else if (flag & B_DELWRI) {
1538Sbill 		bp->b_flags |= B_AGE;
15437736Smckusick 		error = 0;
15537736Smckusick 	}
15637736Smckusick 	return (error);
1578Sbill }
1588Sbill 
1598Sbill /*
160*46151Smckusick  * Delayed write.
161*46151Smckusick  *
162*46151Smckusick  * The buffer is marked dirty, but is not queued for I/O.
163*46151Smckusick  * This routine should be used when the buffer is expected
164*46151Smckusick  * to be modified again soon, typically a small write that
165*46151Smckusick  * partially fills a buffer.
166*46151Smckusick  *
167*46151Smckusick  * NB: magnetic tapes cannot be delayed; they must be
168*46151Smckusick  * written in the order that the writes are requested.
1698Sbill  */
1708Sbill bdwrite(bp)
1717015Smckusick 	register struct buf *bp;
1728Sbill {
1738Sbill 
17439882Smckusick 	if ((bp->b_flags & B_DELWRI) == 0) {
17539882Smckusick 		bp->b_flags |= B_DELWRI;
17639882Smckusick 		reassignbuf(bp, bp->b_vp);
1778039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
17839882Smckusick 	}
17937736Smckusick 	/*
18039668Smckusick 	 * If this is a tape drive, the write must be initiated.
18137736Smckusick 	 */
18239668Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, B_TAPE, 0, NOCRED) == 0) {
1838Sbill 		bawrite(bp);
18439668Smckusick 	} else {
185*46151Smckusick 		bp->b_flags |= (B_DONE | B_DELWRI);
1868Sbill 		brelse(bp);
1878Sbill 	}
1888Sbill }
1898Sbill 
1908Sbill /*
191*46151Smckusick  * Asynchronous write.
192*46151Smckusick  * Start I/O on a buffer, but do not wait for it to complete.
193*46151Smckusick  * The buffer is released when the I/O completes.
1948Sbill  */
1958Sbill bawrite(bp)
1967015Smckusick 	register struct buf *bp;
1978Sbill {
1988Sbill 
199*46151Smckusick 	/*
200*46151Smckusick 	 * Setting the ASYNC flag causes bwrite to return
201*46151Smckusick 	 * after starting the I/O.
202*46151Smckusick 	 */
2038Sbill 	bp->b_flags |= B_ASYNC;
20437736Smckusick 	(void) bwrite(bp);
2058Sbill }
2068Sbill 
2078Sbill /*
208*46151Smckusick  * Release a buffer.
209*46151Smckusick  * Even if the buffer is dirty, no I/O is started.
2108Sbill  */
2118Sbill brelse(bp)
2127015Smckusick 	register struct buf *bp;
2138Sbill {
2142325Swnj 	register struct buf *flist;
215*46151Smckusick 	int s;
2168Sbill 
21740341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
2187015Smckusick 	/*
21939668Smckusick 	 * If a process is waiting for the buffer, or
22039668Smckusick 	 * is waiting for a free buffer, awaken it.
2217015Smckusick 	 */
222*46151Smckusick 	if (bp->b_flags & B_WANTED)
2238Sbill 		wakeup((caddr_t)bp);
224*46151Smckusick 	if (bfreelist[0].b_flags & B_WANTED) {
2252325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2262325Swnj 		wakeup((caddr_t)bfreelist);
2278Sbill 	}
22839668Smckusick 	/*
22939668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
23039668Smckusick 	 */
23139668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
23239668Smckusick 		bp->b_flags &= ~B_ERROR;
23339668Smckusick 	/*
23439668Smckusick 	 * Disassociate buffers that are no longer valid.
23539668Smckusick 	 */
236*46151Smckusick 	if (bp->b_flags & (B_NOCACHE | B_ERROR))
23737736Smckusick 		bp->b_flags |= B_INVAL;
238*46151Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
23939668Smckusick 		if (bp->b_vp)
24039668Smckusick 			brelvp(bp);
24139668Smckusick 		bp->b_flags &= ~B_DELWRI;
24237736Smckusick 	}
2437015Smckusick 	/*
2447015Smckusick 	 * Stick the buffer back on a free list.
2457015Smckusick 	 */
24626271Skarels 	s = splbio();
2478670S 	if (bp->b_bufsize <= 0) {
2488670S 		/* block has no buffer ... put at front of unused buffer list */
2498670S 		flist = &bfreelist[BQ_EMPTY];
2508670S 		binsheadfree(bp, flist);
251*46151Smckusick 	} else if (bp->b_flags & (B_ERROR | B_INVAL)) {
2522325Swnj 		/* block has no info ... put at front of most free list */
2538670S 		flist = &bfreelist[BQ_AGE];
2547015Smckusick 		binsheadfree(bp, flist);
2558Sbill 	} else {
2562325Swnj 		if (bp->b_flags & B_LOCKED)
2572325Swnj 			flist = &bfreelist[BQ_LOCKED];
2582325Swnj 		else if (bp->b_flags & B_AGE)
2592325Swnj 			flist = &bfreelist[BQ_AGE];
2602325Swnj 		else
2612325Swnj 			flist = &bfreelist[BQ_LRU];
2627015Smckusick 		binstailfree(bp, flist);
2638Sbill 	}
264*46151Smckusick 	bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
2658Sbill 	splx(s);
2668Sbill }
2678Sbill 
2688Sbill /*
269*46151Smckusick  * Check to see if a block is currently memory resident.
2708Sbill  */
27137736Smckusick incore(vp, blkno)
27237736Smckusick 	struct vnode *vp;
2737015Smckusick 	daddr_t blkno;
2748Sbill {
2758Sbill 	register struct buf *bp;
2762325Swnj 	register struct buf *dp;
2778Sbill 
27838225Smckusick 	dp = BUFHASH(vp, blkno);
2792325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
28039668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
2817015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
28291Sbill 			return (1);
28391Sbill 	return (0);
2848Sbill }
2858Sbill 
28639668Smckusick /*
287*46151Smckusick  * Check to see if a block is currently memory resident.
288*46151Smckusick  * If it is resident, return it. If it is not resident,
289*46151Smckusick  * allocate a new buffer and assign it to the block.
29039668Smckusick  */
2918Sbill struct buf *
29237736Smckusick getblk(vp, blkno, size)
29337736Smckusick 	register struct vnode *vp;
2946563Smckusic 	daddr_t blkno;
2956563Smckusic 	int size;
2968Sbill {
2978670S 	register struct buf *bp, *dp;
2985424Swnj 	int s;
2998Sbill 
30025255Smckusick 	if (size > MAXBSIZE)
30125255Smckusick 		panic("getblk: size too big");
3027015Smckusick 	/*
303*46151Smckusick 	 * Search the cache for the block. If the buffer is found,
304*46151Smckusick 	 * but it is currently locked, the we must wait for it to
305*46151Smckusick 	 * become available.
3067015Smckusick 	 */
30737736Smckusick 	dp = BUFHASH(vp, blkno);
3087015Smckusick loop:
3092325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
31039668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
311*46151Smckusick 		    (bp->b_flags & B_INVAL))
3128Sbill 			continue;
31326271Skarels 		s = splbio();
314*46151Smckusick 		if (bp->b_flags & B_BUSY) {
3158Sbill 			bp->b_flags |= B_WANTED;
316*46151Smckusick 			sleep((caddr_t)bp, PRIBIO + 1);
3175424Swnj 			splx(s);
3188Sbill 			goto loop;
3198Sbill 		}
32039882Smckusick 		bremfree(bp);
32139882Smckusick 		bp->b_flags |= B_BUSY;
3225424Swnj 		splx(s);
32332608Smckusick 		if (bp->b_bcount != size) {
32439668Smckusick 			printf("getblk: stray size");
32539668Smckusick 			bp->b_flags |= B_INVAL;
32639668Smckusick 			bwrite(bp);
32739668Smckusick 			goto loop;
32832608Smckusick 		}
3298Sbill 		bp->b_flags |= B_CACHE;
33026271Skarels 		return (bp);
3318Sbill 	}
3328670S 	bp = getnewbuf();
3337015Smckusick 	bremhash(bp);
33439668Smckusick 	bgetvp(vp, bp);
33545116Smckusick 	bp->b_bcount = 0;
33639668Smckusick 	bp->b_lblkno = blkno;
3376563Smckusic 	bp->b_blkno = blkno;
3388670S 	bp->b_error = 0;
33937736Smckusick 	bp->b_resid = 0;
34037736Smckusick 	binshash(bp, dp);
34145116Smckusick 	allocbuf(bp, size);
34226271Skarels 	return (bp);
3438Sbill }
3448Sbill 
3458Sbill /*
346*46151Smckusick  * Allocate a buffer.
347*46151Smckusick  * The caller will assign it to a block.
3488Sbill  */
3498Sbill struct buf *
3506563Smckusic geteblk(size)
3516563Smckusic 	int size;
3528Sbill {
3538670S 	register struct buf *bp, *flist;
3548Sbill 
35525255Smckusick 	if (size > MAXBSIZE)
35625255Smckusick 		panic("geteblk: size too big");
3578670S 	bp = getnewbuf();
3588670S 	bp->b_flags |= B_INVAL;
3597015Smckusick 	bremhash(bp);
3608670S 	flist = &bfreelist[BQ_AGE];
36145116Smckusick 	bp->b_bcount = 0;
36237736Smckusick 	bp->b_error = 0;
36337736Smckusick 	bp->b_resid = 0;
3648670S 	binshash(bp, flist);
36545116Smckusick 	allocbuf(bp, size);
36626271Skarels 	return (bp);
3678Sbill }
3688Sbill 
3698Sbill /*
37045116Smckusick  * Expand or contract the actual memory allocated to a buffer.
371*46151Smckusick  * If no memory is available, release buffer and take error exit.
3726563Smckusic  */
37345116Smckusick allocbuf(tp, size)
37445116Smckusick 	register struct buf *tp;
3756563Smckusic 	int size;
3766563Smckusic {
37745116Smckusick 	register struct buf *bp, *ep;
37845116Smckusick 	int sizealloc, take, s;
3796563Smckusic 
38045116Smckusick 	sizealloc = roundup(size, CLBYTES);
38145116Smckusick 	/*
38245116Smckusick 	 * Buffer size does not change
38345116Smckusick 	 */
38445116Smckusick 	if (sizealloc == tp->b_bufsize)
38545116Smckusick 		goto out;
38645116Smckusick 	/*
38745116Smckusick 	 * Buffer size is shrinking.
38845116Smckusick 	 * Place excess space in a buffer header taken from the
38945116Smckusick 	 * BQ_EMPTY buffer list and placed on the "most free" list.
39045116Smckusick 	 * If no extra buffer headers are available, leave the
39145116Smckusick 	 * extra space in the present buffer.
39245116Smckusick 	 */
39345116Smckusick 	if (sizealloc < tp->b_bufsize) {
39445116Smckusick 		ep = bfreelist[BQ_EMPTY].av_forw;
39545116Smckusick 		if (ep == &bfreelist[BQ_EMPTY])
39645116Smckusick 			goto out;
39745116Smckusick 		s = splbio();
39845116Smckusick 		bremfree(ep);
39945116Smckusick 		ep->b_flags |= B_BUSY;
40045116Smckusick 		splx(s);
40145116Smckusick 		pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
40245116Smckusick 		    (int)tp->b_bufsize - sizealloc);
40345116Smckusick 		ep->b_bufsize = tp->b_bufsize - sizealloc;
40445116Smckusick 		tp->b_bufsize = sizealloc;
40545116Smckusick 		ep->b_flags |= B_INVAL;
40645116Smckusick 		ep->b_bcount = 0;
40745116Smckusick 		brelse(ep);
40845116Smckusick 		goto out;
40945116Smckusick 	}
41045116Smckusick 	/*
41145116Smckusick 	 * More buffer space is needed. Get it out of buffers on
41245116Smckusick 	 * the "most free" list, placing the empty headers on the
41345116Smckusick 	 * BQ_EMPTY buffer header list.
41445116Smckusick 	 */
41545116Smckusick 	while (tp->b_bufsize < sizealloc) {
41645116Smckusick 		take = sizealloc - tp->b_bufsize;
41745116Smckusick 		bp = getnewbuf();
41845116Smckusick 		if (take >= bp->b_bufsize)
41945116Smckusick 			take = bp->b_bufsize;
42045116Smckusick 		pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
42145116Smckusick 		    &tp->b_un.b_addr[tp->b_bufsize], take);
42245116Smckusick 		tp->b_bufsize += take;
42345116Smckusick 		bp->b_bufsize = bp->b_bufsize - take;
42445116Smckusick 		if (bp->b_bcount > bp->b_bufsize)
42545116Smckusick 			bp->b_bcount = bp->b_bufsize;
42645116Smckusick 		if (bp->b_bufsize <= 0) {
42745116Smckusick 			bremhash(bp);
42845116Smckusick 			binshash(bp, &bfreelist[BQ_EMPTY]);
429*46151Smckusick 			bp->b_dev = NODEV;
43045116Smckusick 			bp->b_error = 0;
43145116Smckusick 			bp->b_flags |= B_INVAL;
43245116Smckusick 		}
43345116Smckusick 		brelse(bp);
43445116Smckusick 	}
43545116Smckusick out:
43645116Smckusick 	tp->b_bcount = size;
43745116Smckusick 	return (1);
4388670S }
4398670S 
4408670S /*
4418670S  * Find a buffer which is available for use.
4428670S  * Select something from a free list.
4438670S  * Preference is to AGE list, then LRU list.
4448670S  */
4458670S struct buf *
4468670S getnewbuf()
4478670S {
4488670S 	register struct buf *bp, *dp;
44938776Smckusick 	register struct ucred *cred;
4508670S 	int s;
4518670S 
4528670S loop:
45326271Skarels 	s = splbio();
4548670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
4558670S 		if (dp->av_forw != dp)
4568670S 			break;
4578670S 	if (dp == bfreelist) {		/* no free blocks */
4588670S 		dp->b_flags |= B_WANTED;
459*46151Smckusick 		sleep((caddr_t)dp, PRIBIO + 1);
46012170Ssam 		splx(s);
4618670S 		goto loop;
4628670S 	}
46339882Smckusick 	bp = dp->av_forw;
46439882Smckusick 	bremfree(bp);
46539882Smckusick 	bp->b_flags |= B_BUSY;
4668670S 	splx(s);
4678670S 	if (bp->b_flags & B_DELWRI) {
46838614Smckusick 		(void) bawrite(bp);
4698670S 		goto loop;
4708670S 	}
47140341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
47239668Smckusick 	if (bp->b_vp)
47339668Smckusick 		brelvp(bp);
47438776Smckusick 	if (bp->b_rcred != NOCRED) {
47538776Smckusick 		cred = bp->b_rcred;
47638776Smckusick 		bp->b_rcred = NOCRED;
47738776Smckusick 		crfree(cred);
47838776Smckusick 	}
47938776Smckusick 	if (bp->b_wcred != NOCRED) {
48038776Smckusick 		cred = bp->b_wcred;
48138776Smckusick 		bp->b_wcred = NOCRED;
48238776Smckusick 		crfree(cred);
48338776Smckusick 	}
4848670S 	bp->b_flags = B_BUSY;
4858670S 	return (bp);
4868670S }
4878670S 
4888670S /*
489*46151Smckusick  * Wait for I/O to complete.
490*46151Smckusick  *
491*46151Smckusick  * Extract and return any errors associated with the I/O.
492*46151Smckusick  * If the error flag is set, but no specific error is
493*46151Smckusick  * given, return EIO.
4948Sbill  */
4957015Smckusick biowait(bp)
4966563Smckusic 	register struct buf *bp;
4978Sbill {
4985431Sroot 	int s;
4998Sbill 
50026271Skarels 	s = splbio();
50138776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
5028Sbill 		sleep((caddr_t)bp, PRIBIO);
5035431Sroot 	splx(s);
50437736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
50537736Smckusick 		return (0);
50637736Smckusick 	if (bp->b_error)
50737736Smckusick 		return (bp->b_error);
50837736Smckusick 	return (EIO);
5098Sbill }
5108Sbill 
5118Sbill /*
51213128Ssam  * Mark I/O complete on a buffer.
513*46151Smckusick  *
514*46151Smckusick  * If a callback has been requested, e.g. the pageout
515*46151Smckusick  * daemon, do so. Otherwise, awaken waiting processes.
5168Sbill  */
5177015Smckusick biodone(bp)
5187015Smckusick 	register struct buf *bp;
5198Sbill {
52039882Smckusick 	register struct vnode *vp;
5218Sbill 
522420Sbill 	if (bp->b_flags & B_DONE)
5237015Smckusick 		panic("dup biodone");
5248Sbill 	bp->b_flags |= B_DONE;
52539882Smckusick 	if ((bp->b_flags & B_READ) == 0) {
52638776Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
52739882Smckusick 		if (vp = bp->b_vp) {
52839882Smckusick 			vp->v_numoutput--;
52939882Smckusick 			if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
53039882Smckusick 				if (vp->v_numoutput < 0)
53139882Smckusick 					panic("biodone: neg numoutput");
53239882Smckusick 				vp->v_flag &= ~VBWAIT;
53339882Smckusick 				wakeup((caddr_t)&vp->v_numoutput);
53439882Smckusick 			}
53539882Smckusick 		}
53639882Smckusick 	}
5379763Ssam 	if (bp->b_flags & B_CALL) {
5389763Ssam 		bp->b_flags &= ~B_CALL;
5399763Ssam 		(*bp->b_iodone)(bp);
5409763Ssam 		return;
5419763Ssam 	}
542*46151Smckusick 	if (bp->b_flags & B_ASYNC)
5438Sbill 		brelse(bp);
5448Sbill 	else {
5458Sbill 		bp->b_flags &= ~B_WANTED;
5468Sbill 		wakeup((caddr_t)bp);
5478Sbill 	}
5488Sbill }
5498Sbill 
5508Sbill /*
55137736Smckusick  * Make sure all write-behind blocks associated
55238776Smckusick  * with mount point are flushed out (from sync).
5538Sbill  */
55439668Smckusick mntflushbuf(mountp, flags)
55538776Smckusick 	struct mount *mountp;
55639668Smckusick 	int flags;
5578Sbill {
55839668Smckusick 	register struct vnode *vp;
55939668Smckusick 
56041400Smckusick 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
56141299Smckusick 		panic("mntflushbuf: not busy");
56239668Smckusick loop:
56341421Smckusick 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
56439668Smckusick 		if (vget(vp))
56539668Smckusick 			goto loop;
56639668Smckusick 		vflushbuf(vp, flags);
56739668Smckusick 		vput(vp);
56841421Smckusick 		if (vp->v_mount != mountp)
56941421Smckusick 			goto loop;
57039668Smckusick 	}
57139668Smckusick }
57239668Smckusick 
57339668Smckusick /*
57439668Smckusick  * Flush all dirty buffers associated with a vnode.
57539668Smckusick  */
57639668Smckusick vflushbuf(vp, flags)
57739668Smckusick 	register struct vnode *vp;
57839668Smckusick 	int flags;
57939668Smckusick {
5808Sbill 	register struct buf *bp;
58139668Smckusick 	struct buf *nbp;
5825431Sroot 	int s;
5838Sbill 
5848Sbill loop:
58526271Skarels 	s = splbio();
58639882Smckusick 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
58739668Smckusick 		nbp = bp->b_blockf;
58839668Smckusick 		if ((bp->b_flags & B_BUSY))
58939668Smckusick 			continue;
59039668Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
59139882Smckusick 			panic("vflushbuf: not dirty");
59239882Smckusick 		bremfree(bp);
59339882Smckusick 		bp->b_flags |= B_BUSY;
59439668Smckusick 		splx(s);
59539882Smckusick 		/*
59639882Smckusick 		 * Wait for I/O associated with indirect blocks to complete,
59739882Smckusick 		 * since there is no way to quickly wait for them below.
598*46151Smckusick 		 * NB: This is really specific to ufs, but is done here
59939882Smckusick 		 * as it is easier and quicker.
60039882Smckusick 		 */
60139882Smckusick 		if (bp->b_vp == vp || (flags & B_SYNC) == 0) {
60239882Smckusick 			(void) bawrite(bp);
60340639Smckusick 			s = splbio();
60439882Smckusick 		} else {
60539882Smckusick 			(void) bwrite(bp);
60639882Smckusick 			goto loop;
60739882Smckusick 		}
60839668Smckusick 	}
60939738Smckusick 	splx(s);
61039668Smckusick 	if ((flags & B_SYNC) == 0)
61139668Smckusick 		return;
61239668Smckusick 	s = splbio();
61339882Smckusick 	while (vp->v_numoutput) {
61439882Smckusick 		vp->v_flag |= VBWAIT;
615*46151Smckusick 		sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
61637736Smckusick 	}
61739738Smckusick 	splx(s);
61839882Smckusick 	if (vp->v_dirtyblkhd) {
61939882Smckusick 		vprint("vflushbuf: dirty", vp);
62039882Smckusick 		goto loop;
62139882Smckusick 	}
6228Sbill }
6232299Skre 
6242299Skre /*
6252299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6262299Skre  *
62739668Smckusick  * Go through the list of vnodes associated with the file system;
62839668Smckusick  * for each vnode invalidate any buffers that it holds. Normally
62939668Smckusick  * this routine is preceeded by a bflush call, so that on a quiescent
63039668Smckusick  * filesystem there will be no dirty buffers when we are done. Binval
63139668Smckusick  * returns the count of dirty buffers when it is finished.
6322299Skre  */
63339668Smckusick mntinvalbuf(mountp)
63438776Smckusick 	struct mount *mountp;
6352299Skre {
63639668Smckusick 	register struct vnode *vp;
63739668Smckusick 	int dirty = 0;
63839668Smckusick 
63941400Smckusick 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
64041299Smckusick 		panic("mntinvalbuf: not busy");
64139668Smckusick loop:
64241421Smckusick 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
64339668Smckusick 		if (vget(vp))
64439668Smckusick 			goto loop;
64539668Smckusick 		dirty += vinvalbuf(vp, 1);
64639668Smckusick 		vput(vp);
64741421Smckusick 		if (vp->v_mount != mountp)
64841421Smckusick 			goto loop;
64939668Smckusick 	}
65039668Smckusick 	return (dirty);
65139668Smckusick }
65239668Smckusick 
65339668Smckusick /*
65439668Smckusick  * Flush out and invalidate all buffers associated with a vnode.
65539668Smckusick  * Called with the underlying object locked.
65639668Smckusick  */
65739668Smckusick vinvalbuf(vp, save)
65839668Smckusick 	register struct vnode *vp;
65939668Smckusick 	int save;
66039668Smckusick {
6612361Skre 	register struct buf *bp;
66239882Smckusick 	struct buf *nbp, *blist;
66338633Smckusick 	int s, dirty = 0;
6642299Skre 
66539882Smckusick 	for (;;) {
66639882Smckusick 		if (blist = vp->v_dirtyblkhd)
66739882Smckusick 			/* void */;
66839882Smckusick 		else if (blist = vp->v_cleanblkhd)
66939882Smckusick 			/* void */;
67039882Smckusick 		else
67139882Smckusick 			break;
67239882Smckusick 		for (bp = blist; bp; bp = nbp) {
67339882Smckusick 			nbp = bp->b_blockf;
67439882Smckusick 			s = splbio();
67539882Smckusick 			if (bp->b_flags & B_BUSY) {
67639882Smckusick 				bp->b_flags |= B_WANTED;
677*46151Smckusick 				sleep((caddr_t)bp, PRIBIO + 1);
67839882Smckusick 				splx(s);
67939882Smckusick 				break;
68039882Smckusick 			}
68139882Smckusick 			bremfree(bp);
68239882Smckusick 			bp->b_flags |= B_BUSY;
68338808Smckusick 			splx(s);
68439882Smckusick 			if (save && (bp->b_flags & B_DELWRI)) {
68538614Smckusick 				dirty++;
68639668Smckusick 				(void) bwrite(bp);
68739882Smckusick 				break;
68837736Smckusick 			}
68940034Smckusick 			if (bp->b_vp != vp)
69040034Smckusick 				reassignbuf(bp, bp->b_vp);
69140034Smckusick 			else
69240034Smckusick 				bp->b_flags |= B_INVAL;
69339882Smckusick 			brelse(bp);
69438614Smckusick 		}
69538614Smckusick 	}
69639882Smckusick 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
69739668Smckusick 		panic("vinvalbuf: flush failed");
69838614Smckusick 	return (dirty);
6992299Skre }
70037736Smckusick 
70139668Smckusick /*
70239668Smckusick  * Associate a buffer with a vnode.
70339668Smckusick  */
70439668Smckusick bgetvp(vp, bp)
70539668Smckusick 	register struct vnode *vp;
70639668Smckusick 	register struct buf *bp;
70739668Smckusick {
70839668Smckusick 
70939668Smckusick 	if (bp->b_vp)
71039668Smckusick 		panic("bgetvp: not free");
71139808Smckusick 	VHOLD(vp);
71239668Smckusick 	bp->b_vp = vp;
71339668Smckusick 	if (vp->v_type == VBLK || vp->v_type == VCHR)
71439668Smckusick 		bp->b_dev = vp->v_rdev;
71539668Smckusick 	else
71639668Smckusick 		bp->b_dev = NODEV;
71739668Smckusick 	/*
71839668Smckusick 	 * Insert onto list for new vnode.
71939668Smckusick 	 */
72039882Smckusick 	if (vp->v_cleanblkhd) {
72139882Smckusick 		bp->b_blockf = vp->v_cleanblkhd;
72239882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
72339882Smckusick 		vp->v_cleanblkhd->b_blockb = &bp->b_blockf;
72439882Smckusick 		vp->v_cleanblkhd = bp;
72539668Smckusick 	} else {
72639882Smckusick 		vp->v_cleanblkhd = bp;
72739882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
72839668Smckusick 		bp->b_blockf = NULL;
72939668Smckusick 	}
73039668Smckusick }
73139668Smckusick 
73239668Smckusick /*
73339668Smckusick  * Disassociate a buffer from a vnode.
73439668Smckusick  */
73537736Smckusick brelvp(bp)
73639668Smckusick 	register struct buf *bp;
73737736Smckusick {
73839668Smckusick 	struct buf *bq;
73937736Smckusick 	struct vnode *vp;
74037736Smckusick 
74137736Smckusick 	if (bp->b_vp == (struct vnode *) 0)
74239668Smckusick 		panic("brelvp: NULL");
74339668Smckusick 	/*
74439668Smckusick 	 * Delete from old vnode list, if on one.
74539668Smckusick 	 */
74639668Smckusick 	if (bp->b_blockb) {
74739668Smckusick 		if (bq = bp->b_blockf)
74839668Smckusick 			bq->b_blockb = bp->b_blockb;
74939668Smckusick 		*bp->b_blockb = bq;
75039668Smckusick 		bp->b_blockf = NULL;
75139668Smckusick 		bp->b_blockb = NULL;
75239668Smckusick 	}
75337736Smckusick 	vp = bp->b_vp;
75437736Smckusick 	bp->b_vp = (struct vnode *) 0;
75539808Smckusick 	HOLDRELE(vp);
75637736Smckusick }
75739668Smckusick 
75839668Smckusick /*
75939668Smckusick  * Reassign a buffer from one vnode to another.
76039668Smckusick  * Used to assign file specific control information
76139668Smckusick  * (indirect blocks) to the vnode to which they belong.
76239668Smckusick  */
76339668Smckusick reassignbuf(bp, newvp)
76439668Smckusick 	register struct buf *bp;
76539668Smckusick 	register struct vnode *newvp;
76639668Smckusick {
76739882Smckusick 	register struct buf *bq, **listheadp;
76839668Smckusick 
76939882Smckusick 	if (newvp == NULL)
77039882Smckusick 		panic("reassignbuf: NULL");
77139668Smckusick 	/*
77239668Smckusick 	 * Delete from old vnode list, if on one.
77339668Smckusick 	 */
77439668Smckusick 	if (bp->b_blockb) {
77539668Smckusick 		if (bq = bp->b_blockf)
77639668Smckusick 			bq->b_blockb = bp->b_blockb;
77739668Smckusick 		*bp->b_blockb = bq;
77839668Smckusick 	}
77939668Smckusick 	/*
78039882Smckusick 	 * If dirty, put on list of dirty buffers;
78139882Smckusick 	 * otherwise insert onto list of clean buffers.
78239668Smckusick 	 */
78339882Smckusick 	if (bp->b_flags & B_DELWRI)
78439882Smckusick 		listheadp = &newvp->v_dirtyblkhd;
78539882Smckusick 	else
78639882Smckusick 		listheadp = &newvp->v_cleanblkhd;
78739882Smckusick 	if (*listheadp) {
78839882Smckusick 		bp->b_blockf = *listheadp;
78939882Smckusick 		bp->b_blockb = listheadp;
79039882Smckusick 		bp->b_blockf->b_blockb = &bp->b_blockf;
79139882Smckusick 		*listheadp = bp;
79239668Smckusick 	} else {
79339882Smckusick 		*listheadp = bp;
79439882Smckusick 		bp->b_blockb = listheadp;
79539668Smckusick 		bp->b_blockf = NULL;
79639668Smckusick 	}
79739668Smckusick }
798