xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 48360)
123395Smckusick /*
237736Smckusick  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
544454Sbostic  * %sccs.include.redist.c%
637736Smckusick  *
7*48360Smckusick  *	@(#)vfs_cluster.c	7.37 (Berkeley) 04/19/91
823395Smckusick  */
98Sbill 
1017098Sbloom #include "param.h"
1147545Skarels #include "proc.h"
1217098Sbloom #include "buf.h"
1337736Smckusick #include "vnode.h"
1440652Smckusick #include "specdev.h"
1539668Smckusick #include "mount.h"
1617098Sbloom #include "trace.h"
1747568Skarels #include "resourcevar.h"
188Sbill 
1991Sbill /*
2046151Smckusick  * Find the block in the buffer pool.
2146151Smckusick  * If the buffer is not present, allocate a new buffer and load
2246151Smckusick  * its contents according to the filesystem fill routine.
238Sbill  */
2438776Smckusick bread(vp, blkno, size, cred, bpp)
2537736Smckusick 	struct vnode *vp;
266563Smckusic 	daddr_t blkno;
276563Smckusic 	int size;
2838776Smckusick 	struct ucred *cred;
2937736Smckusick 	struct buf **bpp;
308Sbill {
3147545Skarels 	struct proc *p = curproc;		/* XXX */
328Sbill 	register struct buf *bp;
338Sbill 
348670S 	if (size == 0)
358670S 		panic("bread: size 0");
3637736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
3746151Smckusick 	if (bp->b_flags & (B_DONE | B_DELWRI)) {
3840341Smckusick 		trace(TR_BREADHIT, pack(vp, size), blkno);
3937736Smckusick 		return (0);
408Sbill 	}
418Sbill 	bp->b_flags |= B_READ;
428670S 	if (bp->b_bcount > bp->b_bufsize)
438670S 		panic("bread");
4438776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
4538776Smckusick 		crhold(cred);
4638776Smckusick 		bp->b_rcred = cred;
4738776Smckusick 	}
4837736Smckusick 	VOP_STRATEGY(bp);
4940341Smckusick 	trace(TR_BREADMISS, pack(vp, size), blkno);
5047545Skarels 	p->p_stats->p_ru.ru_inblock++;		/* pay for read */
5137736Smckusick 	return (biowait(bp));
528Sbill }
538Sbill 
548Sbill /*
5546151Smckusick  * Operates like bread, but also starts I/O on the specified
5646151Smckusick  * read-ahead block.
578Sbill  */
5838776Smckusick breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
5937736Smckusick 	struct vnode *vp;
607114Smckusick 	daddr_t blkno; int size;
618592Sroot 	daddr_t rablkno; int rabsize;
6238776Smckusick 	struct ucred *cred;
6337736Smckusick 	struct buf **bpp;
648Sbill {
6547545Skarels 	struct proc *p = curproc;		/* XXX */
668Sbill 	register struct buf *bp, *rabp;
678Sbill 
688Sbill 	bp = NULL;
697015Smckusick 	/*
7046151Smckusick 	 * If the block is not memory resident,
7146151Smckusick 	 * allocate a buffer and start I/O.
727015Smckusick 	 */
7337736Smckusick 	if (!incore(vp, blkno)) {
7437736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
7546151Smckusick 		if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
768Sbill 			bp->b_flags |= B_READ;
778670S 			if (bp->b_bcount > bp->b_bufsize)
788670S 				panic("breada");
7938776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
8038776Smckusick 				crhold(cred);
8138776Smckusick 				bp->b_rcred = cred;
8238776Smckusick 			}
8337736Smckusick 			VOP_STRATEGY(bp);
8440341Smckusick 			trace(TR_BREADMISS, pack(vp, size), blkno);
8547545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay for read */
867015Smckusick 		} else
8740341Smckusick 			trace(TR_BREADHIT, pack(vp, size), blkno);
888Sbill 	}
897015Smckusick 
907015Smckusick 	/*
9146151Smckusick 	 * If there is a read-ahead block, start I/O on it too.
927015Smckusick 	 */
9339895Smckusick 	if (!incore(vp, rablkno)) {
9437736Smckusick 		rabp = getblk(vp, rablkno, rabsize);
9546151Smckusick 		if (rabp->b_flags & (B_DONE | B_DELWRI)) {
968Sbill 			brelse(rabp);
9740341Smckusick 			trace(TR_BREADHITRA, pack(vp, rabsize), rablkno);
982045Swnj 		} else {
9946151Smckusick 			rabp->b_flags |= B_ASYNC | B_READ;
1008670S 			if (rabp->b_bcount > rabp->b_bufsize)
1018670S 				panic("breadrabp");
10238880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
10338776Smckusick 				crhold(cred);
10438880Smckusick 				rabp->b_rcred = cred;
10538776Smckusick 			}
10637736Smckusick 			VOP_STRATEGY(rabp);
10740341Smckusick 			trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno);
10847545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay in advance */
1098Sbill 		}
1108Sbill 	}
1117015Smckusick 
1127015Smckusick 	/*
11346151Smckusick 	 * If block was memory resident, let bread get it.
11446151Smckusick 	 * If block was not memory resident, the read was
11546151Smckusick 	 * started above, so just wait for the read to complete.
1167015Smckusick 	 */
1177114Smckusick 	if (bp == NULL)
11838776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
11937736Smckusick 	return (biowait(bp));
1208Sbill }
1218Sbill 
1228Sbill /*
12346151Smckusick  * Synchronous write.
12446151Smckusick  * Release buffer on completion.
1258Sbill  */
1268Sbill bwrite(bp)
1277015Smckusick 	register struct buf *bp;
1288Sbill {
12947545Skarels 	struct proc *p = curproc;		/* XXX */
13037736Smckusick 	register int flag;
13140226Smckusick 	int s, error;
1328Sbill 
1338Sbill 	flag = bp->b_flags;
1349857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
13546151Smckusick 	if ((flag & B_DELWRI) == 0)
13647545Skarels 		p->p_stats->p_ru.ru_oublock++;		/* no one paid yet */
13739882Smckusick 	else
13839882Smckusick 		reassignbuf(bp, bp->b_vp);
13940341Smckusick 	trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
1408670S 	if (bp->b_bcount > bp->b_bufsize)
1418670S 		panic("bwrite");
14240226Smckusick 	s = splbio();
14339882Smckusick 	bp->b_vp->v_numoutput++;
14440226Smckusick 	splx(s);
14537736Smckusick 	VOP_STRATEGY(bp);
1467015Smckusick 
1477015Smckusick 	/*
14846151Smckusick 	 * If the write was synchronous, then await I/O completion.
1497015Smckusick 	 * If the write was "delayed", then we put the buffer on
15046151Smckusick 	 * the queue of blocks awaiting I/O completion status.
1517015Smckusick 	 */
15246151Smckusick 	if ((flag & B_ASYNC) == 0) {
15337736Smckusick 		error = biowait(bp);
1548Sbill 		brelse(bp);
15537736Smckusick 	} else if (flag & B_DELWRI) {
1568Sbill 		bp->b_flags |= B_AGE;
15737736Smckusick 		error = 0;
15837736Smckusick 	}
15937736Smckusick 	return (error);
1608Sbill }
1618Sbill 
1628Sbill /*
16346151Smckusick  * Delayed write.
16446151Smckusick  *
16546151Smckusick  * The buffer is marked dirty, but is not queued for I/O.
16646151Smckusick  * This routine should be used when the buffer is expected
16746151Smckusick  * to be modified again soon, typically a small write that
16846151Smckusick  * partially fills a buffer.
16946151Smckusick  *
17046151Smckusick  * NB: magnetic tapes cannot be delayed; they must be
17146151Smckusick  * written in the order that the writes are requested.
1728Sbill  */
1738Sbill bdwrite(bp)
1747015Smckusick 	register struct buf *bp;
1758Sbill {
17647545Skarels 	struct proc *p = curproc;		/* XXX */
1778Sbill 
17839882Smckusick 	if ((bp->b_flags & B_DELWRI) == 0) {
17939882Smckusick 		bp->b_flags |= B_DELWRI;
18039882Smckusick 		reassignbuf(bp, bp->b_vp);
18147545Skarels 		p->p_stats->p_ru.ru_oublock++;		/* no one paid yet */
18239882Smckusick 	}
18337736Smckusick 	/*
18439668Smckusick 	 * If this is a tape drive, the write must be initiated.
18537736Smckusick 	 */
186*48360Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) {
1878Sbill 		bawrite(bp);
18839668Smckusick 	} else {
18946151Smckusick 		bp->b_flags |= (B_DONE | B_DELWRI);
1908Sbill 		brelse(bp);
1918Sbill 	}
1928Sbill }
1938Sbill 
1948Sbill /*
19546151Smckusick  * Asynchronous write.
19646151Smckusick  * Start I/O on a buffer, but do not wait for it to complete.
19746151Smckusick  * The buffer is released when the I/O completes.
1988Sbill  */
1998Sbill bawrite(bp)
2007015Smckusick 	register struct buf *bp;
2018Sbill {
2028Sbill 
20346151Smckusick 	/*
20446151Smckusick 	 * Setting the ASYNC flag causes bwrite to return
20546151Smckusick 	 * after starting the I/O.
20646151Smckusick 	 */
2078Sbill 	bp->b_flags |= B_ASYNC;
20837736Smckusick 	(void) bwrite(bp);
2098Sbill }
2108Sbill 
2118Sbill /*
21246151Smckusick  * Release a buffer.
21346151Smckusick  * Even if the buffer is dirty, no I/O is started.
2148Sbill  */
2158Sbill brelse(bp)
2167015Smckusick 	register struct buf *bp;
2178Sbill {
2182325Swnj 	register struct buf *flist;
21946151Smckusick 	int s;
2208Sbill 
22140341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
2227015Smckusick 	/*
22339668Smckusick 	 * If a process is waiting for the buffer, or
22439668Smckusick 	 * is waiting for a free buffer, awaken it.
2257015Smckusick 	 */
22646151Smckusick 	if (bp->b_flags & B_WANTED)
2278Sbill 		wakeup((caddr_t)bp);
22846151Smckusick 	if (bfreelist[0].b_flags & B_WANTED) {
2292325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2302325Swnj 		wakeup((caddr_t)bfreelist);
2318Sbill 	}
23239668Smckusick 	/*
23339668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
23439668Smckusick 	 */
23539668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
23639668Smckusick 		bp->b_flags &= ~B_ERROR;
23739668Smckusick 	/*
23839668Smckusick 	 * Disassociate buffers that are no longer valid.
23939668Smckusick 	 */
24046151Smckusick 	if (bp->b_flags & (B_NOCACHE | B_ERROR))
24137736Smckusick 		bp->b_flags |= B_INVAL;
24246151Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
24339668Smckusick 		if (bp->b_vp)
24439668Smckusick 			brelvp(bp);
24539668Smckusick 		bp->b_flags &= ~B_DELWRI;
24637736Smckusick 	}
2477015Smckusick 	/*
2487015Smckusick 	 * Stick the buffer back on a free list.
2497015Smckusick 	 */
25026271Skarels 	s = splbio();
2518670S 	if (bp->b_bufsize <= 0) {
2528670S 		/* block has no buffer ... put at front of unused buffer list */
2538670S 		flist = &bfreelist[BQ_EMPTY];
2548670S 		binsheadfree(bp, flist);
25546151Smckusick 	} else if (bp->b_flags & (B_ERROR | B_INVAL)) {
2562325Swnj 		/* block has no info ... put at front of most free list */
2578670S 		flist = &bfreelist[BQ_AGE];
2587015Smckusick 		binsheadfree(bp, flist);
2598Sbill 	} else {
2602325Swnj 		if (bp->b_flags & B_LOCKED)
2612325Swnj 			flist = &bfreelist[BQ_LOCKED];
2622325Swnj 		else if (bp->b_flags & B_AGE)
2632325Swnj 			flist = &bfreelist[BQ_AGE];
2642325Swnj 		else
2652325Swnj 			flist = &bfreelist[BQ_LRU];
2667015Smckusick 		binstailfree(bp, flist);
2678Sbill 	}
26846151Smckusick 	bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
2698Sbill 	splx(s);
2708Sbill }
2718Sbill 
2728Sbill /*
27346151Smckusick  * Check to see if a block is currently memory resident.
2748Sbill  */
27537736Smckusick incore(vp, blkno)
27637736Smckusick 	struct vnode *vp;
2777015Smckusick 	daddr_t blkno;
2788Sbill {
2798Sbill 	register struct buf *bp;
2802325Swnj 	register struct buf *dp;
2818Sbill 
28238225Smckusick 	dp = BUFHASH(vp, blkno);
2832325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
28439668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
2857015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
28691Sbill 			return (1);
28791Sbill 	return (0);
2888Sbill }
2898Sbill 
29039668Smckusick /*
29146151Smckusick  * Check to see if a block is currently memory resident.
29246151Smckusick  * If it is resident, return it. If it is not resident,
29346151Smckusick  * allocate a new buffer and assign it to the block.
29439668Smckusick  */
2958Sbill struct buf *
29637736Smckusick getblk(vp, blkno, size)
29737736Smckusick 	register struct vnode *vp;
2986563Smckusic 	daddr_t blkno;
2996563Smckusic 	int size;
3008Sbill {
3018670S 	register struct buf *bp, *dp;
3025424Swnj 	int s;
3038Sbill 
30425255Smckusick 	if (size > MAXBSIZE)
30525255Smckusick 		panic("getblk: size too big");
3067015Smckusick 	/*
30746151Smckusick 	 * Search the cache for the block. If the buffer is found,
30846151Smckusick 	 * but it is currently locked, the we must wait for it to
30946151Smckusick 	 * become available.
3107015Smckusick 	 */
31137736Smckusick 	dp = BUFHASH(vp, blkno);
3127015Smckusick loop:
3132325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
31439668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
31546151Smckusick 		    (bp->b_flags & B_INVAL))
3168Sbill 			continue;
31726271Skarels 		s = splbio();
31846151Smckusick 		if (bp->b_flags & B_BUSY) {
3198Sbill 			bp->b_flags |= B_WANTED;
32046151Smckusick 			sleep((caddr_t)bp, PRIBIO + 1);
3215424Swnj 			splx(s);
3228Sbill 			goto loop;
3238Sbill 		}
32439882Smckusick 		bremfree(bp);
32539882Smckusick 		bp->b_flags |= B_BUSY;
3265424Swnj 		splx(s);
32732608Smckusick 		if (bp->b_bcount != size) {
32839668Smckusick 			printf("getblk: stray size");
32939668Smckusick 			bp->b_flags |= B_INVAL;
33039668Smckusick 			bwrite(bp);
33139668Smckusick 			goto loop;
33232608Smckusick 		}
3338Sbill 		bp->b_flags |= B_CACHE;
33426271Skarels 		return (bp);
3358Sbill 	}
3368670S 	bp = getnewbuf();
3377015Smckusick 	bremhash(bp);
33839668Smckusick 	bgetvp(vp, bp);
33945116Smckusick 	bp->b_bcount = 0;
34039668Smckusick 	bp->b_lblkno = blkno;
3416563Smckusic 	bp->b_blkno = blkno;
3428670S 	bp->b_error = 0;
34337736Smckusick 	bp->b_resid = 0;
34437736Smckusick 	binshash(bp, dp);
34545116Smckusick 	allocbuf(bp, size);
34626271Skarels 	return (bp);
3478Sbill }
3488Sbill 
3498Sbill /*
35046151Smckusick  * Allocate a buffer.
35146151Smckusick  * The caller will assign it to a block.
3528Sbill  */
3538Sbill struct buf *
3546563Smckusic geteblk(size)
3556563Smckusic 	int size;
3568Sbill {
3578670S 	register struct buf *bp, *flist;
3588Sbill 
35925255Smckusick 	if (size > MAXBSIZE)
36025255Smckusick 		panic("geteblk: size too big");
3618670S 	bp = getnewbuf();
3628670S 	bp->b_flags |= B_INVAL;
3637015Smckusick 	bremhash(bp);
3648670S 	flist = &bfreelist[BQ_AGE];
36545116Smckusick 	bp->b_bcount = 0;
36637736Smckusick 	bp->b_error = 0;
36737736Smckusick 	bp->b_resid = 0;
3688670S 	binshash(bp, flist);
36945116Smckusick 	allocbuf(bp, size);
37026271Skarels 	return (bp);
3718Sbill }
3728Sbill 
3738Sbill /*
37445116Smckusick  * Expand or contract the actual memory allocated to a buffer.
37546151Smckusick  * If no memory is available, release buffer and take error exit.
3766563Smckusic  */
37745116Smckusick allocbuf(tp, size)
37845116Smckusick 	register struct buf *tp;
3796563Smckusic 	int size;
3806563Smckusic {
38145116Smckusick 	register struct buf *bp, *ep;
38245116Smckusick 	int sizealloc, take, s;
3836563Smckusic 
38445116Smckusick 	sizealloc = roundup(size, CLBYTES);
38545116Smckusick 	/*
38645116Smckusick 	 * Buffer size does not change
38745116Smckusick 	 */
38845116Smckusick 	if (sizealloc == tp->b_bufsize)
38945116Smckusick 		goto out;
39045116Smckusick 	/*
39145116Smckusick 	 * Buffer size is shrinking.
39245116Smckusick 	 * Place excess space in a buffer header taken from the
39345116Smckusick 	 * BQ_EMPTY buffer list and placed on the "most free" list.
39445116Smckusick 	 * If no extra buffer headers are available, leave the
39545116Smckusick 	 * extra space in the present buffer.
39645116Smckusick 	 */
39745116Smckusick 	if (sizealloc < tp->b_bufsize) {
39845116Smckusick 		ep = bfreelist[BQ_EMPTY].av_forw;
39945116Smckusick 		if (ep == &bfreelist[BQ_EMPTY])
40045116Smckusick 			goto out;
40145116Smckusick 		s = splbio();
40245116Smckusick 		bremfree(ep);
40345116Smckusick 		ep->b_flags |= B_BUSY;
40445116Smckusick 		splx(s);
40545116Smckusick 		pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
40645116Smckusick 		    (int)tp->b_bufsize - sizealloc);
40745116Smckusick 		ep->b_bufsize = tp->b_bufsize - sizealloc;
40845116Smckusick 		tp->b_bufsize = sizealloc;
40945116Smckusick 		ep->b_flags |= B_INVAL;
41045116Smckusick 		ep->b_bcount = 0;
41145116Smckusick 		brelse(ep);
41245116Smckusick 		goto out;
41345116Smckusick 	}
41445116Smckusick 	/*
41545116Smckusick 	 * More buffer space is needed. Get it out of buffers on
41645116Smckusick 	 * the "most free" list, placing the empty headers on the
41745116Smckusick 	 * BQ_EMPTY buffer header list.
41845116Smckusick 	 */
41945116Smckusick 	while (tp->b_bufsize < sizealloc) {
42045116Smckusick 		take = sizealloc - tp->b_bufsize;
42145116Smckusick 		bp = getnewbuf();
42245116Smckusick 		if (take >= bp->b_bufsize)
42345116Smckusick 			take = bp->b_bufsize;
42445116Smckusick 		pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
42545116Smckusick 		    &tp->b_un.b_addr[tp->b_bufsize], take);
42645116Smckusick 		tp->b_bufsize += take;
42745116Smckusick 		bp->b_bufsize = bp->b_bufsize - take;
42845116Smckusick 		if (bp->b_bcount > bp->b_bufsize)
42945116Smckusick 			bp->b_bcount = bp->b_bufsize;
43045116Smckusick 		if (bp->b_bufsize <= 0) {
43145116Smckusick 			bremhash(bp);
43245116Smckusick 			binshash(bp, &bfreelist[BQ_EMPTY]);
43346151Smckusick 			bp->b_dev = NODEV;
43445116Smckusick 			bp->b_error = 0;
43545116Smckusick 			bp->b_flags |= B_INVAL;
43645116Smckusick 		}
43745116Smckusick 		brelse(bp);
43845116Smckusick 	}
43945116Smckusick out:
44045116Smckusick 	tp->b_bcount = size;
44145116Smckusick 	return (1);
4428670S }
4438670S 
4448670S /*
4458670S  * Find a buffer which is available for use.
4468670S  * Select something from a free list.
4478670S  * Preference is to AGE list, then LRU list.
4488670S  */
4498670S struct buf *
4508670S getnewbuf()
4518670S {
4528670S 	register struct buf *bp, *dp;
45338776Smckusick 	register struct ucred *cred;
4548670S 	int s;
4558670S 
4568670S loop:
45726271Skarels 	s = splbio();
4588670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
4598670S 		if (dp->av_forw != dp)
4608670S 			break;
4618670S 	if (dp == bfreelist) {		/* no free blocks */
4628670S 		dp->b_flags |= B_WANTED;
46346151Smckusick 		sleep((caddr_t)dp, PRIBIO + 1);
46412170Ssam 		splx(s);
4658670S 		goto loop;
4668670S 	}
46739882Smckusick 	bp = dp->av_forw;
46839882Smckusick 	bremfree(bp);
46939882Smckusick 	bp->b_flags |= B_BUSY;
4708670S 	splx(s);
4718670S 	if (bp->b_flags & B_DELWRI) {
47238614Smckusick 		(void) bawrite(bp);
4738670S 		goto loop;
4748670S 	}
47540341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
47639668Smckusick 	if (bp->b_vp)
47739668Smckusick 		brelvp(bp);
47838776Smckusick 	if (bp->b_rcred != NOCRED) {
47938776Smckusick 		cred = bp->b_rcred;
48038776Smckusick 		bp->b_rcred = NOCRED;
48138776Smckusick 		crfree(cred);
48238776Smckusick 	}
48338776Smckusick 	if (bp->b_wcred != NOCRED) {
48438776Smckusick 		cred = bp->b_wcred;
48538776Smckusick 		bp->b_wcred = NOCRED;
48638776Smckusick 		crfree(cred);
48738776Smckusick 	}
4888670S 	bp->b_flags = B_BUSY;
48946989Smckusick 	bp->b_dirtyoff = bp->b_dirtyend = 0;
4908670S 	return (bp);
4918670S }
4928670S 
4938670S /*
49446151Smckusick  * Wait for I/O to complete.
49546151Smckusick  *
49646151Smckusick  * Extract and return any errors associated with the I/O.
49746151Smckusick  * If the error flag is set, but no specific error is
49846151Smckusick  * given, return EIO.
4998Sbill  */
5007015Smckusick biowait(bp)
5016563Smckusic 	register struct buf *bp;
5028Sbill {
5035431Sroot 	int s;
5048Sbill 
50526271Skarels 	s = splbio();
50638776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
5078Sbill 		sleep((caddr_t)bp, PRIBIO);
5085431Sroot 	splx(s);
50937736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
51037736Smckusick 		return (0);
51137736Smckusick 	if (bp->b_error)
51237736Smckusick 		return (bp->b_error);
51337736Smckusick 	return (EIO);
5148Sbill }
5158Sbill 
5168Sbill /*
51713128Ssam  * Mark I/O complete on a buffer.
51846151Smckusick  *
51946151Smckusick  * If a callback has been requested, e.g. the pageout
52046151Smckusick  * daemon, do so. Otherwise, awaken waiting processes.
5218Sbill  */
5227015Smckusick biodone(bp)
5237015Smckusick 	register struct buf *bp;
5248Sbill {
52539882Smckusick 	register struct vnode *vp;
5268Sbill 
527420Sbill 	if (bp->b_flags & B_DONE)
5287015Smckusick 		panic("dup biodone");
5298Sbill 	bp->b_flags |= B_DONE;
53039882Smckusick 	if ((bp->b_flags & B_READ) == 0) {
53138776Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
53239882Smckusick 		if (vp = bp->b_vp) {
53339882Smckusick 			vp->v_numoutput--;
53439882Smckusick 			if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
53539882Smckusick 				if (vp->v_numoutput < 0)
53639882Smckusick 					panic("biodone: neg numoutput");
53739882Smckusick 				vp->v_flag &= ~VBWAIT;
53839882Smckusick 				wakeup((caddr_t)&vp->v_numoutput);
53939882Smckusick 			}
54039882Smckusick 		}
54139882Smckusick 	}
5429763Ssam 	if (bp->b_flags & B_CALL) {
5439763Ssam 		bp->b_flags &= ~B_CALL;
5449763Ssam 		(*bp->b_iodone)(bp);
5459763Ssam 		return;
5469763Ssam 	}
54746151Smckusick 	if (bp->b_flags & B_ASYNC)
5488Sbill 		brelse(bp);
5498Sbill 	else {
5508Sbill 		bp->b_flags &= ~B_WANTED;
5518Sbill 		wakeup((caddr_t)bp);
5528Sbill 	}
5538Sbill }
5548Sbill 
5558Sbill /*
55637736Smckusick  * Make sure all write-behind blocks associated
55738776Smckusick  * with mount point are flushed out (from sync).
5588Sbill  */
55939668Smckusick mntflushbuf(mountp, flags)
56038776Smckusick 	struct mount *mountp;
56139668Smckusick 	int flags;
5628Sbill {
56339668Smckusick 	register struct vnode *vp;
56439668Smckusick 
56541400Smckusick 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
56641299Smckusick 		panic("mntflushbuf: not busy");
56739668Smckusick loop:
56841421Smckusick 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
569*48360Smckusick 		if (VOP_ISLOCKED(vp))
570*48360Smckusick 			continue;
57139668Smckusick 		if (vget(vp))
57239668Smckusick 			goto loop;
57339668Smckusick 		vflushbuf(vp, flags);
57439668Smckusick 		vput(vp);
57541421Smckusick 		if (vp->v_mount != mountp)
57641421Smckusick 			goto loop;
57739668Smckusick 	}
57839668Smckusick }
57939668Smckusick 
58039668Smckusick /*
58139668Smckusick  * Flush all dirty buffers associated with a vnode.
58239668Smckusick  */
58339668Smckusick vflushbuf(vp, flags)
58439668Smckusick 	register struct vnode *vp;
58539668Smckusick 	int flags;
58639668Smckusick {
5878Sbill 	register struct buf *bp;
58839668Smckusick 	struct buf *nbp;
5895431Sroot 	int s;
5908Sbill 
5918Sbill loop:
59226271Skarels 	s = splbio();
59339882Smckusick 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
59439668Smckusick 		nbp = bp->b_blockf;
59539668Smckusick 		if ((bp->b_flags & B_BUSY))
59639668Smckusick 			continue;
59739668Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
59839882Smckusick 			panic("vflushbuf: not dirty");
59939882Smckusick 		bremfree(bp);
60039882Smckusick 		bp->b_flags |= B_BUSY;
60139668Smckusick 		splx(s);
60239882Smckusick 		/*
60339882Smckusick 		 * Wait for I/O associated with indirect blocks to complete,
60439882Smckusick 		 * since there is no way to quickly wait for them below.
60546151Smckusick 		 * NB: This is really specific to ufs, but is done here
60639882Smckusick 		 * as it is easier and quicker.
60739882Smckusick 		 */
60839882Smckusick 		if (bp->b_vp == vp || (flags & B_SYNC) == 0) {
60939882Smckusick 			(void) bawrite(bp);
61040639Smckusick 			s = splbio();
61139882Smckusick 		} else {
61239882Smckusick 			(void) bwrite(bp);
61339882Smckusick 			goto loop;
61439882Smckusick 		}
61539668Smckusick 	}
61639738Smckusick 	splx(s);
61739668Smckusick 	if ((flags & B_SYNC) == 0)
61839668Smckusick 		return;
61939668Smckusick 	s = splbio();
62039882Smckusick 	while (vp->v_numoutput) {
62139882Smckusick 		vp->v_flag |= VBWAIT;
62246151Smckusick 		sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
62337736Smckusick 	}
62439738Smckusick 	splx(s);
62539882Smckusick 	if (vp->v_dirtyblkhd) {
62639882Smckusick 		vprint("vflushbuf: dirty", vp);
62739882Smckusick 		goto loop;
62839882Smckusick 	}
6298Sbill }
6302299Skre 
6312299Skre /*
6322299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6332299Skre  *
63439668Smckusick  * Go through the list of vnodes associated with the file system;
63539668Smckusick  * for each vnode invalidate any buffers that it holds. Normally
63639668Smckusick  * this routine is preceeded by a bflush call, so that on a quiescent
63739668Smckusick  * filesystem there will be no dirty buffers when we are done. Binval
63839668Smckusick  * returns the count of dirty buffers when it is finished.
6392299Skre  */
64039668Smckusick mntinvalbuf(mountp)
64138776Smckusick 	struct mount *mountp;
6422299Skre {
64339668Smckusick 	register struct vnode *vp;
64439668Smckusick 	int dirty = 0;
64539668Smckusick 
64641400Smckusick 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
64741299Smckusick 		panic("mntinvalbuf: not busy");
64839668Smckusick loop:
64941421Smckusick 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
65039668Smckusick 		if (vget(vp))
65139668Smckusick 			goto loop;
65239668Smckusick 		dirty += vinvalbuf(vp, 1);
65339668Smckusick 		vput(vp);
65441421Smckusick 		if (vp->v_mount != mountp)
65541421Smckusick 			goto loop;
65639668Smckusick 	}
65739668Smckusick 	return (dirty);
65839668Smckusick }
65939668Smckusick 
66039668Smckusick /*
66139668Smckusick  * Flush out and invalidate all buffers associated with a vnode.
66239668Smckusick  * Called with the underlying object locked.
66339668Smckusick  */
66439668Smckusick vinvalbuf(vp, save)
66539668Smckusick 	register struct vnode *vp;
66639668Smckusick 	int save;
66739668Smckusick {
6682361Skre 	register struct buf *bp;
66939882Smckusick 	struct buf *nbp, *blist;
67038633Smckusick 	int s, dirty = 0;
6712299Skre 
67239882Smckusick 	for (;;) {
67339882Smckusick 		if (blist = vp->v_dirtyblkhd)
67439882Smckusick 			/* void */;
67539882Smckusick 		else if (blist = vp->v_cleanblkhd)
67639882Smckusick 			/* void */;
67739882Smckusick 		else
67839882Smckusick 			break;
67939882Smckusick 		for (bp = blist; bp; bp = nbp) {
68039882Smckusick 			nbp = bp->b_blockf;
68139882Smckusick 			s = splbio();
68239882Smckusick 			if (bp->b_flags & B_BUSY) {
68339882Smckusick 				bp->b_flags |= B_WANTED;
68446151Smckusick 				sleep((caddr_t)bp, PRIBIO + 1);
68539882Smckusick 				splx(s);
68639882Smckusick 				break;
68739882Smckusick 			}
68839882Smckusick 			bremfree(bp);
68939882Smckusick 			bp->b_flags |= B_BUSY;
69038808Smckusick 			splx(s);
69139882Smckusick 			if (save && (bp->b_flags & B_DELWRI)) {
69238614Smckusick 				dirty++;
69339668Smckusick 				(void) bwrite(bp);
69439882Smckusick 				break;
69537736Smckusick 			}
69640034Smckusick 			if (bp->b_vp != vp)
69740034Smckusick 				reassignbuf(bp, bp->b_vp);
69840034Smckusick 			else
69940034Smckusick 				bp->b_flags |= B_INVAL;
70039882Smckusick 			brelse(bp);
70138614Smckusick 		}
70238614Smckusick 	}
70339882Smckusick 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
70439668Smckusick 		panic("vinvalbuf: flush failed");
70538614Smckusick 	return (dirty);
7062299Skre }
70737736Smckusick 
70839668Smckusick /*
70939668Smckusick  * Associate a buffer with a vnode.
71039668Smckusick  */
71139668Smckusick bgetvp(vp, bp)
71239668Smckusick 	register struct vnode *vp;
71339668Smckusick 	register struct buf *bp;
71439668Smckusick {
71539668Smckusick 
71639668Smckusick 	if (bp->b_vp)
71739668Smckusick 		panic("bgetvp: not free");
71839808Smckusick 	VHOLD(vp);
71939668Smckusick 	bp->b_vp = vp;
72039668Smckusick 	if (vp->v_type == VBLK || vp->v_type == VCHR)
72139668Smckusick 		bp->b_dev = vp->v_rdev;
72239668Smckusick 	else
72339668Smckusick 		bp->b_dev = NODEV;
72439668Smckusick 	/*
72539668Smckusick 	 * Insert onto list for new vnode.
72639668Smckusick 	 */
72739882Smckusick 	if (vp->v_cleanblkhd) {
72839882Smckusick 		bp->b_blockf = vp->v_cleanblkhd;
72939882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
73039882Smckusick 		vp->v_cleanblkhd->b_blockb = &bp->b_blockf;
73139882Smckusick 		vp->v_cleanblkhd = bp;
73239668Smckusick 	} else {
73339882Smckusick 		vp->v_cleanblkhd = bp;
73439882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
73539668Smckusick 		bp->b_blockf = NULL;
73639668Smckusick 	}
73739668Smckusick }
73839668Smckusick 
73939668Smckusick /*
74039668Smckusick  * Disassociate a buffer from a vnode.
74139668Smckusick  */
74237736Smckusick brelvp(bp)
74339668Smckusick 	register struct buf *bp;
74437736Smckusick {
74539668Smckusick 	struct buf *bq;
74637736Smckusick 	struct vnode *vp;
74737736Smckusick 
74837736Smckusick 	if (bp->b_vp == (struct vnode *) 0)
74939668Smckusick 		panic("brelvp: NULL");
75039668Smckusick 	/*
75139668Smckusick 	 * Delete from old vnode list, if on one.
75239668Smckusick 	 */
75339668Smckusick 	if (bp->b_blockb) {
75439668Smckusick 		if (bq = bp->b_blockf)
75539668Smckusick 			bq->b_blockb = bp->b_blockb;
75639668Smckusick 		*bp->b_blockb = bq;
75739668Smckusick 		bp->b_blockf = NULL;
75839668Smckusick 		bp->b_blockb = NULL;
75939668Smckusick 	}
76037736Smckusick 	vp = bp->b_vp;
76137736Smckusick 	bp->b_vp = (struct vnode *) 0;
76239808Smckusick 	HOLDRELE(vp);
76337736Smckusick }
76439668Smckusick 
76539668Smckusick /*
76639668Smckusick  * Reassign a buffer from one vnode to another.
76739668Smckusick  * Used to assign file specific control information
76839668Smckusick  * (indirect blocks) to the vnode to which they belong.
76939668Smckusick  */
77039668Smckusick reassignbuf(bp, newvp)
77139668Smckusick 	register struct buf *bp;
77239668Smckusick 	register struct vnode *newvp;
77339668Smckusick {
77439882Smckusick 	register struct buf *bq, **listheadp;
77539668Smckusick 
77639882Smckusick 	if (newvp == NULL)
77739882Smckusick 		panic("reassignbuf: NULL");
77839668Smckusick 	/*
77939668Smckusick 	 * Delete from old vnode list, if on one.
78039668Smckusick 	 */
78139668Smckusick 	if (bp->b_blockb) {
78239668Smckusick 		if (bq = bp->b_blockf)
78339668Smckusick 			bq->b_blockb = bp->b_blockb;
78439668Smckusick 		*bp->b_blockb = bq;
78539668Smckusick 	}
78639668Smckusick 	/*
78739882Smckusick 	 * If dirty, put on list of dirty buffers;
78839882Smckusick 	 * otherwise insert onto list of clean buffers.
78939668Smckusick 	 */
79039882Smckusick 	if (bp->b_flags & B_DELWRI)
79139882Smckusick 		listheadp = &newvp->v_dirtyblkhd;
79239882Smckusick 	else
79339882Smckusick 		listheadp = &newvp->v_cleanblkhd;
79439882Smckusick 	if (*listheadp) {
79539882Smckusick 		bp->b_blockf = *listheadp;
79639882Smckusick 		bp->b_blockb = listheadp;
79739882Smckusick 		bp->b_blockf->b_blockb = &bp->b_blockf;
79839882Smckusick 		*listheadp = bp;
79939668Smckusick 	} else {
80039882Smckusick 		*listheadp = bp;
80139882Smckusick 		bp->b_blockb = listheadp;
80239668Smckusick 		bp->b_blockf = NULL;
80339668Smckusick 	}
80439668Smckusick }
805