xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 47545)
123395Smckusick /*
237736Smckusick  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
544454Sbostic  * %sccs.include.redist.c%
637736Smckusick  *
7*47545Skarels  *	@(#)vfs_cluster.c	7.34 (Berkeley) 03/17/91
823395Smckusick  */
98Sbill 
1017098Sbloom #include "param.h"
1117098Sbloom #include "user.h"
12*47545Skarels #include "proc.h"
1317098Sbloom #include "buf.h"
1437736Smckusick #include "vnode.h"
1540652Smckusick #include "specdev.h"
1639668Smckusick #include "mount.h"
1717098Sbloom #include "trace.h"
1838776Smckusick #include "ucred.h"
198Sbill 
2091Sbill /*
2146151Smckusick  * Find the block in the buffer pool.
2246151Smckusick  * If the buffer is not present, allocate a new buffer and load
2346151Smckusick  * its contents according to the filesystem fill routine.
248Sbill  */
2538776Smckusick bread(vp, blkno, size, cred, bpp)
2637736Smckusick 	struct vnode *vp;
276563Smckusic 	daddr_t blkno;
286563Smckusic 	int size;
2938776Smckusick 	struct ucred *cred;
3037736Smckusick 	struct buf **bpp;
318Sbill {
32*47545Skarels 	struct proc *p = curproc;		/* XXX */
338Sbill 	register struct buf *bp;
348Sbill 
358670S 	if (size == 0)
368670S 		panic("bread: size 0");
3737736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
3846151Smckusick 	if (bp->b_flags & (B_DONE | B_DELWRI)) {
3940341Smckusick 		trace(TR_BREADHIT, pack(vp, size), blkno);
4037736Smckusick 		return (0);
418Sbill 	}
428Sbill 	bp->b_flags |= B_READ;
438670S 	if (bp->b_bcount > bp->b_bufsize)
448670S 		panic("bread");
4538776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
4638776Smckusick 		crhold(cred);
4738776Smckusick 		bp->b_rcred = cred;
4838776Smckusick 	}
4937736Smckusick 	VOP_STRATEGY(bp);
5040341Smckusick 	trace(TR_BREADMISS, pack(vp, size), blkno);
51*47545Skarels 	p->p_stats->p_ru.ru_inblock++;		/* pay for read */
5237736Smckusick 	return (biowait(bp));
538Sbill }
548Sbill 
558Sbill /*
5646151Smckusick  * Operates like bread, but also starts I/O on the specified
5746151Smckusick  * read-ahead block.
588Sbill  */
5938776Smckusick breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
6037736Smckusick 	struct vnode *vp;
617114Smckusick 	daddr_t blkno; int size;
628592Sroot 	daddr_t rablkno; int rabsize;
6338776Smckusick 	struct ucred *cred;
6437736Smckusick 	struct buf **bpp;
658Sbill {
66*47545Skarels 	struct proc *p = curproc;		/* XXX */
678Sbill 	register struct buf *bp, *rabp;
688Sbill 
698Sbill 	bp = NULL;
707015Smckusick 	/*
7146151Smckusick 	 * If the block is not memory resident,
7246151Smckusick 	 * allocate a buffer and start I/O.
737015Smckusick 	 */
7437736Smckusick 	if (!incore(vp, blkno)) {
7537736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
7646151Smckusick 		if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
778Sbill 			bp->b_flags |= B_READ;
788670S 			if (bp->b_bcount > bp->b_bufsize)
798670S 				panic("breada");
8038776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
8138776Smckusick 				crhold(cred);
8238776Smckusick 				bp->b_rcred = cred;
8338776Smckusick 			}
8437736Smckusick 			VOP_STRATEGY(bp);
8540341Smckusick 			trace(TR_BREADMISS, pack(vp, size), blkno);
86*47545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay for read */
877015Smckusick 		} else
8840341Smckusick 			trace(TR_BREADHIT, pack(vp, size), blkno);
898Sbill 	}
907015Smckusick 
917015Smckusick 	/*
9246151Smckusick 	 * If there is a read-ahead block, start I/O on it too.
937015Smckusick 	 */
9439895Smckusick 	if (!incore(vp, rablkno)) {
9537736Smckusick 		rabp = getblk(vp, rablkno, rabsize);
9646151Smckusick 		if (rabp->b_flags & (B_DONE | B_DELWRI)) {
978Sbill 			brelse(rabp);
9840341Smckusick 			trace(TR_BREADHITRA, pack(vp, rabsize), rablkno);
992045Swnj 		} else {
10046151Smckusick 			rabp->b_flags |= B_ASYNC | B_READ;
1018670S 			if (rabp->b_bcount > rabp->b_bufsize)
1028670S 				panic("breadrabp");
10338880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
10438776Smckusick 				crhold(cred);
10538880Smckusick 				rabp->b_rcred = cred;
10638776Smckusick 			}
10737736Smckusick 			VOP_STRATEGY(rabp);
10840341Smckusick 			trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno);
109*47545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay in advance */
1108Sbill 		}
1118Sbill 	}
1127015Smckusick 
1137015Smckusick 	/*
11446151Smckusick 	 * If block was memory resident, let bread get it.
11546151Smckusick 	 * If block was not memory resident, the read was
11646151Smckusick 	 * started above, so just wait for the read to complete.
1177015Smckusick 	 */
1187114Smckusick 	if (bp == NULL)
11938776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
12037736Smckusick 	return (biowait(bp));
1218Sbill }
1228Sbill 
1238Sbill /*
12446151Smckusick  * Synchronous write.
12546151Smckusick  * Release buffer on completion.
1268Sbill  */
1278Sbill bwrite(bp)
1287015Smckusick 	register struct buf *bp;
1298Sbill {
130*47545Skarels 	struct proc *p = curproc;		/* XXX */
13137736Smckusick 	register int flag;
13240226Smckusick 	int s, error;
1338Sbill 
1348Sbill 	flag = bp->b_flags;
1359857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
13646151Smckusick 	if ((flag & B_DELWRI) == 0)
137*47545Skarels 		p->p_stats->p_ru.ru_oublock++;		/* no one paid yet */
13839882Smckusick 	else
13939882Smckusick 		reassignbuf(bp, bp->b_vp);
14040341Smckusick 	trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
1418670S 	if (bp->b_bcount > bp->b_bufsize)
1428670S 		panic("bwrite");
14340226Smckusick 	s = splbio();
14439882Smckusick 	bp->b_vp->v_numoutput++;
14540226Smckusick 	splx(s);
14637736Smckusick 	VOP_STRATEGY(bp);
1477015Smckusick 
1487015Smckusick 	/*
14946151Smckusick 	 * If the write was synchronous, then await I/O completion.
1507015Smckusick 	 * If the write was "delayed", then we put the buffer on
15146151Smckusick 	 * the queue of blocks awaiting I/O completion status.
1527015Smckusick 	 */
15346151Smckusick 	if ((flag & B_ASYNC) == 0) {
15437736Smckusick 		error = biowait(bp);
1558Sbill 		brelse(bp);
15637736Smckusick 	} else if (flag & B_DELWRI) {
1578Sbill 		bp->b_flags |= B_AGE;
15837736Smckusick 		error = 0;
15937736Smckusick 	}
16037736Smckusick 	return (error);
1618Sbill }
1628Sbill 
1638Sbill /*
16446151Smckusick  * Delayed write.
16546151Smckusick  *
16646151Smckusick  * The buffer is marked dirty, but is not queued for I/O.
16746151Smckusick  * This routine should be used when the buffer is expected
16846151Smckusick  * to be modified again soon, typically a small write that
16946151Smckusick  * partially fills a buffer.
17046151Smckusick  *
17146151Smckusick  * NB: magnetic tapes cannot be delayed; they must be
17246151Smckusick  * written in the order that the writes are requested.
1738Sbill  */
1748Sbill bdwrite(bp)
1757015Smckusick 	register struct buf *bp;
1768Sbill {
177*47545Skarels 	struct proc *p = curproc;		/* XXX */
1788Sbill 
17939882Smckusick 	if ((bp->b_flags & B_DELWRI) == 0) {
18039882Smckusick 		bp->b_flags |= B_DELWRI;
18139882Smckusick 		reassignbuf(bp, bp->b_vp);
182*47545Skarels 		p->p_stats->p_ru.ru_oublock++;		/* no one paid yet */
18339882Smckusick 	}
18437736Smckusick 	/*
18539668Smckusick 	 * If this is a tape drive, the write must be initiated.
18637736Smckusick 	 */
18739668Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, B_TAPE, 0, NOCRED) == 0) {
1888Sbill 		bawrite(bp);
18939668Smckusick 	} else {
19046151Smckusick 		bp->b_flags |= (B_DONE | B_DELWRI);
1918Sbill 		brelse(bp);
1928Sbill 	}
1938Sbill }
1948Sbill 
1958Sbill /*
19646151Smckusick  * Asynchronous write.
19746151Smckusick  * Start I/O on a buffer, but do not wait for it to complete.
19846151Smckusick  * The buffer is released when the I/O completes.
1998Sbill  */
2008Sbill bawrite(bp)
2017015Smckusick 	register struct buf *bp;
2028Sbill {
2038Sbill 
20446151Smckusick 	/*
20546151Smckusick 	 * Setting the ASYNC flag causes bwrite to return
20646151Smckusick 	 * after starting the I/O.
20746151Smckusick 	 */
2088Sbill 	bp->b_flags |= B_ASYNC;
20937736Smckusick 	(void) bwrite(bp);
2108Sbill }
2118Sbill 
2128Sbill /*
21346151Smckusick  * Release a buffer.
21446151Smckusick  * Even if the buffer is dirty, no I/O is started.
2158Sbill  */
2168Sbill brelse(bp)
2177015Smckusick 	register struct buf *bp;
2188Sbill {
2192325Swnj 	register struct buf *flist;
22046151Smckusick 	int s;
2218Sbill 
22240341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
2237015Smckusick 	/*
22439668Smckusick 	 * If a process is waiting for the buffer, or
22539668Smckusick 	 * is waiting for a free buffer, awaken it.
2267015Smckusick 	 */
22746151Smckusick 	if (bp->b_flags & B_WANTED)
2288Sbill 		wakeup((caddr_t)bp);
22946151Smckusick 	if (bfreelist[0].b_flags & B_WANTED) {
2302325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2312325Swnj 		wakeup((caddr_t)bfreelist);
2328Sbill 	}
23339668Smckusick 	/*
23439668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
23539668Smckusick 	 */
23639668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
23739668Smckusick 		bp->b_flags &= ~B_ERROR;
23839668Smckusick 	/*
23939668Smckusick 	 * Disassociate buffers that are no longer valid.
24039668Smckusick 	 */
24146151Smckusick 	if (bp->b_flags & (B_NOCACHE | B_ERROR))
24237736Smckusick 		bp->b_flags |= B_INVAL;
24346151Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
24439668Smckusick 		if (bp->b_vp)
24539668Smckusick 			brelvp(bp);
24639668Smckusick 		bp->b_flags &= ~B_DELWRI;
24737736Smckusick 	}
2487015Smckusick 	/*
2497015Smckusick 	 * Stick the buffer back on a free list.
2507015Smckusick 	 */
25126271Skarels 	s = splbio();
2528670S 	if (bp->b_bufsize <= 0) {
2538670S 		/* block has no buffer ... put at front of unused buffer list */
2548670S 		flist = &bfreelist[BQ_EMPTY];
2558670S 		binsheadfree(bp, flist);
25646151Smckusick 	} else if (bp->b_flags & (B_ERROR | B_INVAL)) {
2572325Swnj 		/* block has no info ... put at front of most free list */
2588670S 		flist = &bfreelist[BQ_AGE];
2597015Smckusick 		binsheadfree(bp, flist);
2608Sbill 	} else {
2612325Swnj 		if (bp->b_flags & B_LOCKED)
2622325Swnj 			flist = &bfreelist[BQ_LOCKED];
2632325Swnj 		else if (bp->b_flags & B_AGE)
2642325Swnj 			flist = &bfreelist[BQ_AGE];
2652325Swnj 		else
2662325Swnj 			flist = &bfreelist[BQ_LRU];
2677015Smckusick 		binstailfree(bp, flist);
2688Sbill 	}
26946151Smckusick 	bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
2708Sbill 	splx(s);
2718Sbill }
2728Sbill 
2738Sbill /*
27446151Smckusick  * Check to see if a block is currently memory resident.
2758Sbill  */
27637736Smckusick incore(vp, blkno)
27737736Smckusick 	struct vnode *vp;
2787015Smckusick 	daddr_t blkno;
2798Sbill {
2808Sbill 	register struct buf *bp;
2812325Swnj 	register struct buf *dp;
2828Sbill 
28338225Smckusick 	dp = BUFHASH(vp, blkno);
2842325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
28539668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
2867015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
28791Sbill 			return (1);
28891Sbill 	return (0);
2898Sbill }
2908Sbill 
29139668Smckusick /*
29246151Smckusick  * Check to see if a block is currently memory resident.
29346151Smckusick  * If it is resident, return it. If it is not resident,
29446151Smckusick  * allocate a new buffer and assign it to the block.
29539668Smckusick  */
2968Sbill struct buf *
29737736Smckusick getblk(vp, blkno, size)
29837736Smckusick 	register struct vnode *vp;
2996563Smckusic 	daddr_t blkno;
3006563Smckusic 	int size;
3018Sbill {
3028670S 	register struct buf *bp, *dp;
3035424Swnj 	int s;
3048Sbill 
30525255Smckusick 	if (size > MAXBSIZE)
30625255Smckusick 		panic("getblk: size too big");
3077015Smckusick 	/*
30846151Smckusick 	 * Search the cache for the block. If the buffer is found,
30946151Smckusick 	 * but it is currently locked, the we must wait for it to
31046151Smckusick 	 * become available.
3117015Smckusick 	 */
31237736Smckusick 	dp = BUFHASH(vp, blkno);
3137015Smckusick loop:
3142325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
31539668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
31646151Smckusick 		    (bp->b_flags & B_INVAL))
3178Sbill 			continue;
31826271Skarels 		s = splbio();
31946151Smckusick 		if (bp->b_flags & B_BUSY) {
3208Sbill 			bp->b_flags |= B_WANTED;
32146151Smckusick 			sleep((caddr_t)bp, PRIBIO + 1);
3225424Swnj 			splx(s);
3238Sbill 			goto loop;
3248Sbill 		}
32539882Smckusick 		bremfree(bp);
32639882Smckusick 		bp->b_flags |= B_BUSY;
3275424Swnj 		splx(s);
32832608Smckusick 		if (bp->b_bcount != size) {
32939668Smckusick 			printf("getblk: stray size");
33039668Smckusick 			bp->b_flags |= B_INVAL;
33139668Smckusick 			bwrite(bp);
33239668Smckusick 			goto loop;
33332608Smckusick 		}
3348Sbill 		bp->b_flags |= B_CACHE;
33526271Skarels 		return (bp);
3368Sbill 	}
3378670S 	bp = getnewbuf();
3387015Smckusick 	bremhash(bp);
33939668Smckusick 	bgetvp(vp, bp);
34045116Smckusick 	bp->b_bcount = 0;
34139668Smckusick 	bp->b_lblkno = blkno;
3426563Smckusic 	bp->b_blkno = blkno;
3438670S 	bp->b_error = 0;
34437736Smckusick 	bp->b_resid = 0;
34537736Smckusick 	binshash(bp, dp);
34645116Smckusick 	allocbuf(bp, size);
34726271Skarels 	return (bp);
3488Sbill }
3498Sbill 
3508Sbill /*
35146151Smckusick  * Allocate a buffer.
35246151Smckusick  * The caller will assign it to a block.
3538Sbill  */
3548Sbill struct buf *
3556563Smckusic geteblk(size)
3566563Smckusic 	int size;
3578Sbill {
3588670S 	register struct buf *bp, *flist;
3598Sbill 
36025255Smckusick 	if (size > MAXBSIZE)
36125255Smckusick 		panic("geteblk: size too big");
3628670S 	bp = getnewbuf();
3638670S 	bp->b_flags |= B_INVAL;
3647015Smckusick 	bremhash(bp);
3658670S 	flist = &bfreelist[BQ_AGE];
36645116Smckusick 	bp->b_bcount = 0;
36737736Smckusick 	bp->b_error = 0;
36837736Smckusick 	bp->b_resid = 0;
3698670S 	binshash(bp, flist);
37045116Smckusick 	allocbuf(bp, size);
37126271Skarels 	return (bp);
3728Sbill }
3738Sbill 
3748Sbill /*
37545116Smckusick  * Expand or contract the actual memory allocated to a buffer.
37646151Smckusick  * If no memory is available, release buffer and take error exit.
3776563Smckusic  */
37845116Smckusick allocbuf(tp, size)
37945116Smckusick 	register struct buf *tp;
3806563Smckusic 	int size;
3816563Smckusic {
38245116Smckusick 	register struct buf *bp, *ep;
38345116Smckusick 	int sizealloc, take, s;
3846563Smckusic 
38545116Smckusick 	sizealloc = roundup(size, CLBYTES);
38645116Smckusick 	/*
38745116Smckusick 	 * Buffer size does not change
38845116Smckusick 	 */
38945116Smckusick 	if (sizealloc == tp->b_bufsize)
39045116Smckusick 		goto out;
39145116Smckusick 	/*
39245116Smckusick 	 * Buffer size is shrinking.
39345116Smckusick 	 * Place excess space in a buffer header taken from the
39445116Smckusick 	 * BQ_EMPTY buffer list and placed on the "most free" list.
39545116Smckusick 	 * If no extra buffer headers are available, leave the
39645116Smckusick 	 * extra space in the present buffer.
39745116Smckusick 	 */
39845116Smckusick 	if (sizealloc < tp->b_bufsize) {
39945116Smckusick 		ep = bfreelist[BQ_EMPTY].av_forw;
40045116Smckusick 		if (ep == &bfreelist[BQ_EMPTY])
40145116Smckusick 			goto out;
40245116Smckusick 		s = splbio();
40345116Smckusick 		bremfree(ep);
40445116Smckusick 		ep->b_flags |= B_BUSY;
40545116Smckusick 		splx(s);
40645116Smckusick 		pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
40745116Smckusick 		    (int)tp->b_bufsize - sizealloc);
40845116Smckusick 		ep->b_bufsize = tp->b_bufsize - sizealloc;
40945116Smckusick 		tp->b_bufsize = sizealloc;
41045116Smckusick 		ep->b_flags |= B_INVAL;
41145116Smckusick 		ep->b_bcount = 0;
41245116Smckusick 		brelse(ep);
41345116Smckusick 		goto out;
41445116Smckusick 	}
41545116Smckusick 	/*
41645116Smckusick 	 * More buffer space is needed. Get it out of buffers on
41745116Smckusick 	 * the "most free" list, placing the empty headers on the
41845116Smckusick 	 * BQ_EMPTY buffer header list.
41945116Smckusick 	 */
42045116Smckusick 	while (tp->b_bufsize < sizealloc) {
42145116Smckusick 		take = sizealloc - tp->b_bufsize;
42245116Smckusick 		bp = getnewbuf();
42345116Smckusick 		if (take >= bp->b_bufsize)
42445116Smckusick 			take = bp->b_bufsize;
42545116Smckusick 		pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
42645116Smckusick 		    &tp->b_un.b_addr[tp->b_bufsize], take);
42745116Smckusick 		tp->b_bufsize += take;
42845116Smckusick 		bp->b_bufsize = bp->b_bufsize - take;
42945116Smckusick 		if (bp->b_bcount > bp->b_bufsize)
43045116Smckusick 			bp->b_bcount = bp->b_bufsize;
43145116Smckusick 		if (bp->b_bufsize <= 0) {
43245116Smckusick 			bremhash(bp);
43345116Smckusick 			binshash(bp, &bfreelist[BQ_EMPTY]);
43446151Smckusick 			bp->b_dev = NODEV;
43545116Smckusick 			bp->b_error = 0;
43645116Smckusick 			bp->b_flags |= B_INVAL;
43745116Smckusick 		}
43845116Smckusick 		brelse(bp);
43945116Smckusick 	}
44045116Smckusick out:
44145116Smckusick 	tp->b_bcount = size;
44245116Smckusick 	return (1);
4438670S }
4448670S 
4458670S /*
4468670S  * Find a buffer which is available for use.
4478670S  * Select something from a free list.
4488670S  * Preference is to AGE list, then LRU list.
4498670S  */
4508670S struct buf *
4518670S getnewbuf()
4528670S {
4538670S 	register struct buf *bp, *dp;
45438776Smckusick 	register struct ucred *cred;
4558670S 	int s;
4568670S 
4578670S loop:
45826271Skarels 	s = splbio();
4598670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
4608670S 		if (dp->av_forw != dp)
4618670S 			break;
4628670S 	if (dp == bfreelist) {		/* no free blocks */
4638670S 		dp->b_flags |= B_WANTED;
46446151Smckusick 		sleep((caddr_t)dp, PRIBIO + 1);
46512170Ssam 		splx(s);
4668670S 		goto loop;
4678670S 	}
46839882Smckusick 	bp = dp->av_forw;
46939882Smckusick 	bremfree(bp);
47039882Smckusick 	bp->b_flags |= B_BUSY;
4718670S 	splx(s);
4728670S 	if (bp->b_flags & B_DELWRI) {
47338614Smckusick 		(void) bawrite(bp);
4748670S 		goto loop;
4758670S 	}
47640341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
47739668Smckusick 	if (bp->b_vp)
47839668Smckusick 		brelvp(bp);
47938776Smckusick 	if (bp->b_rcred != NOCRED) {
48038776Smckusick 		cred = bp->b_rcred;
48138776Smckusick 		bp->b_rcred = NOCRED;
48238776Smckusick 		crfree(cred);
48338776Smckusick 	}
48438776Smckusick 	if (bp->b_wcred != NOCRED) {
48538776Smckusick 		cred = bp->b_wcred;
48638776Smckusick 		bp->b_wcred = NOCRED;
48738776Smckusick 		crfree(cred);
48838776Smckusick 	}
4898670S 	bp->b_flags = B_BUSY;
49046989Smckusick 	bp->b_dirtyoff = bp->b_dirtyend = 0;
4918670S 	return (bp);
4928670S }
4938670S 
4948670S /*
49546151Smckusick  * Wait for I/O to complete.
49646151Smckusick  *
49746151Smckusick  * Extract and return any errors associated with the I/O.
49846151Smckusick  * If the error flag is set, but no specific error is
49946151Smckusick  * given, return EIO.
5008Sbill  */
5017015Smckusick biowait(bp)
5026563Smckusic 	register struct buf *bp;
5038Sbill {
5045431Sroot 	int s;
5058Sbill 
50626271Skarels 	s = splbio();
50738776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
5088Sbill 		sleep((caddr_t)bp, PRIBIO);
5095431Sroot 	splx(s);
51037736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
51137736Smckusick 		return (0);
51237736Smckusick 	if (bp->b_error)
51337736Smckusick 		return (bp->b_error);
51437736Smckusick 	return (EIO);
5158Sbill }
5168Sbill 
5178Sbill /*
51813128Ssam  * Mark I/O complete on a buffer.
51946151Smckusick  *
52046151Smckusick  * If a callback has been requested, e.g. the pageout
52146151Smckusick  * daemon, do so. Otherwise, awaken waiting processes.
5228Sbill  */
5237015Smckusick biodone(bp)
5247015Smckusick 	register struct buf *bp;
5258Sbill {
52639882Smckusick 	register struct vnode *vp;
5278Sbill 
528420Sbill 	if (bp->b_flags & B_DONE)
5297015Smckusick 		panic("dup biodone");
5308Sbill 	bp->b_flags |= B_DONE;
53139882Smckusick 	if ((bp->b_flags & B_READ) == 0) {
53238776Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
53339882Smckusick 		if (vp = bp->b_vp) {
53439882Smckusick 			vp->v_numoutput--;
53539882Smckusick 			if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
53639882Smckusick 				if (vp->v_numoutput < 0)
53739882Smckusick 					panic("biodone: neg numoutput");
53839882Smckusick 				vp->v_flag &= ~VBWAIT;
53939882Smckusick 				wakeup((caddr_t)&vp->v_numoutput);
54039882Smckusick 			}
54139882Smckusick 		}
54239882Smckusick 	}
5439763Ssam 	if (bp->b_flags & B_CALL) {
5449763Ssam 		bp->b_flags &= ~B_CALL;
5459763Ssam 		(*bp->b_iodone)(bp);
5469763Ssam 		return;
5479763Ssam 	}
54846151Smckusick 	if (bp->b_flags & B_ASYNC)
5498Sbill 		brelse(bp);
5508Sbill 	else {
5518Sbill 		bp->b_flags &= ~B_WANTED;
5528Sbill 		wakeup((caddr_t)bp);
5538Sbill 	}
5548Sbill }
5558Sbill 
5568Sbill /*
55737736Smckusick  * Make sure all write-behind blocks associated
55838776Smckusick  * with mount point are flushed out (from sync).
5598Sbill  */
56039668Smckusick mntflushbuf(mountp, flags)
56138776Smckusick 	struct mount *mountp;
56239668Smckusick 	int flags;
5638Sbill {
56439668Smckusick 	register struct vnode *vp;
56539668Smckusick 
56641400Smckusick 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
56741299Smckusick 		panic("mntflushbuf: not busy");
56839668Smckusick loop:
56941421Smckusick 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
57039668Smckusick 		if (vget(vp))
57139668Smckusick 			goto loop;
57239668Smckusick 		vflushbuf(vp, flags);
57339668Smckusick 		vput(vp);
57441421Smckusick 		if (vp->v_mount != mountp)
57541421Smckusick 			goto loop;
57639668Smckusick 	}
57739668Smckusick }
57839668Smckusick 
57939668Smckusick /*
58039668Smckusick  * Flush all dirty buffers associated with a vnode.
58139668Smckusick  */
58239668Smckusick vflushbuf(vp, flags)
58339668Smckusick 	register struct vnode *vp;
58439668Smckusick 	int flags;
58539668Smckusick {
5868Sbill 	register struct buf *bp;
58739668Smckusick 	struct buf *nbp;
5885431Sroot 	int s;
5898Sbill 
5908Sbill loop:
59126271Skarels 	s = splbio();
59239882Smckusick 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
59339668Smckusick 		nbp = bp->b_blockf;
59439668Smckusick 		if ((bp->b_flags & B_BUSY))
59539668Smckusick 			continue;
59639668Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
59739882Smckusick 			panic("vflushbuf: not dirty");
59839882Smckusick 		bremfree(bp);
59939882Smckusick 		bp->b_flags |= B_BUSY;
60039668Smckusick 		splx(s);
60139882Smckusick 		/*
60239882Smckusick 		 * Wait for I/O associated with indirect blocks to complete,
60339882Smckusick 		 * since there is no way to quickly wait for them below.
60446151Smckusick 		 * NB: This is really specific to ufs, but is done here
60539882Smckusick 		 * as it is easier and quicker.
60639882Smckusick 		 */
60739882Smckusick 		if (bp->b_vp == vp || (flags & B_SYNC) == 0) {
60839882Smckusick 			(void) bawrite(bp);
60940639Smckusick 			s = splbio();
61039882Smckusick 		} else {
61139882Smckusick 			(void) bwrite(bp);
61239882Smckusick 			goto loop;
61339882Smckusick 		}
61439668Smckusick 	}
61539738Smckusick 	splx(s);
61639668Smckusick 	if ((flags & B_SYNC) == 0)
61739668Smckusick 		return;
61839668Smckusick 	s = splbio();
61939882Smckusick 	while (vp->v_numoutput) {
62039882Smckusick 		vp->v_flag |= VBWAIT;
62146151Smckusick 		sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
62237736Smckusick 	}
62339738Smckusick 	splx(s);
62439882Smckusick 	if (vp->v_dirtyblkhd) {
62539882Smckusick 		vprint("vflushbuf: dirty", vp);
62639882Smckusick 		goto loop;
62739882Smckusick 	}
6288Sbill }
6292299Skre 
6302299Skre /*
6312299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6322299Skre  *
63339668Smckusick  * Go through the list of vnodes associated with the file system;
63439668Smckusick  * for each vnode invalidate any buffers that it holds. Normally
63539668Smckusick  * this routine is preceeded by a bflush call, so that on a quiescent
63639668Smckusick  * filesystem there will be no dirty buffers when we are done. Binval
63739668Smckusick  * returns the count of dirty buffers when it is finished.
6382299Skre  */
63939668Smckusick mntinvalbuf(mountp)
64038776Smckusick 	struct mount *mountp;
6412299Skre {
64239668Smckusick 	register struct vnode *vp;
64339668Smckusick 	int dirty = 0;
64439668Smckusick 
64541400Smckusick 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
64641299Smckusick 		panic("mntinvalbuf: not busy");
64739668Smckusick loop:
64841421Smckusick 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
64939668Smckusick 		if (vget(vp))
65039668Smckusick 			goto loop;
65139668Smckusick 		dirty += vinvalbuf(vp, 1);
65239668Smckusick 		vput(vp);
65341421Smckusick 		if (vp->v_mount != mountp)
65441421Smckusick 			goto loop;
65539668Smckusick 	}
65639668Smckusick 	return (dirty);
65739668Smckusick }
65839668Smckusick 
65939668Smckusick /*
66039668Smckusick  * Flush out and invalidate all buffers associated with a vnode.
66139668Smckusick  * Called with the underlying object locked.
66239668Smckusick  */
66339668Smckusick vinvalbuf(vp, save)
66439668Smckusick 	register struct vnode *vp;
66539668Smckusick 	int save;
66639668Smckusick {
6672361Skre 	register struct buf *bp;
66839882Smckusick 	struct buf *nbp, *blist;
66938633Smckusick 	int s, dirty = 0;
6702299Skre 
67139882Smckusick 	for (;;) {
67239882Smckusick 		if (blist = vp->v_dirtyblkhd)
67339882Smckusick 			/* void */;
67439882Smckusick 		else if (blist = vp->v_cleanblkhd)
67539882Smckusick 			/* void */;
67639882Smckusick 		else
67739882Smckusick 			break;
67839882Smckusick 		for (bp = blist; bp; bp = nbp) {
67939882Smckusick 			nbp = bp->b_blockf;
68039882Smckusick 			s = splbio();
68139882Smckusick 			if (bp->b_flags & B_BUSY) {
68239882Smckusick 				bp->b_flags |= B_WANTED;
68346151Smckusick 				sleep((caddr_t)bp, PRIBIO + 1);
68439882Smckusick 				splx(s);
68539882Smckusick 				break;
68639882Smckusick 			}
68739882Smckusick 			bremfree(bp);
68839882Smckusick 			bp->b_flags |= B_BUSY;
68938808Smckusick 			splx(s);
69039882Smckusick 			if (save && (bp->b_flags & B_DELWRI)) {
69138614Smckusick 				dirty++;
69239668Smckusick 				(void) bwrite(bp);
69339882Smckusick 				break;
69437736Smckusick 			}
69540034Smckusick 			if (bp->b_vp != vp)
69640034Smckusick 				reassignbuf(bp, bp->b_vp);
69740034Smckusick 			else
69840034Smckusick 				bp->b_flags |= B_INVAL;
69939882Smckusick 			brelse(bp);
70038614Smckusick 		}
70138614Smckusick 	}
70239882Smckusick 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
70339668Smckusick 		panic("vinvalbuf: flush failed");
70438614Smckusick 	return (dirty);
7052299Skre }
70637736Smckusick 
70739668Smckusick /*
70839668Smckusick  * Associate a buffer with a vnode.
70939668Smckusick  */
71039668Smckusick bgetvp(vp, bp)
71139668Smckusick 	register struct vnode *vp;
71239668Smckusick 	register struct buf *bp;
71339668Smckusick {
71439668Smckusick 
71539668Smckusick 	if (bp->b_vp)
71639668Smckusick 		panic("bgetvp: not free");
71739808Smckusick 	VHOLD(vp);
71839668Smckusick 	bp->b_vp = vp;
71939668Smckusick 	if (vp->v_type == VBLK || vp->v_type == VCHR)
72039668Smckusick 		bp->b_dev = vp->v_rdev;
72139668Smckusick 	else
72239668Smckusick 		bp->b_dev = NODEV;
72339668Smckusick 	/*
72439668Smckusick 	 * Insert onto list for new vnode.
72539668Smckusick 	 */
72639882Smckusick 	if (vp->v_cleanblkhd) {
72739882Smckusick 		bp->b_blockf = vp->v_cleanblkhd;
72839882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
72939882Smckusick 		vp->v_cleanblkhd->b_blockb = &bp->b_blockf;
73039882Smckusick 		vp->v_cleanblkhd = bp;
73139668Smckusick 	} else {
73239882Smckusick 		vp->v_cleanblkhd = bp;
73339882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
73439668Smckusick 		bp->b_blockf = NULL;
73539668Smckusick 	}
73639668Smckusick }
73739668Smckusick 
73839668Smckusick /*
73939668Smckusick  * Disassociate a buffer from a vnode.
74039668Smckusick  */
74137736Smckusick brelvp(bp)
74239668Smckusick 	register struct buf *bp;
74337736Smckusick {
74439668Smckusick 	struct buf *bq;
74537736Smckusick 	struct vnode *vp;
74637736Smckusick 
74737736Smckusick 	if (bp->b_vp == (struct vnode *) 0)
74839668Smckusick 		panic("brelvp: NULL");
74939668Smckusick 	/*
75039668Smckusick 	 * Delete from old vnode list, if on one.
75139668Smckusick 	 */
75239668Smckusick 	if (bp->b_blockb) {
75339668Smckusick 		if (bq = bp->b_blockf)
75439668Smckusick 			bq->b_blockb = bp->b_blockb;
75539668Smckusick 		*bp->b_blockb = bq;
75639668Smckusick 		bp->b_blockf = NULL;
75739668Smckusick 		bp->b_blockb = NULL;
75839668Smckusick 	}
75937736Smckusick 	vp = bp->b_vp;
76037736Smckusick 	bp->b_vp = (struct vnode *) 0;
76139808Smckusick 	HOLDRELE(vp);
76237736Smckusick }
76339668Smckusick 
76439668Smckusick /*
76539668Smckusick  * Reassign a buffer from one vnode to another.
76639668Smckusick  * Used to assign file specific control information
76739668Smckusick  * (indirect blocks) to the vnode to which they belong.
76839668Smckusick  */
76939668Smckusick reassignbuf(bp, newvp)
77039668Smckusick 	register struct buf *bp;
77139668Smckusick 	register struct vnode *newvp;
77239668Smckusick {
77339882Smckusick 	register struct buf *bq, **listheadp;
77439668Smckusick 
77539882Smckusick 	if (newvp == NULL)
77639882Smckusick 		panic("reassignbuf: NULL");
77739668Smckusick 	/*
77839668Smckusick 	 * Delete from old vnode list, if on one.
77939668Smckusick 	 */
78039668Smckusick 	if (bp->b_blockb) {
78139668Smckusick 		if (bq = bp->b_blockf)
78239668Smckusick 			bq->b_blockb = bp->b_blockb;
78339668Smckusick 		*bp->b_blockb = bq;
78439668Smckusick 	}
78539668Smckusick 	/*
78639882Smckusick 	 * If dirty, put on list of dirty buffers;
78739882Smckusick 	 * otherwise insert onto list of clean buffers.
78839668Smckusick 	 */
78939882Smckusick 	if (bp->b_flags & B_DELWRI)
79039882Smckusick 		listheadp = &newvp->v_dirtyblkhd;
79139882Smckusick 	else
79239882Smckusick 		listheadp = &newvp->v_cleanblkhd;
79339882Smckusick 	if (*listheadp) {
79439882Smckusick 		bp->b_blockf = *listheadp;
79539882Smckusick 		bp->b_blockb = listheadp;
79639882Smckusick 		bp->b_blockf->b_blockb = &bp->b_blockf;
79739882Smckusick 		*listheadp = bp;
79839668Smckusick 	} else {
79939882Smckusick 		*listheadp = bp;
80039882Smckusick 		bp->b_blockb = listheadp;
80139668Smckusick 		bp->b_blockf = NULL;
80239668Smckusick 	}
80339668Smckusick }
804