xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 45116)
123395Smckusick /*
237736Smckusick  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
544454Sbostic  * %sccs.include.redist.c%
637736Smckusick  *
7*45116Smckusick  *	@(#)vfs_cluster.c	7.31 (Berkeley) 08/24/90
823395Smckusick  */
98Sbill 
1017098Sbloom #include "param.h"
1117098Sbloom #include "user.h"
1217098Sbloom #include "buf.h"
1337736Smckusick #include "vnode.h"
1440652Smckusick #include "specdev.h"
1539668Smckusick #include "mount.h"
1617098Sbloom #include "trace.h"
1738776Smckusick #include "ucred.h"
188Sbill 
1991Sbill /*
208Sbill  * Read in (if necessary) the block and return a buffer pointer.
218Sbill  */
2238776Smckusick bread(vp, blkno, size, cred, bpp)
2337736Smckusick 	struct vnode *vp;
246563Smckusic 	daddr_t blkno;
256563Smckusic 	int size;
2638776Smckusick 	struct ucred *cred;
2737736Smckusick 	struct buf **bpp;
288Sbill {
298Sbill 	register struct buf *bp;
308Sbill 
318670S 	if (size == 0)
328670S 		panic("bread: size 0");
3337736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
3432608Smckusick 	if (bp->b_flags&(B_DONE|B_DELWRI)) {
3540341Smckusick 		trace(TR_BREADHIT, pack(vp, size), blkno);
3637736Smckusick 		return (0);
378Sbill 	}
388Sbill 	bp->b_flags |= B_READ;
398670S 	if (bp->b_bcount > bp->b_bufsize)
408670S 		panic("bread");
4138776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
4238776Smckusick 		crhold(cred);
4338776Smckusick 		bp->b_rcred = cred;
4438776Smckusick 	}
4537736Smckusick 	VOP_STRATEGY(bp);
4640341Smckusick 	trace(TR_BREADMISS, pack(vp, size), blkno);
478039Sroot 	u.u_ru.ru_inblock++;		/* pay for read */
4837736Smckusick 	return (biowait(bp));
498Sbill }
508Sbill 
518Sbill /*
528Sbill  * Read in the block, like bread, but also start I/O on the
538Sbill  * read-ahead block (which is not allocated to the caller)
548Sbill  */
5538776Smckusick breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
5637736Smckusick 	struct vnode *vp;
577114Smckusick 	daddr_t blkno; int size;
588592Sroot 	daddr_t rablkno; int rabsize;
5938776Smckusick 	struct ucred *cred;
6037736Smckusick 	struct buf **bpp;
618Sbill {
628Sbill 	register struct buf *bp, *rabp;
638Sbill 
648Sbill 	bp = NULL;
657015Smckusick 	/*
667015Smckusick 	 * If the block isn't in core, then allocate
677015Smckusick 	 * a buffer and initiate i/o (getblk checks
687015Smckusick 	 * for a cache hit).
697015Smckusick 	 */
7037736Smckusick 	if (!incore(vp, blkno)) {
7137736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
7232608Smckusick 		if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) {
738Sbill 			bp->b_flags |= B_READ;
748670S 			if (bp->b_bcount > bp->b_bufsize)
758670S 				panic("breada");
7638776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
7738776Smckusick 				crhold(cred);
7838776Smckusick 				bp->b_rcred = cred;
7938776Smckusick 			}
8037736Smckusick 			VOP_STRATEGY(bp);
8140341Smckusick 			trace(TR_BREADMISS, pack(vp, size), blkno);
828039Sroot 			u.u_ru.ru_inblock++;		/* pay for read */
837015Smckusick 		} else
8440341Smckusick 			trace(TR_BREADHIT, pack(vp, size), blkno);
858Sbill 	}
867015Smckusick 
877015Smckusick 	/*
887015Smckusick 	 * If there's a read-ahead block, start i/o
897015Smckusick 	 * on it also (as above).
907015Smckusick 	 */
9139895Smckusick 	if (!incore(vp, rablkno)) {
9237736Smckusick 		rabp = getblk(vp, rablkno, rabsize);
9332608Smckusick 		if (rabp->b_flags & (B_DONE|B_DELWRI)) {
948Sbill 			brelse(rabp);
9540341Smckusick 			trace(TR_BREADHITRA, pack(vp, rabsize), rablkno);
962045Swnj 		} else {
978Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
988670S 			if (rabp->b_bcount > rabp->b_bufsize)
998670S 				panic("breadrabp");
10038880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
10138776Smckusick 				crhold(cred);
10238880Smckusick 				rabp->b_rcred = cred;
10338776Smckusick 			}
10437736Smckusick 			VOP_STRATEGY(rabp);
10540341Smckusick 			trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno);
1068039Sroot 			u.u_ru.ru_inblock++;		/* pay in advance */
1078Sbill 		}
1088Sbill 	}
1097015Smckusick 
1107015Smckusick 	/*
1117114Smckusick 	 * If block was in core, let bread get it.
1127114Smckusick 	 * If block wasn't in core, then the read was started
1137114Smckusick 	 * above, and just wait for it.
1147015Smckusick 	 */
1157114Smckusick 	if (bp == NULL)
11638776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
11737736Smckusick 	return (biowait(bp));
1188Sbill }
1198Sbill 
1208Sbill /*
1218Sbill  * Write the buffer, waiting for completion.
1228Sbill  * Then release the buffer.
1238Sbill  */
1248Sbill bwrite(bp)
1257015Smckusick 	register struct buf *bp;
1268Sbill {
12737736Smckusick 	register int flag;
12840226Smckusick 	int s, error;
1298Sbill 
1308Sbill 	flag = bp->b_flags;
1319857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
1328Sbill 	if ((flag&B_DELWRI) == 0)
1338039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
13439882Smckusick 	else
13539882Smckusick 		reassignbuf(bp, bp->b_vp);
13640341Smckusick 	trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
1378670S 	if (bp->b_bcount > bp->b_bufsize)
1388670S 		panic("bwrite");
13940226Smckusick 	s = splbio();
14039882Smckusick 	bp->b_vp->v_numoutput++;
14140226Smckusick 	splx(s);
14237736Smckusick 	VOP_STRATEGY(bp);
1437015Smckusick 
1447015Smckusick 	/*
1457015Smckusick 	 * If the write was synchronous, then await i/o completion.
1467015Smckusick 	 * If the write was "delayed", then we put the buffer on
1477015Smckusick 	 * the q of blocks awaiting i/o completion status.
1487015Smckusick 	 */
1498Sbill 	if ((flag&B_ASYNC) == 0) {
15037736Smckusick 		error = biowait(bp);
1518Sbill 		brelse(bp);
15237736Smckusick 	} else if (flag & B_DELWRI) {
1538Sbill 		bp->b_flags |= B_AGE;
15437736Smckusick 		error = 0;
15537736Smckusick 	}
15637736Smckusick 	return (error);
1578Sbill }
1588Sbill 
1598Sbill /*
1608Sbill  * Release the buffer, marking it so that if it is grabbed
1618Sbill  * for another purpose it will be written out before being
1628Sbill  * given up (e.g. when writing a partial block where it is
1638Sbill  * assumed that another write for the same block will soon follow).
1648Sbill  * This can't be done for magtape, since writes must be done
1658Sbill  * in the same order as requested.
1668Sbill  */
1678Sbill bdwrite(bp)
1687015Smckusick 	register struct buf *bp;
1698Sbill {
1708Sbill 
17139882Smckusick 	if ((bp->b_flags & B_DELWRI) == 0) {
17239882Smckusick 		bp->b_flags |= B_DELWRI;
17339882Smckusick 		reassignbuf(bp, bp->b_vp);
1748039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
17539882Smckusick 	}
17637736Smckusick 	/*
17739668Smckusick 	 * If this is a tape drive, the write must be initiated.
17837736Smckusick 	 */
17939668Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, B_TAPE, 0, NOCRED) == 0) {
1808Sbill 		bawrite(bp);
18139668Smckusick 	} else {
1828Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
1838Sbill 		brelse(bp);
1848Sbill 	}
1858Sbill }
1868Sbill 
1878Sbill /*
1888Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
1898Sbill  */
1908Sbill bawrite(bp)
1917015Smckusick 	register struct buf *bp;
1928Sbill {
1938Sbill 
1948Sbill 	bp->b_flags |= B_ASYNC;
19537736Smckusick 	(void) bwrite(bp);
1968Sbill }
1978Sbill 
1988Sbill /*
1997015Smckusick  * Release the buffer, with no I/O implied.
2008Sbill  */
2018Sbill brelse(bp)
2027015Smckusick 	register struct buf *bp;
2038Sbill {
2042325Swnj 	register struct buf *flist;
2058Sbill 	register s;
2068Sbill 
20740341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
2087015Smckusick 	/*
20939668Smckusick 	 * If a process is waiting for the buffer, or
21039668Smckusick 	 * is waiting for a free buffer, awaken it.
2117015Smckusick 	 */
2128Sbill 	if (bp->b_flags&B_WANTED)
2138Sbill 		wakeup((caddr_t)bp);
2142325Swnj 	if (bfreelist[0].b_flags&B_WANTED) {
2152325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2162325Swnj 		wakeup((caddr_t)bfreelist);
2178Sbill 	}
21839668Smckusick 	/*
21939668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
22039668Smckusick 	 */
22139668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
22239668Smckusick 		bp->b_flags &= ~B_ERROR;
22339668Smckusick 
22439668Smckusick 	/*
22539668Smckusick 	 * Disassociate buffers that are no longer valid.
22639668Smckusick 	 */
22739668Smckusick 	if (bp->b_flags & (B_NOCACHE|B_ERROR))
22837736Smckusick 		bp->b_flags |= B_INVAL;
22939668Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR|B_INVAL))) {
23039668Smckusick 		if (bp->b_vp)
23139668Smckusick 			brelvp(bp);
23239668Smckusick 		bp->b_flags &= ~B_DELWRI;
23337736Smckusick 	}
2347015Smckusick 	/*
2357015Smckusick 	 * Stick the buffer back on a free list.
2367015Smckusick 	 */
23726271Skarels 	s = splbio();
2388670S 	if (bp->b_bufsize <= 0) {
2398670S 		/* block has no buffer ... put at front of unused buffer list */
2408670S 		flist = &bfreelist[BQ_EMPTY];
2418670S 		binsheadfree(bp, flist);
2428670S 	} else if (bp->b_flags & (B_ERROR|B_INVAL)) {
2432325Swnj 		/* block has no info ... put at front of most free list */
2448670S 		flist = &bfreelist[BQ_AGE];
2457015Smckusick 		binsheadfree(bp, flist);
2468Sbill 	} else {
2472325Swnj 		if (bp->b_flags & B_LOCKED)
2482325Swnj 			flist = &bfreelist[BQ_LOCKED];
2492325Swnj 		else if (bp->b_flags & B_AGE)
2502325Swnj 			flist = &bfreelist[BQ_AGE];
2512325Swnj 		else
2522325Swnj 			flist = &bfreelist[BQ_LRU];
2537015Smckusick 		binstailfree(bp, flist);
2548Sbill 	}
25537736Smckusick 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE);
2568Sbill 	splx(s);
2578Sbill }
2588Sbill 
2598Sbill /*
2608Sbill  * See if the block is associated with some buffer
2618Sbill  * (mainly to avoid getting hung up on a wait in breada)
2628Sbill  */
26337736Smckusick incore(vp, blkno)
26437736Smckusick 	struct vnode *vp;
2657015Smckusick 	daddr_t blkno;
2668Sbill {
2678Sbill 	register struct buf *bp;
2682325Swnj 	register struct buf *dp;
2698Sbill 
27038225Smckusick 	dp = BUFHASH(vp, blkno);
2712325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
27239668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
2737015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
27491Sbill 			return (1);
27591Sbill 	return (0);
2768Sbill }
2778Sbill 
27839668Smckusick /*
27939668Smckusick  * Return a block if it is in memory.
28039668Smckusick  */
28138776Smckusick baddr(vp, blkno, size, cred, bpp)
28237736Smckusick 	struct vnode *vp;
2836563Smckusic 	daddr_t blkno;
2846563Smckusic 	int size;
28538776Smckusick 	struct ucred *cred;
28637736Smckusick 	struct buf **bpp;
2878Sbill {
2888Sbill 
28937736Smckusick 	if (incore(vp, blkno))
29038776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
29137736Smckusick 	*bpp = 0;
2928Sbill 	return (0);
2938Sbill }
2948Sbill 
2958Sbill /*
2968Sbill  * Assign a buffer for the given block.  If the appropriate
2978Sbill  * block is already associated, return it; otherwise search
2988Sbill  * for the oldest non-busy buffer and reassign it.
2995424Swnj  *
3005424Swnj  * We use splx here because this routine may be called
3015424Swnj  * on the interrupt stack during a dump, and we don't
3025424Swnj  * want to lower the ipl back to 0.
3038Sbill  */
3048Sbill struct buf *
30537736Smckusick getblk(vp, blkno, size)
30637736Smckusick 	register struct vnode *vp;
3076563Smckusic 	daddr_t blkno;
3086563Smckusic 	int size;
3098Sbill {
3108670S 	register struct buf *bp, *dp;
3115424Swnj 	int s;
3128Sbill 
31325255Smckusick 	if (size > MAXBSIZE)
31425255Smckusick 		panic("getblk: size too big");
3157015Smckusick 	/*
3167015Smckusick 	 * Search the cache for the block.  If we hit, but
3177015Smckusick 	 * the buffer is in use for i/o, then we wait until
3187015Smckusick 	 * the i/o has completed.
3197015Smckusick 	 */
32037736Smckusick 	dp = BUFHASH(vp, blkno);
3217015Smckusick loop:
3222325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
32339668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
3242325Swnj 		    bp->b_flags&B_INVAL)
3258Sbill 			continue;
32626271Skarels 		s = splbio();
3278Sbill 		if (bp->b_flags&B_BUSY) {
3288Sbill 			bp->b_flags |= B_WANTED;
3298Sbill 			sleep((caddr_t)bp, PRIBIO+1);
3305424Swnj 			splx(s);
3318Sbill 			goto loop;
3328Sbill 		}
33339882Smckusick 		bremfree(bp);
33439882Smckusick 		bp->b_flags |= B_BUSY;
3355424Swnj 		splx(s);
33632608Smckusick 		if (bp->b_bcount != size) {
33739668Smckusick 			printf("getblk: stray size");
33839668Smckusick 			bp->b_flags |= B_INVAL;
33939668Smckusick 			bwrite(bp);
34039668Smckusick 			goto loop;
34132608Smckusick 		}
3428Sbill 		bp->b_flags |= B_CACHE;
34326271Skarels 		return (bp);
3448Sbill 	}
3458670S 	bp = getnewbuf();
3467015Smckusick 	bremhash(bp);
34739668Smckusick 	bgetvp(vp, bp);
348*45116Smckusick 	bp->b_bcount = 0;
34939668Smckusick 	bp->b_lblkno = blkno;
3506563Smckusic 	bp->b_blkno = blkno;
3518670S 	bp->b_error = 0;
35237736Smckusick 	bp->b_resid = 0;
35337736Smckusick 	binshash(bp, dp);
354*45116Smckusick 	allocbuf(bp, size);
35526271Skarels 	return (bp);
3568Sbill }
3578Sbill 
3588Sbill /*
3598Sbill  * get an empty block,
3608Sbill  * not assigned to any particular device
3618Sbill  */
3628Sbill struct buf *
3636563Smckusic geteblk(size)
3646563Smckusic 	int size;
3658Sbill {
3668670S 	register struct buf *bp, *flist;
3678Sbill 
36825255Smckusick 	if (size > MAXBSIZE)
36925255Smckusick 		panic("geteblk: size too big");
3708670S 	bp = getnewbuf();
3718670S 	bp->b_flags |= B_INVAL;
3727015Smckusick 	bremhash(bp);
3738670S 	flist = &bfreelist[BQ_AGE];
374*45116Smckusick 	bp->b_bcount = 0;
37537736Smckusick 	bp->b_error = 0;
37637736Smckusick 	bp->b_resid = 0;
3778670S 	binshash(bp, flist);
378*45116Smckusick 	allocbuf(bp, size);
37926271Skarels 	return (bp);
3808Sbill }
3818Sbill 
3828Sbill /*
383*45116Smckusick  * Expand or contract the actual memory allocated to a buffer.
384*45116Smckusick  * If no memory is available, release buffer and take error exit
3856563Smckusic  */
386*45116Smckusick allocbuf(tp, size)
387*45116Smckusick 	register struct buf *tp;
3886563Smckusic 	int size;
3896563Smckusic {
390*45116Smckusick 	register struct buf *bp, *ep;
391*45116Smckusick 	int sizealloc, take, s;
3926563Smckusic 
393*45116Smckusick 	sizealloc = roundup(size, CLBYTES);
394*45116Smckusick 	/*
395*45116Smckusick 	 * Buffer size does not change
396*45116Smckusick 	 */
397*45116Smckusick 	if (sizealloc == tp->b_bufsize)
398*45116Smckusick 		goto out;
399*45116Smckusick 	/*
400*45116Smckusick 	 * Buffer size is shrinking.
401*45116Smckusick 	 * Place excess space in a buffer header taken from the
402*45116Smckusick 	 * BQ_EMPTY buffer list and placed on the "most free" list.
403*45116Smckusick 	 * If no extra buffer headers are available, leave the
404*45116Smckusick 	 * extra space in the present buffer.
405*45116Smckusick 	 */
406*45116Smckusick 	if (sizealloc < tp->b_bufsize) {
407*45116Smckusick 		ep = bfreelist[BQ_EMPTY].av_forw;
408*45116Smckusick 		if (ep == &bfreelist[BQ_EMPTY])
409*45116Smckusick 			goto out;
410*45116Smckusick 		s = splbio();
411*45116Smckusick 		bremfree(ep);
412*45116Smckusick 		ep->b_flags |= B_BUSY;
413*45116Smckusick 		splx(s);
414*45116Smckusick 		pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
415*45116Smckusick 		    (int)tp->b_bufsize - sizealloc);
416*45116Smckusick 		ep->b_bufsize = tp->b_bufsize - sizealloc;
417*45116Smckusick 		tp->b_bufsize = sizealloc;
418*45116Smckusick 		ep->b_flags |= B_INVAL;
419*45116Smckusick 		ep->b_bcount = 0;
420*45116Smckusick 		brelse(ep);
421*45116Smckusick 		goto out;
422*45116Smckusick 	}
423*45116Smckusick 	/*
424*45116Smckusick 	 * More buffer space is needed. Get it out of buffers on
425*45116Smckusick 	 * the "most free" list, placing the empty headers on the
426*45116Smckusick 	 * BQ_EMPTY buffer header list.
427*45116Smckusick 	 */
428*45116Smckusick 	while (tp->b_bufsize < sizealloc) {
429*45116Smckusick 		take = sizealloc - tp->b_bufsize;
430*45116Smckusick 		bp = getnewbuf();
431*45116Smckusick 		if (take >= bp->b_bufsize)
432*45116Smckusick 			take = bp->b_bufsize;
433*45116Smckusick 		pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
434*45116Smckusick 		    &tp->b_un.b_addr[tp->b_bufsize], take);
435*45116Smckusick 		tp->b_bufsize += take;
436*45116Smckusick 		bp->b_bufsize = bp->b_bufsize - take;
437*45116Smckusick 		if (bp->b_bcount > bp->b_bufsize)
438*45116Smckusick 			bp->b_bcount = bp->b_bufsize;
439*45116Smckusick 		if (bp->b_bufsize <= 0) {
440*45116Smckusick 			bremhash(bp);
441*45116Smckusick 			binshash(bp, &bfreelist[BQ_EMPTY]);
442*45116Smckusick 			bp->b_dev = (dev_t)NODEV;
443*45116Smckusick 			bp->b_error = 0;
444*45116Smckusick 			bp->b_flags |= B_INVAL;
445*45116Smckusick 		}
446*45116Smckusick 		brelse(bp);
447*45116Smckusick 	}
448*45116Smckusick out:
449*45116Smckusick 	tp->b_bcount = size;
450*45116Smckusick 	return (1);
4518670S }
4528670S 
4538670S /*
4548670S  * Find a buffer which is available for use.
4558670S  * Select something from a free list.
4568670S  * Preference is to AGE list, then LRU list.
4578670S  */
4588670S struct buf *
4598670S getnewbuf()
4608670S {
4618670S 	register struct buf *bp, *dp;
46238776Smckusick 	register struct ucred *cred;
4638670S 	int s;
4648670S 
4658670S loop:
46626271Skarels 	s = splbio();
4678670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
4688670S 		if (dp->av_forw != dp)
4698670S 			break;
4708670S 	if (dp == bfreelist) {		/* no free blocks */
4718670S 		dp->b_flags |= B_WANTED;
4728670S 		sleep((caddr_t)dp, PRIBIO+1);
47312170Ssam 		splx(s);
4748670S 		goto loop;
4758670S 	}
47639882Smckusick 	bp = dp->av_forw;
47739882Smckusick 	bremfree(bp);
47839882Smckusick 	bp->b_flags |= B_BUSY;
4798670S 	splx(s);
4808670S 	if (bp->b_flags & B_DELWRI) {
48138614Smckusick 		(void) bawrite(bp);
4828670S 		goto loop;
4838670S 	}
48440341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
48539668Smckusick 	if (bp->b_vp)
48639668Smckusick 		brelvp(bp);
48738776Smckusick 	if (bp->b_rcred != NOCRED) {
48838776Smckusick 		cred = bp->b_rcred;
48938776Smckusick 		bp->b_rcred = NOCRED;
49038776Smckusick 		crfree(cred);
49138776Smckusick 	}
49238776Smckusick 	if (bp->b_wcred != NOCRED) {
49338776Smckusick 		cred = bp->b_wcred;
49438776Smckusick 		bp->b_wcred = NOCRED;
49538776Smckusick 		crfree(cred);
49638776Smckusick 	}
4978670S 	bp->b_flags = B_BUSY;
4988670S 	return (bp);
4998670S }
5008670S 
5018670S /*
5028Sbill  * Wait for I/O completion on the buffer; return errors
5038Sbill  * to the user.
5048Sbill  */
5057015Smckusick biowait(bp)
5066563Smckusic 	register struct buf *bp;
5078Sbill {
5085431Sroot 	int s;
5098Sbill 
51026271Skarels 	s = splbio();
51138776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
5128Sbill 		sleep((caddr_t)bp, PRIBIO);
5135431Sroot 	splx(s);
51437736Smckusick 	/*
51537736Smckusick 	 * Pick up the device's error number and pass it to the user;
51637736Smckusick 	 * if there is an error but the number is 0 set a generalized code.
51737736Smckusick 	 */
51837736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
51937736Smckusick 		return (0);
52037736Smckusick 	if (bp->b_error)
52137736Smckusick 		return (bp->b_error);
52237736Smckusick 	return (EIO);
5238Sbill }
5248Sbill 
5258Sbill /*
52613128Ssam  * Mark I/O complete on a buffer.
52713128Ssam  * If someone should be called, e.g. the pageout
52813128Ssam  * daemon, do so.  Otherwise, wake up anyone
52913128Ssam  * waiting for it.
5308Sbill  */
5317015Smckusick biodone(bp)
5327015Smckusick 	register struct buf *bp;
5338Sbill {
53439882Smckusick 	register struct vnode *vp;
5358Sbill 
536420Sbill 	if (bp->b_flags & B_DONE)
5377015Smckusick 		panic("dup biodone");
5388Sbill 	bp->b_flags |= B_DONE;
53939882Smckusick 	if ((bp->b_flags & B_READ) == 0) {
54038776Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
54139882Smckusick 		if (vp = bp->b_vp) {
54239882Smckusick 			vp->v_numoutput--;
54339882Smckusick 			if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
54439882Smckusick 				if (vp->v_numoutput < 0)
54539882Smckusick 					panic("biodone: neg numoutput");
54639882Smckusick 				vp->v_flag &= ~VBWAIT;
54739882Smckusick 				wakeup((caddr_t)&vp->v_numoutput);
54839882Smckusick 			}
54939882Smckusick 		}
55039882Smckusick 	}
5519763Ssam 	if (bp->b_flags & B_CALL) {
5529763Ssam 		bp->b_flags &= ~B_CALL;
5539763Ssam 		(*bp->b_iodone)(bp);
5549763Ssam 		return;
5559763Ssam 	}
5568Sbill 	if (bp->b_flags&B_ASYNC)
5578Sbill 		brelse(bp);
5588Sbill 	else {
5598Sbill 		bp->b_flags &= ~B_WANTED;
5608Sbill 		wakeup((caddr_t)bp);
5618Sbill 	}
5628Sbill }
5638Sbill 
5648Sbill /*
56537736Smckusick  * Make sure all write-behind blocks associated
56638776Smckusick  * with mount point are flushed out (from sync).
5678Sbill  */
56839668Smckusick mntflushbuf(mountp, flags)
56938776Smckusick 	struct mount *mountp;
57039668Smckusick 	int flags;
5718Sbill {
57239668Smckusick 	register struct vnode *vp;
57339668Smckusick 
57441400Smckusick 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
57541299Smckusick 		panic("mntflushbuf: not busy");
57639668Smckusick loop:
57741421Smckusick 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
57839668Smckusick 		if (vget(vp))
57939668Smckusick 			goto loop;
58039668Smckusick 		vflushbuf(vp, flags);
58139668Smckusick 		vput(vp);
58241421Smckusick 		if (vp->v_mount != mountp)
58341421Smckusick 			goto loop;
58439668Smckusick 	}
58539668Smckusick }
58639668Smckusick 
58739668Smckusick /*
58839668Smckusick  * Flush all dirty buffers associated with a vnode.
58939668Smckusick  */
59039668Smckusick vflushbuf(vp, flags)
59139668Smckusick 	register struct vnode *vp;
59239668Smckusick 	int flags;
59339668Smckusick {
5948Sbill 	register struct buf *bp;
59539668Smckusick 	struct buf *nbp;
5965431Sroot 	int s;
5978Sbill 
5988Sbill loop:
59926271Skarels 	s = splbio();
60039882Smckusick 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
60139668Smckusick 		nbp = bp->b_blockf;
60239668Smckusick 		if ((bp->b_flags & B_BUSY))
60339668Smckusick 			continue;
60439668Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
60539882Smckusick 			panic("vflushbuf: not dirty");
60639882Smckusick 		bremfree(bp);
60739882Smckusick 		bp->b_flags |= B_BUSY;
60839668Smckusick 		splx(s);
60939882Smckusick 		/*
61039882Smckusick 		 * Wait for I/O associated with indirect blocks to complete,
61139882Smckusick 		 * since there is no way to quickly wait for them below.
61239882Smckusick 		 * NB - This is really specific to ufs, but is done here
61339882Smckusick 		 * as it is easier and quicker.
61439882Smckusick 		 */
61539882Smckusick 		if (bp->b_vp == vp || (flags & B_SYNC) == 0) {
61639882Smckusick 			(void) bawrite(bp);
61740639Smckusick 			s = splbio();
61839882Smckusick 		} else {
61939882Smckusick 			(void) bwrite(bp);
62039882Smckusick 			goto loop;
62139882Smckusick 		}
62239668Smckusick 	}
62339738Smckusick 	splx(s);
62439668Smckusick 	if ((flags & B_SYNC) == 0)
62539668Smckusick 		return;
62639668Smckusick 	s = splbio();
62739882Smckusick 	while (vp->v_numoutput) {
62839882Smckusick 		vp->v_flag |= VBWAIT;
62939882Smckusick 		sleep((caddr_t)&vp->v_numoutput, PRIBIO+1);
63037736Smckusick 	}
63139738Smckusick 	splx(s);
63239882Smckusick 	if (vp->v_dirtyblkhd) {
63339882Smckusick 		vprint("vflushbuf: dirty", vp);
63439882Smckusick 		goto loop;
63539882Smckusick 	}
6368Sbill }
6372299Skre 
6382299Skre /*
6392299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6402299Skre  *
64139668Smckusick  * Go through the list of vnodes associated with the file system;
64239668Smckusick  * for each vnode invalidate any buffers that it holds. Normally
64339668Smckusick  * this routine is preceeded by a bflush call, so that on a quiescent
64439668Smckusick  * filesystem there will be no dirty buffers when we are done. Binval
64539668Smckusick  * returns the count of dirty buffers when it is finished.
6462299Skre  */
64739668Smckusick mntinvalbuf(mountp)
64838776Smckusick 	struct mount *mountp;
6492299Skre {
65039668Smckusick 	register struct vnode *vp;
65139668Smckusick 	int dirty = 0;
65239668Smckusick 
65341400Smckusick 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
65441299Smckusick 		panic("mntinvalbuf: not busy");
65539668Smckusick loop:
65641421Smckusick 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
65739668Smckusick 		if (vget(vp))
65839668Smckusick 			goto loop;
65939668Smckusick 		dirty += vinvalbuf(vp, 1);
66039668Smckusick 		vput(vp);
66141421Smckusick 		if (vp->v_mount != mountp)
66241421Smckusick 			goto loop;
66339668Smckusick 	}
66439668Smckusick 	return (dirty);
66539668Smckusick }
66639668Smckusick 
66739668Smckusick /*
66839668Smckusick  * Flush out and invalidate all buffers associated with a vnode.
66939668Smckusick  * Called with the underlying object locked.
67039668Smckusick  */
67139668Smckusick vinvalbuf(vp, save)
67239668Smckusick 	register struct vnode *vp;
67339668Smckusick 	int save;
67439668Smckusick {
6752361Skre 	register struct buf *bp;
67639882Smckusick 	struct buf *nbp, *blist;
67738633Smckusick 	int s, dirty = 0;
6782299Skre 
67939882Smckusick 	for (;;) {
68039882Smckusick 		if (blist = vp->v_dirtyblkhd)
68139882Smckusick 			/* void */;
68239882Smckusick 		else if (blist = vp->v_cleanblkhd)
68339882Smckusick 			/* void */;
68439882Smckusick 		else
68539882Smckusick 			break;
68639882Smckusick 		for (bp = blist; bp; bp = nbp) {
68739882Smckusick 			nbp = bp->b_blockf;
68839882Smckusick 			s = splbio();
68939882Smckusick 			if (bp->b_flags & B_BUSY) {
69039882Smckusick 				bp->b_flags |= B_WANTED;
69139882Smckusick 				sleep((caddr_t)bp, PRIBIO+1);
69239882Smckusick 				splx(s);
69339882Smckusick 				break;
69439882Smckusick 			}
69539882Smckusick 			bremfree(bp);
69639882Smckusick 			bp->b_flags |= B_BUSY;
69738808Smckusick 			splx(s);
69839882Smckusick 			if (save && (bp->b_flags & B_DELWRI)) {
69938614Smckusick 				dirty++;
70039668Smckusick 				(void) bwrite(bp);
70139882Smckusick 				break;
70237736Smckusick 			}
70340034Smckusick 			if (bp->b_vp != vp)
70440034Smckusick 				reassignbuf(bp, bp->b_vp);
70540034Smckusick 			else
70640034Smckusick 				bp->b_flags |= B_INVAL;
70739882Smckusick 			brelse(bp);
70838614Smckusick 		}
70938614Smckusick 	}
71039882Smckusick 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
71139668Smckusick 		panic("vinvalbuf: flush failed");
71238614Smckusick 	return (dirty);
7132299Skre }
71437736Smckusick 
71539668Smckusick /*
71639668Smckusick  * Associate a buffer with a vnode.
71739668Smckusick  */
71839668Smckusick bgetvp(vp, bp)
71939668Smckusick 	register struct vnode *vp;
72039668Smckusick 	register struct buf *bp;
72139668Smckusick {
72239668Smckusick 
72339668Smckusick 	if (bp->b_vp)
72439668Smckusick 		panic("bgetvp: not free");
72539808Smckusick 	VHOLD(vp);
72639668Smckusick 	bp->b_vp = vp;
72739668Smckusick 	if (vp->v_type == VBLK || vp->v_type == VCHR)
72839668Smckusick 		bp->b_dev = vp->v_rdev;
72939668Smckusick 	else
73039668Smckusick 		bp->b_dev = NODEV;
73139668Smckusick 	/*
73239668Smckusick 	 * Insert onto list for new vnode.
73339668Smckusick 	 */
73439882Smckusick 	if (vp->v_cleanblkhd) {
73539882Smckusick 		bp->b_blockf = vp->v_cleanblkhd;
73639882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
73739882Smckusick 		vp->v_cleanblkhd->b_blockb = &bp->b_blockf;
73839882Smckusick 		vp->v_cleanblkhd = bp;
73939668Smckusick 	} else {
74039882Smckusick 		vp->v_cleanblkhd = bp;
74139882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
74239668Smckusick 		bp->b_blockf = NULL;
74339668Smckusick 	}
74439668Smckusick }
74539668Smckusick 
74639668Smckusick /*
74739668Smckusick  * Disassociate a buffer from a vnode.
74839668Smckusick  */
74937736Smckusick brelvp(bp)
75039668Smckusick 	register struct buf *bp;
75137736Smckusick {
75239668Smckusick 	struct buf *bq;
75337736Smckusick 	struct vnode *vp;
75437736Smckusick 
75537736Smckusick 	if (bp->b_vp == (struct vnode *) 0)
75639668Smckusick 		panic("brelvp: NULL");
75739668Smckusick 	/*
75839668Smckusick 	 * Delete from old vnode list, if on one.
75939668Smckusick 	 */
76039668Smckusick 	if (bp->b_blockb) {
76139668Smckusick 		if (bq = bp->b_blockf)
76239668Smckusick 			bq->b_blockb = bp->b_blockb;
76339668Smckusick 		*bp->b_blockb = bq;
76439668Smckusick 		bp->b_blockf = NULL;
76539668Smckusick 		bp->b_blockb = NULL;
76639668Smckusick 	}
76737736Smckusick 	vp = bp->b_vp;
76837736Smckusick 	bp->b_vp = (struct vnode *) 0;
76939808Smckusick 	HOLDRELE(vp);
77037736Smckusick }
77139668Smckusick 
77239668Smckusick /*
77339668Smckusick  * Reassign a buffer from one vnode to another.
77439668Smckusick  * Used to assign file specific control information
77539668Smckusick  * (indirect blocks) to the vnode to which they belong.
77639668Smckusick  */
77739668Smckusick reassignbuf(bp, newvp)
77839668Smckusick 	register struct buf *bp;
77939668Smckusick 	register struct vnode *newvp;
78039668Smckusick {
78139882Smckusick 	register struct buf *bq, **listheadp;
78239668Smckusick 
78339882Smckusick 	if (newvp == NULL)
78439882Smckusick 		panic("reassignbuf: NULL");
78539668Smckusick 	/*
78639668Smckusick 	 * Delete from old vnode list, if on one.
78739668Smckusick 	 */
78839668Smckusick 	if (bp->b_blockb) {
78939668Smckusick 		if (bq = bp->b_blockf)
79039668Smckusick 			bq->b_blockb = bp->b_blockb;
79139668Smckusick 		*bp->b_blockb = bq;
79239668Smckusick 	}
79339668Smckusick 	/*
79439882Smckusick 	 * If dirty, put on list of dirty buffers;
79539882Smckusick 	 * otherwise insert onto list of clean buffers.
79639668Smckusick 	 */
79739882Smckusick 	if (bp->b_flags & B_DELWRI)
79839882Smckusick 		listheadp = &newvp->v_dirtyblkhd;
79939882Smckusick 	else
80039882Smckusick 		listheadp = &newvp->v_cleanblkhd;
80139882Smckusick 	if (*listheadp) {
80239882Smckusick 		bp->b_blockf = *listheadp;
80339882Smckusick 		bp->b_blockb = listheadp;
80439882Smckusick 		bp->b_blockf->b_blockb = &bp->b_blockf;
80539882Smckusick 		*listheadp = bp;
80639668Smckusick 	} else {
80739882Smckusick 		*listheadp = bp;
80839882Smckusick 		bp->b_blockb = listheadp;
80939668Smckusick 		bp->b_blockf = NULL;
81039668Smckusick 	}
81139668Smckusick }
812