xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 40226)
123395Smckusick /*
237736Smckusick  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
537736Smckusick  * Redistribution and use in source and binary forms are permitted
637736Smckusick  * provided that the above copyright notice and this paragraph are
737736Smckusick  * duplicated in all such forms and that any documentation,
837736Smckusick  * advertising materials, and other materials related to such
937736Smckusick  * distribution and use acknowledge that the software was developed
1037736Smckusick  * by the University of California, Berkeley.  The name of the
1137736Smckusick  * University may not be used to endorse or promote products derived
1237736Smckusick  * from this software without specific prior written permission.
1337736Smckusick  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1437736Smckusick  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1537736Smckusick  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1637736Smckusick  *
17*40226Smckusick  *	@(#)vfs_cluster.c	7.22 (Berkeley) 02/25/90
1823395Smckusick  */
198Sbill 
2017098Sbloom #include "param.h"
2117098Sbloom #include "user.h"
2217098Sbloom #include "buf.h"
2337736Smckusick #include "vnode.h"
2439668Smckusick #include "mount.h"
2517098Sbloom #include "trace.h"
2638776Smckusick #include "ucred.h"
278Sbill 
2891Sbill /*
298Sbill  * Read in (if necessary) the block and return a buffer pointer.
308Sbill  */
3138776Smckusick bread(vp, blkno, size, cred, bpp)
3237736Smckusick 	struct vnode *vp;
336563Smckusic 	daddr_t blkno;
346563Smckusic 	int size;
3538776Smckusick 	struct ucred *cred;
3637736Smckusick 	struct buf **bpp;
378Sbill {
388Sbill 	register struct buf *bp;
398Sbill 
408670S 	if (size == 0)
418670S 		panic("bread: size 0");
4237736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
4332608Smckusick 	if (bp->b_flags&(B_DONE|B_DELWRI)) {
4437736Smckusick 		trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), blkno);
4537736Smckusick 		return (0);
468Sbill 	}
478Sbill 	bp->b_flags |= B_READ;
488670S 	if (bp->b_bcount > bp->b_bufsize)
498670S 		panic("bread");
5038776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
5138776Smckusick 		crhold(cred);
5238776Smckusick 		bp->b_rcred = cred;
5338776Smckusick 	}
5437736Smckusick 	VOP_STRATEGY(bp);
5537736Smckusick 	trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), blkno);
568039Sroot 	u.u_ru.ru_inblock++;		/* pay for read */
5737736Smckusick 	return (biowait(bp));
588Sbill }
598Sbill 
608Sbill /*
618Sbill  * Read in the block, like bread, but also start I/O on the
628Sbill  * read-ahead block (which is not allocated to the caller)
638Sbill  */
6438776Smckusick breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
6537736Smckusick 	struct vnode *vp;
667114Smckusick 	daddr_t blkno; int size;
678592Sroot 	daddr_t rablkno; int rabsize;
6838776Smckusick 	struct ucred *cred;
6937736Smckusick 	struct buf **bpp;
708Sbill {
718Sbill 	register struct buf *bp, *rabp;
728Sbill 
738Sbill 	bp = NULL;
747015Smckusick 	/*
757015Smckusick 	 * If the block isn't in core, then allocate
767015Smckusick 	 * a buffer and initiate i/o (getblk checks
777015Smckusick 	 * for a cache hit).
787015Smckusick 	 */
7937736Smckusick 	if (!incore(vp, blkno)) {
8037736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
8132608Smckusick 		if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) {
828Sbill 			bp->b_flags |= B_READ;
838670S 			if (bp->b_bcount > bp->b_bufsize)
848670S 				panic("breada");
8538776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
8638776Smckusick 				crhold(cred);
8738776Smckusick 				bp->b_rcred = cred;
8838776Smckusick 			}
8937736Smckusick 			VOP_STRATEGY(bp);
9037736Smckusick 			trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size),
9137736Smckusick 			    blkno);
928039Sroot 			u.u_ru.ru_inblock++;		/* pay for read */
937015Smckusick 		} else
9437736Smckusick 			trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size),
9537736Smckusick 			    blkno);
968Sbill 	}
977015Smckusick 
987015Smckusick 	/*
997015Smckusick 	 * If there's a read-ahead block, start i/o
1007015Smckusick 	 * on it also (as above).
1017015Smckusick 	 */
10239895Smckusick 	if (!incore(vp, rablkno)) {
10337736Smckusick 		rabp = getblk(vp, rablkno, rabsize);
10432608Smckusick 		if (rabp->b_flags & (B_DONE|B_DELWRI)) {
1058Sbill 			brelse(rabp);
10637736Smckusick 			trace(TR_BREADHITRA,
10738880Smckusick 			    pack(vp->v_mount->m_fsid[0], rabsize), rablkno);
1082045Swnj 		} else {
1098Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
1108670S 			if (rabp->b_bcount > rabp->b_bufsize)
1118670S 				panic("breadrabp");
11238880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
11338776Smckusick 				crhold(cred);
11438880Smckusick 				rabp->b_rcred = cred;
11538776Smckusick 			}
11637736Smckusick 			VOP_STRATEGY(rabp);
11737736Smckusick 			trace(TR_BREADMISSRA,
11838880Smckusick 			    pack(vp->v_mount->m_fsid[0], rabsize), rablkno);
1198039Sroot 			u.u_ru.ru_inblock++;		/* pay in advance */
1208Sbill 		}
1218Sbill 	}
1227015Smckusick 
1237015Smckusick 	/*
1247114Smckusick 	 * If block was in core, let bread get it.
1257114Smckusick 	 * If block wasn't in core, then the read was started
1267114Smckusick 	 * above, and just wait for it.
1277015Smckusick 	 */
1287114Smckusick 	if (bp == NULL)
12938776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
13037736Smckusick 	return (biowait(bp));
1318Sbill }
1328Sbill 
1338Sbill /*
1348Sbill  * Write the buffer, waiting for completion.
1358Sbill  * Then release the buffer.
1368Sbill  */
1378Sbill bwrite(bp)
1387015Smckusick 	register struct buf *bp;
1398Sbill {
14037736Smckusick 	register int flag;
141*40226Smckusick 	int s, error;
1428Sbill 
1438Sbill 	flag = bp->b_flags;
1449857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
1458Sbill 	if ((flag&B_DELWRI) == 0)
1468039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
14739882Smckusick 	else
14839882Smckusick 		reassignbuf(bp, bp->b_vp);
14937736Smckusick 	trace(TR_BWRITE,
15039668Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bcount), bp->b_lblkno);
1518670S 	if (bp->b_bcount > bp->b_bufsize)
1528670S 		panic("bwrite");
153*40226Smckusick 	s = splbio();
15439882Smckusick 	bp->b_vp->v_numoutput++;
155*40226Smckusick 	splx(s);
15637736Smckusick 	VOP_STRATEGY(bp);
1577015Smckusick 
1587015Smckusick 	/*
1597015Smckusick 	 * If the write was synchronous, then await i/o completion.
1607015Smckusick 	 * If the write was "delayed", then we put the buffer on
1617015Smckusick 	 * the q of blocks awaiting i/o completion status.
1627015Smckusick 	 */
1638Sbill 	if ((flag&B_ASYNC) == 0) {
16437736Smckusick 		error = biowait(bp);
1658Sbill 		brelse(bp);
16637736Smckusick 	} else if (flag & B_DELWRI) {
1678Sbill 		bp->b_flags |= B_AGE;
16837736Smckusick 		error = 0;
16937736Smckusick 	}
17037736Smckusick 	return (error);
1718Sbill }
1728Sbill 
1738Sbill /*
1748Sbill  * Release the buffer, marking it so that if it is grabbed
1758Sbill  * for another purpose it will be written out before being
1768Sbill  * given up (e.g. when writing a partial block where it is
1778Sbill  * assumed that another write for the same block will soon follow).
1788Sbill  * This can't be done for magtape, since writes must be done
1798Sbill  * in the same order as requested.
1808Sbill  */
1818Sbill bdwrite(bp)
1827015Smckusick 	register struct buf *bp;
1838Sbill {
1848Sbill 
18539882Smckusick 	if ((bp->b_flags & B_DELWRI) == 0) {
18639882Smckusick 		bp->b_flags |= B_DELWRI;
18739882Smckusick 		reassignbuf(bp, bp->b_vp);
1888039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
18939882Smckusick 	}
19037736Smckusick 	/*
19139668Smckusick 	 * If this is a tape drive, the write must be initiated.
19237736Smckusick 	 */
19339668Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, B_TAPE, 0, NOCRED) == 0) {
1948Sbill 		bawrite(bp);
19539668Smckusick 	} else {
1968Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
1978Sbill 		brelse(bp);
1988Sbill 	}
1998Sbill }
2008Sbill 
2018Sbill /*
2028Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
2038Sbill  */
2048Sbill bawrite(bp)
2057015Smckusick 	register struct buf *bp;
2068Sbill {
2078Sbill 
2088Sbill 	bp->b_flags |= B_ASYNC;
20937736Smckusick 	(void) bwrite(bp);
2108Sbill }
2118Sbill 
2128Sbill /*
2137015Smckusick  * Release the buffer, with no I/O implied.
2148Sbill  */
2158Sbill brelse(bp)
2167015Smckusick 	register struct buf *bp;
2178Sbill {
2182325Swnj 	register struct buf *flist;
2198Sbill 	register s;
2208Sbill 
22137736Smckusick 	trace(TR_BRELSE,
22239668Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_lblkno);
2237015Smckusick 	/*
22439668Smckusick 	 * If a process is waiting for the buffer, or
22539668Smckusick 	 * is waiting for a free buffer, awaken it.
2267015Smckusick 	 */
2278Sbill 	if (bp->b_flags&B_WANTED)
2288Sbill 		wakeup((caddr_t)bp);
2292325Swnj 	if (bfreelist[0].b_flags&B_WANTED) {
2302325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2312325Swnj 		wakeup((caddr_t)bfreelist);
2328Sbill 	}
23339668Smckusick 	/*
23439668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
23539668Smckusick 	 */
23639668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
23739668Smckusick 		bp->b_flags &= ~B_ERROR;
23839668Smckusick 
23939668Smckusick 	/*
24039668Smckusick 	 * Disassociate buffers that are no longer valid.
24139668Smckusick 	 */
24239668Smckusick 	if (bp->b_flags & (B_NOCACHE|B_ERROR))
24337736Smckusick 		bp->b_flags |= B_INVAL;
24439668Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR|B_INVAL))) {
24539668Smckusick 		if (bp->b_vp)
24639668Smckusick 			brelvp(bp);
24739668Smckusick 		bp->b_flags &= ~B_DELWRI;
24837736Smckusick 	}
2497015Smckusick 	/*
2507015Smckusick 	 * Stick the buffer back on a free list.
2517015Smckusick 	 */
25226271Skarels 	s = splbio();
2538670S 	if (bp->b_bufsize <= 0) {
2548670S 		/* block has no buffer ... put at front of unused buffer list */
2558670S 		flist = &bfreelist[BQ_EMPTY];
2568670S 		binsheadfree(bp, flist);
2578670S 	} else if (bp->b_flags & (B_ERROR|B_INVAL)) {
2582325Swnj 		/* block has no info ... put at front of most free list */
2598670S 		flist = &bfreelist[BQ_AGE];
2607015Smckusick 		binsheadfree(bp, flist);
2618Sbill 	} else {
2622325Swnj 		if (bp->b_flags & B_LOCKED)
2632325Swnj 			flist = &bfreelist[BQ_LOCKED];
2642325Swnj 		else if (bp->b_flags & B_AGE)
2652325Swnj 			flist = &bfreelist[BQ_AGE];
2662325Swnj 		else
2672325Swnj 			flist = &bfreelist[BQ_LRU];
2687015Smckusick 		binstailfree(bp, flist);
2698Sbill 	}
27037736Smckusick 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE);
2718Sbill 	splx(s);
2728Sbill }
2738Sbill 
2748Sbill /*
2758Sbill  * See if the block is associated with some buffer
2768Sbill  * (mainly to avoid getting hung up on a wait in breada)
2778Sbill  */
27837736Smckusick incore(vp, blkno)
27937736Smckusick 	struct vnode *vp;
2807015Smckusick 	daddr_t blkno;
2818Sbill {
2828Sbill 	register struct buf *bp;
2832325Swnj 	register struct buf *dp;
2848Sbill 
28538225Smckusick 	dp = BUFHASH(vp, blkno);
2862325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
28739668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
2887015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
28991Sbill 			return (1);
29091Sbill 	return (0);
2918Sbill }
2928Sbill 
29339668Smckusick /*
29439668Smckusick  * Return a block if it is in memory.
29539668Smckusick  */
29638776Smckusick baddr(vp, blkno, size, cred, bpp)
29737736Smckusick 	struct vnode *vp;
2986563Smckusic 	daddr_t blkno;
2996563Smckusic 	int size;
30038776Smckusick 	struct ucred *cred;
30137736Smckusick 	struct buf **bpp;
3028Sbill {
3038Sbill 
30437736Smckusick 	if (incore(vp, blkno))
30538776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
30637736Smckusick 	*bpp = 0;
3078Sbill 	return (0);
3088Sbill }
3098Sbill 
3108Sbill /*
3118Sbill  * Assign a buffer for the given block.  If the appropriate
3128Sbill  * block is already associated, return it; otherwise search
3138Sbill  * for the oldest non-busy buffer and reassign it.
3145424Swnj  *
3155424Swnj  * We use splx here because this routine may be called
3165424Swnj  * on the interrupt stack during a dump, and we don't
3175424Swnj  * want to lower the ipl back to 0.
3188Sbill  */
3198Sbill struct buf *
32037736Smckusick getblk(vp, blkno, size)
32137736Smckusick 	register struct vnode *vp;
3226563Smckusic 	daddr_t blkno;
3236563Smckusic 	int size;
3248Sbill {
3258670S 	register struct buf *bp, *dp;
3265424Swnj 	int s;
3278Sbill 
32825255Smckusick 	if (size > MAXBSIZE)
32925255Smckusick 		panic("getblk: size too big");
3307015Smckusick 	/*
33124730Smckusick 	 * To prevent overflow of 32-bit ints when converting block
33224730Smckusick 	 * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set
33324730Smckusick 	 * to the maximum number that can be converted to a byte offset
33424730Smckusick 	 * without overflow. This is historic code; what bug it fixed,
33524730Smckusick 	 * or whether it is still a reasonable thing to do is open to
33624730Smckusick 	 * dispute. mkm 9/85
33739668Smckusick 	 *
33839668Smckusick 	 * Make it a panic to see if it ever really happens. mkm 11/89
33924730Smckusick 	 */
34039668Smckusick 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT)) {
34139668Smckusick 		panic("getblk: blkno too big");
34224730Smckusick 		blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1);
34339668Smckusick 	}
34424730Smckusick 	/*
3457015Smckusick 	 * Search the cache for the block.  If we hit, but
3467015Smckusick 	 * the buffer is in use for i/o, then we wait until
3477015Smckusick 	 * the i/o has completed.
3487015Smckusick 	 */
34937736Smckusick 	dp = BUFHASH(vp, blkno);
3507015Smckusick loop:
3512325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
35239668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
3532325Swnj 		    bp->b_flags&B_INVAL)
3548Sbill 			continue;
35526271Skarels 		s = splbio();
3568Sbill 		if (bp->b_flags&B_BUSY) {
3578Sbill 			bp->b_flags |= B_WANTED;
3588Sbill 			sleep((caddr_t)bp, PRIBIO+1);
3595424Swnj 			splx(s);
3608Sbill 			goto loop;
3618Sbill 		}
36239882Smckusick 		bremfree(bp);
36339882Smckusick 		bp->b_flags |= B_BUSY;
3645424Swnj 		splx(s);
36532608Smckusick 		if (bp->b_bcount != size) {
36639668Smckusick 			printf("getblk: stray size");
36739668Smckusick 			bp->b_flags |= B_INVAL;
36839668Smckusick 			bwrite(bp);
36939668Smckusick 			goto loop;
37032608Smckusick 		}
3718Sbill 		bp->b_flags |= B_CACHE;
37226271Skarels 		return (bp);
3738Sbill 	}
3748670S 	bp = getnewbuf();
3756563Smckusic 	bfree(bp);
3767015Smckusick 	bremhash(bp);
37739668Smckusick 	bgetvp(vp, bp);
37839668Smckusick 	bp->b_lblkno = blkno;
3796563Smckusic 	bp->b_blkno = blkno;
3808670S 	bp->b_error = 0;
38137736Smckusick 	bp->b_resid = 0;
38237736Smckusick 	binshash(bp, dp);
38339668Smckusick 	brealloc(bp, size);
38426271Skarels 	return (bp);
3858Sbill }
3868Sbill 
3878Sbill /*
3888Sbill  * get an empty block,
3898Sbill  * not assigned to any particular device
3908Sbill  */
3918Sbill struct buf *
3926563Smckusic geteblk(size)
3936563Smckusic 	int size;
3948Sbill {
3958670S 	register struct buf *bp, *flist;
3968Sbill 
39725255Smckusick 	if (size > MAXBSIZE)
39825255Smckusick 		panic("geteblk: size too big");
3998670S 	bp = getnewbuf();
4008670S 	bp->b_flags |= B_INVAL;
4017015Smckusick 	bfree(bp);
4027015Smckusick 	bremhash(bp);
4038670S 	flist = &bfreelist[BQ_AGE];
40437736Smckusick 	bp->b_error = 0;
40537736Smckusick 	bp->b_resid = 0;
4068670S 	binshash(bp, flist);
40739668Smckusick 	brealloc(bp, size);
40826271Skarels 	return (bp);
4098Sbill }
4108Sbill 
4118Sbill /*
4126563Smckusic  * Allocate space associated with a buffer.
4136563Smckusic  */
4146563Smckusic brealloc(bp, size)
4156563Smckusic 	register struct buf *bp;
4166563Smckusic 	int size;
4176563Smckusic {
4186563Smckusic 	daddr_t start, last;
4196563Smckusic 	register struct buf *ep;
4206563Smckusic 	struct buf *dp;
4216563Smckusic 	int s;
4226563Smckusic 
4236563Smckusic 	if (size == bp->b_bcount)
42439668Smckusick 		return;
42539668Smckusick 	allocbuf(bp, size);
4268670S }
4278670S 
4288670S /*
4298670S  * Find a buffer which is available for use.
4308670S  * Select something from a free list.
4318670S  * Preference is to AGE list, then LRU list.
4328670S  */
4338670S struct buf *
4348670S getnewbuf()
4358670S {
4368670S 	register struct buf *bp, *dp;
43738776Smckusick 	register struct ucred *cred;
4388670S 	int s;
4398670S 
4408670S loop:
44126271Skarels 	s = splbio();
4428670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
4438670S 		if (dp->av_forw != dp)
4448670S 			break;
4458670S 	if (dp == bfreelist) {		/* no free blocks */
4468670S 		dp->b_flags |= B_WANTED;
4478670S 		sleep((caddr_t)dp, PRIBIO+1);
44812170Ssam 		splx(s);
4498670S 		goto loop;
4508670S 	}
45139882Smckusick 	bp = dp->av_forw;
45239882Smckusick 	bremfree(bp);
45339882Smckusick 	bp->b_flags |= B_BUSY;
4548670S 	splx(s);
4558670S 	if (bp->b_flags & B_DELWRI) {
45638614Smckusick 		(void) bawrite(bp);
4578670S 		goto loop;
4588670S 	}
45937736Smckusick 	trace(TR_BRELSE,
46039668Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_lblkno);
46139668Smckusick 	if (bp->b_vp)
46239668Smckusick 		brelvp(bp);
46338776Smckusick 	if (bp->b_rcred != NOCRED) {
46438776Smckusick 		cred = bp->b_rcred;
46538776Smckusick 		bp->b_rcred = NOCRED;
46638776Smckusick 		crfree(cred);
46738776Smckusick 	}
46838776Smckusick 	if (bp->b_wcred != NOCRED) {
46938776Smckusick 		cred = bp->b_wcred;
47038776Smckusick 		bp->b_wcred = NOCRED;
47138776Smckusick 		crfree(cred);
47238776Smckusick 	}
4738670S 	bp->b_flags = B_BUSY;
4748670S 	return (bp);
4758670S }
4768670S 
4778670S /*
4788Sbill  * Wait for I/O completion on the buffer; return errors
4798Sbill  * to the user.
4808Sbill  */
4817015Smckusick biowait(bp)
4826563Smckusic 	register struct buf *bp;
4838Sbill {
4845431Sroot 	int s;
4858Sbill 
48626271Skarels 	s = splbio();
48738776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
4888Sbill 		sleep((caddr_t)bp, PRIBIO);
4895431Sroot 	splx(s);
49037736Smckusick 	/*
49137736Smckusick 	 * Pick up the device's error number and pass it to the user;
49237736Smckusick 	 * if there is an error but the number is 0 set a generalized code.
49337736Smckusick 	 */
49437736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
49537736Smckusick 		return (0);
49637736Smckusick 	if (bp->b_error)
49737736Smckusick 		return (bp->b_error);
49837736Smckusick 	return (EIO);
4998Sbill }
5008Sbill 
5018Sbill /*
50213128Ssam  * Mark I/O complete on a buffer.
50313128Ssam  * If someone should be called, e.g. the pageout
50413128Ssam  * daemon, do so.  Otherwise, wake up anyone
50513128Ssam  * waiting for it.
5068Sbill  */
5077015Smckusick biodone(bp)
5087015Smckusick 	register struct buf *bp;
5098Sbill {
51039882Smckusick 	register struct vnode *vp;
5118Sbill 
512420Sbill 	if (bp->b_flags & B_DONE)
5137015Smckusick 		panic("dup biodone");
5148Sbill 	bp->b_flags |= B_DONE;
51539882Smckusick 	if ((bp->b_flags & B_READ) == 0) {
51638776Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
51739882Smckusick 		if (vp = bp->b_vp) {
51839882Smckusick 			vp->v_numoutput--;
51939882Smckusick 			if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
52039882Smckusick 				if (vp->v_numoutput < 0)
52139882Smckusick 					panic("biodone: neg numoutput");
52239882Smckusick 				vp->v_flag &= ~VBWAIT;
52339882Smckusick 				wakeup((caddr_t)&vp->v_numoutput);
52439882Smckusick 			}
52539882Smckusick 		}
52639882Smckusick 	}
5279763Ssam 	if (bp->b_flags & B_CALL) {
5289763Ssam 		bp->b_flags &= ~B_CALL;
5299763Ssam 		(*bp->b_iodone)(bp);
5309763Ssam 		return;
5319763Ssam 	}
5328Sbill 	if (bp->b_flags&B_ASYNC)
5338Sbill 		brelse(bp);
5348Sbill 	else {
5358Sbill 		bp->b_flags &= ~B_WANTED;
5368Sbill 		wakeup((caddr_t)bp);
5378Sbill 	}
5388Sbill }
5398Sbill 
5408Sbill /*
54137736Smckusick  * Make sure all write-behind blocks associated
54238776Smckusick  * with mount point are flushed out (from sync).
5438Sbill  */
54439668Smckusick mntflushbuf(mountp, flags)
54538776Smckusick 	struct mount *mountp;
54639668Smckusick 	int flags;
5478Sbill {
54839668Smckusick 	register struct vnode *vp;
54939764Smckusick 	struct vnode *nvp;
55039668Smckusick 
55139668Smckusick loop:
55239764Smckusick 	for (vp = mountp->m_mounth; vp; vp = nvp) {
55339764Smckusick 		nvp = vp->v_mountf;
55439668Smckusick 		if (vget(vp))
55539668Smckusick 			goto loop;
55639668Smckusick 		vflushbuf(vp, flags);
55739668Smckusick 		vput(vp);
55839668Smckusick 	}
55939668Smckusick }
56039668Smckusick 
56139668Smckusick /*
56239668Smckusick  * Flush all dirty buffers associated with a vnode.
56339668Smckusick  */
56439668Smckusick vflushbuf(vp, flags)
56539668Smckusick 	register struct vnode *vp;
56639668Smckusick 	int flags;
56739668Smckusick {
5688Sbill 	register struct buf *bp;
56939668Smckusick 	struct buf *nbp;
5705431Sroot 	int s;
5718Sbill 
5728Sbill loop:
57326271Skarels 	s = splbio();
57439882Smckusick 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
57539668Smckusick 		nbp = bp->b_blockf;
57639668Smckusick 		if ((bp->b_flags & B_BUSY))
57739668Smckusick 			continue;
57839668Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
57939882Smckusick 			panic("vflushbuf: not dirty");
58039882Smckusick 		bremfree(bp);
58139882Smckusick 		bp->b_flags |= B_BUSY;
58239668Smckusick 		splx(s);
58339882Smckusick 		/*
58439882Smckusick 		 * Wait for I/O associated with indirect blocks to complete,
58539882Smckusick 		 * since there is no way to quickly wait for them below.
58639882Smckusick 		 * NB - This is really specific to ufs, but is done here
58739882Smckusick 		 * as it is easier and quicker.
58839882Smckusick 		 */
58939882Smckusick 		if (bp->b_vp == vp || (flags & B_SYNC) == 0) {
59039882Smckusick 			(void) bawrite(bp);
59139882Smckusick 		} else {
59239882Smckusick 			(void) bwrite(bp);
59339882Smckusick 			goto loop;
59439882Smckusick 		}
59539668Smckusick 	}
59639738Smckusick 	splx(s);
59739668Smckusick 	if ((flags & B_SYNC) == 0)
59839668Smckusick 		return;
59939668Smckusick 	s = splbio();
60039882Smckusick 	while (vp->v_numoutput) {
60139882Smckusick 		vp->v_flag |= VBWAIT;
60239882Smckusick 		sleep((caddr_t)&vp->v_numoutput, PRIBIO+1);
60337736Smckusick 	}
60439738Smckusick 	splx(s);
60539882Smckusick 	if (vp->v_dirtyblkhd) {
60639882Smckusick 		vprint("vflushbuf: dirty", vp);
60739882Smckusick 		goto loop;
60839882Smckusick 	}
6098Sbill }
6102299Skre 
6112299Skre /*
6122299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6132299Skre  *
61439668Smckusick  * Go through the list of vnodes associated with the file system;
61539668Smckusick  * for each vnode invalidate any buffers that it holds. Normally
61639668Smckusick  * this routine is preceeded by a bflush call, so that on a quiescent
61739668Smckusick  * filesystem there will be no dirty buffers when we are done. Binval
61839668Smckusick  * returns the count of dirty buffers when it is finished.
6192299Skre  */
62039668Smckusick mntinvalbuf(mountp)
62138776Smckusick 	struct mount *mountp;
6222299Skre {
62339668Smckusick 	register struct vnode *vp;
62439764Smckusick 	struct vnode *nvp;
62539668Smckusick 	int dirty = 0;
62639668Smckusick 
62739668Smckusick loop:
62839764Smckusick 	for (vp = mountp->m_mounth; vp; vp = nvp) {
62939764Smckusick 		nvp = vp->v_mountf;
63039668Smckusick 		if (vget(vp))
63139668Smckusick 			goto loop;
63239668Smckusick 		dirty += vinvalbuf(vp, 1);
63339668Smckusick 		vput(vp);
63439668Smckusick 	}
63539668Smckusick 	return (dirty);
63639668Smckusick }
63739668Smckusick 
63839668Smckusick /*
63939668Smckusick  * Flush out and invalidate all buffers associated with a vnode.
64039668Smckusick  * Called with the underlying object locked.
64139668Smckusick  */
64239668Smckusick vinvalbuf(vp, save)
64339668Smckusick 	register struct vnode *vp;
64439668Smckusick 	int save;
64539668Smckusick {
6462361Skre 	register struct buf *bp;
64739882Smckusick 	struct buf *nbp, *blist;
64838633Smckusick 	int s, dirty = 0;
6492299Skre 
65039882Smckusick 	for (;;) {
65139882Smckusick 		if (blist = vp->v_dirtyblkhd)
65239882Smckusick 			/* void */;
65339882Smckusick 		else if (blist = vp->v_cleanblkhd)
65439882Smckusick 			/* void */;
65539882Smckusick 		else
65639882Smckusick 			break;
65739882Smckusick 		for (bp = blist; bp; bp = nbp) {
65839882Smckusick 			nbp = bp->b_blockf;
65939882Smckusick 			s = splbio();
66039882Smckusick 			if (bp->b_flags & B_BUSY) {
66139882Smckusick 				bp->b_flags |= B_WANTED;
66239882Smckusick 				sleep((caddr_t)bp, PRIBIO+1);
66339882Smckusick 				splx(s);
66439882Smckusick 				break;
66539882Smckusick 			}
66639882Smckusick 			bremfree(bp);
66739882Smckusick 			bp->b_flags |= B_BUSY;
66838808Smckusick 			splx(s);
66939882Smckusick 			if (save && (bp->b_flags & B_DELWRI)) {
67038614Smckusick 				dirty++;
67139668Smckusick 				(void) bwrite(bp);
67239882Smckusick 				break;
67337736Smckusick 			}
67440034Smckusick 			if (bp->b_vp != vp)
67540034Smckusick 				reassignbuf(bp, bp->b_vp);
67640034Smckusick 			else
67740034Smckusick 				bp->b_flags |= B_INVAL;
67839882Smckusick 			brelse(bp);
67938614Smckusick 		}
68038614Smckusick 	}
68139882Smckusick 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
68239668Smckusick 		panic("vinvalbuf: flush failed");
68338614Smckusick 	return (dirty);
6842299Skre }
68537736Smckusick 
68639668Smckusick /*
68739668Smckusick  * Associate a buffer with a vnode.
68839668Smckusick  */
68939668Smckusick bgetvp(vp, bp)
69039668Smckusick 	register struct vnode *vp;
69139668Smckusick 	register struct buf *bp;
69239668Smckusick {
69339668Smckusick 
69439668Smckusick 	if (bp->b_vp)
69539668Smckusick 		panic("bgetvp: not free");
69639808Smckusick 	VHOLD(vp);
69739668Smckusick 	bp->b_vp = vp;
69839668Smckusick 	if (vp->v_type == VBLK || vp->v_type == VCHR)
69939668Smckusick 		bp->b_dev = vp->v_rdev;
70039668Smckusick 	else
70139668Smckusick 		bp->b_dev = NODEV;
70239668Smckusick 	/*
70339668Smckusick 	 * Insert onto list for new vnode.
70439668Smckusick 	 */
70539882Smckusick 	if (vp->v_cleanblkhd) {
70639882Smckusick 		bp->b_blockf = vp->v_cleanblkhd;
70739882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
70839882Smckusick 		vp->v_cleanblkhd->b_blockb = &bp->b_blockf;
70939882Smckusick 		vp->v_cleanblkhd = bp;
71039668Smckusick 	} else {
71139882Smckusick 		vp->v_cleanblkhd = bp;
71239882Smckusick 		bp->b_blockb = &vp->v_cleanblkhd;
71339668Smckusick 		bp->b_blockf = NULL;
71439668Smckusick 	}
71539668Smckusick }
71639668Smckusick 
71739668Smckusick /*
71839668Smckusick  * Disassociate a buffer from a vnode.
71939668Smckusick  */
72037736Smckusick brelvp(bp)
72139668Smckusick 	register struct buf *bp;
72237736Smckusick {
72339668Smckusick 	struct buf *bq;
72437736Smckusick 	struct vnode *vp;
72537736Smckusick 
72637736Smckusick 	if (bp->b_vp == (struct vnode *) 0)
72739668Smckusick 		panic("brelvp: NULL");
72839668Smckusick 	/*
72939668Smckusick 	 * Delete from old vnode list, if on one.
73039668Smckusick 	 */
73139668Smckusick 	if (bp->b_blockb) {
73239668Smckusick 		if (bq = bp->b_blockf)
73339668Smckusick 			bq->b_blockb = bp->b_blockb;
73439668Smckusick 		*bp->b_blockb = bq;
73539668Smckusick 		bp->b_blockf = NULL;
73639668Smckusick 		bp->b_blockb = NULL;
73739668Smckusick 	}
73837736Smckusick 	vp = bp->b_vp;
73937736Smckusick 	bp->b_vp = (struct vnode *) 0;
74039808Smckusick 	HOLDRELE(vp);
74137736Smckusick }
74239668Smckusick 
74339668Smckusick /*
74439668Smckusick  * Reassign a buffer from one vnode to another.
74539668Smckusick  * Used to assign file specific control information
74639668Smckusick  * (indirect blocks) to the vnode to which they belong.
74739668Smckusick  */
74839668Smckusick reassignbuf(bp, newvp)
74939668Smckusick 	register struct buf *bp;
75039668Smckusick 	register struct vnode *newvp;
75139668Smckusick {
75239882Smckusick 	register struct buf *bq, **listheadp;
75339668Smckusick 
75439882Smckusick 	if (newvp == NULL)
75539882Smckusick 		panic("reassignbuf: NULL");
75639668Smckusick 	/*
75739668Smckusick 	 * Delete from old vnode list, if on one.
75839668Smckusick 	 */
75939668Smckusick 	if (bp->b_blockb) {
76039668Smckusick 		if (bq = bp->b_blockf)
76139668Smckusick 			bq->b_blockb = bp->b_blockb;
76239668Smckusick 		*bp->b_blockb = bq;
76339668Smckusick 	}
76439668Smckusick 	/*
76539882Smckusick 	 * If dirty, put on list of dirty buffers;
76639882Smckusick 	 * otherwise insert onto list of clean buffers.
76739668Smckusick 	 */
76839882Smckusick 	if (bp->b_flags & B_DELWRI)
76939882Smckusick 		listheadp = &newvp->v_dirtyblkhd;
77039882Smckusick 	else
77139882Smckusick 		listheadp = &newvp->v_cleanblkhd;
77239882Smckusick 	if (*listheadp) {
77339882Smckusick 		bp->b_blockf = *listheadp;
77439882Smckusick 		bp->b_blockb = listheadp;
77539882Smckusick 		bp->b_blockf->b_blockb = &bp->b_blockf;
77639882Smckusick 		*listheadp = bp;
77739668Smckusick 	} else {
77839882Smckusick 		*listheadp = bp;
77939882Smckusick 		bp->b_blockb = listheadp;
78039668Smckusick 		bp->b_blockf = NULL;
78139668Smckusick 	}
78239668Smckusick }
783