xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 39668)
123395Smckusick /*
237736Smckusick  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
537736Smckusick  * Redistribution and use in source and binary forms are permitted
637736Smckusick  * provided that the above copyright notice and this paragraph are
737736Smckusick  * duplicated in all such forms and that any documentation,
837736Smckusick  * advertising materials, and other materials related to such
937736Smckusick  * distribution and use acknowledge that the software was developed
1037736Smckusick  * by the University of California, Berkeley.  The name of the
1137736Smckusick  * University may not be used to endorse or promote products derived
1237736Smckusick  * from this software without specific prior written permission.
1337736Smckusick  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1437736Smckusick  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1537736Smckusick  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1637736Smckusick  *
17*39668Smckusick  *	@(#)vfs_cluster.c	7.15 (Berkeley) 11/30/89
1823395Smckusick  */
198Sbill 
2017098Sbloom #include "param.h"
2117098Sbloom #include "user.h"
2217098Sbloom #include "buf.h"
2337736Smckusick #include "vnode.h"
24*39668Smckusick #include "mount.h"
2517098Sbloom #include "trace.h"
2638776Smckusick #include "ucred.h"
278Sbill 
2891Sbill /*
298Sbill  * Read in (if necessary) the block and return a buffer pointer.
308Sbill  */
3138776Smckusick bread(vp, blkno, size, cred, bpp)
3237736Smckusick 	struct vnode *vp;
336563Smckusic 	daddr_t blkno;
346563Smckusic 	int size;
3538776Smckusick 	struct ucred *cred;
3637736Smckusick 	struct buf **bpp;
378Sbill {
388Sbill 	register struct buf *bp;
398Sbill 
408670S 	if (size == 0)
418670S 		panic("bread: size 0");
4237736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
4332608Smckusick 	if (bp->b_flags&(B_DONE|B_DELWRI)) {
4437736Smckusick 		trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), blkno);
4537736Smckusick 		return (0);
468Sbill 	}
478Sbill 	bp->b_flags |= B_READ;
488670S 	if (bp->b_bcount > bp->b_bufsize)
498670S 		panic("bread");
5038776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
5138776Smckusick 		crhold(cred);
5238776Smckusick 		bp->b_rcred = cred;
5338776Smckusick 	}
5437736Smckusick 	VOP_STRATEGY(bp);
5537736Smckusick 	trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), blkno);
568039Sroot 	u.u_ru.ru_inblock++;		/* pay for read */
5737736Smckusick 	return (biowait(bp));
588Sbill }
598Sbill 
608Sbill /*
618Sbill  * Read in the block, like bread, but also start I/O on the
628Sbill  * read-ahead block (which is not allocated to the caller)
638Sbill  */
6438776Smckusick breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
6537736Smckusick 	struct vnode *vp;
667114Smckusick 	daddr_t blkno; int size;
678592Sroot 	daddr_t rablkno; int rabsize;
6838776Smckusick 	struct ucred *cred;
6937736Smckusick 	struct buf **bpp;
708Sbill {
718Sbill 	register struct buf *bp, *rabp;
728Sbill 
738Sbill 	bp = NULL;
747015Smckusick 	/*
757015Smckusick 	 * If the block isn't in core, then allocate
767015Smckusick 	 * a buffer and initiate i/o (getblk checks
777015Smckusick 	 * for a cache hit).
787015Smckusick 	 */
7937736Smckusick 	if (!incore(vp, blkno)) {
8037736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
8132608Smckusick 		if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) {
828Sbill 			bp->b_flags |= B_READ;
838670S 			if (bp->b_bcount > bp->b_bufsize)
848670S 				panic("breada");
8538776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
8638776Smckusick 				crhold(cred);
8738776Smckusick 				bp->b_rcred = cred;
8838776Smckusick 			}
8937736Smckusick 			VOP_STRATEGY(bp);
9037736Smckusick 			trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size),
9137736Smckusick 			    blkno);
928039Sroot 			u.u_ru.ru_inblock++;		/* pay for read */
937015Smckusick 		} else
9437736Smckusick 			trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size),
9537736Smckusick 			    blkno);
968Sbill 	}
977015Smckusick 
987015Smckusick 	/*
997015Smckusick 	 * If there's a read-ahead block, start i/o
1007015Smckusick 	 * on it also (as above).
1017015Smckusick 	 */
10237736Smckusick 	if (rablkno && !incore(vp, rablkno)) {
10337736Smckusick 		rabp = getblk(vp, rablkno, rabsize);
10432608Smckusick 		if (rabp->b_flags & (B_DONE|B_DELWRI)) {
1058Sbill 			brelse(rabp);
10637736Smckusick 			trace(TR_BREADHITRA,
10738880Smckusick 			    pack(vp->v_mount->m_fsid[0], rabsize), rablkno);
1082045Swnj 		} else {
1098Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
1108670S 			if (rabp->b_bcount > rabp->b_bufsize)
1118670S 				panic("breadrabp");
11238880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
11338776Smckusick 				crhold(cred);
11438880Smckusick 				rabp->b_rcred = cred;
11538776Smckusick 			}
11637736Smckusick 			VOP_STRATEGY(rabp);
11737736Smckusick 			trace(TR_BREADMISSRA,
11838880Smckusick 			    pack(vp->v_mount->m_fsid[0], rabsize), rablkno);
1198039Sroot 			u.u_ru.ru_inblock++;		/* pay in advance */
1208Sbill 		}
1218Sbill 	}
1227015Smckusick 
1237015Smckusick 	/*
1247114Smckusick 	 * If block was in core, let bread get it.
1257114Smckusick 	 * If block wasn't in core, then the read was started
1267114Smckusick 	 * above, and just wait for it.
1277015Smckusick 	 */
1287114Smckusick 	if (bp == NULL)
12938776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
13037736Smckusick 	return (biowait(bp));
1318Sbill }
1328Sbill 
1338Sbill /*
1348Sbill  * Write the buffer, waiting for completion.
1358Sbill  * Then release the buffer.
1368Sbill  */
1378Sbill bwrite(bp)
1387015Smckusick 	register struct buf *bp;
1398Sbill {
14037736Smckusick 	register int flag;
14137736Smckusick 	int error;
1428Sbill 
1438Sbill 	flag = bp->b_flags;
1449857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
1458Sbill 	if ((flag&B_DELWRI) == 0)
1468039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
14737736Smckusick 	trace(TR_BWRITE,
148*39668Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bcount), bp->b_lblkno);
1498670S 	if (bp->b_bcount > bp->b_bufsize)
1508670S 		panic("bwrite");
15137736Smckusick 	VOP_STRATEGY(bp);
1527015Smckusick 
1537015Smckusick 	/*
1547015Smckusick 	 * If the write was synchronous, then await i/o completion.
1557015Smckusick 	 * If the write was "delayed", then we put the buffer on
1567015Smckusick 	 * the q of blocks awaiting i/o completion status.
1577015Smckusick 	 */
1588Sbill 	if ((flag&B_ASYNC) == 0) {
15937736Smckusick 		error = biowait(bp);
1608Sbill 		brelse(bp);
16137736Smckusick 	} else if (flag & B_DELWRI) {
1628Sbill 		bp->b_flags |= B_AGE;
16337736Smckusick 		error = 0;
16437736Smckusick 	}
16537736Smckusick 	return (error);
1668Sbill }
1678Sbill 
1688Sbill /*
1698Sbill  * Release the buffer, marking it so that if it is grabbed
1708Sbill  * for another purpose it will be written out before being
1718Sbill  * given up (e.g. when writing a partial block where it is
1728Sbill  * assumed that another write for the same block will soon follow).
1738Sbill  * This can't be done for magtape, since writes must be done
1748Sbill  * in the same order as requested.
1758Sbill  */
1768Sbill bdwrite(bp)
1777015Smckusick 	register struct buf *bp;
1788Sbill {
1798Sbill 
1808Sbill 	if ((bp->b_flags&B_DELWRI) == 0)
1818039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
18237736Smckusick 	/*
183*39668Smckusick 	 * If this is a tape drive, the write must be initiated.
18437736Smckusick 	 */
185*39668Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, B_TAPE, 0, NOCRED) == 0) {
1868Sbill 		bawrite(bp);
187*39668Smckusick 	} else {
1888Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
1898Sbill 		brelse(bp);
1908Sbill 	}
1918Sbill }
1928Sbill 
1938Sbill /*
1948Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
1958Sbill  */
1968Sbill bawrite(bp)
1977015Smckusick 	register struct buf *bp;
1988Sbill {
1998Sbill 
2008Sbill 	bp->b_flags |= B_ASYNC;
20137736Smckusick 	(void) bwrite(bp);
2028Sbill }
2038Sbill 
2048Sbill /*
2057015Smckusick  * Release the buffer, with no I/O implied.
2068Sbill  */
2078Sbill brelse(bp)
2087015Smckusick 	register struct buf *bp;
2098Sbill {
2102325Swnj 	register struct buf *flist;
2118Sbill 	register s;
2128Sbill 
21337736Smckusick 	trace(TR_BRELSE,
214*39668Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_lblkno);
2157015Smckusick 	/*
216*39668Smckusick 	 * If a process is waiting for the buffer, or
217*39668Smckusick 	 * is waiting for a free buffer, awaken it.
2187015Smckusick 	 */
2198Sbill 	if (bp->b_flags&B_WANTED)
2208Sbill 		wakeup((caddr_t)bp);
2212325Swnj 	if (bfreelist[0].b_flags&B_WANTED) {
2222325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2232325Swnj 		wakeup((caddr_t)bfreelist);
2248Sbill 	}
225*39668Smckusick 	/*
226*39668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
227*39668Smckusick 	 */
228*39668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
229*39668Smckusick 		bp->b_flags &= ~B_ERROR;
230*39668Smckusick 
231*39668Smckusick 	/*
232*39668Smckusick 	 * Disassociate buffers that are no longer valid.
233*39668Smckusick 	 */
234*39668Smckusick 	if (bp->b_flags & (B_NOCACHE|B_ERROR))
23537736Smckusick 		bp->b_flags |= B_INVAL;
236*39668Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR|B_INVAL))) {
237*39668Smckusick 		if (bp->b_vp)
238*39668Smckusick 			brelvp(bp);
239*39668Smckusick 		bp->b_flags &= ~B_DELWRI;
24037736Smckusick 	}
2417015Smckusick 	/*
2427015Smckusick 	 * Stick the buffer back on a free list.
2437015Smckusick 	 */
24426271Skarels 	s = splbio();
2458670S 	if (bp->b_bufsize <= 0) {
2468670S 		/* block has no buffer ... put at front of unused buffer list */
2478670S 		flist = &bfreelist[BQ_EMPTY];
2488670S 		binsheadfree(bp, flist);
2498670S 	} else if (bp->b_flags & (B_ERROR|B_INVAL)) {
2502325Swnj 		/* block has no info ... put at front of most free list */
2518670S 		flist = &bfreelist[BQ_AGE];
2527015Smckusick 		binsheadfree(bp, flist);
2538Sbill 	} else {
2542325Swnj 		if (bp->b_flags & B_LOCKED)
2552325Swnj 			flist = &bfreelist[BQ_LOCKED];
2562325Swnj 		else if (bp->b_flags & B_AGE)
2572325Swnj 			flist = &bfreelist[BQ_AGE];
2582325Swnj 		else
2592325Swnj 			flist = &bfreelist[BQ_LRU];
2607015Smckusick 		binstailfree(bp, flist);
2618Sbill 	}
26237736Smckusick 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE);
2638Sbill 	splx(s);
2648Sbill }
2658Sbill 
2668Sbill /*
2678Sbill  * See if the block is associated with some buffer
2688Sbill  * (mainly to avoid getting hung up on a wait in breada)
2698Sbill  */
27037736Smckusick incore(vp, blkno)
27137736Smckusick 	struct vnode *vp;
2727015Smckusick 	daddr_t blkno;
2738Sbill {
2748Sbill 	register struct buf *bp;
2752325Swnj 	register struct buf *dp;
2768Sbill 
27738225Smckusick 	dp = BUFHASH(vp, blkno);
2782325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
279*39668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
2807015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
28191Sbill 			return (1);
28291Sbill 	return (0);
2838Sbill }
2848Sbill 
285*39668Smckusick /*
286*39668Smckusick  * Return a block if it is in memory.
287*39668Smckusick  */
28838776Smckusick baddr(vp, blkno, size, cred, bpp)
28937736Smckusick 	struct vnode *vp;
2906563Smckusic 	daddr_t blkno;
2916563Smckusic 	int size;
29238776Smckusick 	struct ucred *cred;
29337736Smckusick 	struct buf **bpp;
2948Sbill {
2958Sbill 
29637736Smckusick 	if (incore(vp, blkno))
29738776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
29837736Smckusick 	*bpp = 0;
2998Sbill 	return (0);
3008Sbill }
3018Sbill 
3028Sbill /*
3038Sbill  * Assign a buffer for the given block.  If the appropriate
3048Sbill  * block is already associated, return it; otherwise search
3058Sbill  * for the oldest non-busy buffer and reassign it.
3065424Swnj  *
3075424Swnj  * We use splx here because this routine may be called
3085424Swnj  * on the interrupt stack during a dump, and we don't
3095424Swnj  * want to lower the ipl back to 0.
3108Sbill  */
3118Sbill struct buf *
31237736Smckusick getblk(vp, blkno, size)
31337736Smckusick 	register struct vnode *vp;
3146563Smckusic 	daddr_t blkno;
3156563Smckusic 	int size;
3168Sbill {
3178670S 	register struct buf *bp, *dp;
3185424Swnj 	int s;
3198Sbill 
32025255Smckusick 	if (size > MAXBSIZE)
32125255Smckusick 		panic("getblk: size too big");
3227015Smckusick 	/*
32324730Smckusick 	 * To prevent overflow of 32-bit ints when converting block
32424730Smckusick 	 * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set
32524730Smckusick 	 * to the maximum number that can be converted to a byte offset
32624730Smckusick 	 * without overflow. This is historic code; what bug it fixed,
32724730Smckusick 	 * or whether it is still a reasonable thing to do is open to
32824730Smckusick 	 * dispute. mkm 9/85
329*39668Smckusick 	 *
330*39668Smckusick 	 * Make it a panic to see if it ever really happens. mkm 11/89
33124730Smckusick 	 */
332*39668Smckusick 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT)) {
333*39668Smckusick 		panic("getblk: blkno too big");
33424730Smckusick 		blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1);
335*39668Smckusick 	}
33624730Smckusick 	/*
3377015Smckusick 	 * Search the cache for the block.  If we hit, but
3387015Smckusick 	 * the buffer is in use for i/o, then we wait until
3397015Smckusick 	 * the i/o has completed.
3407015Smckusick 	 */
34137736Smckusick 	dp = BUFHASH(vp, blkno);
3427015Smckusick loop:
3432325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
344*39668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
3452325Swnj 		    bp->b_flags&B_INVAL)
3468Sbill 			continue;
34726271Skarels 		s = splbio();
3488Sbill 		if (bp->b_flags&B_BUSY) {
3498Sbill 			bp->b_flags |= B_WANTED;
3508Sbill 			sleep((caddr_t)bp, PRIBIO+1);
3515424Swnj 			splx(s);
3528Sbill 			goto loop;
3538Sbill 		}
3545424Swnj 		splx(s);
3558Sbill 		notavail(bp);
35632608Smckusick 		if (bp->b_bcount != size) {
357*39668Smckusick 			printf("getblk: stray size");
358*39668Smckusick 			bp->b_flags |= B_INVAL;
359*39668Smckusick 			bwrite(bp);
360*39668Smckusick 			goto loop;
36132608Smckusick 		}
3628Sbill 		bp->b_flags |= B_CACHE;
36326271Skarels 		return (bp);
3648Sbill 	}
3658670S 	bp = getnewbuf();
3666563Smckusic 	bfree(bp);
3677015Smckusick 	bremhash(bp);
368*39668Smckusick 	bgetvp(vp, bp);
369*39668Smckusick 	bp->b_lblkno = blkno;
3706563Smckusic 	bp->b_blkno = blkno;
3718670S 	bp->b_error = 0;
37237736Smckusick 	bp->b_resid = 0;
37337736Smckusick 	binshash(bp, dp);
374*39668Smckusick 	brealloc(bp, size);
37526271Skarels 	return (bp);
3768Sbill }
3778Sbill 
3788Sbill /*
3798Sbill  * get an empty block,
3808Sbill  * not assigned to any particular device
3818Sbill  */
3828Sbill struct buf *
3836563Smckusic geteblk(size)
3846563Smckusic 	int size;
3858Sbill {
3868670S 	register struct buf *bp, *flist;
3878Sbill 
38825255Smckusick 	if (size > MAXBSIZE)
38925255Smckusick 		panic("geteblk: size too big");
3908670S 	bp = getnewbuf();
3918670S 	bp->b_flags |= B_INVAL;
3927015Smckusick 	bfree(bp);
3937015Smckusick 	bremhash(bp);
3948670S 	flist = &bfreelist[BQ_AGE];
39537736Smckusick 	bp->b_error = 0;
39637736Smckusick 	bp->b_resid = 0;
3978670S 	binshash(bp, flist);
398*39668Smckusick 	brealloc(bp, size);
39926271Skarels 	return (bp);
4008Sbill }
4018Sbill 
4028Sbill /*
4036563Smckusic  * Allocate space associated with a buffer.
4046563Smckusic  */
4056563Smckusic brealloc(bp, size)
4066563Smckusic 	register struct buf *bp;
4076563Smckusic 	int size;
4086563Smckusic {
4096563Smckusic 	daddr_t start, last;
4106563Smckusic 	register struct buf *ep;
4116563Smckusic 	struct buf *dp;
4126563Smckusic 	int s;
4136563Smckusic 
4146563Smckusic 	if (size == bp->b_bcount)
415*39668Smckusick 		return;
416*39668Smckusick 	allocbuf(bp, size);
4178670S }
4188670S 
4198670S /*
4208670S  * Find a buffer which is available for use.
4218670S  * Select something from a free list.
4228670S  * Preference is to AGE list, then LRU list.
4238670S  */
4248670S struct buf *
4258670S getnewbuf()
4268670S {
4278670S 	register struct buf *bp, *dp;
42838776Smckusick 	register struct ucred *cred;
4298670S 	int s;
4308670S 
4318670S loop:
43226271Skarels 	s = splbio();
4338670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
4348670S 		if (dp->av_forw != dp)
4358670S 			break;
4368670S 	if (dp == bfreelist) {		/* no free blocks */
4378670S 		dp->b_flags |= B_WANTED;
4388670S 		sleep((caddr_t)dp, PRIBIO+1);
43912170Ssam 		splx(s);
4408670S 		goto loop;
4418670S 	}
4428670S 	splx(s);
4438670S 	bp = dp->av_forw;
4448670S 	notavail(bp);
4458670S 	if (bp->b_flags & B_DELWRI) {
44638614Smckusick 		(void) bawrite(bp);
4478670S 		goto loop;
4488670S 	}
44937736Smckusick 	trace(TR_BRELSE,
450*39668Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_lblkno);
451*39668Smckusick 	if (bp->b_vp)
452*39668Smckusick 		brelvp(bp);
45338776Smckusick 	if (bp->b_rcred != NOCRED) {
45438776Smckusick 		cred = bp->b_rcred;
45538776Smckusick 		bp->b_rcred = NOCRED;
45638776Smckusick 		crfree(cred);
45738776Smckusick 	}
45838776Smckusick 	if (bp->b_wcred != NOCRED) {
45938776Smckusick 		cred = bp->b_wcred;
46038776Smckusick 		bp->b_wcred = NOCRED;
46138776Smckusick 		crfree(cred);
46238776Smckusick 	}
4638670S 	bp->b_flags = B_BUSY;
4648670S 	return (bp);
4658670S }
4668670S 
4678670S /*
4688Sbill  * Wait for I/O completion on the buffer; return errors
4698Sbill  * to the user.
4708Sbill  */
4717015Smckusick biowait(bp)
4726563Smckusic 	register struct buf *bp;
4738Sbill {
4745431Sroot 	int s;
4758Sbill 
47626271Skarels 	s = splbio();
47738776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
4788Sbill 		sleep((caddr_t)bp, PRIBIO);
4795431Sroot 	splx(s);
48037736Smckusick 	/*
48137736Smckusick 	 * Pick up the device's error number and pass it to the user;
48237736Smckusick 	 * if there is an error but the number is 0 set a generalized code.
48337736Smckusick 	 */
48437736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
48537736Smckusick 		return (0);
48637736Smckusick 	if (bp->b_error)
48737736Smckusick 		return (bp->b_error);
48837736Smckusick 	return (EIO);
4898Sbill }
4908Sbill 
4918Sbill /*
49213128Ssam  * Mark I/O complete on a buffer.
49313128Ssam  * If someone should be called, e.g. the pageout
49413128Ssam  * daemon, do so.  Otherwise, wake up anyone
49513128Ssam  * waiting for it.
4968Sbill  */
4977015Smckusick biodone(bp)
4987015Smckusick 	register struct buf *bp;
4998Sbill {
5008Sbill 
501420Sbill 	if (bp->b_flags & B_DONE)
5027015Smckusick 		panic("dup biodone");
5038Sbill 	bp->b_flags |= B_DONE;
50438776Smckusick 	if ((bp->b_flags & B_READ) == 0)
50538776Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
5069763Ssam 	if (bp->b_flags & B_CALL) {
5079763Ssam 		bp->b_flags &= ~B_CALL;
5089763Ssam 		(*bp->b_iodone)(bp);
5099763Ssam 		return;
5109763Ssam 	}
5118Sbill 	if (bp->b_flags&B_ASYNC)
5128Sbill 		brelse(bp);
5138Sbill 	else {
5148Sbill 		bp->b_flags &= ~B_WANTED;
5158Sbill 		wakeup((caddr_t)bp);
5168Sbill 	}
5178Sbill }
5188Sbill 
5198Sbill /*
52037736Smckusick  * Ensure that no part of a specified block is in an incore buffer.
52130749Skarels #ifdef SECSIZE
522*39668Smckusick  * "size" is given in device blocks (the units of b_lblkno).
52330749Skarels #endif SECSIZE
5248670S  */
52537736Smckusick blkflush(vp, blkno, size)
52637736Smckusick 	struct vnode *vp;
5278670S 	daddr_t blkno;
5288670S 	long size;
5298670S {
5308670S 	register struct buf *ep;
5318670S 	struct buf *dp;
53239303Smckusick 	daddr_t curblk, nextblk, ecurblk, lastblk;
53337736Smckusick 	int s, error, allerrors = 0;
5348670S 
53539303Smckusick 	/*
53639303Smckusick 	 * Iterate through each possible hash chain.
53739303Smckusick 	 */
53839303Smckusick 	lastblk = blkno + btodb(size) - 1;
53939303Smckusick 	for (curblk = blkno; curblk <= lastblk; curblk = nextblk) {
54039303Smckusick #if RND & (RND-1)
54139303Smckusick 		nextblk = ((curblk / RND) + 1) * RND;
54239303Smckusick #else
54339303Smckusick 		nextblk = ((curblk & ~(RND-1)) + RND);
54439303Smckusick #endif
54539303Smckusick 		ecurblk = nextblk > lastblk ? lastblk : nextblk - 1;
54639303Smckusick 		dp = BUFHASH(vp, curblk);
5478670S loop:
54839303Smckusick 		for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
54939303Smckusick 			if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
55039303Smckusick 				continue;
55139303Smckusick 			/* look for overlap */
552*39668Smckusick 			if (ep->b_bcount == 0 || ep->b_lblkno > ecurblk ||
553*39668Smckusick 			    ep->b_lblkno + btodb(ep->b_bcount) <= curblk)
55439303Smckusick 				continue;
55539303Smckusick 			s = splbio();
55639303Smckusick 			if (ep->b_flags&B_BUSY) {
55739303Smckusick 				ep->b_flags |= B_WANTED;
55839303Smckusick 				sleep((caddr_t)ep, PRIBIO+1);
55939303Smckusick 				splx(s);
56039303Smckusick 				goto loop;
56139303Smckusick 			}
56239303Smckusick 			if (ep->b_flags & B_DELWRI) {
56339303Smckusick 				splx(s);
56439303Smckusick 				notavail(ep);
56539303Smckusick 				if (error = bwrite(ep))
56639303Smckusick 					allerrors = error;
56739303Smckusick 				goto loop;
56839303Smckusick 			}
5698670S 			splx(s);
5708670S 		}
5718670S 	}
57237736Smckusick 	return (allerrors);
5738670S }
5748670S 
5758670S /*
57637736Smckusick  * Make sure all write-behind blocks associated
57738776Smckusick  * with mount point are flushed out (from sync).
5788Sbill  */
579*39668Smckusick mntflushbuf(mountp, flags)
58038776Smckusick 	struct mount *mountp;
581*39668Smckusick 	int flags;
5828Sbill {
583*39668Smckusick 	register struct vnode *vp;
584*39668Smckusick 
585*39668Smckusick loop:
586*39668Smckusick 	for (vp = mountp->m_mounth; vp; vp = vp->v_mountf) {
587*39668Smckusick 		if (vget(vp))
588*39668Smckusick 			goto loop;
589*39668Smckusick 		vflushbuf(vp, flags);
590*39668Smckusick 		vput(vp);
591*39668Smckusick 	}
592*39668Smckusick }
593*39668Smckusick 
594*39668Smckusick /*
595*39668Smckusick  * Flush all dirty buffers associated with a vnode.
596*39668Smckusick  */
597*39668Smckusick vflushbuf(vp, flags)
598*39668Smckusick 	register struct vnode *vp;
599*39668Smckusick 	int flags;
600*39668Smckusick {
6018Sbill 	register struct buf *bp;
602*39668Smckusick 	struct buf *nbp;
6035431Sroot 	int s;
6048Sbill 
6058Sbill loop:
60626271Skarels 	s = splbio();
607*39668Smckusick 	for (bp = vp->v_blockh; bp; bp = nbp) {
608*39668Smckusick 		nbp = bp->b_blockf;
609*39668Smckusick 		if ((bp->b_flags & B_BUSY))
610*39668Smckusick 			continue;
611*39668Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
612*39668Smckusick 			continue;
613*39668Smckusick 		splx(s);
614*39668Smckusick 		notavail(bp);
615*39668Smckusick 		(void) bawrite(bp);
616*39668Smckusick 		goto loop;
617*39668Smckusick 	}
618*39668Smckusick 	if ((flags & B_SYNC) == 0)
619*39668Smckusick 		return;
620*39668Smckusick wloop:
621*39668Smckusick 	s = splbio();
622*39668Smckusick 	for (bp = vp->v_blockh; bp; bp = nbp) {
623*39668Smckusick 		nbp = bp->b_blockf;
624*39668Smckusick 		if (bp->b_flags & B_BUSY) {
625*39668Smckusick 			bp->b_flags |= B_WANTED;
626*39668Smckusick 			sleep((caddr_t)bp, PRIBIO+1);
627*39668Smckusick 			splx(s);
628*39668Smckusick 			goto wloop;
62937736Smckusick 		}
630*39668Smckusick 		if ((bp->b_flags & B_DELWRI))
631*39668Smckusick 			goto loop;
63237736Smckusick 	}
6338Sbill }
6342299Skre 
6352299Skre /*
6362299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6372299Skre  *
638*39668Smckusick  * Go through the list of vnodes associated with the file system;
639*39668Smckusick  * for each vnode invalidate any buffers that it holds. Normally
640*39668Smckusick  * this routine is preceeded by a bflush call, so that on a quiescent
641*39668Smckusick  * filesystem there will be no dirty buffers when we are done. Binval
642*39668Smckusick  * returns the count of dirty buffers when it is finished.
6432299Skre  */
644*39668Smckusick mntinvalbuf(mountp)
64538776Smckusick 	struct mount *mountp;
6462299Skre {
647*39668Smckusick 	register struct vnode *vp;
648*39668Smckusick 	int dirty = 0;
649*39668Smckusick 
650*39668Smckusick loop:
651*39668Smckusick 	for (vp = mountp->m_mounth; vp; vp = vp->v_mountf) {
652*39668Smckusick 		if (vget(vp))
653*39668Smckusick 			goto loop;
654*39668Smckusick 		dirty += vinvalbuf(vp, 1);
655*39668Smckusick 		vput(vp);
656*39668Smckusick 	}
657*39668Smckusick 	return (dirty);
658*39668Smckusick }
659*39668Smckusick 
660*39668Smckusick /*
661*39668Smckusick  * Flush out and invalidate all buffers associated with a vnode.
662*39668Smckusick  * Called with the underlying object locked.
663*39668Smckusick  */
664*39668Smckusick vinvalbuf(vp, save)
665*39668Smckusick 	register struct vnode *vp;
666*39668Smckusick 	int save;
667*39668Smckusick {
6682361Skre 	register struct buf *bp;
669*39668Smckusick 	struct buf *nbp;
67038633Smckusick 	int s, dirty = 0;
6712299Skre 
67238776Smckusick loop:
673*39668Smckusick 	for (bp = vp->v_blockh; bp; bp = nbp) {
674*39668Smckusick 		nbp = bp->b_blockf;
675*39668Smckusick 		s = splbio();
676*39668Smckusick 		if (bp->b_flags & B_BUSY) {
677*39668Smckusick 			bp->b_flags |= B_WANTED;
678*39668Smckusick 			sleep((caddr_t)bp, PRIBIO+1);
67938808Smckusick 			splx(s);
680*39668Smckusick 			goto loop;
681*39668Smckusick 		}
682*39668Smckusick 		splx(s);
683*39668Smckusick 		notavail(bp);
684*39668Smckusick 		if (save) {
68538614Smckusick 			if (bp->b_flags & B_DELWRI) {
68638614Smckusick 				dirty++;
687*39668Smckusick 				(void) bwrite(bp);
688*39668Smckusick 				goto loop;
68937736Smckusick 			}
69038614Smckusick 		}
691*39668Smckusick 		bp->b_flags |= B_INVAL;
692*39668Smckusick 		brelse(bp);
69338614Smckusick 	}
694*39668Smckusick 	if (vp->v_blockh != 0)
695*39668Smckusick 		panic("vinvalbuf: flush failed");
69638614Smckusick 	return (dirty);
6972299Skre }
69837736Smckusick 
699*39668Smckusick /*
700*39668Smckusick  * Associate a buffer with a vnode.
701*39668Smckusick  */
702*39668Smckusick bgetvp(vp, bp)
703*39668Smckusick 	register struct vnode *vp;
704*39668Smckusick 	register struct buf *bp;
705*39668Smckusick {
706*39668Smckusick 
707*39668Smckusick 	if (bp->b_vp)
708*39668Smckusick 		panic("bgetvp: not free");
709*39668Smckusick 	VREF(vp);
710*39668Smckusick 	bp->b_vp = vp;
711*39668Smckusick 	if (vp->v_type == VBLK || vp->v_type == VCHR)
712*39668Smckusick 		bp->b_dev = vp->v_rdev;
713*39668Smckusick 	else
714*39668Smckusick 		bp->b_dev = NODEV;
715*39668Smckusick 	/*
716*39668Smckusick 	 * Insert onto list for new vnode.
717*39668Smckusick 	 */
718*39668Smckusick 	if (vp->v_blockh) {
719*39668Smckusick 		bp->b_blockf = vp->v_blockh;
720*39668Smckusick 		bp->b_blockb = &vp->v_blockh;
721*39668Smckusick 		vp->v_blockh->b_blockb = &bp->b_blockf;
722*39668Smckusick 		vp->v_blockh = bp;
723*39668Smckusick 	} else {
724*39668Smckusick 		vp->v_blockh = bp;
725*39668Smckusick 		bp->b_blockb = &vp->v_blockh;
726*39668Smckusick 		bp->b_blockf = NULL;
727*39668Smckusick 	}
728*39668Smckusick }
729*39668Smckusick 
730*39668Smckusick /*
731*39668Smckusick  * Disassociate a buffer from a vnode.
732*39668Smckusick  */
73337736Smckusick brelvp(bp)
734*39668Smckusick 	register struct buf *bp;
73537736Smckusick {
736*39668Smckusick 	struct buf *bq;
73737736Smckusick 	struct vnode *vp;
73837736Smckusick 
73937736Smckusick 	if (bp->b_vp == (struct vnode *) 0)
740*39668Smckusick 		panic("brelvp: NULL");
741*39668Smckusick 	/*
742*39668Smckusick 	 * Delete from old vnode list, if on one.
743*39668Smckusick 	 */
744*39668Smckusick 	if (bp->b_blockb) {
745*39668Smckusick 		if (bq = bp->b_blockf)
746*39668Smckusick 			bq->b_blockb = bp->b_blockb;
747*39668Smckusick 		*bp->b_blockb = bq;
748*39668Smckusick 		bp->b_blockf = NULL;
749*39668Smckusick 		bp->b_blockb = NULL;
750*39668Smckusick 	}
75137736Smckusick 	vp = bp->b_vp;
75237736Smckusick 	bp->b_vp = (struct vnode *) 0;
75337736Smckusick 	vrele(vp);
75437736Smckusick }
755*39668Smckusick 
756*39668Smckusick /*
757*39668Smckusick  * Reassign a buffer from one vnode to another.
758*39668Smckusick  * Used to assign file specific control information
759*39668Smckusick  * (indirect blocks) to the vnode to which they belong.
760*39668Smckusick  */
761*39668Smckusick reassignbuf(bp, newvp)
762*39668Smckusick 	register struct buf *bp;
763*39668Smckusick 	register struct vnode *newvp;
764*39668Smckusick {
765*39668Smckusick 	register struct buf *bq;
766*39668Smckusick 
767*39668Smckusick 	/*
768*39668Smckusick 	 * Delete from old vnode list, if on one.
769*39668Smckusick 	 */
770*39668Smckusick 	if (bp->b_blockb) {
771*39668Smckusick 		if (bq = bp->b_blockf)
772*39668Smckusick 			bq->b_blockb = bp->b_blockb;
773*39668Smckusick 		*bp->b_blockb = bq;
774*39668Smckusick 	}
775*39668Smckusick 	/*
776*39668Smckusick 	 * Insert onto list for new vnode.
777*39668Smckusick 	 */
778*39668Smckusick 	if (newvp->v_blockh) {
779*39668Smckusick 		bp->b_blockf = newvp->v_blockh;
780*39668Smckusick 		bp->b_blockb = &newvp->v_blockh;
781*39668Smckusick 		newvp->v_blockh->b_blockb = &bp->b_blockf;
782*39668Smckusick 		newvp->v_blockh = bp;
783*39668Smckusick 	} else {
784*39668Smckusick 		newvp->v_blockh = bp;
785*39668Smckusick 		bp->b_blockb = &newvp->v_blockh;
786*39668Smckusick 		bp->b_blockf = NULL;
787*39668Smckusick 	}
788*39668Smckusick }
789