xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 38776)
123395Smckusick /*
237736Smckusick  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
537736Smckusick  * Redistribution and use in source and binary forms are permitted
637736Smckusick  * provided that the above copyright notice and this paragraph are
737736Smckusick  * duplicated in all such forms and that any documentation,
837736Smckusick  * advertising materials, and other materials related to such
937736Smckusick  * distribution and use acknowledge that the software was developed
1037736Smckusick  * by the University of California, Berkeley.  The name of the
1137736Smckusick  * University may not be used to endorse or promote products derived
1237736Smckusick  * from this software without specific prior written permission.
1337736Smckusick  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1437736Smckusick  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1537736Smckusick  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1637736Smckusick  *
17*38776Smckusick  *	@(#)vfs_cluster.c	7.10 (Berkeley) 08/26/89
1823395Smckusick  */
198Sbill 
2017098Sbloom #include "param.h"
2117098Sbloom #include "user.h"
2217098Sbloom #include "buf.h"
2337736Smckusick #include "vnode.h"
2417098Sbloom #include "trace.h"
25*38776Smckusick #include "ucred.h"
268Sbill 
2791Sbill /*
288Sbill  * Read in (if necessary) the block and return a buffer pointer.
298Sbill  */
30*38776Smckusick bread(vp, blkno, size, cred, bpp)
3137736Smckusick 	struct vnode *vp;
326563Smckusic 	daddr_t blkno;
336563Smckusic 	int size;
34*38776Smckusick 	struct ucred *cred;
3537736Smckusick 	struct buf **bpp;
368Sbill {
378Sbill 	register struct buf *bp;
388Sbill 
398670S 	if (size == 0)
408670S 		panic("bread: size 0");
4137736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
4232608Smckusick 	if (bp->b_flags&(B_DONE|B_DELWRI)) {
4337736Smckusick 		trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), blkno);
4437736Smckusick 		return (0);
458Sbill 	}
468Sbill 	bp->b_flags |= B_READ;
478670S 	if (bp->b_bcount > bp->b_bufsize)
488670S 		panic("bread");
49*38776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
50*38776Smckusick 		crhold(cred);
51*38776Smckusick 		bp->b_rcred = cred;
52*38776Smckusick 	}
5337736Smckusick 	VOP_STRATEGY(bp);
5437736Smckusick 	trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), blkno);
558039Sroot 	u.u_ru.ru_inblock++;		/* pay for read */
5637736Smckusick 	return (biowait(bp));
578Sbill }
588Sbill 
598Sbill /*
608Sbill  * Read in the block, like bread, but also start I/O on the
618Sbill  * read-ahead block (which is not allocated to the caller)
628Sbill  */
63*38776Smckusick breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
6437736Smckusick 	struct vnode *vp;
657114Smckusick 	daddr_t blkno; int size;
668592Sroot 	daddr_t rablkno; int rabsize;
67*38776Smckusick 	struct ucred *cred;
6837736Smckusick 	struct buf **bpp;
698Sbill {
708Sbill 	register struct buf *bp, *rabp;
718Sbill 
728Sbill 	bp = NULL;
737015Smckusick 	/*
747015Smckusick 	 * If the block isn't in core, then allocate
757015Smckusick 	 * a buffer and initiate i/o (getblk checks
767015Smckusick 	 * for a cache hit).
777015Smckusick 	 */
7837736Smckusick 	if (!incore(vp, blkno)) {
7937736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
8032608Smckusick 		if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) {
818Sbill 			bp->b_flags |= B_READ;
828670S 			if (bp->b_bcount > bp->b_bufsize)
838670S 				panic("breada");
84*38776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
85*38776Smckusick 				crhold(cred);
86*38776Smckusick 				bp->b_rcred = cred;
87*38776Smckusick 			}
8837736Smckusick 			VOP_STRATEGY(bp);
8937736Smckusick 			trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size),
9037736Smckusick 			    blkno);
918039Sroot 			u.u_ru.ru_inblock++;		/* pay for read */
927015Smckusick 		} else
9337736Smckusick 			trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size),
9437736Smckusick 			    blkno);
958Sbill 	}
967015Smckusick 
977015Smckusick 	/*
987015Smckusick 	 * If there's a read-ahead block, start i/o
997015Smckusick 	 * on it also (as above).
1007015Smckusick 	 */
10137736Smckusick 	if (rablkno && !incore(vp, rablkno)) {
10237736Smckusick 		rabp = getblk(vp, rablkno, rabsize);
10332608Smckusick 		if (rabp->b_flags & (B_DONE|B_DELWRI)) {
1048Sbill 			brelse(rabp);
10537736Smckusick 			trace(TR_BREADHITRA,
10637736Smckusick 			    pack(vp->v_mount->m_fsid[0], rabsize), blkno);
1072045Swnj 		} else {
1088Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
1098670S 			if (rabp->b_bcount > rabp->b_bufsize)
1108670S 				panic("breadrabp");
111*38776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
112*38776Smckusick 				crhold(cred);
113*38776Smckusick 				bp->b_rcred = cred;
114*38776Smckusick 			}
11537736Smckusick 			VOP_STRATEGY(rabp);
11637736Smckusick 			trace(TR_BREADMISSRA,
11737736Smckusick 			    pack(vp->v_mount->m_fsid[0], rabsize), rablock);
1188039Sroot 			u.u_ru.ru_inblock++;		/* pay in advance */
1198Sbill 		}
1208Sbill 	}
1217015Smckusick 
1227015Smckusick 	/*
1237114Smckusick 	 * If block was in core, let bread get it.
1247114Smckusick 	 * If block wasn't in core, then the read was started
1257114Smckusick 	 * above, and just wait for it.
1267015Smckusick 	 */
1277114Smckusick 	if (bp == NULL)
128*38776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
12937736Smckusick 	return (biowait(bp));
1308Sbill }
1318Sbill 
1328Sbill /*
1338Sbill  * Write the buffer, waiting for completion.
1348Sbill  * Then release the buffer.
1358Sbill  */
1368Sbill bwrite(bp)
1377015Smckusick 	register struct buf *bp;
1388Sbill {
13937736Smckusick 	register int flag;
14037736Smckusick 	int error;
1418Sbill 
1428Sbill 	flag = bp->b_flags;
1439857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
1448Sbill 	if ((flag&B_DELWRI) == 0)
1458039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
14637736Smckusick 	trace(TR_BWRITE,
14737736Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bcount), bp->b_blkno);
1488670S 	if (bp->b_bcount > bp->b_bufsize)
1498670S 		panic("bwrite");
15037736Smckusick 	VOP_STRATEGY(bp);
1517015Smckusick 
1527015Smckusick 	/*
1537015Smckusick 	 * If the write was synchronous, then await i/o completion.
1547015Smckusick 	 * If the write was "delayed", then we put the buffer on
1557015Smckusick 	 * the q of blocks awaiting i/o completion status.
1567015Smckusick 	 */
1578Sbill 	if ((flag&B_ASYNC) == 0) {
15837736Smckusick 		error = biowait(bp);
1598Sbill 		brelse(bp);
16037736Smckusick 	} else if (flag & B_DELWRI) {
1618Sbill 		bp->b_flags |= B_AGE;
16237736Smckusick 		error = 0;
16337736Smckusick 	}
16437736Smckusick 	return (error);
1658Sbill }
1668Sbill 
1678Sbill /*
1688Sbill  * Release the buffer, marking it so that if it is grabbed
1698Sbill  * for another purpose it will be written out before being
1708Sbill  * given up (e.g. when writing a partial block where it is
1718Sbill  * assumed that another write for the same block will soon follow).
1728Sbill  * This can't be done for magtape, since writes must be done
1738Sbill  * in the same order as requested.
1748Sbill  */
1758Sbill bdwrite(bp)
1767015Smckusick 	register struct buf *bp;
1778Sbill {
1788Sbill 
1798Sbill 	if ((bp->b_flags&B_DELWRI) == 0)
1808039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
18137736Smckusick #ifdef notdef
18237736Smckusick 	/*
18337736Smckusick 	 * This does not work for buffers associated with
18437736Smckusick 	 * vnodes that are remote - they have no dev.
18537736Smckusick 	 * Besides, we don't use bio with tapes, so rather
18637736Smckusick 	 * than develop a fix, we just ifdef this out for now.
18737736Smckusick 	 */
18830749Skarels 	if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
1898Sbill 		bawrite(bp);
1908Sbill 	else {
1918Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
1928Sbill 		brelse(bp);
1938Sbill 	}
19437736Smckusick #endif
19537736Smckusick 	bp->b_flags |= B_DELWRI | B_DONE;
19637736Smckusick 	brelse(bp);
1978Sbill }
1988Sbill 
1998Sbill /*
2008Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
2018Sbill  */
2028Sbill bawrite(bp)
2037015Smckusick 	register struct buf *bp;
2048Sbill {
2058Sbill 
2068Sbill 	bp->b_flags |= B_ASYNC;
20737736Smckusick 	(void) bwrite(bp);
2088Sbill }
2098Sbill 
2108Sbill /*
2117015Smckusick  * Release the buffer, with no I/O implied.
2128Sbill  */
2138Sbill brelse(bp)
2147015Smckusick 	register struct buf *bp;
2158Sbill {
2162325Swnj 	register struct buf *flist;
2178Sbill 	register s;
2188Sbill 
21937736Smckusick 	trace(TR_BRELSE,
22037736Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno);
2217015Smckusick 	/*
2227015Smckusick 	 * If someone's waiting for the buffer, or
2237015Smckusick 	 * is waiting for a buffer wake 'em up.
2247015Smckusick 	 */
2258Sbill 	if (bp->b_flags&B_WANTED)
2268Sbill 		wakeup((caddr_t)bp);
2272325Swnj 	if (bfreelist[0].b_flags&B_WANTED) {
2282325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2292325Swnj 		wakeup((caddr_t)bfreelist);
2308Sbill 	}
23137736Smckusick 	if (bp->b_flags & B_NOCACHE) {
23237736Smckusick 		bp->b_flags |= B_INVAL;
23337736Smckusick 	}
2342683Swnj 	if (bp->b_flags&B_ERROR)
2352683Swnj 		if (bp->b_flags & B_LOCKED)
2362683Swnj 			bp->b_flags &= ~B_ERROR;	/* try again later */
2372683Swnj 		else
23837736Smckusick 			brelvp(bp); 	 		/* no assoc */
2397015Smckusick 
2407015Smckusick 	/*
2417015Smckusick 	 * Stick the buffer back on a free list.
2427015Smckusick 	 */
24326271Skarels 	s = splbio();
2448670S 	if (bp->b_bufsize <= 0) {
2458670S 		/* block has no buffer ... put at front of unused buffer list */
2468670S 		flist = &bfreelist[BQ_EMPTY];
2478670S 		binsheadfree(bp, flist);
2488670S 	} else if (bp->b_flags & (B_ERROR|B_INVAL)) {
2492325Swnj 		/* block has no info ... put at front of most free list */
2508670S 		flist = &bfreelist[BQ_AGE];
2517015Smckusick 		binsheadfree(bp, flist);
2528Sbill 	} else {
2532325Swnj 		if (bp->b_flags & B_LOCKED)
2542325Swnj 			flist = &bfreelist[BQ_LOCKED];
2552325Swnj 		else if (bp->b_flags & B_AGE)
2562325Swnj 			flist = &bfreelist[BQ_AGE];
2572325Swnj 		else
2582325Swnj 			flist = &bfreelist[BQ_LRU];
2597015Smckusick 		binstailfree(bp, flist);
2608Sbill 	}
26137736Smckusick 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE);
2628Sbill 	splx(s);
2638Sbill }
2648Sbill 
2658Sbill /*
2668Sbill  * See if the block is associated with some buffer
2678Sbill  * (mainly to avoid getting hung up on a wait in breada)
2688Sbill  */
26937736Smckusick incore(vp, blkno)
27037736Smckusick 	struct vnode *vp;
2717015Smckusick 	daddr_t blkno;
2728Sbill {
2738Sbill 	register struct buf *bp;
2742325Swnj 	register struct buf *dp;
2758Sbill 
27638225Smckusick 	dp = BUFHASH(vp, blkno);
2772325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
27837736Smckusick 		if (bp->b_blkno == blkno && bp->b_vp == vp &&
2797015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
28091Sbill 			return (1);
28191Sbill 	return (0);
2828Sbill }
2838Sbill 
284*38776Smckusick baddr(vp, blkno, size, cred, bpp)
28537736Smckusick 	struct vnode *vp;
2866563Smckusic 	daddr_t blkno;
2876563Smckusic 	int size;
288*38776Smckusick 	struct ucred *cred;
28937736Smckusick 	struct buf **bpp;
2908Sbill {
2918Sbill 
29237736Smckusick 	if (incore(vp, blkno))
293*38776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
29437736Smckusick 	*bpp = 0;
2958Sbill 	return (0);
2968Sbill }
2978Sbill 
2988Sbill /*
2998Sbill  * Assign a buffer for the given block.  If the appropriate
3008Sbill  * block is already associated, return it; otherwise search
3018Sbill  * for the oldest non-busy buffer and reassign it.
3025424Swnj  *
30332608Smckusick  * If we find the buffer, but it is dirty (marked DELWRI) and
30432608Smckusick  * its size is changing, we must write it out first. When the
30532608Smckusick  * buffer is shrinking, the write is done by brealloc to avoid
30632608Smckusick  * losing the unwritten data. When the buffer is growing, the
30732608Smckusick  * write is done by getblk, so that bread will not read stale
30832608Smckusick  * disk data over the modified data in the buffer.
30932608Smckusick  *
3105424Swnj  * We use splx here because this routine may be called
3115424Swnj  * on the interrupt stack during a dump, and we don't
3125424Swnj  * want to lower the ipl back to 0.
3138Sbill  */
3148Sbill struct buf *
31537736Smckusick getblk(vp, blkno, size)
31637736Smckusick 	register struct vnode *vp;
3176563Smckusic 	daddr_t blkno;
3186563Smckusic 	int size;
3198Sbill {
3208670S 	register struct buf *bp, *dp;
3215424Swnj 	int s;
3228Sbill 
32325255Smckusick 	if (size > MAXBSIZE)
32425255Smckusick 		panic("getblk: size too big");
3257015Smckusick 	/*
32624730Smckusick 	 * To prevent overflow of 32-bit ints when converting block
32724730Smckusick 	 * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set
32824730Smckusick 	 * to the maximum number that can be converted to a byte offset
32924730Smckusick 	 * without overflow. This is historic code; what bug it fixed,
33024730Smckusick 	 * or whether it is still a reasonable thing to do is open to
33124730Smckusick 	 * dispute. mkm 9/85
33224730Smckusick 	 */
33324730Smckusick 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT))
33424730Smckusick 		blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1);
33524730Smckusick 	/*
3367015Smckusick 	 * Search the cache for the block.  If we hit, but
3377015Smckusick 	 * the buffer is in use for i/o, then we wait until
3387015Smckusick 	 * the i/o has completed.
3397015Smckusick 	 */
34037736Smckusick 	dp = BUFHASH(vp, blkno);
3417015Smckusick loop:
3422325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
34337736Smckusick 		if (bp->b_blkno != blkno || bp->b_vp != vp ||
3442325Swnj 		    bp->b_flags&B_INVAL)
3458Sbill 			continue;
34626271Skarels 		s = splbio();
3478Sbill 		if (bp->b_flags&B_BUSY) {
3488Sbill 			bp->b_flags |= B_WANTED;
3498Sbill 			sleep((caddr_t)bp, PRIBIO+1);
3505424Swnj 			splx(s);
3518Sbill 			goto loop;
3528Sbill 		}
3535424Swnj 		splx(s);
3548Sbill 		notavail(bp);
35532608Smckusick 		if (bp->b_bcount != size) {
35632608Smckusick 			if (bp->b_bcount < size && (bp->b_flags&B_DELWRI)) {
35732608Smckusick 				bp->b_flags &= ~B_ASYNC;
35837736Smckusick 				(void) bwrite(bp);
35932608Smckusick 				goto loop;
36032608Smckusick 			}
36132608Smckusick 			if (brealloc(bp, size) == 0)
36232608Smckusick 				goto loop;
36332608Smckusick 		}
36416855Smckusick 		if (bp->b_bcount != size && brealloc(bp, size) == 0)
3657188Sroot 			goto loop;
3668Sbill 		bp->b_flags |= B_CACHE;
36726271Skarels 		return (bp);
3688Sbill 	}
3698670S 	bp = getnewbuf();
3706563Smckusic 	bfree(bp);
3717015Smckusick 	bremhash(bp);
37237736Smckusick 	if (bp->b_vp)
37337736Smckusick 		brelvp(bp);
37438345Smckusick 	VREF(vp);
37537736Smckusick 	bp->b_vp = vp;
37637736Smckusick 	bp->b_dev = vp->v_rdev;
3776563Smckusic 	bp->b_blkno = blkno;
3788670S 	bp->b_error = 0;
37937736Smckusick 	bp->b_resid = 0;
38037736Smckusick 	binshash(bp, dp);
3817188Sroot 	if (brealloc(bp, size) == 0)
3827188Sroot 		goto loop;
38326271Skarels 	return (bp);
3848Sbill }
3858Sbill 
3868Sbill /*
3878Sbill  * get an empty block,
3888Sbill  * not assigned to any particular device
3898Sbill  */
3908Sbill struct buf *
3916563Smckusic geteblk(size)
3926563Smckusic 	int size;
3938Sbill {
3948670S 	register struct buf *bp, *flist;
3958Sbill 
39625255Smckusick 	if (size > MAXBSIZE)
39725255Smckusick 		panic("geteblk: size too big");
3988Sbill loop:
3998670S 	bp = getnewbuf();
4008670S 	bp->b_flags |= B_INVAL;
4017015Smckusick 	bfree(bp);
4027015Smckusick 	bremhash(bp);
4038670S 	flist = &bfreelist[BQ_AGE];
40437736Smckusick 	brelvp(bp);
40537736Smckusick 	bp->b_error = 0;
40637736Smckusick 	bp->b_resid = 0;
4078670S 	binshash(bp, flist);
4087188Sroot 	if (brealloc(bp, size) == 0)
4097188Sroot 		goto loop;
41026271Skarels 	return (bp);
4118Sbill }
4128Sbill 
4138Sbill /*
4146563Smckusic  * Allocate space associated with a buffer.
4159763Ssam  * If can't get space, buffer is released
4166563Smckusic  */
4176563Smckusic brealloc(bp, size)
4186563Smckusic 	register struct buf *bp;
4196563Smckusic 	int size;
4206563Smckusic {
4216563Smckusic 	daddr_t start, last;
4226563Smckusic 	register struct buf *ep;
4236563Smckusic 	struct buf *dp;
4246563Smckusic 	int s;
4256563Smckusic 
4266563Smckusic 	/*
42730749Skarels 	 * First need to make sure that all overlapping previous I/O
4286563Smckusic 	 * is dispatched with.
4296563Smckusic 	 */
4306563Smckusic 	if (size == bp->b_bcount)
4317188Sroot 		return (1);
4327188Sroot 	if (size < bp->b_bcount) {
4337188Sroot 		if (bp->b_flags & B_DELWRI) {
43437736Smckusick 			(void) bwrite(bp);
4357188Sroot 			return (0);
4367188Sroot 		}
4377188Sroot 		if (bp->b_flags & B_LOCKED)
4387188Sroot 			panic("brealloc");
4399763Ssam 		return (allocbuf(bp, size));
4407188Sroot 	}
4417188Sroot 	bp->b_flags &= ~B_DONE;
44237736Smckusick 	if (bp->b_vp == (struct vnode *)0)
4439763Ssam 		return (allocbuf(bp, size));
4447016Smckusick 
44537736Smckusick 	trace(TR_BREALLOC,
44637736Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], size), bp->b_blkno);
4477188Sroot 	/*
4487188Sroot 	 * Search cache for any buffers that overlap the one that we
4497188Sroot 	 * are trying to allocate. Overlapping buffers must be marked
4507188Sroot 	 * invalid, after being written out if they are dirty. (indicated
4517188Sroot 	 * by B_DELWRI) A disk block must be mapped by at most one buffer
4527188Sroot 	 * at any point in time. Care must be taken to avoid deadlocking
4537188Sroot 	 * when two buffer are trying to get the same set of disk blocks.
4547188Sroot 	 */
4557188Sroot 	start = bp->b_blkno;
45612644Ssam 	last = start + btodb(size) - 1;
45737736Smckusick 	dp = BUFHASH(bp->b_vp, bp->b_blkno);
4586563Smckusic loop:
4596563Smckusic 	for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
46037736Smckusick 		if (ep == bp || ep->b_vp != bp->b_vp ||
46137736Smckusick 		    (ep->b_flags & B_INVAL))
4626563Smckusic 			continue;
4637188Sroot 		/* look for overlap */
4647188Sroot 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
46512644Ssam 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
4667188Sroot 			continue;
46726271Skarels 		s = splbio();
4686563Smckusic 		if (ep->b_flags&B_BUSY) {
4696563Smckusic 			ep->b_flags |= B_WANTED;
4706563Smckusic 			sleep((caddr_t)ep, PRIBIO+1);
4718670S 			splx(s);
4726563Smckusic 			goto loop;
4736563Smckusic 		}
4748670S 		splx(s);
4757188Sroot 		notavail(ep);
4766563Smckusic 		if (ep->b_flags & B_DELWRI) {
47737736Smckusick 			(void) bwrite(ep);
4786563Smckusic 			goto loop;
4796563Smckusic 		}
4807188Sroot 		ep->b_flags |= B_INVAL;
4817188Sroot 		brelse(ep);
4826563Smckusic 	}
4839763Ssam 	return (allocbuf(bp, size));
4848670S }
4858670S 
4868670S /*
4878670S  * Find a buffer which is available for use.
4888670S  * Select something from a free list.
4898670S  * Preference is to AGE list, then LRU list.
4908670S  */
4918670S struct buf *
4928670S getnewbuf()
4938670S {
4948670S 	register struct buf *bp, *dp;
495*38776Smckusick 	register struct ucred *cred;
4968670S 	int s;
4978670S 
4988670S loop:
49926271Skarels 	s = splbio();
5008670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
5018670S 		if (dp->av_forw != dp)
5028670S 			break;
5038670S 	if (dp == bfreelist) {		/* no free blocks */
5048670S 		dp->b_flags |= B_WANTED;
5058670S 		sleep((caddr_t)dp, PRIBIO+1);
50612170Ssam 		splx(s);
5078670S 		goto loop;
5088670S 	}
5098670S 	splx(s);
5108670S 	bp = dp->av_forw;
5118670S 	notavail(bp);
5128670S 	if (bp->b_flags & B_DELWRI) {
51338614Smckusick 		(void) bawrite(bp);
5148670S 		goto loop;
5158670S 	}
51637736Smckusick 	trace(TR_BRELSE,
51737736Smckusick 	    pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno);
51837736Smckusick 	brelvp(bp);
519*38776Smckusick 	if (bp->b_rcred != NOCRED) {
520*38776Smckusick 		cred = bp->b_rcred;
521*38776Smckusick 		bp->b_rcred = NOCRED;
522*38776Smckusick 		crfree(cred);
523*38776Smckusick 	}
524*38776Smckusick 	if (bp->b_wcred != NOCRED) {
525*38776Smckusick 		cred = bp->b_wcred;
526*38776Smckusick 		bp->b_wcred = NOCRED;
527*38776Smckusick 		crfree(cred);
528*38776Smckusick 	}
5298670S 	bp->b_flags = B_BUSY;
5308670S 	return (bp);
5318670S }
5328670S 
5338670S /*
5348Sbill  * Wait for I/O completion on the buffer; return errors
5358Sbill  * to the user.
5368Sbill  */
5377015Smckusick biowait(bp)
5386563Smckusic 	register struct buf *bp;
5398Sbill {
5405431Sroot 	int s;
5418Sbill 
54226271Skarels 	s = splbio();
543*38776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
5448Sbill 		sleep((caddr_t)bp, PRIBIO);
5455431Sroot 	splx(s);
54637736Smckusick 	/*
54737736Smckusick 	 * Pick up the device's error number and pass it to the user;
54837736Smckusick 	 * if there is an error but the number is 0 set a generalized code.
54937736Smckusick 	 */
55037736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
55137736Smckusick 		return (0);
55237736Smckusick 	if (bp->b_error)
55337736Smckusick 		return (bp->b_error);
55437736Smckusick 	return (EIO);
5558Sbill }
5568Sbill 
5578Sbill /*
55813128Ssam  * Mark I/O complete on a buffer.
55913128Ssam  * If someone should be called, e.g. the pageout
56013128Ssam  * daemon, do so.  Otherwise, wake up anyone
56113128Ssam  * waiting for it.
5628Sbill  */
5637015Smckusick biodone(bp)
5647015Smckusick 	register struct buf *bp;
5658Sbill {
5668Sbill 
567420Sbill 	if (bp->b_flags & B_DONE)
5687015Smckusick 		panic("dup biodone");
5698Sbill 	bp->b_flags |= B_DONE;
570*38776Smckusick 	if ((bp->b_flags & B_READ) == 0)
571*38776Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
5729763Ssam 	if (bp->b_flags & B_CALL) {
5739763Ssam 		bp->b_flags &= ~B_CALL;
5749763Ssam 		(*bp->b_iodone)(bp);
5759763Ssam 		return;
5769763Ssam 	}
5778Sbill 	if (bp->b_flags&B_ASYNC)
5788Sbill 		brelse(bp);
5798Sbill 	else {
5808Sbill 		bp->b_flags &= ~B_WANTED;
5818Sbill 		wakeup((caddr_t)bp);
5828Sbill 	}
5838Sbill }
5848Sbill 
5858Sbill /*
58637736Smckusick  * Ensure that no part of a specified block is in an incore buffer.
58730749Skarels #ifdef SECSIZE
58830749Skarels  * "size" is given in device blocks (the units of b_blkno).
58930749Skarels #endif SECSIZE
5908670S  */
59137736Smckusick blkflush(vp, blkno, size)
59237736Smckusick 	struct vnode *vp;
5938670S 	daddr_t blkno;
5948670S 	long size;
5958670S {
5968670S 	register struct buf *ep;
5978670S 	struct buf *dp;
5988670S 	daddr_t start, last;
59937736Smckusick 	int s, error, allerrors = 0;
6008670S 
6018670S 	start = blkno;
60212644Ssam 	last = start + btodb(size) - 1;
60337736Smckusick 	dp = BUFHASH(vp, blkno);
6048670S loop:
6058670S 	for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
60637736Smckusick 		if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
6078670S 			continue;
6088670S 		/* look for overlap */
6098670S 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
61012644Ssam 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
6118670S 			continue;
61226271Skarels 		s = splbio();
6138670S 		if (ep->b_flags&B_BUSY) {
6148670S 			ep->b_flags |= B_WANTED;
6158670S 			sleep((caddr_t)ep, PRIBIO+1);
6168670S 			splx(s);
6178670S 			goto loop;
6188670S 		}
6198670S 		if (ep->b_flags & B_DELWRI) {
6208670S 			splx(s);
6218670S 			notavail(ep);
62237736Smckusick 			if (error = bwrite(ep))
62337736Smckusick 				allerrors = error;
6248670S 			goto loop;
6258670S 		}
6268670S 		splx(s);
6278670S 	}
62837736Smckusick 	return (allerrors);
6298670S }
6308670S 
6318670S /*
63237736Smckusick  * Make sure all write-behind blocks associated
633*38776Smckusick  * with mount point are flushed out (from sync).
6348Sbill  */
635*38776Smckusick bflush(mountp)
636*38776Smckusick 	struct mount *mountp;
6378Sbill {
6388Sbill 	register struct buf *bp;
6392325Swnj 	register struct buf *flist;
6405431Sroot 	int s;
6418Sbill 
6428Sbill loop:
64326271Skarels 	s = splbio();
644*38776Smckusick 	for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) {
645*38776Smckusick 		for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
646*38776Smckusick 			if ((bp->b_flags & B_BUSY))
647*38776Smckusick 				continue;
648*38776Smckusick 			if ((bp->b_flags & B_DELWRI) == 0)
649*38776Smckusick 				continue;
650*38776Smckusick 			if (bp->b_vp && bp->b_vp->v_mount == mountp) {
65137736Smckusick 				notavail(bp);
65238614Smckusick 				(void) bawrite(bp);
653*38776Smckusick 				splx(s);
654*38776Smckusick 				goto loop;
65537736Smckusick 			}
65637736Smckusick 		}
65737736Smckusick 	}
658*38776Smckusick 	splx(s);
6598Sbill }
6602299Skre 
6612299Skre /*
6622299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6632299Skre  *
66438614Smckusick  * We walk through the buffer pool and invalidate any buffers for the
665*38776Smckusick  * indicated mount point. Normally this routine is preceeded by a bflush
66638614Smckusick  * call, so that on a quiescent filesystem there will be no dirty
66738614Smckusick  * buffers when we are done. We return the count of dirty buffers when
66838614Smckusick  * we are finished.
6692299Skre  */
670*38776Smckusick binval(mountp)
671*38776Smckusick 	struct mount *mountp;
6722299Skre {
6732361Skre 	register struct buf *bp;
6742361Skre 	register struct bufhd *hp;
67538633Smckusick 	int s, dirty = 0;
6762361Skre #define dp ((struct buf *)hp)
6772299Skre 
678*38776Smckusick loop:
679*38776Smckusick 	s = splbio();
68038614Smckusick 	for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) {
68138614Smckusick 		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
682*38776Smckusick 			if (bp->b_vp == NULL || bp->b_vp->v_mount != mountp)
68338614Smckusick 				continue;
68438633Smckusick 			if (bp->b_flags & B_BUSY) {
68538633Smckusick 				bp->b_flags |= B_WANTED;
68638633Smckusick 				sleep((caddr_t)bp, PRIBIO+1);
68738633Smckusick 				splx(s);
68838633Smckusick 				goto loop;
68938633Smckusick 			}
69038614Smckusick 			notavail(bp);
69138614Smckusick 			if (bp->b_flags & B_DELWRI) {
69238614Smckusick 				(void) bawrite(bp);
69338614Smckusick 				dirty++;
69438614Smckusick 				continue;
69537736Smckusick 			}
69638614Smckusick 			bp->b_flags |= B_INVAL;
69738614Smckusick 			brelvp(bp);
69838614Smckusick 			brelse(bp);
69938614Smckusick 		}
70038614Smckusick 	}
70138614Smckusick 	return (dirty);
7022299Skre }
70337736Smckusick 
70437736Smckusick brelvp(bp)
70537736Smckusick 	struct buf *bp;
70637736Smckusick {
70737736Smckusick 	struct vnode *vp;
70837736Smckusick 
70937736Smckusick 	if (bp->b_vp == (struct vnode *) 0)
71037736Smckusick 		return;
71137736Smckusick 	vp = bp->b_vp;
71237736Smckusick 	bp->b_vp = (struct vnode *) 0;
71337736Smckusick 	vrele(vp);
71437736Smckusick }
715