xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 56356)
149589Sbostic /*-
249589Sbostic  * Copyright (c) 1982, 1986, 1989 The Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
549618Smckusick  * This module is believed to contain source code proprietary to AT&T.
649618Smckusick  * Use and redistribution is subject to the Berkeley Software License
749618Smckusick  * Agreement and your Software Agreement with AT&T (Western Electric).
837736Smckusick  *
9*56356Smckusick  *	@(#)vfs_cluster.c	7.53 (Berkeley) 09/30/92
1023395Smckusick  */
118Sbill 
1251455Sbostic #include <sys/param.h>
1351455Sbostic #include <sys/proc.h>
1451455Sbostic #include <sys/buf.h>
1551455Sbostic #include <sys/vnode.h>
1651455Sbostic #include <sys/mount.h>
1751455Sbostic #include <sys/trace.h>
1851455Sbostic #include <sys/resourcevar.h>
198Sbill 
2091Sbill /*
2149280Skarels  * Initialize buffers and hash links for buffers.
2249280Skarels  */
2351455Sbostic void
2449280Skarels bufinit()
2549280Skarels {
2649280Skarels 	register int i;
2749280Skarels 	register struct buf *bp, *dp;
2849280Skarels 	register struct bufhd *hp;
2949280Skarels 	int base, residual;
3049280Skarels 
3149280Skarels 	for (hp = bufhash, i = 0; i < BUFHSZ; i++, hp++)
3249280Skarels 		hp->b_forw = hp->b_back = (struct buf *)hp;
3349280Skarels 
3449280Skarels 	for (dp = bfreelist; dp < &bfreelist[BQUEUES]; dp++) {
3549280Skarels 		dp->b_forw = dp->b_back = dp->av_forw = dp->av_back = dp;
3649280Skarels 		dp->b_flags = B_HEAD;
3749280Skarels 	}
3849280Skarels 	base = bufpages / nbuf;
3949280Skarels 	residual = bufpages % nbuf;
4049280Skarels 	for (i = 0; i < nbuf; i++) {
4149280Skarels 		bp = &buf[i];
4249280Skarels 		bp->b_dev = NODEV;
4349280Skarels 		bp->b_bcount = 0;
4449280Skarels 		bp->b_rcred = NOCRED;
4549280Skarels 		bp->b_wcred = NOCRED;
4649280Skarels 		bp->b_dirtyoff = 0;
4749280Skarels 		bp->b_dirtyend = 0;
4852189Smckusick 		bp->b_validoff = 0;
4952189Smckusick 		bp->b_validend = 0;
5049280Skarels 		bp->b_un.b_addr = buffers + i * MAXBSIZE;
5149280Skarels 		if (i < residual)
5249280Skarels 			bp->b_bufsize = (base + 1) * CLBYTES;
5349280Skarels 		else
5449280Skarels 			bp->b_bufsize = base * CLBYTES;
5549280Skarels 		binshash(bp, &bfreelist[BQ_AGE]);
5652413Storek 		bp->b_flags = B_INVAL;
5752413Storek 		dp = bp->b_bufsize ? &bfreelist[BQ_AGE] : &bfreelist[BQ_EMPTY];
5852413Storek 		binsheadfree(bp, dp);
5949280Skarels 	}
6049280Skarels }
6149280Skarels 
6249280Skarels /*
6346151Smckusick  * Find the block in the buffer pool.
6446151Smckusick  * If the buffer is not present, allocate a new buffer and load
6546151Smckusick  * its contents according to the filesystem fill routine.
668Sbill  */
6738776Smckusick bread(vp, blkno, size, cred, bpp)
6837736Smckusick 	struct vnode *vp;
696563Smckusic 	daddr_t blkno;
706563Smckusic 	int size;
7138776Smckusick 	struct ucred *cred;
7237736Smckusick 	struct buf **bpp;
738Sbill {
7447545Skarels 	struct proc *p = curproc;		/* XXX */
758Sbill 	register struct buf *bp;
768Sbill 
778670S 	if (size == 0)
788670S 		panic("bread: size 0");
7937736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
8046151Smckusick 	if (bp->b_flags & (B_DONE | B_DELWRI)) {
8140341Smckusick 		trace(TR_BREADHIT, pack(vp, size), blkno);
8237736Smckusick 		return (0);
838Sbill 	}
848Sbill 	bp->b_flags |= B_READ;
858670S 	if (bp->b_bcount > bp->b_bufsize)
868670S 		panic("bread");
8738776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
8838776Smckusick 		crhold(cred);
8938776Smckusick 		bp->b_rcred = cred;
9038776Smckusick 	}
9137736Smckusick 	VOP_STRATEGY(bp);
9240341Smckusick 	trace(TR_BREADMISS, pack(vp, size), blkno);
9347545Skarels 	p->p_stats->p_ru.ru_inblock++;		/* pay for read */
9437736Smckusick 	return (biowait(bp));
958Sbill }
968Sbill 
978Sbill /*
9852189Smckusick  * Operates like bread, but also starts I/O on the N specified
9952189Smckusick  * read-ahead blocks.
1008Sbill  */
10152189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp)
10237736Smckusick 	struct vnode *vp;
1037114Smckusick 	daddr_t blkno; int size;
10452189Smckusick 	daddr_t rablkno[]; int rabsize[];
10552189Smckusick 	int num;
10638776Smckusick 	struct ucred *cred;
10737736Smckusick 	struct buf **bpp;
1088Sbill {
10947545Skarels 	struct proc *p = curproc;		/* XXX */
1108Sbill 	register struct buf *bp, *rabp;
11152189Smckusick 	register int i;
1128Sbill 
1138Sbill 	bp = NULL;
1147015Smckusick 	/*
11546151Smckusick 	 * If the block is not memory resident,
11646151Smckusick 	 * allocate a buffer and start I/O.
1177015Smckusick 	 */
11837736Smckusick 	if (!incore(vp, blkno)) {
11937736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
12046151Smckusick 		if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
1218Sbill 			bp->b_flags |= B_READ;
1228670S 			if (bp->b_bcount > bp->b_bufsize)
12352189Smckusick 				panic("breadn");
12438776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
12538776Smckusick 				crhold(cred);
12638776Smckusick 				bp->b_rcred = cred;
12738776Smckusick 			}
12837736Smckusick 			VOP_STRATEGY(bp);
12940341Smckusick 			trace(TR_BREADMISS, pack(vp, size), blkno);
13047545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay for read */
13154342Smckusick 		} else {
13240341Smckusick 			trace(TR_BREADHIT, pack(vp, size), blkno);
13354342Smckusick 		}
1348Sbill 	}
1357015Smckusick 
1367015Smckusick 	/*
13752189Smckusick 	 * If there's read-ahead block(s), start I/O
13852189Smckusick 	 * on them also (as above).
1397015Smckusick 	 */
14052189Smckusick 	for (i = 0; i < num; i++) {
14152189Smckusick 		if (incore(vp, rablkno[i]))
14252189Smckusick 			continue;
14352189Smckusick 		rabp = getblk(vp, rablkno[i], rabsize[i]);
14446151Smckusick 		if (rabp->b_flags & (B_DONE | B_DELWRI)) {
1458Sbill 			brelse(rabp);
14652189Smckusick 			trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]);
1472045Swnj 		} else {
14846151Smckusick 			rabp->b_flags |= B_ASYNC | B_READ;
1498670S 			if (rabp->b_bcount > rabp->b_bufsize)
1508670S 				panic("breadrabp");
15138880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
15238776Smckusick 				crhold(cred);
15338880Smckusick 				rabp->b_rcred = cred;
15438776Smckusick 			}
15537736Smckusick 			VOP_STRATEGY(rabp);
15652189Smckusick 			trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]);
15747545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay in advance */
1588Sbill 		}
1598Sbill 	}
1607015Smckusick 
1617015Smckusick 	/*
16246151Smckusick 	 * If block was memory resident, let bread get it.
16346151Smckusick 	 * If block was not memory resident, the read was
16446151Smckusick 	 * started above, so just wait for the read to complete.
1657015Smckusick 	 */
1667114Smckusick 	if (bp == NULL)
16738776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
16837736Smckusick 	return (biowait(bp));
1698Sbill }
1708Sbill 
1718Sbill /*
17246151Smckusick  * Synchronous write.
17346151Smckusick  * Release buffer on completion.
1748Sbill  */
1758Sbill bwrite(bp)
1767015Smckusick 	register struct buf *bp;
1778Sbill {
17847545Skarels 	struct proc *p = curproc;		/* XXX */
17937736Smckusick 	register int flag;
18052413Storek 	int s, error = 0;
1818Sbill 
1828Sbill 	flag = bp->b_flags;
1839857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
18449459Smckusick 	if (flag & B_ASYNC) {
18549459Smckusick 		if ((flag & B_DELWRI) == 0)
18649459Smckusick 			p->p_stats->p_ru.ru_oublock++;	/* no one paid yet */
18749459Smckusick 		else
18849459Smckusick 			reassignbuf(bp, bp->b_vp);
18949459Smckusick 	}
19040341Smckusick 	trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
1918670S 	if (bp->b_bcount > bp->b_bufsize)
1928670S 		panic("bwrite");
19340226Smckusick 	s = splbio();
19439882Smckusick 	bp->b_vp->v_numoutput++;
19540226Smckusick 	splx(s);
19637736Smckusick 	VOP_STRATEGY(bp);
1977015Smckusick 
1987015Smckusick 	/*
19946151Smckusick 	 * If the write was synchronous, then await I/O completion.
2007015Smckusick 	 * If the write was "delayed", then we put the buffer on
20146151Smckusick 	 * the queue of blocks awaiting I/O completion status.
2027015Smckusick 	 */
20346151Smckusick 	if ((flag & B_ASYNC) == 0) {
20437736Smckusick 		error = biowait(bp);
20549459Smckusick 		if ((flag&B_DELWRI) == 0)
20649459Smckusick 			p->p_stats->p_ru.ru_oublock++;	/* no one paid yet */
20749459Smckusick 		else
20849459Smckusick 			reassignbuf(bp, bp->b_vp);
2098Sbill 		brelse(bp);
21037736Smckusick 	} else if (flag & B_DELWRI) {
21152413Storek 		s = splbio();
2128Sbill 		bp->b_flags |= B_AGE;
21352413Storek 		splx(s);
21437736Smckusick 	}
21537736Smckusick 	return (error);
2168Sbill }
2178Sbill 
21853578Sheideman int
21953578Sheideman vn_bwrite(ap)
22053578Sheideman 	struct vop_bwrite_args *ap;
22153578Sheideman {
22253578Sheideman 	return bwrite (ap->a_bp);
22353578Sheideman }
22453578Sheideman 
22553578Sheideman 
2268Sbill /*
22746151Smckusick  * Delayed write.
22846151Smckusick  *
22946151Smckusick  * The buffer is marked dirty, but is not queued for I/O.
23046151Smckusick  * This routine should be used when the buffer is expected
23146151Smckusick  * to be modified again soon, typically a small write that
23246151Smckusick  * partially fills a buffer.
23346151Smckusick  *
23446151Smckusick  * NB: magnetic tapes cannot be delayed; they must be
23546151Smckusick  * written in the order that the writes are requested.
2368Sbill  */
2378Sbill bdwrite(bp)
2387015Smckusick 	register struct buf *bp;
2398Sbill {
24047545Skarels 	struct proc *p = curproc;		/* XXX */
2418Sbill 
24239882Smckusick 	if ((bp->b_flags & B_DELWRI) == 0) {
24339882Smckusick 		bp->b_flags |= B_DELWRI;
24439882Smckusick 		reassignbuf(bp, bp->b_vp);
24547545Skarels 		p->p_stats->p_ru.ru_oublock++;		/* no one paid yet */
24639882Smckusick 	}
24737736Smckusick 	/*
24839668Smckusick 	 * If this is a tape drive, the write must be initiated.
24937736Smckusick 	 */
25048360Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) {
2518Sbill 		bawrite(bp);
25239668Smckusick 	} else {
25346151Smckusick 		bp->b_flags |= (B_DONE | B_DELWRI);
2548Sbill 		brelse(bp);
2558Sbill 	}
2568Sbill }
2578Sbill 
2588Sbill /*
25946151Smckusick  * Asynchronous write.
26046151Smckusick  * Start I/O on a buffer, but do not wait for it to complete.
26146151Smckusick  * The buffer is released when the I/O completes.
2628Sbill  */
2638Sbill bawrite(bp)
2647015Smckusick 	register struct buf *bp;
2658Sbill {
2668Sbill 
26746151Smckusick 	/*
26846151Smckusick 	 * Setting the ASYNC flag causes bwrite to return
26946151Smckusick 	 * after starting the I/O.
27046151Smckusick 	 */
2718Sbill 	bp->b_flags |= B_ASYNC;
27237736Smckusick 	(void) bwrite(bp);
2738Sbill }
2748Sbill 
2758Sbill /*
27646151Smckusick  * Release a buffer.
27746151Smckusick  * Even if the buffer is dirty, no I/O is started.
2788Sbill  */
2798Sbill brelse(bp)
2807015Smckusick 	register struct buf *bp;
2818Sbill {
2822325Swnj 	register struct buf *flist;
28346151Smckusick 	int s;
2848Sbill 
28540341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
2867015Smckusick 	/*
28739668Smckusick 	 * If a process is waiting for the buffer, or
28839668Smckusick 	 * is waiting for a free buffer, awaken it.
2897015Smckusick 	 */
29046151Smckusick 	if (bp->b_flags & B_WANTED)
2918Sbill 		wakeup((caddr_t)bp);
29246151Smckusick 	if (bfreelist[0].b_flags & B_WANTED) {
2932325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2942325Swnj 		wakeup((caddr_t)bfreelist);
2958Sbill 	}
29639668Smckusick 	/*
29739668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
29839668Smckusick 	 */
29952413Storek 	s = splbio();
30039668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
30139668Smckusick 		bp->b_flags &= ~B_ERROR;
30239668Smckusick 	/*
30339668Smckusick 	 * Disassociate buffers that are no longer valid.
30439668Smckusick 	 */
30546151Smckusick 	if (bp->b_flags & (B_NOCACHE | B_ERROR))
30637736Smckusick 		bp->b_flags |= B_INVAL;
30746151Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
30839668Smckusick 		if (bp->b_vp)
30939668Smckusick 			brelvp(bp);
31039668Smckusick 		bp->b_flags &= ~B_DELWRI;
31137736Smckusick 	}
3127015Smckusick 	/*
3137015Smckusick 	 * Stick the buffer back on a free list.
3147015Smckusick 	 */
3158670S 	if (bp->b_bufsize <= 0) {
3168670S 		/* block has no buffer ... put at front of unused buffer list */
3178670S 		flist = &bfreelist[BQ_EMPTY];
3188670S 		binsheadfree(bp, flist);
31946151Smckusick 	} else if (bp->b_flags & (B_ERROR | B_INVAL)) {
3202325Swnj 		/* block has no info ... put at front of most free list */
3218670S 		flist = &bfreelist[BQ_AGE];
3227015Smckusick 		binsheadfree(bp, flist);
3238Sbill 	} else {
3242325Swnj 		if (bp->b_flags & B_LOCKED)
3252325Swnj 			flist = &bfreelist[BQ_LOCKED];
3262325Swnj 		else if (bp->b_flags & B_AGE)
3272325Swnj 			flist = &bfreelist[BQ_AGE];
3282325Swnj 		else
3292325Swnj 			flist = &bfreelist[BQ_LRU];
3307015Smckusick 		binstailfree(bp, flist);
3318Sbill 	}
33246151Smckusick 	bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
3338Sbill 	splx(s);
3348Sbill }
3358Sbill 
3368Sbill /*
33746151Smckusick  * Check to see if a block is currently memory resident.
3388Sbill  */
33937736Smckusick incore(vp, blkno)
34037736Smckusick 	struct vnode *vp;
3417015Smckusick 	daddr_t blkno;
3428Sbill {
3438Sbill 	register struct buf *bp;
3442325Swnj 	register struct buf *dp;
3458Sbill 
34638225Smckusick 	dp = BUFHASH(vp, blkno);
3472325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
34839668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
3497015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
35091Sbill 			return (1);
35191Sbill 	return (0);
3528Sbill }
3538Sbill 
35439668Smckusick /*
35546151Smckusick  * Check to see if a block is currently memory resident.
35646151Smckusick  * If it is resident, return it. If it is not resident,
35746151Smckusick  * allocate a new buffer and assign it to the block.
35839668Smckusick  */
3598Sbill struct buf *
36037736Smckusick getblk(vp, blkno, size)
36137736Smckusick 	register struct vnode *vp;
3626563Smckusic 	daddr_t blkno;
3636563Smckusic 	int size;
3648Sbill {
3658670S 	register struct buf *bp, *dp;
3665424Swnj 	int s;
3678Sbill 
36825255Smckusick 	if (size > MAXBSIZE)
36925255Smckusick 		panic("getblk: size too big");
3707015Smckusick 	/*
37146151Smckusick 	 * Search the cache for the block. If the buffer is found,
37246151Smckusick 	 * but it is currently locked, the we must wait for it to
37346151Smckusick 	 * become available.
3747015Smckusick 	 */
37537736Smckusick 	dp = BUFHASH(vp, blkno);
3767015Smckusick loop:
3772325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
37839668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
37946151Smckusick 		    (bp->b_flags & B_INVAL))
3808Sbill 			continue;
38126271Skarels 		s = splbio();
38246151Smckusick 		if (bp->b_flags & B_BUSY) {
3838Sbill 			bp->b_flags |= B_WANTED;
38446151Smckusick 			sleep((caddr_t)bp, PRIBIO + 1);
3855424Swnj 			splx(s);
3868Sbill 			goto loop;
3878Sbill 		}
38839882Smckusick 		bremfree(bp);
38939882Smckusick 		bp->b_flags |= B_BUSY;
3905424Swnj 		splx(s);
39132608Smckusick 		if (bp->b_bcount != size) {
39239668Smckusick 			printf("getblk: stray size");
39339668Smckusick 			bp->b_flags |= B_INVAL;
39439668Smckusick 			bwrite(bp);
39539668Smckusick 			goto loop;
39632608Smckusick 		}
3978Sbill 		bp->b_flags |= B_CACHE;
39826271Skarels 		return (bp);
3998Sbill 	}
4008670S 	bp = getnewbuf();
4017015Smckusick 	bremhash(bp);
40239668Smckusick 	bgetvp(vp, bp);
40345116Smckusick 	bp->b_bcount = 0;
40439668Smckusick 	bp->b_lblkno = blkno;
4056563Smckusic 	bp->b_blkno = blkno;
4068670S 	bp->b_error = 0;
40737736Smckusick 	bp->b_resid = 0;
40837736Smckusick 	binshash(bp, dp);
40945116Smckusick 	allocbuf(bp, size);
41026271Skarels 	return (bp);
4118Sbill }
4128Sbill 
4138Sbill /*
41446151Smckusick  * Allocate a buffer.
41546151Smckusick  * The caller will assign it to a block.
4168Sbill  */
4178Sbill struct buf *
4186563Smckusic geteblk(size)
4196563Smckusic 	int size;
4208Sbill {
4218670S 	register struct buf *bp, *flist;
4228Sbill 
42325255Smckusick 	if (size > MAXBSIZE)
42425255Smckusick 		panic("geteblk: size too big");
4258670S 	bp = getnewbuf();
4268670S 	bp->b_flags |= B_INVAL;
4277015Smckusick 	bremhash(bp);
4288670S 	flist = &bfreelist[BQ_AGE];
42945116Smckusick 	bp->b_bcount = 0;
43037736Smckusick 	bp->b_error = 0;
43137736Smckusick 	bp->b_resid = 0;
4328670S 	binshash(bp, flist);
43345116Smckusick 	allocbuf(bp, size);
43426271Skarels 	return (bp);
4358Sbill }
4368Sbill 
4378Sbill /*
43845116Smckusick  * Expand or contract the actual memory allocated to a buffer.
43946151Smckusick  * If no memory is available, release buffer and take error exit.
4406563Smckusic  */
44145116Smckusick allocbuf(tp, size)
44245116Smckusick 	register struct buf *tp;
4436563Smckusic 	int size;
4446563Smckusic {
44545116Smckusick 	register struct buf *bp, *ep;
44645116Smckusick 	int sizealloc, take, s;
4476563Smckusic 
44845116Smckusick 	sizealloc = roundup(size, CLBYTES);
44945116Smckusick 	/*
45045116Smckusick 	 * Buffer size does not change
45145116Smckusick 	 */
45245116Smckusick 	if (sizealloc == tp->b_bufsize)
45345116Smckusick 		goto out;
45445116Smckusick 	/*
45545116Smckusick 	 * Buffer size is shrinking.
45645116Smckusick 	 * Place excess space in a buffer header taken from the
45745116Smckusick 	 * BQ_EMPTY buffer list and placed on the "most free" list.
45845116Smckusick 	 * If no extra buffer headers are available, leave the
45945116Smckusick 	 * extra space in the present buffer.
46045116Smckusick 	 */
46145116Smckusick 	if (sizealloc < tp->b_bufsize) {
46245116Smckusick 		ep = bfreelist[BQ_EMPTY].av_forw;
46345116Smckusick 		if (ep == &bfreelist[BQ_EMPTY])
46445116Smckusick 			goto out;
46545116Smckusick 		s = splbio();
46645116Smckusick 		bremfree(ep);
46745116Smckusick 		ep->b_flags |= B_BUSY;
46845116Smckusick 		splx(s);
46945116Smckusick 		pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
47045116Smckusick 		    (int)tp->b_bufsize - sizealloc);
47145116Smckusick 		ep->b_bufsize = tp->b_bufsize - sizealloc;
47245116Smckusick 		tp->b_bufsize = sizealloc;
47345116Smckusick 		ep->b_flags |= B_INVAL;
47445116Smckusick 		ep->b_bcount = 0;
47545116Smckusick 		brelse(ep);
47645116Smckusick 		goto out;
47745116Smckusick 	}
47845116Smckusick 	/*
47945116Smckusick 	 * More buffer space is needed. Get it out of buffers on
48045116Smckusick 	 * the "most free" list, placing the empty headers on the
48145116Smckusick 	 * BQ_EMPTY buffer header list.
48245116Smckusick 	 */
48345116Smckusick 	while (tp->b_bufsize < sizealloc) {
48445116Smckusick 		take = sizealloc - tp->b_bufsize;
48545116Smckusick 		bp = getnewbuf();
48645116Smckusick 		if (take >= bp->b_bufsize)
48745116Smckusick 			take = bp->b_bufsize;
48845116Smckusick 		pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
48945116Smckusick 		    &tp->b_un.b_addr[tp->b_bufsize], take);
49045116Smckusick 		tp->b_bufsize += take;
49145116Smckusick 		bp->b_bufsize = bp->b_bufsize - take;
49245116Smckusick 		if (bp->b_bcount > bp->b_bufsize)
49345116Smckusick 			bp->b_bcount = bp->b_bufsize;
49445116Smckusick 		if (bp->b_bufsize <= 0) {
49545116Smckusick 			bremhash(bp);
49645116Smckusick 			binshash(bp, &bfreelist[BQ_EMPTY]);
49746151Smckusick 			bp->b_dev = NODEV;
49845116Smckusick 			bp->b_error = 0;
49945116Smckusick 			bp->b_flags |= B_INVAL;
50045116Smckusick 		}
50145116Smckusick 		brelse(bp);
50245116Smckusick 	}
50345116Smckusick out:
50445116Smckusick 	tp->b_bcount = size;
50545116Smckusick 	return (1);
5068670S }
5078670S 
5088670S /*
5098670S  * Find a buffer which is available for use.
5108670S  * Select something from a free list.
5118670S  * Preference is to AGE list, then LRU list.
5128670S  */
5138670S struct buf *
5148670S getnewbuf()
5158670S {
5168670S 	register struct buf *bp, *dp;
51738776Smckusick 	register struct ucred *cred;
5188670S 	int s;
5198670S 
5208670S loop:
52126271Skarels 	s = splbio();
5228670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
5238670S 		if (dp->av_forw != dp)
5248670S 			break;
5258670S 	if (dp == bfreelist) {		/* no free blocks */
5268670S 		dp->b_flags |= B_WANTED;
52746151Smckusick 		sleep((caddr_t)dp, PRIBIO + 1);
52812170Ssam 		splx(s);
5298670S 		goto loop;
5308670S 	}
53139882Smckusick 	bp = dp->av_forw;
53239882Smckusick 	bremfree(bp);
53339882Smckusick 	bp->b_flags |= B_BUSY;
5348670S 	splx(s);
5358670S 	if (bp->b_flags & B_DELWRI) {
53638614Smckusick 		(void) bawrite(bp);
5378670S 		goto loop;
5388670S 	}
53940341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
54039668Smckusick 	if (bp->b_vp)
54139668Smckusick 		brelvp(bp);
54238776Smckusick 	if (bp->b_rcred != NOCRED) {
54338776Smckusick 		cred = bp->b_rcred;
54438776Smckusick 		bp->b_rcred = NOCRED;
54538776Smckusick 		crfree(cred);
54638776Smckusick 	}
54738776Smckusick 	if (bp->b_wcred != NOCRED) {
54838776Smckusick 		cred = bp->b_wcred;
54938776Smckusick 		bp->b_wcred = NOCRED;
55038776Smckusick 		crfree(cred);
55138776Smckusick 	}
5528670S 	bp->b_flags = B_BUSY;
55346989Smckusick 	bp->b_dirtyoff = bp->b_dirtyend = 0;
55452189Smckusick 	bp->b_validoff = bp->b_validend = 0;
5558670S 	return (bp);
5568670S }
5578670S 
5588670S /*
55946151Smckusick  * Wait for I/O to complete.
56046151Smckusick  *
56146151Smckusick  * Extract and return any errors associated with the I/O.
56246151Smckusick  * If the error flag is set, but no specific error is
56346151Smckusick  * given, return EIO.
5648Sbill  */
5657015Smckusick biowait(bp)
5666563Smckusic 	register struct buf *bp;
5678Sbill {
5685431Sroot 	int s;
5698Sbill 
57026271Skarels 	s = splbio();
57138776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
5728Sbill 		sleep((caddr_t)bp, PRIBIO);
5735431Sroot 	splx(s);
57437736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
57537736Smckusick 		return (0);
57637736Smckusick 	if (bp->b_error)
57737736Smckusick 		return (bp->b_error);
57837736Smckusick 	return (EIO);
5798Sbill }
5808Sbill 
5818Sbill /*
58213128Ssam  * Mark I/O complete on a buffer.
58346151Smckusick  *
58446151Smckusick  * If a callback has been requested, e.g. the pageout
58546151Smckusick  * daemon, do so. Otherwise, awaken waiting processes.
5868Sbill  */
58751455Sbostic void
5887015Smckusick biodone(bp)
5897015Smckusick 	register struct buf *bp;
5908Sbill {
5918Sbill 
592420Sbill 	if (bp->b_flags & B_DONE)
5937015Smckusick 		panic("dup biodone");
5948Sbill 	bp->b_flags |= B_DONE;
59549232Smckusick 	if ((bp->b_flags & B_READ) == 0)
59649232Smckusick 		vwakeup(bp);
5979763Ssam 	if (bp->b_flags & B_CALL) {
5989763Ssam 		bp->b_flags &= ~B_CALL;
5999763Ssam 		(*bp->b_iodone)(bp);
6009763Ssam 		return;
6019763Ssam 	}
60246151Smckusick 	if (bp->b_flags & B_ASYNC)
6038Sbill 		brelse(bp);
6048Sbill 	else {
6058Sbill 		bp->b_flags &= ~B_WANTED;
6068Sbill 		wakeup((caddr_t)bp);
6078Sbill 	}
6088Sbill }
609*56356Smckusick 
610*56356Smckusick #ifdef DIAGNOSTIC
611*56356Smckusick /*
612*56356Smckusick  * Print out statistics on the current allocation of the buffer pool.
613*56356Smckusick  * Can be enabled to print out on every ``sync'' by setting "syncprt"
614*56356Smckusick  * above.
615*56356Smckusick  */
616*56356Smckusick void
617*56356Smckusick vfs_bufstats()
618*56356Smckusick {
619*56356Smckusick 	int s, i, j, count;
620*56356Smckusick 	register struct buf *dp, *bp;
621*56356Smckusick 	int counts[MAXBSIZE/CLBYTES+1];
622*56356Smckusick 	static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
623*56356Smckusick 
624*56356Smckusick 	for (dp = bfreelist, i = 0; dp < &bfreelist[BQUEUES]; dp++, i++) {
625*56356Smckusick 		count = 0;
626*56356Smckusick 		for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
627*56356Smckusick 			counts[j] = 0;
628*56356Smckusick 		s = splbio();
629*56356Smckusick 		for (bp = dp->av_forw; dp != bp; bp = bp->av_forw) {
630*56356Smckusick 			counts[bp->b_bufsize/CLBYTES]++;
631*56356Smckusick 			count++;
632*56356Smckusick 		}
633*56356Smckusick 		splx(s);
634*56356Smckusick 		printf("%s: total-%d", bname[i], count);
635*56356Smckusick 		for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
636*56356Smckusick 			if (counts[j] != 0)
637*56356Smckusick 				printf(", %d-%d", j * CLBYTES, counts[j]);
638*56356Smckusick 		printf("\n");
639*56356Smckusick 	}
640*56356Smckusick }
641*56356Smckusick #endif /* DIAGNOSTIC */
642