xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 56395)
149589Sbostic /*-
249589Sbostic  * Copyright (c) 1982, 1986, 1989 The Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
549618Smckusick  * This module is believed to contain source code proprietary to AT&T.
649618Smckusick  * Use and redistribution is subject to the Berkeley Software License
749618Smckusick  * Agreement and your Software Agreement with AT&T (Western Electric).
837736Smckusick  *
9*56395Smckusick  *	@(#)vfs_cluster.c	7.54 (Berkeley) 10/02/92
1023395Smckusick  */
118Sbill 
1251455Sbostic #include <sys/param.h>
1351455Sbostic #include <sys/proc.h>
1451455Sbostic #include <sys/buf.h>
1551455Sbostic #include <sys/vnode.h>
1651455Sbostic #include <sys/mount.h>
1751455Sbostic #include <sys/trace.h>
1851455Sbostic #include <sys/resourcevar.h>
19*56395Smckusick #include <sys/malloc.h>
20*56395Smckusick #include <libkern/libkern.h>
218Sbill 
2291Sbill /*
23*56395Smckusick  * Definitions for the buffer hash lists.
24*56395Smckusick  */
25*56395Smckusick #define	BUFHASH(dvp, lbn)	\
26*56395Smckusick 	(&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
27*56395Smckusick struct	buf **bufhashtbl, *invalhash;
28*56395Smckusick u_long	bufhash;
29*56395Smckusick 
30*56395Smckusick /*
31*56395Smckusick  * Insq/Remq for the buffer hash lists.
32*56395Smckusick  */
33*56395Smckusick #define	bremhash(bp) { \
34*56395Smckusick 	struct buf *bq; \
35*56395Smckusick 	if (bq = (bp)->b_forw) \
36*56395Smckusick 		bq->b_back = (bp)->b_back; \
37*56395Smckusick 	*(bp)->b_back = bq; \
38*56395Smckusick }
39*56395Smckusick #define	binshash(bp, dp) { \
40*56395Smckusick 	struct buf *bq; \
41*56395Smckusick 	if (bq = *(dp)) \
42*56395Smckusick 		bq->b_back = &(bp)->b_forw; \
43*56395Smckusick 	(bp)->b_forw = bq; \
44*56395Smckusick 	(bp)->b_back = (dp); \
45*56395Smckusick 	*(dp) = (bp); \
46*56395Smckusick }
47*56395Smckusick 
48*56395Smckusick /*
49*56395Smckusick  * Definitions for the buffer free lists.
50*56395Smckusick  */
51*56395Smckusick #define	BQUEUES		4		/* number of free buffer queues */
52*56395Smckusick 
53*56395Smckusick #define	BQ_LOCKED	0		/* super-blocks &c */
54*56395Smckusick #define	BQ_LRU		1		/* lru, useful buffers */
55*56395Smckusick #define	BQ_AGE		2		/* rubbish */
56*56395Smckusick #define	BQ_EMPTY	3		/* buffer headers with no memory */
57*56395Smckusick 
58*56395Smckusick struct bufqueue {
59*56395Smckusick 	struct	buf *buffreehead;	/* head of available list */
60*56395Smckusick 	struct	buf **buffreetail;	/* tail of available list */
61*56395Smckusick } bufqueues[BQUEUES];
62*56395Smckusick int needbuffer;
63*56395Smckusick 
64*56395Smckusick /*
65*56395Smckusick  * Insq/Remq for the buffer free lists.
66*56395Smckusick  */
67*56395Smckusick void
68*56395Smckusick bremfree(bp)
69*56395Smckusick 	struct buf *bp;
70*56395Smckusick {
71*56395Smckusick 	struct buf *bq;
72*56395Smckusick 	struct bufqueue *dp;
73*56395Smckusick 
74*56395Smckusick 	if (bq = bp->b_actf) {
75*56395Smckusick 		bq->b_actb = bp->b_actb;
76*56395Smckusick 	} else {
77*56395Smckusick 		for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
78*56395Smckusick 			if (dp->buffreetail == &bp->b_actf)
79*56395Smckusick 				break;
80*56395Smckusick 		if (dp == &bufqueues[BQUEUES])
81*56395Smckusick 			panic("bremfree: lost tail");
82*56395Smckusick 		dp->buffreetail = bp->b_actb;
83*56395Smckusick 	}
84*56395Smckusick 	*bp->b_actb = bq;
85*56395Smckusick }
86*56395Smckusick 
87*56395Smckusick #define	binsheadfree(bp, dp) { \
88*56395Smckusick 	struct buf *bq; \
89*56395Smckusick 	if (bq = (dp)->buffreehead) \
90*56395Smckusick 		bq->b_actb = &(bp)->b_actf; \
91*56395Smckusick 	else \
92*56395Smckusick 		(dp)->buffreetail = &(bp)->b_actf; \
93*56395Smckusick 	(dp)->buffreehead = (bp); \
94*56395Smckusick 	(bp)->b_actf = bq; \
95*56395Smckusick 	(bp)->b_actb = &(dp)->buffreehead; \
96*56395Smckusick }
97*56395Smckusick #define	binstailfree(bp, dp) { \
98*56395Smckusick 	(bp)->b_actf = NULL; \
99*56395Smckusick 	(bp)->b_actb = (dp)->buffreetail; \
100*56395Smckusick 	*(dp)->buffreetail = (bp); \
101*56395Smckusick 	(dp)->buffreetail = &(bp)->b_actf; \
102*56395Smckusick }
103*56395Smckusick 
104*56395Smckusick /*
10549280Skarels  * Initialize buffers and hash links for buffers.
10649280Skarels  */
10751455Sbostic void
10849280Skarels bufinit()
10949280Skarels {
110*56395Smckusick 	register struct buf *bp;
111*56395Smckusick 	struct bufqueue *dp;
11249280Skarels 	register int i;
11349280Skarels 	int base, residual;
11449280Skarels 
115*56395Smckusick 	for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
116*56395Smckusick 		dp->buffreetail = &dp->buffreehead;
117*56395Smckusick 	bufhashtbl = (struct buf **)hashinit(nbuf, M_CACHE, &bufhash);
11849280Skarels 	base = bufpages / nbuf;
11949280Skarels 	residual = bufpages % nbuf;
12049280Skarels 	for (i = 0; i < nbuf; i++) {
12149280Skarels 		bp = &buf[i];
122*56395Smckusick 		bzero((char *)bp, sizeof *bp);
12349280Skarels 		bp->b_dev = NODEV;
12449280Skarels 		bp->b_rcred = NOCRED;
12549280Skarels 		bp->b_wcred = NOCRED;
12649280Skarels 		bp->b_un.b_addr = buffers + i * MAXBSIZE;
12749280Skarels 		if (i < residual)
12849280Skarels 			bp->b_bufsize = (base + 1) * CLBYTES;
12949280Skarels 		else
13049280Skarels 			bp->b_bufsize = base * CLBYTES;
13152413Storek 		bp->b_flags = B_INVAL;
132*56395Smckusick 		dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
13352413Storek 		binsheadfree(bp, dp);
134*56395Smckusick 		binshash(bp, &invalhash);
13549280Skarels 	}
13649280Skarels }
13749280Skarels 
13849280Skarels /*
13946151Smckusick  * Find the block in the buffer pool.
14046151Smckusick  * If the buffer is not present, allocate a new buffer and load
14146151Smckusick  * its contents according to the filesystem fill routine.
1428Sbill  */
14338776Smckusick bread(vp, blkno, size, cred, bpp)
14437736Smckusick 	struct vnode *vp;
1456563Smckusic 	daddr_t blkno;
1466563Smckusic 	int size;
14738776Smckusick 	struct ucred *cred;
14837736Smckusick 	struct buf **bpp;
1498Sbill {
15047545Skarels 	struct proc *p = curproc;		/* XXX */
1518Sbill 	register struct buf *bp;
1528Sbill 
1538670S 	if (size == 0)
1548670S 		panic("bread: size 0");
15537736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
15646151Smckusick 	if (bp->b_flags & (B_DONE | B_DELWRI)) {
15740341Smckusick 		trace(TR_BREADHIT, pack(vp, size), blkno);
15837736Smckusick 		return (0);
1598Sbill 	}
1608Sbill 	bp->b_flags |= B_READ;
1618670S 	if (bp->b_bcount > bp->b_bufsize)
1628670S 		panic("bread");
16338776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
16438776Smckusick 		crhold(cred);
16538776Smckusick 		bp->b_rcred = cred;
16638776Smckusick 	}
16737736Smckusick 	VOP_STRATEGY(bp);
16840341Smckusick 	trace(TR_BREADMISS, pack(vp, size), blkno);
16947545Skarels 	p->p_stats->p_ru.ru_inblock++;		/* pay for read */
17037736Smckusick 	return (biowait(bp));
1718Sbill }
1728Sbill 
1738Sbill /*
17452189Smckusick  * Operates like bread, but also starts I/O on the N specified
17552189Smckusick  * read-ahead blocks.
1768Sbill  */
17752189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp)
17837736Smckusick 	struct vnode *vp;
1797114Smckusick 	daddr_t blkno; int size;
18052189Smckusick 	daddr_t rablkno[]; int rabsize[];
18152189Smckusick 	int num;
18238776Smckusick 	struct ucred *cred;
18337736Smckusick 	struct buf **bpp;
1848Sbill {
18547545Skarels 	struct proc *p = curproc;		/* XXX */
1868Sbill 	register struct buf *bp, *rabp;
18752189Smckusick 	register int i;
1888Sbill 
1898Sbill 	bp = NULL;
1907015Smckusick 	/*
19146151Smckusick 	 * If the block is not memory resident,
19246151Smckusick 	 * allocate a buffer and start I/O.
1937015Smckusick 	 */
19437736Smckusick 	if (!incore(vp, blkno)) {
19537736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
19646151Smckusick 		if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
1978Sbill 			bp->b_flags |= B_READ;
1988670S 			if (bp->b_bcount > bp->b_bufsize)
19952189Smckusick 				panic("breadn");
20038776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
20138776Smckusick 				crhold(cred);
20238776Smckusick 				bp->b_rcred = cred;
20338776Smckusick 			}
20437736Smckusick 			VOP_STRATEGY(bp);
20540341Smckusick 			trace(TR_BREADMISS, pack(vp, size), blkno);
20647545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay for read */
20754342Smckusick 		} else {
20840341Smckusick 			trace(TR_BREADHIT, pack(vp, size), blkno);
20954342Smckusick 		}
2108Sbill 	}
2117015Smckusick 
2127015Smckusick 	/*
21352189Smckusick 	 * If there's read-ahead block(s), start I/O
21452189Smckusick 	 * on them also (as above).
2157015Smckusick 	 */
21652189Smckusick 	for (i = 0; i < num; i++) {
21752189Smckusick 		if (incore(vp, rablkno[i]))
21852189Smckusick 			continue;
21952189Smckusick 		rabp = getblk(vp, rablkno[i], rabsize[i]);
22046151Smckusick 		if (rabp->b_flags & (B_DONE | B_DELWRI)) {
2218Sbill 			brelse(rabp);
22252189Smckusick 			trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]);
2232045Swnj 		} else {
22446151Smckusick 			rabp->b_flags |= B_ASYNC | B_READ;
2258670S 			if (rabp->b_bcount > rabp->b_bufsize)
2268670S 				panic("breadrabp");
22738880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
22838776Smckusick 				crhold(cred);
22938880Smckusick 				rabp->b_rcred = cred;
23038776Smckusick 			}
23137736Smckusick 			VOP_STRATEGY(rabp);
23252189Smckusick 			trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]);
23347545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay in advance */
2348Sbill 		}
2358Sbill 	}
2367015Smckusick 
2377015Smckusick 	/*
23846151Smckusick 	 * If block was memory resident, let bread get it.
23946151Smckusick 	 * If block was not memory resident, the read was
24046151Smckusick 	 * started above, so just wait for the read to complete.
2417015Smckusick 	 */
2427114Smckusick 	if (bp == NULL)
24338776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
24437736Smckusick 	return (biowait(bp));
2458Sbill }
2468Sbill 
2478Sbill /*
24846151Smckusick  * Synchronous write.
24946151Smckusick  * Release buffer on completion.
2508Sbill  */
2518Sbill bwrite(bp)
2527015Smckusick 	register struct buf *bp;
2538Sbill {
25447545Skarels 	struct proc *p = curproc;		/* XXX */
25537736Smckusick 	register int flag;
25652413Storek 	int s, error = 0;
2578Sbill 
2588Sbill 	flag = bp->b_flags;
2599857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
26049459Smckusick 	if (flag & B_ASYNC) {
26149459Smckusick 		if ((flag & B_DELWRI) == 0)
26249459Smckusick 			p->p_stats->p_ru.ru_oublock++;	/* no one paid yet */
26349459Smckusick 		else
26449459Smckusick 			reassignbuf(bp, bp->b_vp);
26549459Smckusick 	}
26640341Smckusick 	trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
2678670S 	if (bp->b_bcount > bp->b_bufsize)
2688670S 		panic("bwrite");
26940226Smckusick 	s = splbio();
27039882Smckusick 	bp->b_vp->v_numoutput++;
27140226Smckusick 	splx(s);
27237736Smckusick 	VOP_STRATEGY(bp);
2737015Smckusick 
2747015Smckusick 	/*
27546151Smckusick 	 * If the write was synchronous, then await I/O completion.
2767015Smckusick 	 * If the write was "delayed", then we put the buffer on
27746151Smckusick 	 * the queue of blocks awaiting I/O completion status.
2787015Smckusick 	 */
27946151Smckusick 	if ((flag & B_ASYNC) == 0) {
28037736Smckusick 		error = biowait(bp);
28149459Smckusick 		if ((flag&B_DELWRI) == 0)
28249459Smckusick 			p->p_stats->p_ru.ru_oublock++;	/* no one paid yet */
28349459Smckusick 		else
28449459Smckusick 			reassignbuf(bp, bp->b_vp);
2858Sbill 		brelse(bp);
28637736Smckusick 	} else if (flag & B_DELWRI) {
28752413Storek 		s = splbio();
2888Sbill 		bp->b_flags |= B_AGE;
28952413Storek 		splx(s);
29037736Smckusick 	}
29137736Smckusick 	return (error);
2928Sbill }
2938Sbill 
29453578Sheideman int
29553578Sheideman vn_bwrite(ap)
29653578Sheideman 	struct vop_bwrite_args *ap;
29753578Sheideman {
298*56395Smckusick 	return (bwrite(ap->a_bp));
29953578Sheideman }
30053578Sheideman 
30153578Sheideman 
3028Sbill /*
30346151Smckusick  * Delayed write.
30446151Smckusick  *
30546151Smckusick  * The buffer is marked dirty, but is not queued for I/O.
30646151Smckusick  * This routine should be used when the buffer is expected
30746151Smckusick  * to be modified again soon, typically a small write that
30846151Smckusick  * partially fills a buffer.
30946151Smckusick  *
31046151Smckusick  * NB: magnetic tapes cannot be delayed; they must be
31146151Smckusick  * written in the order that the writes are requested.
3128Sbill  */
3138Sbill bdwrite(bp)
3147015Smckusick 	register struct buf *bp;
3158Sbill {
31647545Skarels 	struct proc *p = curproc;		/* XXX */
3178Sbill 
31839882Smckusick 	if ((bp->b_flags & B_DELWRI) == 0) {
31939882Smckusick 		bp->b_flags |= B_DELWRI;
32039882Smckusick 		reassignbuf(bp, bp->b_vp);
32147545Skarels 		p->p_stats->p_ru.ru_oublock++;		/* no one paid yet */
32239882Smckusick 	}
32337736Smckusick 	/*
32439668Smckusick 	 * If this is a tape drive, the write must be initiated.
32537736Smckusick 	 */
32648360Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) {
3278Sbill 		bawrite(bp);
32839668Smckusick 	} else {
32946151Smckusick 		bp->b_flags |= (B_DONE | B_DELWRI);
3308Sbill 		brelse(bp);
3318Sbill 	}
3328Sbill }
3338Sbill 
3348Sbill /*
33546151Smckusick  * Asynchronous write.
33646151Smckusick  * Start I/O on a buffer, but do not wait for it to complete.
33746151Smckusick  * The buffer is released when the I/O completes.
3388Sbill  */
3398Sbill bawrite(bp)
3407015Smckusick 	register struct buf *bp;
3418Sbill {
3428Sbill 
34346151Smckusick 	/*
34446151Smckusick 	 * Setting the ASYNC flag causes bwrite to return
34546151Smckusick 	 * after starting the I/O.
34646151Smckusick 	 */
3478Sbill 	bp->b_flags |= B_ASYNC;
34837736Smckusick 	(void) bwrite(bp);
3498Sbill }
3508Sbill 
3518Sbill /*
35246151Smckusick  * Release a buffer.
35346151Smckusick  * Even if the buffer is dirty, no I/O is started.
3548Sbill  */
3558Sbill brelse(bp)
3567015Smckusick 	register struct buf *bp;
3578Sbill {
358*56395Smckusick 	register struct bufqueue *flist;
35946151Smckusick 	int s;
3608Sbill 
36140341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
3627015Smckusick 	/*
36339668Smckusick 	 * If a process is waiting for the buffer, or
36439668Smckusick 	 * is waiting for a free buffer, awaken it.
3657015Smckusick 	 */
36646151Smckusick 	if (bp->b_flags & B_WANTED)
3678Sbill 		wakeup((caddr_t)bp);
368*56395Smckusick 	if (needbuffer) {
369*56395Smckusick 		needbuffer = 0;
370*56395Smckusick 		wakeup((caddr_t)&needbuffer);
3718Sbill 	}
37239668Smckusick 	/*
37339668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
37439668Smckusick 	 */
37552413Storek 	s = splbio();
37639668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
37739668Smckusick 		bp->b_flags &= ~B_ERROR;
37839668Smckusick 	/*
37939668Smckusick 	 * Disassociate buffers that are no longer valid.
38039668Smckusick 	 */
38146151Smckusick 	if (bp->b_flags & (B_NOCACHE | B_ERROR))
38237736Smckusick 		bp->b_flags |= B_INVAL;
38346151Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
38439668Smckusick 		if (bp->b_vp)
38539668Smckusick 			brelvp(bp);
38639668Smckusick 		bp->b_flags &= ~B_DELWRI;
38737736Smckusick 	}
3887015Smckusick 	/*
3897015Smckusick 	 * Stick the buffer back on a free list.
3907015Smckusick 	 */
3918670S 	if (bp->b_bufsize <= 0) {
3928670S 		/* block has no buffer ... put at front of unused buffer list */
393*56395Smckusick 		flist = &bufqueues[BQ_EMPTY];
3948670S 		binsheadfree(bp, flist);
39546151Smckusick 	} else if (bp->b_flags & (B_ERROR | B_INVAL)) {
3962325Swnj 		/* block has no info ... put at front of most free list */
397*56395Smckusick 		flist = &bufqueues[BQ_AGE];
3987015Smckusick 		binsheadfree(bp, flist);
3998Sbill 	} else {
4002325Swnj 		if (bp->b_flags & B_LOCKED)
401*56395Smckusick 			flist = &bufqueues[BQ_LOCKED];
4022325Swnj 		else if (bp->b_flags & B_AGE)
403*56395Smckusick 			flist = &bufqueues[BQ_AGE];
4042325Swnj 		else
405*56395Smckusick 			flist = &bufqueues[BQ_LRU];
4067015Smckusick 		binstailfree(bp, flist);
4078Sbill 	}
40846151Smckusick 	bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
4098Sbill 	splx(s);
4108Sbill }
4118Sbill 
4128Sbill /*
41346151Smckusick  * Check to see if a block is currently memory resident.
4148Sbill  */
41537736Smckusick incore(vp, blkno)
41637736Smckusick 	struct vnode *vp;
4177015Smckusick 	daddr_t blkno;
4188Sbill {
4198Sbill 	register struct buf *bp;
4208Sbill 
421*56395Smckusick 	for (bp = *BUFHASH(vp, blkno); bp; bp = bp->b_forw)
42239668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
4237015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
42491Sbill 			return (1);
42591Sbill 	return (0);
4268Sbill }
4278Sbill 
42839668Smckusick /*
42946151Smckusick  * Check to see if a block is currently memory resident.
43046151Smckusick  * If it is resident, return it. If it is not resident,
43146151Smckusick  * allocate a new buffer and assign it to the block.
43239668Smckusick  */
4338Sbill struct buf *
43437736Smckusick getblk(vp, blkno, size)
43537736Smckusick 	register struct vnode *vp;
4366563Smckusic 	daddr_t blkno;
4376563Smckusic 	int size;
4388Sbill {
439*56395Smckusick 	register struct buf *bp, **dp;
4405424Swnj 	int s;
4418Sbill 
44225255Smckusick 	if (size > MAXBSIZE)
44325255Smckusick 		panic("getblk: size too big");
4447015Smckusick 	/*
44546151Smckusick 	 * Search the cache for the block. If the buffer is found,
44646151Smckusick 	 * but it is currently locked, the we must wait for it to
44746151Smckusick 	 * become available.
4487015Smckusick 	 */
44937736Smckusick 	dp = BUFHASH(vp, blkno);
4507015Smckusick loop:
451*56395Smckusick 	for (bp = *dp; bp; bp = bp->b_forw) {
45239668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
45346151Smckusick 		    (bp->b_flags & B_INVAL))
4548Sbill 			continue;
45526271Skarels 		s = splbio();
45646151Smckusick 		if (bp->b_flags & B_BUSY) {
4578Sbill 			bp->b_flags |= B_WANTED;
45846151Smckusick 			sleep((caddr_t)bp, PRIBIO + 1);
4595424Swnj 			splx(s);
4608Sbill 			goto loop;
4618Sbill 		}
46239882Smckusick 		bremfree(bp);
46339882Smckusick 		bp->b_flags |= B_BUSY;
4645424Swnj 		splx(s);
46532608Smckusick 		if (bp->b_bcount != size) {
46639668Smckusick 			printf("getblk: stray size");
46739668Smckusick 			bp->b_flags |= B_INVAL;
46839668Smckusick 			bwrite(bp);
46939668Smckusick 			goto loop;
47032608Smckusick 		}
4718Sbill 		bp->b_flags |= B_CACHE;
47226271Skarels 		return (bp);
4738Sbill 	}
4748670S 	bp = getnewbuf();
4757015Smckusick 	bremhash(bp);
47639668Smckusick 	bgetvp(vp, bp);
47745116Smckusick 	bp->b_bcount = 0;
47839668Smckusick 	bp->b_lblkno = blkno;
4796563Smckusic 	bp->b_blkno = blkno;
4808670S 	bp->b_error = 0;
48137736Smckusick 	bp->b_resid = 0;
48237736Smckusick 	binshash(bp, dp);
48345116Smckusick 	allocbuf(bp, size);
48426271Skarels 	return (bp);
4858Sbill }
4868Sbill 
4878Sbill /*
48846151Smckusick  * Allocate a buffer.
48946151Smckusick  * The caller will assign it to a block.
4908Sbill  */
4918Sbill struct buf *
4926563Smckusic geteblk(size)
4936563Smckusic 	int size;
4948Sbill {
495*56395Smckusick 	register struct buf *bp;
4968Sbill 
49725255Smckusick 	if (size > MAXBSIZE)
49825255Smckusick 		panic("geteblk: size too big");
4998670S 	bp = getnewbuf();
5008670S 	bp->b_flags |= B_INVAL;
5017015Smckusick 	bremhash(bp);
502*56395Smckusick 	binshash(bp, &invalhash);
50345116Smckusick 	bp->b_bcount = 0;
50437736Smckusick 	bp->b_error = 0;
50537736Smckusick 	bp->b_resid = 0;
50645116Smckusick 	allocbuf(bp, size);
50726271Skarels 	return (bp);
5088Sbill }
5098Sbill 
5108Sbill /*
51145116Smckusick  * Expand or contract the actual memory allocated to a buffer.
51246151Smckusick  * If no memory is available, release buffer and take error exit.
5136563Smckusic  */
51445116Smckusick allocbuf(tp, size)
51545116Smckusick 	register struct buf *tp;
5166563Smckusic 	int size;
5176563Smckusic {
51845116Smckusick 	register struct buf *bp, *ep;
51945116Smckusick 	int sizealloc, take, s;
5206563Smckusic 
52145116Smckusick 	sizealloc = roundup(size, CLBYTES);
52245116Smckusick 	/*
52345116Smckusick 	 * Buffer size does not change
52445116Smckusick 	 */
52545116Smckusick 	if (sizealloc == tp->b_bufsize)
52645116Smckusick 		goto out;
52745116Smckusick 	/*
52845116Smckusick 	 * Buffer size is shrinking.
52945116Smckusick 	 * Place excess space in a buffer header taken from the
53045116Smckusick 	 * BQ_EMPTY buffer list and placed on the "most free" list.
53145116Smckusick 	 * If no extra buffer headers are available, leave the
53245116Smckusick 	 * extra space in the present buffer.
53345116Smckusick 	 */
53445116Smckusick 	if (sizealloc < tp->b_bufsize) {
535*56395Smckusick 		if ((ep = bufqueues[BQ_EMPTY].buffreehead) == NULL)
53645116Smckusick 			goto out;
53745116Smckusick 		s = splbio();
53845116Smckusick 		bremfree(ep);
53945116Smckusick 		ep->b_flags |= B_BUSY;
54045116Smckusick 		splx(s);
54145116Smckusick 		pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
54245116Smckusick 		    (int)tp->b_bufsize - sizealloc);
54345116Smckusick 		ep->b_bufsize = tp->b_bufsize - sizealloc;
54445116Smckusick 		tp->b_bufsize = sizealloc;
54545116Smckusick 		ep->b_flags |= B_INVAL;
54645116Smckusick 		ep->b_bcount = 0;
54745116Smckusick 		brelse(ep);
54845116Smckusick 		goto out;
54945116Smckusick 	}
55045116Smckusick 	/*
55145116Smckusick 	 * More buffer space is needed. Get it out of buffers on
55245116Smckusick 	 * the "most free" list, placing the empty headers on the
55345116Smckusick 	 * BQ_EMPTY buffer header list.
55445116Smckusick 	 */
55545116Smckusick 	while (tp->b_bufsize < sizealloc) {
55645116Smckusick 		take = sizealloc - tp->b_bufsize;
55745116Smckusick 		bp = getnewbuf();
55845116Smckusick 		if (take >= bp->b_bufsize)
55945116Smckusick 			take = bp->b_bufsize;
56045116Smckusick 		pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
56145116Smckusick 		    &tp->b_un.b_addr[tp->b_bufsize], take);
56245116Smckusick 		tp->b_bufsize += take;
56345116Smckusick 		bp->b_bufsize = bp->b_bufsize - take;
56445116Smckusick 		if (bp->b_bcount > bp->b_bufsize)
56545116Smckusick 			bp->b_bcount = bp->b_bufsize;
56645116Smckusick 		if (bp->b_bufsize <= 0) {
56745116Smckusick 			bremhash(bp);
568*56395Smckusick 			binshash(bp, &invalhash);
56946151Smckusick 			bp->b_dev = NODEV;
57045116Smckusick 			bp->b_error = 0;
57145116Smckusick 			bp->b_flags |= B_INVAL;
57245116Smckusick 		}
57345116Smckusick 		brelse(bp);
57445116Smckusick 	}
57545116Smckusick out:
57645116Smckusick 	tp->b_bcount = size;
57745116Smckusick 	return (1);
5788670S }
5798670S 
5808670S /*
5818670S  * Find a buffer which is available for use.
5828670S  * Select something from a free list.
5838670S  * Preference is to AGE list, then LRU list.
5848670S  */
5858670S struct buf *
5868670S getnewbuf()
5878670S {
588*56395Smckusick 	register struct buf *bp;
589*56395Smckusick 	register struct bufqueue *dp;
59038776Smckusick 	register struct ucred *cred;
5918670S 	int s;
5928670S 
5938670S loop:
59426271Skarels 	s = splbio();
595*56395Smckusick 	for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--)
596*56395Smckusick 		if (dp->buffreehead)
5978670S 			break;
598*56395Smckusick 	if (dp == bufqueues) {		/* no free blocks */
599*56395Smckusick 		needbuffer = 1;
600*56395Smckusick 		sleep((caddr_t)&needbuffer, PRIBIO + 1);
60112170Ssam 		splx(s);
6028670S 		goto loop;
6038670S 	}
604*56395Smckusick 	bp = dp->buffreehead;
60539882Smckusick 	bremfree(bp);
60639882Smckusick 	bp->b_flags |= B_BUSY;
6078670S 	splx(s);
6088670S 	if (bp->b_flags & B_DELWRI) {
60938614Smckusick 		(void) bawrite(bp);
6108670S 		goto loop;
6118670S 	}
61240341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
61339668Smckusick 	if (bp->b_vp)
61439668Smckusick 		brelvp(bp);
61538776Smckusick 	if (bp->b_rcred != NOCRED) {
61638776Smckusick 		cred = bp->b_rcred;
61738776Smckusick 		bp->b_rcred = NOCRED;
61838776Smckusick 		crfree(cred);
61938776Smckusick 	}
62038776Smckusick 	if (bp->b_wcred != NOCRED) {
62138776Smckusick 		cred = bp->b_wcred;
62238776Smckusick 		bp->b_wcred = NOCRED;
62338776Smckusick 		crfree(cred);
62438776Smckusick 	}
6258670S 	bp->b_flags = B_BUSY;
62646989Smckusick 	bp->b_dirtyoff = bp->b_dirtyend = 0;
62752189Smckusick 	bp->b_validoff = bp->b_validend = 0;
6288670S 	return (bp);
6298670S }
6308670S 
6318670S /*
63246151Smckusick  * Wait for I/O to complete.
63346151Smckusick  *
63446151Smckusick  * Extract and return any errors associated with the I/O.
63546151Smckusick  * If the error flag is set, but no specific error is
63646151Smckusick  * given, return EIO.
6378Sbill  */
6387015Smckusick biowait(bp)
6396563Smckusic 	register struct buf *bp;
6408Sbill {
6415431Sroot 	int s;
6428Sbill 
64326271Skarels 	s = splbio();
64438776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
6458Sbill 		sleep((caddr_t)bp, PRIBIO);
6465431Sroot 	splx(s);
64737736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
64837736Smckusick 		return (0);
64937736Smckusick 	if (bp->b_error)
65037736Smckusick 		return (bp->b_error);
65137736Smckusick 	return (EIO);
6528Sbill }
6538Sbill 
6548Sbill /*
65513128Ssam  * Mark I/O complete on a buffer.
65646151Smckusick  *
65746151Smckusick  * If a callback has been requested, e.g. the pageout
65846151Smckusick  * daemon, do so. Otherwise, awaken waiting processes.
6598Sbill  */
66051455Sbostic void
6617015Smckusick biodone(bp)
6627015Smckusick 	register struct buf *bp;
6638Sbill {
6648Sbill 
665420Sbill 	if (bp->b_flags & B_DONE)
6667015Smckusick 		panic("dup biodone");
6678Sbill 	bp->b_flags |= B_DONE;
66849232Smckusick 	if ((bp->b_flags & B_READ) == 0)
66949232Smckusick 		vwakeup(bp);
6709763Ssam 	if (bp->b_flags & B_CALL) {
6719763Ssam 		bp->b_flags &= ~B_CALL;
6729763Ssam 		(*bp->b_iodone)(bp);
6739763Ssam 		return;
6749763Ssam 	}
67546151Smckusick 	if (bp->b_flags & B_ASYNC)
6768Sbill 		brelse(bp);
6778Sbill 	else {
6788Sbill 		bp->b_flags &= ~B_WANTED;
6798Sbill 		wakeup((caddr_t)bp);
6808Sbill 	}
6818Sbill }
68256356Smckusick 
68356356Smckusick #ifdef DIAGNOSTIC
68456356Smckusick /*
68556356Smckusick  * Print out statistics on the current allocation of the buffer pool.
68656356Smckusick  * Can be enabled to print out on every ``sync'' by setting "syncprt"
68756356Smckusick  * above.
68856356Smckusick  */
68956356Smckusick void
69056356Smckusick vfs_bufstats()
69156356Smckusick {
69256356Smckusick 	int s, i, j, count;
693*56395Smckusick 	register struct buf *bp;
694*56395Smckusick 	register struct bufqueue *dp;
69556356Smckusick 	int counts[MAXBSIZE/CLBYTES+1];
69656356Smckusick 	static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
69756356Smckusick 
698*56395Smckusick 	for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
69956356Smckusick 		count = 0;
70056356Smckusick 		for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
70156356Smckusick 			counts[j] = 0;
70256356Smckusick 		s = splbio();
703*56395Smckusick 		for (bp = dp->buffreehead; bp; bp = bp->b_actf) {
70456356Smckusick 			counts[bp->b_bufsize/CLBYTES]++;
70556356Smckusick 			count++;
70656356Smckusick 		}
70756356Smckusick 		splx(s);
70856356Smckusick 		printf("%s: total-%d", bname[i], count);
70956356Smckusick 		for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
71056356Smckusick 			if (counts[j] != 0)
71156356Smckusick 				printf(", %d-%d", j * CLBYTES, counts[j]);
71256356Smckusick 		printf("\n");
71356356Smckusick 	}
71456356Smckusick }
71556356Smckusick #endif /* DIAGNOSTIC */
716