xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 54342)
149589Sbostic /*-
249589Sbostic  * Copyright (c) 1982, 1986, 1989 The Regents of the University of California.
337736Smckusick  * All rights reserved.
423395Smckusick  *
549618Smckusick  * This module is believed to contain source code proprietary to AT&T.
649618Smckusick  * Use and redistribution is subject to the Berkeley Software License
749618Smckusick  * Agreement and your Software Agreement with AT&T (Western Electric).
837736Smckusick  *
9*54342Smckusick  *	@(#)vfs_cluster.c	7.49 (Berkeley) 06/23/92
1023395Smckusick  */
118Sbill 
1251455Sbostic #include <sys/param.h>
1351455Sbostic #include <sys/proc.h>
1451455Sbostic #include <sys/buf.h>
1551455Sbostic #include <sys/vnode.h>
1651455Sbostic #include <sys/specdev.h>
1751455Sbostic #include <sys/mount.h>
1851455Sbostic #include <sys/trace.h>
1951455Sbostic #include <sys/resourcevar.h>
208Sbill 
2191Sbill /*
2249280Skarels  * Initialize buffers and hash links for buffers.
2349280Skarels  */
2451455Sbostic void
2549280Skarels bufinit()
2649280Skarels {
2749280Skarels 	register int i;
2849280Skarels 	register struct buf *bp, *dp;
2949280Skarels 	register struct bufhd *hp;
3049280Skarels 	int base, residual;
3149280Skarels 
3249280Skarels 	for (hp = bufhash, i = 0; i < BUFHSZ; i++, hp++)
3349280Skarels 		hp->b_forw = hp->b_back = (struct buf *)hp;
3449280Skarels 
3549280Skarels 	for (dp = bfreelist; dp < &bfreelist[BQUEUES]; dp++) {
3649280Skarels 		dp->b_forw = dp->b_back = dp->av_forw = dp->av_back = dp;
3749280Skarels 		dp->b_flags = B_HEAD;
3849280Skarels 	}
3949280Skarels 	base = bufpages / nbuf;
4049280Skarels 	residual = bufpages % nbuf;
4149280Skarels 	for (i = 0; i < nbuf; i++) {
4249280Skarels 		bp = &buf[i];
4349280Skarels 		bp->b_dev = NODEV;
4449280Skarels 		bp->b_bcount = 0;
4549280Skarels 		bp->b_rcred = NOCRED;
4649280Skarels 		bp->b_wcred = NOCRED;
4749280Skarels 		bp->b_dirtyoff = 0;
4849280Skarels 		bp->b_dirtyend = 0;
4952189Smckusick 		bp->b_validoff = 0;
5052189Smckusick 		bp->b_validend = 0;
5149280Skarels 		bp->b_un.b_addr = buffers + i * MAXBSIZE;
5249280Skarels 		if (i < residual)
5349280Skarels 			bp->b_bufsize = (base + 1) * CLBYTES;
5449280Skarels 		else
5549280Skarels 			bp->b_bufsize = base * CLBYTES;
5649280Skarels 		binshash(bp, &bfreelist[BQ_AGE]);
5752413Storek 		bp->b_flags = B_INVAL;
5852413Storek 		dp = bp->b_bufsize ? &bfreelist[BQ_AGE] : &bfreelist[BQ_EMPTY];
5952413Storek 		binsheadfree(bp, dp);
6049280Skarels 	}
6149280Skarels }
6249280Skarels 
6349280Skarels /*
6446151Smckusick  * Find the block in the buffer pool.
6546151Smckusick  * If the buffer is not present, allocate a new buffer and load
6646151Smckusick  * its contents according to the filesystem fill routine.
678Sbill  */
6838776Smckusick bread(vp, blkno, size, cred, bpp)
6937736Smckusick 	struct vnode *vp;
706563Smckusic 	daddr_t blkno;
716563Smckusic 	int size;
7238776Smckusick 	struct ucred *cred;
7337736Smckusick 	struct buf **bpp;
748Sbill {
7553545Sheideman 	USES_VOP_STRATEGY;
7647545Skarels 	struct proc *p = curproc;		/* XXX */
778Sbill 	register struct buf *bp;
788Sbill 
798670S 	if (size == 0)
808670S 		panic("bread: size 0");
8137736Smckusick 	*bpp = bp = getblk(vp, blkno, size);
8246151Smckusick 	if (bp->b_flags & (B_DONE | B_DELWRI)) {
8340341Smckusick 		trace(TR_BREADHIT, pack(vp, size), blkno);
8437736Smckusick 		return (0);
858Sbill 	}
868Sbill 	bp->b_flags |= B_READ;
878670S 	if (bp->b_bcount > bp->b_bufsize)
888670S 		panic("bread");
8938776Smckusick 	if (bp->b_rcred == NOCRED && cred != NOCRED) {
9038776Smckusick 		crhold(cred);
9138776Smckusick 		bp->b_rcred = cred;
9238776Smckusick 	}
9337736Smckusick 	VOP_STRATEGY(bp);
9440341Smckusick 	trace(TR_BREADMISS, pack(vp, size), blkno);
9547545Skarels 	p->p_stats->p_ru.ru_inblock++;		/* pay for read */
9637736Smckusick 	return (biowait(bp));
978Sbill }
988Sbill 
998Sbill /*
10052189Smckusick  * Operates like bread, but also starts I/O on the N specified
10152189Smckusick  * read-ahead blocks.
1028Sbill  */
10352189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp)
10437736Smckusick 	struct vnode *vp;
1057114Smckusick 	daddr_t blkno; int size;
10652189Smckusick 	daddr_t rablkno[]; int rabsize[];
10752189Smckusick 	int num;
10838776Smckusick 	struct ucred *cred;
10937736Smckusick 	struct buf **bpp;
1108Sbill {
11153545Sheideman 	USES_VOP_STRATEGY;
11247545Skarels 	struct proc *p = curproc;		/* XXX */
1138Sbill 	register struct buf *bp, *rabp;
11452189Smckusick 	register int i;
1158Sbill 
1168Sbill 	bp = NULL;
1177015Smckusick 	/*
11846151Smckusick 	 * If the block is not memory resident,
11946151Smckusick 	 * allocate a buffer and start I/O.
1207015Smckusick 	 */
12137736Smckusick 	if (!incore(vp, blkno)) {
12237736Smckusick 		*bpp = bp = getblk(vp, blkno, size);
12346151Smckusick 		if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
1248Sbill 			bp->b_flags |= B_READ;
1258670S 			if (bp->b_bcount > bp->b_bufsize)
12652189Smckusick 				panic("breadn");
12738776Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
12838776Smckusick 				crhold(cred);
12938776Smckusick 				bp->b_rcred = cred;
13038776Smckusick 			}
13137736Smckusick 			VOP_STRATEGY(bp);
13240341Smckusick 			trace(TR_BREADMISS, pack(vp, size), blkno);
13347545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay for read */
134*54342Smckusick 		} else {
13540341Smckusick 			trace(TR_BREADHIT, pack(vp, size), blkno);
136*54342Smckusick 		}
1378Sbill 	}
1387015Smckusick 
1397015Smckusick 	/*
14052189Smckusick 	 * If there's read-ahead block(s), start I/O
14152189Smckusick 	 * on them also (as above).
1427015Smckusick 	 */
14352189Smckusick 	for (i = 0; i < num; i++) {
14452189Smckusick 		if (incore(vp, rablkno[i]))
14552189Smckusick 			continue;
14652189Smckusick 		rabp = getblk(vp, rablkno[i], rabsize[i]);
14746151Smckusick 		if (rabp->b_flags & (B_DONE | B_DELWRI)) {
1488Sbill 			brelse(rabp);
14952189Smckusick 			trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]);
1502045Swnj 		} else {
15146151Smckusick 			rabp->b_flags |= B_ASYNC | B_READ;
1528670S 			if (rabp->b_bcount > rabp->b_bufsize)
1538670S 				panic("breadrabp");
15438880Smckusick 			if (rabp->b_rcred == NOCRED && cred != NOCRED) {
15538776Smckusick 				crhold(cred);
15638880Smckusick 				rabp->b_rcred = cred;
15738776Smckusick 			}
15837736Smckusick 			VOP_STRATEGY(rabp);
15952189Smckusick 			trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]);
16047545Skarels 			p->p_stats->p_ru.ru_inblock++;	/* pay in advance */
1618Sbill 		}
1628Sbill 	}
1637015Smckusick 
1647015Smckusick 	/*
16546151Smckusick 	 * If block was memory resident, let bread get it.
16646151Smckusick 	 * If block was not memory resident, the read was
16746151Smckusick 	 * started above, so just wait for the read to complete.
1687015Smckusick 	 */
1697114Smckusick 	if (bp == NULL)
17038776Smckusick 		return (bread(vp, blkno, size, cred, bpp));
17137736Smckusick 	return (biowait(bp));
1728Sbill }
1738Sbill 
1748Sbill /*
17546151Smckusick  * Synchronous write.
17646151Smckusick  * Release buffer on completion.
1778Sbill  */
1788Sbill bwrite(bp)
1797015Smckusick 	register struct buf *bp;
1808Sbill {
18153545Sheideman 	USES_VOP_STRATEGY;
18247545Skarels 	struct proc *p = curproc;		/* XXX */
18337736Smckusick 	register int flag;
18452413Storek 	int s, error = 0;
1858Sbill 
1868Sbill 	flag = bp->b_flags;
1879857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
18849459Smckusick 	if (flag & B_ASYNC) {
18949459Smckusick 		if ((flag & B_DELWRI) == 0)
19049459Smckusick 			p->p_stats->p_ru.ru_oublock++;	/* no one paid yet */
19149459Smckusick 		else
19249459Smckusick 			reassignbuf(bp, bp->b_vp);
19349459Smckusick 	}
19440341Smckusick 	trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
1958670S 	if (bp->b_bcount > bp->b_bufsize)
1968670S 		panic("bwrite");
19740226Smckusick 	s = splbio();
19839882Smckusick 	bp->b_vp->v_numoutput++;
19940226Smckusick 	splx(s);
20037736Smckusick 	VOP_STRATEGY(bp);
2017015Smckusick 
2027015Smckusick 	/*
20346151Smckusick 	 * If the write was synchronous, then await I/O completion.
2047015Smckusick 	 * If the write was "delayed", then we put the buffer on
20546151Smckusick 	 * the queue of blocks awaiting I/O completion status.
2067015Smckusick 	 */
20746151Smckusick 	if ((flag & B_ASYNC) == 0) {
20837736Smckusick 		error = biowait(bp);
20949459Smckusick 		if ((flag&B_DELWRI) == 0)
21049459Smckusick 			p->p_stats->p_ru.ru_oublock++;	/* no one paid yet */
21149459Smckusick 		else
21249459Smckusick 			reassignbuf(bp, bp->b_vp);
2138Sbill 		brelse(bp);
21437736Smckusick 	} else if (flag & B_DELWRI) {
21552413Storek 		s = splbio();
2168Sbill 		bp->b_flags |= B_AGE;
21752413Storek 		splx(s);
21837736Smckusick 	}
21937736Smckusick 	return (error);
2208Sbill }
2218Sbill 
22253578Sheideman int
22353578Sheideman vn_bwrite(ap)
22453578Sheideman 	struct vop_bwrite_args *ap;
22553578Sheideman {
22653578Sheideman 	return bwrite (ap->a_bp);
22753578Sheideman }
22853578Sheideman 
22953578Sheideman 
2308Sbill /*
23146151Smckusick  * Delayed write.
23246151Smckusick  *
23346151Smckusick  * The buffer is marked dirty, but is not queued for I/O.
23446151Smckusick  * This routine should be used when the buffer is expected
23546151Smckusick  * to be modified again soon, typically a small write that
23646151Smckusick  * partially fills a buffer.
23746151Smckusick  *
23846151Smckusick  * NB: magnetic tapes cannot be delayed; they must be
23946151Smckusick  * written in the order that the writes are requested.
2408Sbill  */
2418Sbill bdwrite(bp)
2427015Smckusick 	register struct buf *bp;
2438Sbill {
24453545Sheideman 	USES_VOP_IOCTL;
24547545Skarels 	struct proc *p = curproc;		/* XXX */
2468Sbill 
24739882Smckusick 	if ((bp->b_flags & B_DELWRI) == 0) {
24839882Smckusick 		bp->b_flags |= B_DELWRI;
24939882Smckusick 		reassignbuf(bp, bp->b_vp);
25047545Skarels 		p->p_stats->p_ru.ru_oublock++;		/* no one paid yet */
25139882Smckusick 	}
25237736Smckusick 	/*
25339668Smckusick 	 * If this is a tape drive, the write must be initiated.
25437736Smckusick 	 */
25548360Smckusick 	if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) {
2568Sbill 		bawrite(bp);
25739668Smckusick 	} else {
25846151Smckusick 		bp->b_flags |= (B_DONE | B_DELWRI);
2598Sbill 		brelse(bp);
2608Sbill 	}
2618Sbill }
2628Sbill 
2638Sbill /*
26446151Smckusick  * Asynchronous write.
26546151Smckusick  * Start I/O on a buffer, but do not wait for it to complete.
26646151Smckusick  * The buffer is released when the I/O completes.
2678Sbill  */
2688Sbill bawrite(bp)
2697015Smckusick 	register struct buf *bp;
2708Sbill {
2718Sbill 
27246151Smckusick 	/*
27346151Smckusick 	 * Setting the ASYNC flag causes bwrite to return
27446151Smckusick 	 * after starting the I/O.
27546151Smckusick 	 */
2768Sbill 	bp->b_flags |= B_ASYNC;
27737736Smckusick 	(void) bwrite(bp);
2788Sbill }
2798Sbill 
2808Sbill /*
28146151Smckusick  * Release a buffer.
28246151Smckusick  * Even if the buffer is dirty, no I/O is started.
2838Sbill  */
2848Sbill brelse(bp)
2857015Smckusick 	register struct buf *bp;
2868Sbill {
2872325Swnj 	register struct buf *flist;
28846151Smckusick 	int s;
2898Sbill 
29040341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
2917015Smckusick 	/*
29239668Smckusick 	 * If a process is waiting for the buffer, or
29339668Smckusick 	 * is waiting for a free buffer, awaken it.
2947015Smckusick 	 */
29546151Smckusick 	if (bp->b_flags & B_WANTED)
2968Sbill 		wakeup((caddr_t)bp);
29746151Smckusick 	if (bfreelist[0].b_flags & B_WANTED) {
2982325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2992325Swnj 		wakeup((caddr_t)bfreelist);
3008Sbill 	}
30139668Smckusick 	/*
30239668Smckusick 	 * Retry I/O for locked buffers rather than invalidating them.
30339668Smckusick 	 */
30452413Storek 	s = splbio();
30539668Smckusick 	if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
30639668Smckusick 		bp->b_flags &= ~B_ERROR;
30739668Smckusick 	/*
30839668Smckusick 	 * Disassociate buffers that are no longer valid.
30939668Smckusick 	 */
31046151Smckusick 	if (bp->b_flags & (B_NOCACHE | B_ERROR))
31137736Smckusick 		bp->b_flags |= B_INVAL;
31246151Smckusick 	if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
31339668Smckusick 		if (bp->b_vp)
31439668Smckusick 			brelvp(bp);
31539668Smckusick 		bp->b_flags &= ~B_DELWRI;
31637736Smckusick 	}
3177015Smckusick 	/*
3187015Smckusick 	 * Stick the buffer back on a free list.
3197015Smckusick 	 */
3208670S 	if (bp->b_bufsize <= 0) {
3218670S 		/* block has no buffer ... put at front of unused buffer list */
3228670S 		flist = &bfreelist[BQ_EMPTY];
3238670S 		binsheadfree(bp, flist);
32446151Smckusick 	} else if (bp->b_flags & (B_ERROR | B_INVAL)) {
3252325Swnj 		/* block has no info ... put at front of most free list */
3268670S 		flist = &bfreelist[BQ_AGE];
3277015Smckusick 		binsheadfree(bp, flist);
3288Sbill 	} else {
3292325Swnj 		if (bp->b_flags & B_LOCKED)
3302325Swnj 			flist = &bfreelist[BQ_LOCKED];
3312325Swnj 		else if (bp->b_flags & B_AGE)
3322325Swnj 			flist = &bfreelist[BQ_AGE];
3332325Swnj 		else
3342325Swnj 			flist = &bfreelist[BQ_LRU];
3357015Smckusick 		binstailfree(bp, flist);
3368Sbill 	}
33746151Smckusick 	bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
3388Sbill 	splx(s);
3398Sbill }
3408Sbill 
3418Sbill /*
34246151Smckusick  * Check to see if a block is currently memory resident.
3438Sbill  */
34437736Smckusick incore(vp, blkno)
34537736Smckusick 	struct vnode *vp;
3467015Smckusick 	daddr_t blkno;
3478Sbill {
3488Sbill 	register struct buf *bp;
3492325Swnj 	register struct buf *dp;
3508Sbill 
35138225Smckusick 	dp = BUFHASH(vp, blkno);
3522325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
35339668Smckusick 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
3547015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
35591Sbill 			return (1);
35691Sbill 	return (0);
3578Sbill }
3588Sbill 
35939668Smckusick /*
36046151Smckusick  * Check to see if a block is currently memory resident.
36146151Smckusick  * If it is resident, return it. If it is not resident,
36246151Smckusick  * allocate a new buffer and assign it to the block.
36339668Smckusick  */
3648Sbill struct buf *
36537736Smckusick getblk(vp, blkno, size)
36637736Smckusick 	register struct vnode *vp;
3676563Smckusic 	daddr_t blkno;
3686563Smckusic 	int size;
3698Sbill {
3708670S 	register struct buf *bp, *dp;
3715424Swnj 	int s;
3728Sbill 
37325255Smckusick 	if (size > MAXBSIZE)
37425255Smckusick 		panic("getblk: size too big");
3757015Smckusick 	/*
37646151Smckusick 	 * Search the cache for the block. If the buffer is found,
37746151Smckusick 	 * but it is currently locked, the we must wait for it to
37846151Smckusick 	 * become available.
3797015Smckusick 	 */
38037736Smckusick 	dp = BUFHASH(vp, blkno);
3817015Smckusick loop:
3822325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
38339668Smckusick 		if (bp->b_lblkno != blkno || bp->b_vp != vp ||
38446151Smckusick 		    (bp->b_flags & B_INVAL))
3858Sbill 			continue;
38626271Skarels 		s = splbio();
38746151Smckusick 		if (bp->b_flags & B_BUSY) {
3888Sbill 			bp->b_flags |= B_WANTED;
38946151Smckusick 			sleep((caddr_t)bp, PRIBIO + 1);
3905424Swnj 			splx(s);
3918Sbill 			goto loop;
3928Sbill 		}
39339882Smckusick 		bremfree(bp);
39439882Smckusick 		bp->b_flags |= B_BUSY;
3955424Swnj 		splx(s);
39632608Smckusick 		if (bp->b_bcount != size) {
39739668Smckusick 			printf("getblk: stray size");
39839668Smckusick 			bp->b_flags |= B_INVAL;
39939668Smckusick 			bwrite(bp);
40039668Smckusick 			goto loop;
40132608Smckusick 		}
4028Sbill 		bp->b_flags |= B_CACHE;
40326271Skarels 		return (bp);
4048Sbill 	}
4058670S 	bp = getnewbuf();
4067015Smckusick 	bremhash(bp);
40739668Smckusick 	bgetvp(vp, bp);
40845116Smckusick 	bp->b_bcount = 0;
40939668Smckusick 	bp->b_lblkno = blkno;
4106563Smckusic 	bp->b_blkno = blkno;
4118670S 	bp->b_error = 0;
41237736Smckusick 	bp->b_resid = 0;
41337736Smckusick 	binshash(bp, dp);
41445116Smckusick 	allocbuf(bp, size);
41526271Skarels 	return (bp);
4168Sbill }
4178Sbill 
4188Sbill /*
41946151Smckusick  * Allocate a buffer.
42046151Smckusick  * The caller will assign it to a block.
4218Sbill  */
4228Sbill struct buf *
4236563Smckusic geteblk(size)
4246563Smckusic 	int size;
4258Sbill {
4268670S 	register struct buf *bp, *flist;
4278Sbill 
42825255Smckusick 	if (size > MAXBSIZE)
42925255Smckusick 		panic("geteblk: size too big");
4308670S 	bp = getnewbuf();
4318670S 	bp->b_flags |= B_INVAL;
4327015Smckusick 	bremhash(bp);
4338670S 	flist = &bfreelist[BQ_AGE];
43445116Smckusick 	bp->b_bcount = 0;
43537736Smckusick 	bp->b_error = 0;
43637736Smckusick 	bp->b_resid = 0;
4378670S 	binshash(bp, flist);
43845116Smckusick 	allocbuf(bp, size);
43926271Skarels 	return (bp);
4408Sbill }
4418Sbill 
4428Sbill /*
44345116Smckusick  * Expand or contract the actual memory allocated to a buffer.
44446151Smckusick  * If no memory is available, release buffer and take error exit.
4456563Smckusic  */
44645116Smckusick allocbuf(tp, size)
44745116Smckusick 	register struct buf *tp;
4486563Smckusic 	int size;
4496563Smckusic {
45045116Smckusick 	register struct buf *bp, *ep;
45145116Smckusick 	int sizealloc, take, s;
4526563Smckusic 
45345116Smckusick 	sizealloc = roundup(size, CLBYTES);
45445116Smckusick 	/*
45545116Smckusick 	 * Buffer size does not change
45645116Smckusick 	 */
45745116Smckusick 	if (sizealloc == tp->b_bufsize)
45845116Smckusick 		goto out;
45945116Smckusick 	/*
46045116Smckusick 	 * Buffer size is shrinking.
46145116Smckusick 	 * Place excess space in a buffer header taken from the
46245116Smckusick 	 * BQ_EMPTY buffer list and placed on the "most free" list.
46345116Smckusick 	 * If no extra buffer headers are available, leave the
46445116Smckusick 	 * extra space in the present buffer.
46545116Smckusick 	 */
46645116Smckusick 	if (sizealloc < tp->b_bufsize) {
46745116Smckusick 		ep = bfreelist[BQ_EMPTY].av_forw;
46845116Smckusick 		if (ep == &bfreelist[BQ_EMPTY])
46945116Smckusick 			goto out;
47045116Smckusick 		s = splbio();
47145116Smckusick 		bremfree(ep);
47245116Smckusick 		ep->b_flags |= B_BUSY;
47345116Smckusick 		splx(s);
47445116Smckusick 		pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
47545116Smckusick 		    (int)tp->b_bufsize - sizealloc);
47645116Smckusick 		ep->b_bufsize = tp->b_bufsize - sizealloc;
47745116Smckusick 		tp->b_bufsize = sizealloc;
47845116Smckusick 		ep->b_flags |= B_INVAL;
47945116Smckusick 		ep->b_bcount = 0;
48045116Smckusick 		brelse(ep);
48145116Smckusick 		goto out;
48245116Smckusick 	}
48345116Smckusick 	/*
48445116Smckusick 	 * More buffer space is needed. Get it out of buffers on
48545116Smckusick 	 * the "most free" list, placing the empty headers on the
48645116Smckusick 	 * BQ_EMPTY buffer header list.
48745116Smckusick 	 */
48845116Smckusick 	while (tp->b_bufsize < sizealloc) {
48945116Smckusick 		take = sizealloc - tp->b_bufsize;
49045116Smckusick 		bp = getnewbuf();
49145116Smckusick 		if (take >= bp->b_bufsize)
49245116Smckusick 			take = bp->b_bufsize;
49345116Smckusick 		pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
49445116Smckusick 		    &tp->b_un.b_addr[tp->b_bufsize], take);
49545116Smckusick 		tp->b_bufsize += take;
49645116Smckusick 		bp->b_bufsize = bp->b_bufsize - take;
49745116Smckusick 		if (bp->b_bcount > bp->b_bufsize)
49845116Smckusick 			bp->b_bcount = bp->b_bufsize;
49945116Smckusick 		if (bp->b_bufsize <= 0) {
50045116Smckusick 			bremhash(bp);
50145116Smckusick 			binshash(bp, &bfreelist[BQ_EMPTY]);
50246151Smckusick 			bp->b_dev = NODEV;
50345116Smckusick 			bp->b_error = 0;
50445116Smckusick 			bp->b_flags |= B_INVAL;
50545116Smckusick 		}
50645116Smckusick 		brelse(bp);
50745116Smckusick 	}
50845116Smckusick out:
50945116Smckusick 	tp->b_bcount = size;
51045116Smckusick 	return (1);
5118670S }
5128670S 
5138670S /*
5148670S  * Find a buffer which is available for use.
5158670S  * Select something from a free list.
5168670S  * Preference is to AGE list, then LRU list.
5178670S  */
5188670S struct buf *
5198670S getnewbuf()
5208670S {
5218670S 	register struct buf *bp, *dp;
52238776Smckusick 	register struct ucred *cred;
5238670S 	int s;
5248670S 
52552096Sbostic #ifdef LFS
52652096Sbostic 	lfs_flush();
52752096Sbostic #endif
5288670S loop:
52926271Skarels 	s = splbio();
5308670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
5318670S 		if (dp->av_forw != dp)
5328670S 			break;
5338670S 	if (dp == bfreelist) {		/* no free blocks */
5348670S 		dp->b_flags |= B_WANTED;
53546151Smckusick 		sleep((caddr_t)dp, PRIBIO + 1);
53612170Ssam 		splx(s);
5378670S 		goto loop;
5388670S 	}
53939882Smckusick 	bp = dp->av_forw;
54039882Smckusick 	bremfree(bp);
54139882Smckusick 	bp->b_flags |= B_BUSY;
5428670S 	splx(s);
5438670S 	if (bp->b_flags & B_DELWRI) {
54438614Smckusick 		(void) bawrite(bp);
5458670S 		goto loop;
5468670S 	}
54740341Smckusick 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
54839668Smckusick 	if (bp->b_vp)
54939668Smckusick 		brelvp(bp);
55038776Smckusick 	if (bp->b_rcred != NOCRED) {
55138776Smckusick 		cred = bp->b_rcred;
55238776Smckusick 		bp->b_rcred = NOCRED;
55338776Smckusick 		crfree(cred);
55438776Smckusick 	}
55538776Smckusick 	if (bp->b_wcred != NOCRED) {
55638776Smckusick 		cred = bp->b_wcred;
55738776Smckusick 		bp->b_wcred = NOCRED;
55838776Smckusick 		crfree(cred);
55938776Smckusick 	}
5608670S 	bp->b_flags = B_BUSY;
56146989Smckusick 	bp->b_dirtyoff = bp->b_dirtyend = 0;
56252189Smckusick 	bp->b_validoff = bp->b_validend = 0;
5638670S 	return (bp);
5648670S }
5658670S 
5668670S /*
56746151Smckusick  * Wait for I/O to complete.
56846151Smckusick  *
56946151Smckusick  * Extract and return any errors associated with the I/O.
57046151Smckusick  * If the error flag is set, but no specific error is
57146151Smckusick  * given, return EIO.
5728Sbill  */
5737015Smckusick biowait(bp)
5746563Smckusic 	register struct buf *bp;
5758Sbill {
5765431Sroot 	int s;
5778Sbill 
57826271Skarels 	s = splbio();
57938776Smckusick 	while ((bp->b_flags & B_DONE) == 0)
5808Sbill 		sleep((caddr_t)bp, PRIBIO);
5815431Sroot 	splx(s);
58237736Smckusick 	if ((bp->b_flags & B_ERROR) == 0)
58337736Smckusick 		return (0);
58437736Smckusick 	if (bp->b_error)
58537736Smckusick 		return (bp->b_error);
58637736Smckusick 	return (EIO);
5878Sbill }
5888Sbill 
5898Sbill /*
59013128Ssam  * Mark I/O complete on a buffer.
59146151Smckusick  *
59246151Smckusick  * If a callback has been requested, e.g. the pageout
59346151Smckusick  * daemon, do so. Otherwise, awaken waiting processes.
5948Sbill  */
59551455Sbostic void
5967015Smckusick biodone(bp)
5977015Smckusick 	register struct buf *bp;
5988Sbill {
5998Sbill 
600420Sbill 	if (bp->b_flags & B_DONE)
6017015Smckusick 		panic("dup biodone");
6028Sbill 	bp->b_flags |= B_DONE;
60349232Smckusick 	if ((bp->b_flags & B_READ) == 0)
60449232Smckusick 		vwakeup(bp);
6059763Ssam 	if (bp->b_flags & B_CALL) {
6069763Ssam 		bp->b_flags &= ~B_CALL;
6079763Ssam 		(*bp->b_iodone)(bp);
6089763Ssam 		return;
6099763Ssam 	}
61046151Smckusick 	if (bp->b_flags & B_ASYNC)
6118Sbill 		brelse(bp);
6128Sbill 	else {
6138Sbill 		bp->b_flags &= ~B_WANTED;
6148Sbill 		wakeup((caddr_t)bp);
6158Sbill 	}
6168Sbill }
617