xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 30749)
123395Smckusick /*
229114Smckusick  * Copyright (c) 1982, 1986 Regents of the University of California.
323395Smckusick  * All rights reserved.  The Berkeley software License Agreement
423395Smckusick  * specifies the terms and conditions for redistribution.
523395Smckusick  *
6*30749Skarels  *	@(#)vfs_cluster.c	7.1.1.1 (Berkeley) 04/02/87
723395Smckusick  */
88Sbill 
99763Ssam #include "../machine/pte.h"
109763Ssam 
1117098Sbloom #include "param.h"
1217098Sbloom #include "systm.h"
1317098Sbloom #include "dir.h"
1417098Sbloom #include "user.h"
1517098Sbloom #include "buf.h"
1617098Sbloom #include "conf.h"
1717098Sbloom #include "proc.h"
1817098Sbloom #include "seg.h"
1917098Sbloom #include "vm.h"
2017098Sbloom #include "trace.h"
218Sbill 
2291Sbill /*
238Sbill  * Read in (if necessary) the block and return a buffer pointer.
248Sbill  */
258Sbill struct buf *
26*30749Skarels #ifdef SECSIZE
27*30749Skarels bread(dev, blkno, size, secsize)
28*30749Skarels #else SECSIZE
296563Smckusic bread(dev, blkno, size)
30*30749Skarels #endif SECSIZE
316563Smckusic 	dev_t dev;
326563Smckusic 	daddr_t blkno;
336563Smckusic 	int size;
34*30749Skarels #ifdef SECSIZE
35*30749Skarels 	long secsize;
36*30749Skarels #endif SECSIZE
378Sbill {
388Sbill 	register struct buf *bp;
398Sbill 
408670S 	if (size == 0)
418670S 		panic("bread: size 0");
42*30749Skarels #ifdef SECSIZE
43*30749Skarels 	bp = getblk(dev, blkno, size, secsize);
44*30749Skarels #else SECSIZE
456563Smckusic 	bp = getblk(dev, blkno, size);
46*30749Skarels #endif SECSIZE
478Sbill 	if (bp->b_flags&B_DONE) {
4815795Ssam 		trace(TR_BREADHIT, pack(dev, size), blkno);
4926271Skarels 		return (bp);
508Sbill 	}
518Sbill 	bp->b_flags |= B_READ;
528670S 	if (bp->b_bcount > bp->b_bufsize)
538670S 		panic("bread");
548Sbill 	(*bdevsw[major(dev)].d_strategy)(bp);
5515795Ssam 	trace(TR_BREADMISS, pack(dev, size), blkno);
568039Sroot 	u.u_ru.ru_inblock++;		/* pay for read */
577015Smckusick 	biowait(bp);
5826271Skarels 	return (bp);
598Sbill }
608Sbill 
618Sbill /*
628Sbill  * Read in the block, like bread, but also start I/O on the
638Sbill  * read-ahead block (which is not allocated to the caller)
648Sbill  */
658Sbill struct buf *
66*30749Skarels #ifdef SECSIZE
67*30749Skarels breada(dev, blkno, size, secsize, rablkno, rabsize)
68*30749Skarels #else SECSIZE
698592Sroot breada(dev, blkno, size, rablkno, rabsize)
70*30749Skarels #endif SECSIZE
716563Smckusic 	dev_t dev;
727114Smckusick 	daddr_t blkno; int size;
73*30749Skarels #ifdef SECSIZE
74*30749Skarels 	long secsize;
75*30749Skarels #endif SECSIZE
768592Sroot 	daddr_t rablkno; int rabsize;
778Sbill {
788Sbill 	register struct buf *bp, *rabp;
798Sbill 
808Sbill 	bp = NULL;
817015Smckusick 	/*
827015Smckusick 	 * If the block isn't in core, then allocate
837015Smckusick 	 * a buffer and initiate i/o (getblk checks
847015Smckusick 	 * for a cache hit).
857015Smckusick 	 */
868Sbill 	if (!incore(dev, blkno)) {
87*30749Skarels #ifdef SECSIZE
88*30749Skarels 		bp = getblk(dev, blkno, size, secsize);
89*30749Skarels #else SECSIZE
906563Smckusic 		bp = getblk(dev, blkno, size);
91*30749Skarels #endif SECSIZE
928Sbill 		if ((bp->b_flags&B_DONE) == 0) {
938Sbill 			bp->b_flags |= B_READ;
948670S 			if (bp->b_bcount > bp->b_bufsize)
958670S 				panic("breada");
968Sbill 			(*bdevsw[major(dev)].d_strategy)(bp);
9715795Ssam 			trace(TR_BREADMISS, pack(dev, size), blkno);
988039Sroot 			u.u_ru.ru_inblock++;		/* pay for read */
997015Smckusick 		} else
10015795Ssam 			trace(TR_BREADHIT, pack(dev, size), blkno);
1018Sbill 	}
1027015Smckusick 
1037015Smckusick 	/*
1047015Smckusick 	 * If there's a read-ahead block, start i/o
1057015Smckusick 	 * on it also (as above).
1067015Smckusick 	 */
1078Sbill 	if (rablkno && !incore(dev, rablkno)) {
108*30749Skarels #ifdef SECSIZE
109*30749Skarels 		rabp = getblk(dev, rablkno, rabsize, secsize);
110*30749Skarels #else SECSIZE
1118592Sroot 		rabp = getblk(dev, rablkno, rabsize);
112*30749Skarels #endif SECSIZE
1132045Swnj 		if (rabp->b_flags & B_DONE) {
1148Sbill 			brelse(rabp);
11515795Ssam 			trace(TR_BREADHITRA, pack(dev, rabsize), blkno);
1162045Swnj 		} else {
1178Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
1188670S 			if (rabp->b_bcount > rabp->b_bufsize)
1198670S 				panic("breadrabp");
1208Sbill 			(*bdevsw[major(dev)].d_strategy)(rabp);
12115795Ssam 			trace(TR_BREADMISSRA, pack(dev, rabsize), rablock);
1228039Sroot 			u.u_ru.ru_inblock++;		/* pay in advance */
1238Sbill 		}
1248Sbill 	}
1257015Smckusick 
1267015Smckusick 	/*
1277114Smckusick 	 * If block was in core, let bread get it.
1287114Smckusick 	 * If block wasn't in core, then the read was started
1297114Smckusick 	 * above, and just wait for it.
1307015Smckusick 	 */
1317114Smckusick 	if (bp == NULL)
132*30749Skarels #ifdef SECSIZE
133*30749Skarels 		return (bread(dev, blkno, size, secsize));
134*30749Skarels #else SECSIZE
1357114Smckusick 		return (bread(dev, blkno, size));
136*30749Skarels #endif SECSIZE
1377015Smckusick 	biowait(bp);
1387114Smckusick 	return (bp);
1398Sbill }
1408Sbill 
1418Sbill /*
1428Sbill  * Write the buffer, waiting for completion.
1438Sbill  * Then release the buffer.
1448Sbill  */
1458Sbill bwrite(bp)
1467015Smckusick 	register struct buf *bp;
1478Sbill {
1488Sbill 	register flag;
1498Sbill 
1508Sbill 	flag = bp->b_flags;
1519857Ssam 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
1528Sbill 	if ((flag&B_DELWRI) == 0)
1538039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
15415795Ssam 	trace(TR_BWRITE, pack(bp->b_dev, bp->b_bcount), bp->b_blkno);
1558670S 	if (bp->b_bcount > bp->b_bufsize)
1568670S 		panic("bwrite");
1578Sbill 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1587015Smckusick 
1597015Smckusick 	/*
1607015Smckusick 	 * If the write was synchronous, then await i/o completion.
1617015Smckusick 	 * If the write was "delayed", then we put the buffer on
1627015Smckusick 	 * the q of blocks awaiting i/o completion status.
1637015Smckusick 	 */
1648Sbill 	if ((flag&B_ASYNC) == 0) {
1657015Smckusick 		biowait(bp);
1668Sbill 		brelse(bp);
1678Sbill 	} else if (flag & B_DELWRI)
1688Sbill 		bp->b_flags |= B_AGE;
1698Sbill }
1708Sbill 
1718Sbill /*
1728Sbill  * Release the buffer, marking it so that if it is grabbed
1738Sbill  * for another purpose it will be written out before being
1748Sbill  * given up (e.g. when writing a partial block where it is
1758Sbill  * assumed that another write for the same block will soon follow).
1768Sbill  * This can't be done for magtape, since writes must be done
1778Sbill  * in the same order as requested.
1788Sbill  */
1798Sbill bdwrite(bp)
1807015Smckusick 	register struct buf *bp;
1818Sbill {
1828Sbill 
1838Sbill 	if ((bp->b_flags&B_DELWRI) == 0)
1848039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
185*30749Skarels 	if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
1868Sbill 		bawrite(bp);
1878Sbill 	else {
1888Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
1898Sbill 		brelse(bp);
1908Sbill 	}
1918Sbill }
1928Sbill 
1938Sbill /*
1948Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
1958Sbill  */
1968Sbill bawrite(bp)
1977015Smckusick 	register struct buf *bp;
1988Sbill {
1998Sbill 
2008Sbill 	bp->b_flags |= B_ASYNC;
2018Sbill 	bwrite(bp);
2028Sbill }
2038Sbill 
2048Sbill /*
2057015Smckusick  * Release the buffer, with no I/O implied.
2068Sbill  */
2078Sbill brelse(bp)
2087015Smckusick 	register struct buf *bp;
2098Sbill {
2102325Swnj 	register struct buf *flist;
2118Sbill 	register s;
2128Sbill 
21315795Ssam 	trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno);
2147015Smckusick 	/*
2157015Smckusick 	 * If someone's waiting for the buffer, or
2167015Smckusick 	 * is waiting for a buffer wake 'em up.
2177015Smckusick 	 */
2188Sbill 	if (bp->b_flags&B_WANTED)
2198Sbill 		wakeup((caddr_t)bp);
2202325Swnj 	if (bfreelist[0].b_flags&B_WANTED) {
2212325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
2222325Swnj 		wakeup((caddr_t)bfreelist);
2238Sbill 	}
2242683Swnj 	if (bp->b_flags&B_ERROR)
2252683Swnj 		if (bp->b_flags & B_LOCKED)
2262683Swnj 			bp->b_flags &= ~B_ERROR;	/* try again later */
2272683Swnj 		else
2282683Swnj 			bp->b_dev = NODEV;  		/* no assoc */
2297015Smckusick 
2307015Smckusick 	/*
2317015Smckusick 	 * Stick the buffer back on a free list.
2327015Smckusick 	 */
23326271Skarels 	s = splbio();
2348670S 	if (bp->b_bufsize <= 0) {
2358670S 		/* block has no buffer ... put at front of unused buffer list */
2368670S 		flist = &bfreelist[BQ_EMPTY];
2378670S 		binsheadfree(bp, flist);
2388670S 	} else if (bp->b_flags & (B_ERROR|B_INVAL)) {
2392325Swnj 		/* block has no info ... put at front of most free list */
2408670S 		flist = &bfreelist[BQ_AGE];
2417015Smckusick 		binsheadfree(bp, flist);
2428Sbill 	} else {
2432325Swnj 		if (bp->b_flags & B_LOCKED)
2442325Swnj 			flist = &bfreelist[BQ_LOCKED];
2452325Swnj 		else if (bp->b_flags & B_AGE)
2462325Swnj 			flist = &bfreelist[BQ_AGE];
2472325Swnj 		else
2482325Swnj 			flist = &bfreelist[BQ_LRU];
2497015Smckusick 		binstailfree(bp, flist);
2508Sbill 	}
2518Sbill 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
2528Sbill 	splx(s);
2538Sbill }
2548Sbill 
2558Sbill /*
2568Sbill  * See if the block is associated with some buffer
2578Sbill  * (mainly to avoid getting hung up on a wait in breada)
2588Sbill  */
2598Sbill incore(dev, blkno)
2607015Smckusick 	dev_t dev;
2617015Smckusick 	daddr_t blkno;
2628Sbill {
2638Sbill 	register struct buf *bp;
2642325Swnj 	register struct buf *dp;
2658Sbill 
2666563Smckusic 	dp = BUFHASH(dev, blkno);
2672325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
2686563Smckusic 		if (bp->b_blkno == blkno && bp->b_dev == dev &&
2697015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
27091Sbill 			return (1);
27191Sbill 	return (0);
2728Sbill }
2738Sbill 
2748Sbill struct buf *
275*30749Skarels #ifdef SECSIZE
276*30749Skarels baddr(dev, blkno, size, secsize)
277*30749Skarels #else SECSIZE
2786563Smckusic baddr(dev, blkno, size)
279*30749Skarels #endif SECSIZE
2806563Smckusic 	dev_t dev;
2816563Smckusic 	daddr_t blkno;
2826563Smckusic 	int size;
283*30749Skarels #ifdef SECSIZE
284*30749Skarels 	long secsize;
285*30749Skarels #endif SECSIZE
2868Sbill {
2878Sbill 
2888Sbill 	if (incore(dev, blkno))
289*30749Skarels #ifdef SECSIZE
290*30749Skarels 		return (bread(dev, blkno, size, secsize));
291*30749Skarels #else SECSIZE
2926563Smckusic 		return (bread(dev, blkno, size));
293*30749Skarels #endif SECSIZE
2948Sbill 	return (0);
2958Sbill }
2968Sbill 
2978Sbill /*
2988Sbill  * Assign a buffer for the given block.  If the appropriate
2998Sbill  * block is already associated, return it; otherwise search
3008Sbill  * for the oldest non-busy buffer and reassign it.
3015424Swnj  *
3025424Swnj  * We use splx here because this routine may be called
3035424Swnj  * on the interrupt stack during a dump, and we don't
3045424Swnj  * want to lower the ipl back to 0.
3058Sbill  */
3068Sbill struct buf *
307*30749Skarels #ifdef SECSIZE
308*30749Skarels getblk(dev, blkno, size, secsize)
309*30749Skarels #else SECSIZE
3106563Smckusic getblk(dev, blkno, size)
311*30749Skarels #endif SECSIZE
3126563Smckusic 	dev_t dev;
3136563Smckusic 	daddr_t blkno;
3146563Smckusic 	int size;
315*30749Skarels #ifdef SECSIZE
316*30749Skarels 	long secsize;
317*30749Skarels #endif SECSIZE
3188Sbill {
3198670S 	register struct buf *bp, *dp;
3205424Swnj 	int s;
3218Sbill 
32225255Smckusick 	if (size > MAXBSIZE)
32325255Smckusick 		panic("getblk: size too big");
3247015Smckusick 	/*
32524730Smckusick 	 * To prevent overflow of 32-bit ints when converting block
32624730Smckusick 	 * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set
32724730Smckusick 	 * to the maximum number that can be converted to a byte offset
32824730Smckusick 	 * without overflow. This is historic code; what bug it fixed,
32924730Smckusick 	 * or whether it is still a reasonable thing to do is open to
33024730Smckusick 	 * dispute. mkm 9/85
33124730Smckusick 	 */
33224730Smckusick 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT))
33324730Smckusick 		blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1);
33424730Smckusick 	/*
3357015Smckusick 	 * Search the cache for the block.  If we hit, but
3367015Smckusick 	 * the buffer is in use for i/o, then we wait until
3377015Smckusick 	 * the i/o has completed.
3387015Smckusick 	 */
3396563Smckusic 	dp = BUFHASH(dev, blkno);
3407015Smckusick loop:
3412325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
3426563Smckusic 		if (bp->b_blkno != blkno || bp->b_dev != dev ||
3432325Swnj 		    bp->b_flags&B_INVAL)
3448Sbill 			continue;
34526271Skarels 		s = splbio();
3468Sbill 		if (bp->b_flags&B_BUSY) {
3478Sbill 			bp->b_flags |= B_WANTED;
3488Sbill 			sleep((caddr_t)bp, PRIBIO+1);
3495424Swnj 			splx(s);
3508Sbill 			goto loop;
3518Sbill 		}
3525424Swnj 		splx(s);
3538Sbill 		notavail(bp);
35416855Smckusick 		if (bp->b_bcount != size && brealloc(bp, size) == 0)
3557188Sroot 			goto loop;
3568Sbill 		bp->b_flags |= B_CACHE;
35726271Skarels 		return (bp);
3588Sbill 	}
35991Sbill 	if (major(dev) >= nblkdev)
36091Sbill 		panic("blkdev");
3618670S 	bp = getnewbuf();
3626563Smckusic 	bfree(bp);
3637015Smckusick 	bremhash(bp);
3647015Smckusick 	binshash(bp, dp);
3658Sbill 	bp->b_dev = dev;
366*30749Skarels #ifdef SECSIZE
367*30749Skarels 	bp->b_blksize = secsize;
368*30749Skarels #endif SECSIZE
3696563Smckusic 	bp->b_blkno = blkno;
3708670S 	bp->b_error = 0;
3717188Sroot 	if (brealloc(bp, size) == 0)
3727188Sroot 		goto loop;
37326271Skarels 	return (bp);
3748Sbill }
3758Sbill 
3768Sbill /*
3778Sbill  * get an empty block,
3788Sbill  * not assigned to any particular device
3798Sbill  */
3808Sbill struct buf *
3816563Smckusic geteblk(size)
3826563Smckusic 	int size;
3838Sbill {
3848670S 	register struct buf *bp, *flist;
3858Sbill 
38625255Smckusick 	if (size > MAXBSIZE)
38725255Smckusick 		panic("geteblk: size too big");
3888Sbill loop:
3898670S 	bp = getnewbuf();
3908670S 	bp->b_flags |= B_INVAL;
3917015Smckusick 	bfree(bp);
3927015Smckusick 	bremhash(bp);
3938670S 	flist = &bfreelist[BQ_AGE];
3948670S 	binshash(bp, flist);
3958Sbill 	bp->b_dev = (dev_t)NODEV;
396*30749Skarels #ifdef SECSIZE
397*30749Skarels 	bp->b_blksize = DEV_BSIZE;
398*30749Skarels #endif SECSIZE
3998670S 	bp->b_error = 0;
4007188Sroot 	if (brealloc(bp, size) == 0)
4017188Sroot 		goto loop;
40226271Skarels 	return (bp);
4038Sbill }
4048Sbill 
4058Sbill /*
4066563Smckusic  * Allocate space associated with a buffer.
4079763Ssam  * If can't get space, buffer is released
4086563Smckusic  */
4096563Smckusic brealloc(bp, size)
4106563Smckusic 	register struct buf *bp;
4116563Smckusic 	int size;
4126563Smckusic {
4136563Smckusic 	daddr_t start, last;
4146563Smckusic 	register struct buf *ep;
4156563Smckusic 	struct buf *dp;
4166563Smckusic 	int s;
4176563Smckusic 
4186563Smckusic 	/*
419*30749Skarels 	 * First need to make sure that all overlapping previous I/O
4206563Smckusic 	 * is dispatched with.
4216563Smckusic 	 */
4226563Smckusic 	if (size == bp->b_bcount)
4237188Sroot 		return (1);
4247188Sroot 	if (size < bp->b_bcount) {
4257188Sroot 		if (bp->b_flags & B_DELWRI) {
4267188Sroot 			bwrite(bp);
4277188Sroot 			return (0);
4287188Sroot 		}
4297188Sroot 		if (bp->b_flags & B_LOCKED)
4307188Sroot 			panic("brealloc");
4319763Ssam 		return (allocbuf(bp, size));
4327188Sroot 	}
4337188Sroot 	bp->b_flags &= ~B_DONE;
4349763Ssam 	if (bp->b_dev == NODEV)
4359763Ssam 		return (allocbuf(bp, size));
4367016Smckusick 
43715795Ssam 	trace(TR_BREALLOC, pack(bp->b_dev, size), bp->b_blkno);
4387188Sroot 	/*
4397188Sroot 	 * Search cache for any buffers that overlap the one that we
4407188Sroot 	 * are trying to allocate. Overlapping buffers must be marked
4417188Sroot 	 * invalid, after being written out if they are dirty. (indicated
4427188Sroot 	 * by B_DELWRI) A disk block must be mapped by at most one buffer
4437188Sroot 	 * at any point in time. Care must be taken to avoid deadlocking
4447188Sroot 	 * when two buffer are trying to get the same set of disk blocks.
4457188Sroot 	 */
4467188Sroot 	start = bp->b_blkno;
447*30749Skarels #ifdef SECSIZE
448*30749Skarels 	last = start + size/bp->b_blksize - 1;
449*30749Skarels #else SECSIZE
45012644Ssam 	last = start + btodb(size) - 1;
451*30749Skarels #endif SECSIZE
4526563Smckusic 	dp = BUFHASH(bp->b_dev, bp->b_blkno);
4536563Smckusic loop:
4546563Smckusic 	for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
4557188Sroot 		if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL))
4566563Smckusic 			continue;
4577188Sroot 		/* look for overlap */
4587188Sroot 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
459*30749Skarels #ifdef SECSIZE
460*30749Skarels 		    ep->b_blkno + ep->b_bcount/ep->b_blksize <= start)
461*30749Skarels #else SECSIZE
46212644Ssam 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
463*30749Skarels #endif SECSIZE
4647188Sroot 			continue;
46526271Skarels 		s = splbio();
4666563Smckusic 		if (ep->b_flags&B_BUSY) {
4676563Smckusic 			ep->b_flags |= B_WANTED;
4686563Smckusic 			sleep((caddr_t)ep, PRIBIO+1);
4698670S 			splx(s);
4706563Smckusic 			goto loop;
4716563Smckusic 		}
4728670S 		splx(s);
4737188Sroot 		notavail(ep);
4746563Smckusic 		if (ep->b_flags & B_DELWRI) {
4756563Smckusic 			bwrite(ep);
4766563Smckusic 			goto loop;
4776563Smckusic 		}
4787188Sroot 		ep->b_flags |= B_INVAL;
4797188Sroot 		brelse(ep);
4806563Smckusic 	}
4819763Ssam 	return (allocbuf(bp, size));
4828670S }
4838670S 
4848670S /*
4858670S  * Find a buffer which is available for use.
4868670S  * Select something from a free list.
4878670S  * Preference is to AGE list, then LRU list.
4888670S  */
4898670S struct buf *
4908670S getnewbuf()
4918670S {
4928670S 	register struct buf *bp, *dp;
4938670S 	int s;
4948670S 
4958670S loop:
49626271Skarels 	s = splbio();
4978670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
4988670S 		if (dp->av_forw != dp)
4998670S 			break;
5008670S 	if (dp == bfreelist) {		/* no free blocks */
5018670S 		dp->b_flags |= B_WANTED;
5028670S 		sleep((caddr_t)dp, PRIBIO+1);
50312170Ssam 		splx(s);
5048670S 		goto loop;
5058670S 	}
5068670S 	splx(s);
5078670S 	bp = dp->av_forw;
5088670S 	notavail(bp);
5098670S 	if (bp->b_flags & B_DELWRI) {
5108670S 		bp->b_flags |= B_ASYNC;
5118670S 		bwrite(bp);
5128670S 		goto loop;
5138670S 	}
51415795Ssam 	trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno);
5158670S 	bp->b_flags = B_BUSY;
5168670S 	return (bp);
5178670S }
5188670S 
5198670S /*
5208Sbill  * Wait for I/O completion on the buffer; return errors
5218Sbill  * to the user.
5228Sbill  */
5237015Smckusick biowait(bp)
5246563Smckusic 	register struct buf *bp;
5258Sbill {
5265431Sroot 	int s;
5278Sbill 
52826271Skarels 	s = splbio();
5298Sbill 	while ((bp->b_flags&B_DONE)==0)
5308Sbill 		sleep((caddr_t)bp, PRIBIO);
5315431Sroot 	splx(s);
53211841Ssam 	if (u.u_error == 0)			/* XXX */
53311841Ssam 		u.u_error = geterror(bp);
5348Sbill }
5358Sbill 
5368Sbill /*
53713128Ssam  * Mark I/O complete on a buffer.
53813128Ssam  * If someone should be called, e.g. the pageout
53913128Ssam  * daemon, do so.  Otherwise, wake up anyone
54013128Ssam  * waiting for it.
5418Sbill  */
5427015Smckusick biodone(bp)
5437015Smckusick 	register struct buf *bp;
5448Sbill {
5458Sbill 
546420Sbill 	if (bp->b_flags & B_DONE)
5477015Smckusick 		panic("dup biodone");
5488Sbill 	bp->b_flags |= B_DONE;
5499763Ssam 	if (bp->b_flags & B_CALL) {
5509763Ssam 		bp->b_flags &= ~B_CALL;
5519763Ssam 		(*bp->b_iodone)(bp);
5529763Ssam 		return;
5539763Ssam 	}
5548Sbill 	if (bp->b_flags&B_ASYNC)
5558Sbill 		brelse(bp);
5568Sbill 	else {
5578Sbill 		bp->b_flags &= ~B_WANTED;
5588Sbill 		wakeup((caddr_t)bp);
5598Sbill 	}
5608Sbill }
5618Sbill 
5628Sbill /*
5638670S  * Insure that no part of a specified block is in an incore buffer.
564*30749Skarels #ifdef SECSIZE
565*30749Skarels  * "size" is given in device blocks (the units of b_blkno).
566*30749Skarels #endif SECSIZE
5678670S  */
5688670S blkflush(dev, blkno, size)
5698670S 	dev_t dev;
5708670S 	daddr_t blkno;
571*30749Skarels #ifdef SECSIZE
572*30749Skarels 	int size;
573*30749Skarels #else SECSIZE
5748670S 	long size;
575*30749Skarels #endif SECSIZE
5768670S {
5778670S 	register struct buf *ep;
5788670S 	struct buf *dp;
5798670S 	daddr_t start, last;
5808670S 	int s;
5818670S 
5828670S 	start = blkno;
583*30749Skarels #ifdef SECSIZE
584*30749Skarels 	last = start + size - 1;
585*30749Skarels #else SECSIZE
58612644Ssam 	last = start + btodb(size) - 1;
587*30749Skarels #endif SECSIZE
5888670S 	dp = BUFHASH(dev, blkno);
5898670S loop:
5908670S 	for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
5918670S 		if (ep->b_dev != dev || (ep->b_flags&B_INVAL))
5928670S 			continue;
5938670S 		/* look for overlap */
5948670S 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
595*30749Skarels #ifdef SECSIZE
596*30749Skarels 		    ep->b_blkno + ep->b_bcount / ep->b_blksize <= start)
597*30749Skarels #else SECSIZE
59812644Ssam 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
599*30749Skarels #endif SECSIZE
6008670S 			continue;
60126271Skarels 		s = splbio();
6028670S 		if (ep->b_flags&B_BUSY) {
6038670S 			ep->b_flags |= B_WANTED;
6048670S 			sleep((caddr_t)ep, PRIBIO+1);
6058670S 			splx(s);
6068670S 			goto loop;
6078670S 		}
6088670S 		if (ep->b_flags & B_DELWRI) {
6098670S 			splx(s);
6108670S 			notavail(ep);
6118670S 			bwrite(ep);
6128670S 			goto loop;
6138670S 		}
6148670S 		splx(s);
6158670S 	}
6168670S }
6178670S 
6188670S /*
61913128Ssam  * Make sure all write-behind blocks
6208Sbill  * on dev (or NODEV for all)
6218Sbill  * are flushed out.
6228Sbill  * (from umount and update)
6238Sbill  */
6248Sbill bflush(dev)
6257015Smckusick 	dev_t dev;
6268Sbill {
6278Sbill 	register struct buf *bp;
6282325Swnj 	register struct buf *flist;
6295431Sroot 	int s;
6308Sbill 
6318Sbill loop:
63226271Skarels 	s = splbio();
6338670S 	for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++)
6342325Swnj 	for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
6357015Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
6367015Smckusick 			continue;
6377015Smckusick 		if (dev == NODEV || dev == bp->b_dev) {
6388Sbill 			bp->b_flags |= B_ASYNC;
6398Sbill 			notavail(bp);
6408Sbill 			bwrite(bp);
64112173Ssam 			splx(s);
6428Sbill 			goto loop;
6438Sbill 		}
6448Sbill 	}
6455431Sroot 	splx(s);
6468Sbill }
6478Sbill 
6488Sbill /*
6498Sbill  * Pick up the device's error number and pass it to the user;
65024829Skarels  * if there is an error but the number is 0 set a generalized code.
6518Sbill  */
6528Sbill geterror(bp)
6537015Smckusick 	register struct buf *bp;
6548Sbill {
6557723Swnj 	int error = 0;
6568Sbill 
6578Sbill 	if (bp->b_flags&B_ERROR)
6587723Swnj 		if ((error = bp->b_error)==0)
6597723Swnj 			return (EIO);
6607723Swnj 	return (error);
6618Sbill }
6622299Skre 
6632299Skre /*
6642299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6652299Skre  *
6662299Skre  * This is not nicely done at all - the buffer ought to be removed from the
6672299Skre  * hash chains & have its dev/blkno fields clobbered, but unfortunately we
6682299Skre  * can't do that here, as it is quite possible that the block is still
6692299Skre  * being used for i/o. Eventually, all disc drivers should be forced to
6702299Skre  * have a close routine, which ought ensure that the queue is empty, then
6712299Skre  * properly flush the queues. Until that happy day, this suffices for
6722299Skre  * correctness.						... kre
6732299Skre  */
6742299Skre binval(dev)
6757015Smckusick 	dev_t dev;
6762299Skre {
6772361Skre 	register struct buf *bp;
6782361Skre 	register struct bufhd *hp;
6792361Skre #define dp ((struct buf *)hp)
6802299Skre 
6812361Skre 	for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
6822361Skre 		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
6832361Skre 			if (bp->b_dev == dev)
6842361Skre 				bp->b_flags |= B_INVAL;
6852299Skre }
686