xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 8670)
1*8670S /*	vfs_cluster.c	4.39	82/11/13	*/
28Sbill 
38Sbill #include "../h/param.h"
48Sbill #include "../h/systm.h"
58Sbill #include "../h/dir.h"
68Sbill #include "../h/user.h"
78Sbill #include "../h/buf.h"
88Sbill #include "../h/conf.h"
98Sbill #include "../h/proc.h"
108Sbill #include "../h/seg.h"
118Sbill #include "../h/pte.h"
128Sbill #include "../h/vm.h"
132045Swnj #include "../h/trace.h"
148Sbill 
1591Sbill /*
168Sbill  * Read in (if necessary) the block and return a buffer pointer.
178Sbill  */
188Sbill struct buf *
196563Smckusic bread(dev, blkno, size)
206563Smckusic 	dev_t dev;
216563Smckusic 	daddr_t blkno;
226563Smckusic 	int size;
238Sbill {
248Sbill 	register struct buf *bp;
258Sbill 
26*8670S 	if (size == 0)
27*8670S 		panic("bread: size 0");
286563Smckusic 	bp = getblk(dev, blkno, size);
298Sbill 	if (bp->b_flags&B_DONE) {
303199Swnj 		trace(TR_BREADHIT, dev, blkno);
318Sbill 		return(bp);
328Sbill 	}
338Sbill 	bp->b_flags |= B_READ;
34*8670S 	if (bp->b_bcount > bp->b_bufsize)
35*8670S 		panic("bread");
368Sbill 	(*bdevsw[major(dev)].d_strategy)(bp);
373199Swnj 	trace(TR_BREADMISS, dev, blkno);
388039Sroot 	u.u_ru.ru_inblock++;		/* pay for read */
397015Smckusick 	biowait(bp);
408Sbill 	return(bp);
418Sbill }
428Sbill 
438Sbill /*
448Sbill  * Read in the block, like bread, but also start I/O on the
458Sbill  * read-ahead block (which is not allocated to the caller)
468Sbill  */
478Sbill struct buf *
488592Sroot breada(dev, blkno, size, rablkno, rabsize)
496563Smckusic 	dev_t dev;
507114Smckusick 	daddr_t blkno; int size;
518592Sroot 	daddr_t rablkno; int rabsize;
528Sbill {
538Sbill 	register struct buf *bp, *rabp;
548Sbill 
558Sbill 	bp = NULL;
567015Smckusick 	/*
577015Smckusick 	 * If the block isn't in core, then allocate
587015Smckusick 	 * a buffer and initiate i/o (getblk checks
597015Smckusick 	 * for a cache hit).
607015Smckusick 	 */
618Sbill 	if (!incore(dev, blkno)) {
626563Smckusic 		bp = getblk(dev, blkno, size);
638Sbill 		if ((bp->b_flags&B_DONE) == 0) {
648Sbill 			bp->b_flags |= B_READ;
65*8670S 			if (bp->b_bcount > bp->b_bufsize)
66*8670S 				panic("breada");
678Sbill 			(*bdevsw[major(dev)].d_strategy)(bp);
683199Swnj 			trace(TR_BREADMISS, dev, blkno);
698039Sroot 			u.u_ru.ru_inblock++;		/* pay for read */
707015Smckusick 		} else
713199Swnj 			trace(TR_BREADHIT, dev, blkno);
728Sbill 	}
737015Smckusick 
747015Smckusick 	/*
757015Smckusick 	 * If there's a read-ahead block, start i/o
767015Smckusick 	 * on it also (as above).
777015Smckusick 	 */
788Sbill 	if (rablkno && !incore(dev, rablkno)) {
798592Sroot 		rabp = getblk(dev, rablkno, rabsize);
802045Swnj 		if (rabp->b_flags & B_DONE) {
818Sbill 			brelse(rabp);
823199Swnj 			trace(TR_BREADHITRA, dev, blkno);
832045Swnj 		} else {
848Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
85*8670S 			if (rabp->b_bcount > rabp->b_bufsize)
86*8670S 				panic("breadrabp");
878Sbill 			(*bdevsw[major(dev)].d_strategy)(rabp);
883199Swnj 			trace(TR_BREADMISSRA, dev, rablock);
898039Sroot 			u.u_ru.ru_inblock++;		/* pay in advance */
908Sbill 		}
918Sbill 	}
927015Smckusick 
937015Smckusick 	/*
947114Smckusick 	 * If block was in core, let bread get it.
957114Smckusick 	 * If block wasn't in core, then the read was started
967114Smckusick 	 * above, and just wait for it.
977015Smckusick 	 */
987114Smckusick 	if (bp == NULL)
997114Smckusick 		return (bread(dev, blkno, size));
1007015Smckusick 	biowait(bp);
1017114Smckusick 	return (bp);
1028Sbill }
1038Sbill 
1048Sbill /*
1058Sbill  * Write the buffer, waiting for completion.
1068Sbill  * Then release the buffer.
1078Sbill  */
1088Sbill bwrite(bp)
1097015Smckusick 	register struct buf *bp;
1108Sbill {
1118Sbill 	register flag;
1128Sbill 
1138Sbill 	flag = bp->b_flags;
1148Sbill 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
1158Sbill 	if ((flag&B_DELWRI) == 0)
1168039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
1174033Swnj 	trace(TR_BWRITE, bp->b_dev, bp->b_blkno);
118*8670S 	if (bp->b_bcount > bp->b_bufsize)
119*8670S 		panic("bwrite");
1208Sbill 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1217015Smckusick 
1227015Smckusick 	/*
1237015Smckusick 	 * If the write was synchronous, then await i/o completion.
1247015Smckusick 	 * If the write was "delayed", then we put the buffer on
1257015Smckusick 	 * the q of blocks awaiting i/o completion status.
1267015Smckusick 	 * Otherwise, the i/o must be finished and we check for
1277015Smckusick 	 * an error.
1287015Smckusick 	 */
1298Sbill 	if ((flag&B_ASYNC) == 0) {
1307015Smckusick 		biowait(bp);
1318Sbill 		brelse(bp);
1328Sbill 	} else if (flag & B_DELWRI)
1338Sbill 		bp->b_flags |= B_AGE;
1348Sbill 	else
1357723Swnj 		u.u_error = geterror(bp);
1368Sbill }
1378Sbill 
1388Sbill /*
1398Sbill  * Release the buffer, marking it so that if it is grabbed
1408Sbill  * for another purpose it will be written out before being
1418Sbill  * given up (e.g. when writing a partial block where it is
1428Sbill  * assumed that another write for the same block will soon follow).
1438Sbill  * This can't be done for magtape, since writes must be done
1448Sbill  * in the same order as requested.
1458Sbill  */
1468Sbill bdwrite(bp)
1477015Smckusick 	register struct buf *bp;
1488Sbill {
1492403Skre 	register int flags;
1508Sbill 
1518Sbill 	if ((bp->b_flags&B_DELWRI) == 0)
1528039Sroot 		u.u_ru.ru_oublock++;		/* noone paid yet */
1532403Skre 	flags = bdevsw[major(bp->b_dev)].d_flags;
1542403Skre 	if(flags & B_TAPE)
1558Sbill 		bawrite(bp);
1568Sbill 	else {
1578Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
1588Sbill 		brelse(bp);
1598Sbill 	}
1608Sbill }
1618Sbill 
1628Sbill /*
1638Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
1648Sbill  */
1658Sbill bawrite(bp)
1667015Smckusick 	register struct buf *bp;
1678Sbill {
1688Sbill 
1698Sbill 	bp->b_flags |= B_ASYNC;
1708Sbill 	bwrite(bp);
1718Sbill }
1728Sbill 
1738Sbill /*
1747015Smckusick  * Release the buffer, with no I/O implied.
1758Sbill  */
1768Sbill brelse(bp)
1777015Smckusick 	register struct buf *bp;
1788Sbill {
1792325Swnj 	register struct buf *flist;
1808Sbill 	register s;
1818Sbill 
1827015Smckusick 	/*
1837015Smckusick 	 * If someone's waiting for the buffer, or
1847015Smckusick 	 * is waiting for a buffer wake 'em up.
1857015Smckusick 	 */
1868Sbill 	if (bp->b_flags&B_WANTED)
1878Sbill 		wakeup((caddr_t)bp);
1882325Swnj 	if (bfreelist[0].b_flags&B_WANTED) {
1892325Swnj 		bfreelist[0].b_flags &= ~B_WANTED;
1902325Swnj 		wakeup((caddr_t)bfreelist);
1918Sbill 	}
1922683Swnj 	if (bp->b_flags&B_ERROR)
1932683Swnj 		if (bp->b_flags & B_LOCKED)
1942683Swnj 			bp->b_flags &= ~B_ERROR;	/* try again later */
1952683Swnj 		else
1962683Swnj 			bp->b_dev = NODEV;  		/* no assoc */
1977015Smckusick 
1987015Smckusick 	/*
1997015Smckusick 	 * Stick the buffer back on a free list.
2007015Smckusick 	 */
2018Sbill 	s = spl6();
202*8670S 	if (bp->b_bufsize <= 0) {
203*8670S 		/* block has no buffer ... put at front of unused buffer list */
204*8670S 		flist = &bfreelist[BQ_EMPTY];
205*8670S 		binsheadfree(bp, flist);
206*8670S 	} else if (bp->b_flags & (B_ERROR|B_INVAL)) {
2072325Swnj 		/* block has no info ... put at front of most free list */
208*8670S 		flist = &bfreelist[BQ_AGE];
2097015Smckusick 		binsheadfree(bp, flist);
2108Sbill 	} else {
2112325Swnj 		if (bp->b_flags & B_LOCKED)
2122325Swnj 			flist = &bfreelist[BQ_LOCKED];
2132325Swnj 		else if (bp->b_flags & B_AGE)
2142325Swnj 			flist = &bfreelist[BQ_AGE];
2152325Swnj 		else
2162325Swnj 			flist = &bfreelist[BQ_LRU];
2177015Smckusick 		binstailfree(bp, flist);
2188Sbill 	}
2198Sbill 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
2208Sbill 	splx(s);
2218Sbill }
2228Sbill 
2238Sbill /*
2248Sbill  * See if the block is associated with some buffer
2258Sbill  * (mainly to avoid getting hung up on a wait in breada)
2268Sbill  */
2278Sbill incore(dev, blkno)
2287015Smckusick 	dev_t dev;
2297015Smckusick 	daddr_t blkno;
2308Sbill {
2318Sbill 	register struct buf *bp;
2322325Swnj 	register struct buf *dp;
2338Sbill 
2346563Smckusic 	dp = BUFHASH(dev, blkno);
2352325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
2366563Smckusic 		if (bp->b_blkno == blkno && bp->b_dev == dev &&
2377015Smckusick 		    (bp->b_flags & B_INVAL) == 0)
23891Sbill 			return (1);
23991Sbill 	return (0);
2408Sbill }
2418Sbill 
2428Sbill struct buf *
2436563Smckusic baddr(dev, blkno, size)
2446563Smckusic 	dev_t dev;
2456563Smckusic 	daddr_t blkno;
2466563Smckusic 	int size;
2478Sbill {
2488Sbill 
2498Sbill 	if (incore(dev, blkno))
2506563Smckusic 		return (bread(dev, blkno, size));
2518Sbill 	return (0);
2528Sbill }
2538Sbill 
2548Sbill /*
2558Sbill  * Assign a buffer for the given block.  If the appropriate
2568Sbill  * block is already associated, return it; otherwise search
2578Sbill  * for the oldest non-busy buffer and reassign it.
2585424Swnj  *
2595424Swnj  * We use splx here because this routine may be called
2605424Swnj  * on the interrupt stack during a dump, and we don't
2615424Swnj  * want to lower the ipl back to 0.
2628Sbill  */
2638Sbill struct buf *
2646563Smckusic getblk(dev, blkno, size)
2656563Smckusic 	dev_t dev;
2666563Smckusic 	daddr_t blkno;
2676563Smckusic 	int size;
2688Sbill {
269*8670S 	register struct buf *bp, *dp;
2705424Swnj 	int s;
2718Sbill 
2721831Sbill 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
2731831Sbill 		blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
2747015Smckusick 	/*
2757015Smckusick 	 * Search the cache for the block.  If we hit, but
2767015Smckusick 	 * the buffer is in use for i/o, then we wait until
2777015Smckusick 	 * the i/o has completed.
2787015Smckusick 	 */
2796563Smckusic 	dp = BUFHASH(dev, blkno);
2807015Smckusick loop:
2812325Swnj 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
2826563Smckusic 		if (bp->b_blkno != blkno || bp->b_dev != dev ||
2832325Swnj 		    bp->b_flags&B_INVAL)
2848Sbill 			continue;
2855424Swnj 		s = spl6();
2868Sbill 		if (bp->b_flags&B_BUSY) {
2878Sbill 			bp->b_flags |= B_WANTED;
2888Sbill 			sleep((caddr_t)bp, PRIBIO+1);
2895424Swnj 			splx(s);
2908Sbill 			goto loop;
2918Sbill 		}
2925424Swnj 		splx(s);
2938Sbill 		notavail(bp);
2947188Sroot 		if (brealloc(bp, size) == 0)
2957188Sroot 			goto loop;
2968Sbill 		bp->b_flags |= B_CACHE;
2978Sbill 		return(bp);
2988Sbill 	}
29991Sbill 	if (major(dev) >= nblkdev)
30091Sbill 		panic("blkdev");
301*8670S 	bp = getnewbuf();
3026563Smckusic 	bfree(bp);
3037015Smckusick 	bremhash(bp);
3047015Smckusick 	binshash(bp, dp);
3058Sbill 	bp->b_dev = dev;
3066563Smckusic 	bp->b_blkno = blkno;
307*8670S 	bp->b_error = 0;
3087188Sroot 	if (brealloc(bp, size) == 0)
3097188Sroot 		goto loop;
3108Sbill 	return(bp);
3118Sbill }
3128Sbill 
3138Sbill /*
3148Sbill  * get an empty block,
3158Sbill  * not assigned to any particular device
3168Sbill  */
3178Sbill struct buf *
3186563Smckusic geteblk(size)
3196563Smckusic 	int size;
3208Sbill {
321*8670S 	register struct buf *bp, *flist;
3228Sbill 
3238Sbill loop:
324*8670S 	bp = getnewbuf();
325*8670S 	bp->b_flags |= B_INVAL;
3267015Smckusick 	bfree(bp);
3277015Smckusick 	bremhash(bp);
328*8670S 	flist = &bfreelist[BQ_AGE];
329*8670S 	binshash(bp, flist);
3308Sbill 	bp->b_dev = (dev_t)NODEV;
331*8670S 	bp->b_error = 0;
3327188Sroot 	if (brealloc(bp, size) == 0)
3337188Sroot 		goto loop;
3348Sbill 	return(bp);
3358Sbill }
3368Sbill 
3378Sbill /*
3386563Smckusic  * Allocate space associated with a buffer.
3396563Smckusic  */
3406563Smckusic brealloc(bp, size)
3416563Smckusic 	register struct buf *bp;
3426563Smckusic 	int size;
3436563Smckusic {
3446563Smckusic 	daddr_t start, last;
3456563Smckusic 	register struct buf *ep;
3466563Smckusic 	struct buf *dp;
3476563Smckusic 	int s;
3486563Smckusic 
3496563Smckusic 	/*
3506563Smckusic 	 * First need to make sure that all overlaping previous I/O
3516563Smckusic 	 * is dispatched with.
3526563Smckusic 	 */
3536563Smckusic 	if (size == bp->b_bcount)
3547188Sroot 		return (1);
3557188Sroot 	if (size < bp->b_bcount) {
3567188Sroot 		if (bp->b_flags & B_DELWRI) {
3577188Sroot 			bwrite(bp);
3587188Sroot 			return (0);
3597188Sroot 		}
3607188Sroot 		if (bp->b_flags & B_LOCKED)
3617188Sroot 			panic("brealloc");
362*8670S 		allocbuf(bp, size);
363*8670S 		return (1);
3647188Sroot 	}
3657188Sroot 	bp->b_flags &= ~B_DONE;
366*8670S 	if (bp->b_dev == NODEV) {
367*8670S 		allocbuf(bp, size);
368*8670S 		return (1);
369*8670S 	}
3707016Smckusick 
3717188Sroot 	/*
3727188Sroot 	 * Search cache for any buffers that overlap the one that we
3737188Sroot 	 * are trying to allocate. Overlapping buffers must be marked
3747188Sroot 	 * invalid, after being written out if they are dirty. (indicated
3757188Sroot 	 * by B_DELWRI) A disk block must be mapped by at most one buffer
3767188Sroot 	 * at any point in time. Care must be taken to avoid deadlocking
3777188Sroot 	 * when two buffer are trying to get the same set of disk blocks.
3787188Sroot 	 */
3797188Sroot 	start = bp->b_blkno;
3807188Sroot 	last = start + (size / DEV_BSIZE) - 1;
3816563Smckusic 	dp = BUFHASH(bp->b_dev, bp->b_blkno);
3826563Smckusic loop:
3836563Smckusic 	for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
3847188Sroot 		if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL))
3856563Smckusic 			continue;
3867188Sroot 		/* look for overlap */
3877188Sroot 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
3887188Sroot 		    ep->b_blkno + (ep->b_bcount / DEV_BSIZE) <= start)
3897188Sroot 			continue;
3906563Smckusic 		s = spl6();
3916563Smckusic 		if (ep->b_flags&B_BUSY) {
3926563Smckusic 			ep->b_flags |= B_WANTED;
3936563Smckusic 			sleep((caddr_t)ep, PRIBIO+1);
394*8670S 			splx(s);
3956563Smckusic 			goto loop;
3966563Smckusic 		}
397*8670S 		splx(s);
3987188Sroot 		notavail(ep);
3996563Smckusic 		if (ep->b_flags & B_DELWRI) {
4006563Smckusic 			bwrite(ep);
4016563Smckusic 			goto loop;
4026563Smckusic 		}
4037188Sroot 		ep->b_flags |= B_INVAL;
4047188Sroot 		brelse(ep);
4056563Smckusic 	}
406*8670S 	allocbuf(bp, size);
407*8670S 	return (1);
408*8670S }
409*8670S 
410*8670S /*
411*8670S  * Expand or contract the actual memory allocated to a buffer.
412*8670S  */
413*8670S allocbuf(tp, size)
414*8670S 	register struct buf *tp;
415*8670S 	int size;
416*8670S {
417*8670S 	register struct buf *bp, *ep;
418*8670S 	int sizealloc, take;
419*8670S 
420*8670S 	sizealloc = roundup(size, CLBYTES);
4216563Smckusic 	/*
422*8670S 	 * Buffer size does not change
4236563Smckusic 	 */
424*8670S 	if (sizealloc == tp->b_bufsize)
425*8670S 		goto out;
426*8670S 	/*
427*8670S 	 * Buffer size is shrinking.
428*8670S 	 * Place excess space in a buffer header taken from the
429*8670S 	 * BQ_EMPTY buffer list and placed on the "most free" list.
430*8670S 	 * If no extra buffer headers are available, leave the
431*8670S 	 * extra space in the present buffer.
432*8670S 	 */
433*8670S 	if (sizealloc < tp->b_bufsize) {
434*8670S 		ep = bfreelist[BQ_EMPTY].av_forw;
435*8670S 		if (ep == &bfreelist[BQ_EMPTY])
436*8670S 			goto out;
437*8670S 		notavail(ep);
438*8670S 		pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
439*8670S 		    (int)tp->b_bufsize - sizealloc);
440*8670S 		ep->b_bufsize = tp->b_bufsize - sizealloc;
441*8670S 		tp->b_bufsize = sizealloc;
442*8670S 		ep->b_flags |= B_INVAL;
443*8670S 		ep->b_bcount = 0;
444*8670S 		brelse(ep);
445*8670S 		goto out;
446*8670S 	}
447*8670S 	/*
448*8670S 	 * More buffer space is needed. Get it out of buffers on
449*8670S 	 * the "most free" list, placing the empty headers on the
450*8670S 	 * BQ_EMPTY buffer header list.
451*8670S 	 */
452*8670S 	while (tp->b_bufsize < sizealloc) {
453*8670S 		take = sizealloc - tp->b_bufsize;
454*8670S 		bp = getnewbuf();
455*8670S 		if (take >= bp->b_bufsize)
456*8670S 			take = bp->b_bufsize;
457*8670S 		pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
458*8670S 		    &tp->b_un.b_addr[tp->b_bufsize], take);
459*8670S 		tp->b_bufsize += take;
460*8670S 		bp->b_bufsize = bp->b_bufsize - take;
461*8670S 		if (bp->b_bcount > bp->b_bufsize)
462*8670S 			bp->b_bcount = bp->b_bufsize;
463*8670S 		if (bp->b_bufsize <= 0) {
464*8670S 			bremhash(bp);
465*8670S 			binshash(bp, &bfreelist[BQ_EMPTY]);
466*8670S 			bp->b_dev = (dev_t)NODEV;
467*8670S 			bp->b_error = 0;
468*8670S 			bp->b_flags |= B_INVAL;
469*8670S 		}
470*8670S 		brelse(bp);
471*8670S 	}
472*8670S out:
473*8670S 	tp->b_bcount = size;
4746563Smckusic }
4756563Smckusic 
4766563Smckusic /*
4776563Smckusic  * Release space associated with a buffer.
4786563Smckusic  */
4796563Smckusic bfree(bp)
4806563Smckusic 	struct buf *bp;
4816563Smckusic {
4826563Smckusic 	/*
483*8670S 	 * This stub is provided to allow the system to reclaim
484*8670S 	 * memory from the buffer pool. Currently we do not migrate
485*8670S 	 * memory between the buffer memory pool and the user memory
486*8670S 	 * pool.
4876563Smckusic 	 */
4886563Smckusic 	bp->b_bcount = 0;
4896563Smckusic }
4906563Smckusic 
4916563Smckusic /*
492*8670S  * Find a buffer which is available for use.
493*8670S  * Select something from a free list.
494*8670S  * Preference is to AGE list, then LRU list.
495*8670S  */
496*8670S struct buf *
497*8670S getnewbuf()
498*8670S {
499*8670S 	register struct buf *bp, *dp;
500*8670S 	int s;
501*8670S 
502*8670S loop:
503*8670S 	s = spl6();
504*8670S 	for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
505*8670S 		if (dp->av_forw != dp)
506*8670S 			break;
507*8670S 	if (dp == bfreelist) {		/* no free blocks */
508*8670S 		dp->b_flags |= B_WANTED;
509*8670S 		sleep((caddr_t)dp, PRIBIO+1);
510*8670S 		goto loop;
511*8670S 	}
512*8670S 	splx(s);
513*8670S 	bp = dp->av_forw;
514*8670S 	notavail(bp);
515*8670S 	if (bp->b_flags & B_DELWRI) {
516*8670S 		bp->b_flags |= B_ASYNC;
517*8670S 		bwrite(bp);
518*8670S 		goto loop;
519*8670S 	}
520*8670S 	trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
521*8670S 	bp->b_flags = B_BUSY;
522*8670S 	return (bp);
523*8670S }
524*8670S 
525*8670S /*
5268Sbill  * Wait for I/O completion on the buffer; return errors
5278Sbill  * to the user.
5288Sbill  */
5297015Smckusick biowait(bp)
5306563Smckusic 	register struct buf *bp;
5318Sbill {
5325431Sroot 	int s;
5338Sbill 
5345431Sroot 	s = spl6();
5358Sbill 	while ((bp->b_flags&B_DONE)==0)
5368Sbill 		sleep((caddr_t)bp, PRIBIO);
5375431Sroot 	splx(s);
5387723Swnj 	u.u_error = geterror(bp);
5398Sbill }
5408Sbill 
5418Sbill /*
5428Sbill  * Mark I/O complete on a buffer. If the header
5438Sbill  * indicates a dirty page push completion, the
5448Sbill  * header is inserted into the ``cleaned'' list
5458Sbill  * to be processed by the pageout daemon. Otherwise
5468Sbill  * release it if I/O is asynchronous, and wake
5478Sbill  * up anyone waiting for it.
5488Sbill  */
5497015Smckusick biodone(bp)
5507015Smckusick 	register struct buf *bp;
5518Sbill {
5528Sbill 	register int s;
5538Sbill 
554420Sbill 	if (bp->b_flags & B_DONE)
5557015Smckusick 		panic("dup biodone");
5568Sbill 	bp->b_flags |= B_DONE;
5578Sbill 	if (bp->b_flags & B_DIRTY) {
5588Sbill 		if (bp->b_flags & B_ERROR)
5598Sbill 			panic("IO err in push");
5608Sbill 		s = spl6();
5618Sbill 		bp->av_forw = bclnlist;
5628Sbill 		bp->b_bcount = swsize[bp - swbuf];
5638Sbill 		bp->b_pfcent = swpf[bp - swbuf];
5643601Swnj 		cnt.v_pgout++;
5653601Swnj 		cnt.v_pgpgout += bp->b_bcount / NBPG;
5668Sbill 		bclnlist = bp;
5678Sbill 		if (bswlist.b_flags & B_WANTED)
5688Sbill 			wakeup((caddr_t)&proc[2]);
5698Sbill 		splx(s);
570383Sbill 		return;
5718Sbill 	}
5728Sbill 	if (bp->b_flags&B_ASYNC)
5738Sbill 		brelse(bp);
5748Sbill 	else {
5758Sbill 		bp->b_flags &= ~B_WANTED;
5768Sbill 		wakeup((caddr_t)bp);
5778Sbill 	}
5788Sbill }
5798Sbill 
5808Sbill /*
581*8670S  * Insure that no part of a specified block is in an incore buffer.
582*8670S  */
583*8670S blkflush(dev, blkno, size)
584*8670S 	dev_t dev;
585*8670S 	daddr_t blkno;
586*8670S 	long size;
587*8670S {
588*8670S 	register struct buf *ep;
589*8670S 	struct buf *dp;
590*8670S 	daddr_t start, last;
591*8670S 	int s;
592*8670S 
593*8670S 	start = blkno;
594*8670S 	last = start + (size / DEV_BSIZE) - 1;
595*8670S 	dp = BUFHASH(dev, blkno);
596*8670S loop:
597*8670S 	for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
598*8670S 		if (ep->b_dev != dev || (ep->b_flags&B_INVAL))
599*8670S 			continue;
600*8670S 		/* look for overlap */
601*8670S 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
602*8670S 		    ep->b_blkno + (ep->b_bcount / DEV_BSIZE) <= start)
603*8670S 			continue;
604*8670S 		s = spl6();
605*8670S 		if (ep->b_flags&B_BUSY) {
606*8670S 			ep->b_flags |= B_WANTED;
607*8670S 			sleep((caddr_t)ep, PRIBIO+1);
608*8670S 			splx(s);
609*8670S 			goto loop;
610*8670S 		}
611*8670S 		if (ep->b_flags & B_DELWRI) {
612*8670S 			splx(s);
613*8670S 			notavail(ep);
614*8670S 			bwrite(ep);
615*8670S 			goto loop;
616*8670S 		}
617*8670S 		splx(s);
618*8670S 	}
619*8670S }
620*8670S 
621*8670S /*
6228Sbill  * make sure all write-behind blocks
6238Sbill  * on dev (or NODEV for all)
6248Sbill  * are flushed out.
6258Sbill  * (from umount and update)
6266563Smckusic  * (and temporarily pagein)
6278Sbill  */
6288Sbill bflush(dev)
6297015Smckusick 	dev_t dev;
6308Sbill {
6318Sbill 	register struct buf *bp;
6322325Swnj 	register struct buf *flist;
6335431Sroot 	int s;
6348Sbill 
6358Sbill loop:
6365431Sroot 	s = spl6();
637*8670S 	for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++)
6382325Swnj 	for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
6397015Smckusick 		if ((bp->b_flags & B_DELWRI) == 0)
6407015Smckusick 			continue;
6417015Smckusick 		if (dev == NODEV || dev == bp->b_dev) {
6428Sbill 			bp->b_flags |= B_ASYNC;
6438Sbill 			notavail(bp);
6448Sbill 			bwrite(bp);
6458Sbill 			goto loop;
6468Sbill 		}
6478Sbill 	}
6485431Sroot 	splx(s);
6498Sbill }
6508Sbill 
6518Sbill /*
6528Sbill  * Pick up the device's error number and pass it to the user;
6538Sbill  * if there is an error but the number is 0 set a generalized
6548Sbill  * code.  Actually the latter is always true because devices
6558Sbill  * don't yet return specific errors.
6568Sbill  */
6578Sbill geterror(bp)
6587015Smckusick 	register struct buf *bp;
6598Sbill {
6607723Swnj 	int error = 0;
6618Sbill 
6628Sbill 	if (bp->b_flags&B_ERROR)
6637723Swnj 		if ((error = bp->b_error)==0)
6647723Swnj 			return (EIO);
6657723Swnj 	return (error);
6668Sbill }
6672299Skre 
6682299Skre /*
6692299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
6702299Skre  *
6712299Skre  * This is not nicely done at all - the buffer ought to be removed from the
6722299Skre  * hash chains & have its dev/blkno fields clobbered, but unfortunately we
6732299Skre  * can't do that here, as it is quite possible that the block is still
6742299Skre  * being used for i/o. Eventually, all disc drivers should be forced to
6752299Skre  * have a close routine, which ought ensure that the queue is empty, then
6762299Skre  * properly flush the queues. Until that happy day, this suffices for
6772299Skre  * correctness.						... kre
6782299Skre  */
6792299Skre binval(dev)
6807015Smckusick 	dev_t dev;
6812299Skre {
6822361Skre 	register struct buf *bp;
6832361Skre 	register struct bufhd *hp;
6842361Skre #define dp ((struct buf *)hp)
6852299Skre 
6862361Skre 	for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
6872361Skre 		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
6882361Skre 			if (bp->b_dev == dev)
6892361Skre 				bp->b_flags |= B_INVAL;
6902299Skre }
691