xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 2299)
1*2299Skre /*	vfs_cluster.c	4.5	01/28/81	*/
28Sbill 
38Sbill #include "../h/param.h"
48Sbill #include "../h/systm.h"
58Sbill #include "../h/dir.h"
68Sbill #include "../h/user.h"
78Sbill #include "../h/buf.h"
88Sbill #include "../h/conf.h"
98Sbill #include "../h/proc.h"
108Sbill #include "../h/seg.h"
118Sbill #include "../h/pte.h"
128Sbill #include "../h/vm.h"
132045Swnj #include "../h/trace.h"
148Sbill 
1591Sbill /*
1691Sbill  * The following several routines allocate and free
1791Sbill  * buffers with various side effects.  In general the
1891Sbill  * arguments to an allocate routine are a device and
1991Sbill  * a block number, and the value is a pointer to
2091Sbill  * to the buffer header; the buffer is marked "busy"
2191Sbill  * so that no one else can touch it.  If the block was
2291Sbill  * already in core, no I/O need be done; if it is
2391Sbill  * already busy, the process waits until it becomes free.
2491Sbill  * The following routines allocate a buffer:
2591Sbill  *	getblk
2691Sbill  *	bread
2791Sbill  *	breada
2891Sbill  *	baddr	(if it is incore)
2991Sbill  * Eventually the buffer must be released, possibly with the
3091Sbill  * side effect of writing it out, by using one of
3191Sbill  *	bwrite
3291Sbill  *	bdwrite
3391Sbill  *	bawrite
3491Sbill  *	brelse
3591Sbill  */
3691Sbill 
3791Sbill #define	BUFHSZ	63
3891Sbill #define	BUFHASH(blkno)	(blkno % BUFHSZ)
3991Sbill short	bufhash[BUFHSZ];
4091Sbill 
4191Sbill /*
4291Sbill  * Initialize hash links for buffers.
4391Sbill  */
4491Sbill bhinit()
4591Sbill {
4691Sbill 	register int i;
4791Sbill 
4891Sbill 	for (i = 0; i < BUFHSZ; i++)
4991Sbill 		bufhash[i] = -1;
5091Sbill }
5191Sbill 
528Sbill /* #define	DISKMON	1 */
538Sbill 
548Sbill #ifdef	DISKMON
558Sbill struct {
568Sbill 	int	nbuf;
578Sbill 	long	nread;
588Sbill 	long	nreada;
598Sbill 	long	ncache;
608Sbill 	long	nwrite;
618Sbill 	long	bufcount[NBUF];
628Sbill } io_info;
638Sbill #endif
648Sbill 
658Sbill /*
668Sbill  * Swap IO headers -
678Sbill  * They contain the necessary information for the swap I/O.
688Sbill  * At any given time, a swap header can be in three
698Sbill  * different lists. When free it is in the free list,
708Sbill  * when allocated and the I/O queued, it is on the swap
718Sbill  * device list, and finally, if the operation was a dirty
728Sbill  * page push, when the I/O completes, it is inserted
738Sbill  * in a list of cleaned pages to be processed by the pageout daemon.
748Sbill  */
758Sbill struct	buf swbuf[NSWBUF];
768Sbill short	swsize[NSWBUF];		/* CAN WE JUST USE B_BCOUNT? */
778Sbill int	swpf[NSWBUF];
788Sbill 
798Sbill 
808Sbill #ifdef	FASTVAX
818Sbill #define	notavail(bp) \
828Sbill { \
838Sbill 	int s = spl6(); \
848Sbill 	(bp)->av_back->av_forw = (bp)->av_forw; \
858Sbill 	(bp)->av_forw->av_back = (bp)->av_back; \
868Sbill 	(bp)->b_flags |= B_BUSY; \
878Sbill 	splx(s); \
888Sbill }
898Sbill #endif
908Sbill 
918Sbill /*
928Sbill  * Read in (if necessary) the block and return a buffer pointer.
938Sbill  */
948Sbill struct buf *
958Sbill bread(dev, blkno)
968Sbill dev_t dev;
978Sbill daddr_t blkno;
988Sbill {
998Sbill 	register struct buf *bp;
1008Sbill 
1018Sbill 	bp = getblk(dev, blkno);
1028Sbill 	if (bp->b_flags&B_DONE) {
1032045Swnj #ifdef	EPAWNJ
1042045Swnj 		trace(TR_BREAD|TR_HIT, dev, blkno);
1052045Swnj #endif
1068Sbill #ifdef	DISKMON
1078Sbill 		io_info.ncache++;
1088Sbill #endif
1098Sbill 		return(bp);
1108Sbill 	}
1118Sbill 	bp->b_flags |= B_READ;
1128Sbill 	bp->b_bcount = BSIZE;
1138Sbill 	(*bdevsw[major(dev)].d_strategy)(bp);
1142045Swnj #ifdef	EPAWNJ
1152045Swnj 	trace(TR_BREAD|TR_MISS, dev, blkno);
1162045Swnj #endif
1178Sbill #ifdef	DISKMON
1188Sbill 	io_info.nread++;
1198Sbill #endif
1208Sbill 	u.u_vm.vm_inblk++;		/* pay for read */
1218Sbill 	iowait(bp);
1228Sbill 	return(bp);
1238Sbill }
1248Sbill 
1258Sbill /*
1268Sbill  * Read in the block, like bread, but also start I/O on the
1278Sbill  * read-ahead block (which is not allocated to the caller)
1288Sbill  */
1298Sbill struct buf *
1308Sbill breada(dev, blkno, rablkno)
1318Sbill dev_t dev;
1328Sbill daddr_t blkno, rablkno;
1338Sbill {
1348Sbill 	register struct buf *bp, *rabp;
1358Sbill 
1368Sbill 	bp = NULL;
1378Sbill 	if (!incore(dev, blkno)) {
1388Sbill 		bp = getblk(dev, blkno);
1398Sbill 		if ((bp->b_flags&B_DONE) == 0) {
1408Sbill 			bp->b_flags |= B_READ;
1418Sbill 			bp->b_bcount = BSIZE;
1428Sbill 			(*bdevsw[major(dev)].d_strategy)(bp);
1432045Swnj #ifdef	EPAWNJ
1442045Swnj 			trace(TR_BREAD|TR_MISS, dev, blkno);
1452045Swnj #endif
1468Sbill #ifdef	DISKMON
1478Sbill 			io_info.nread++;
1488Sbill #endif
1498Sbill 			u.u_vm.vm_inblk++;		/* pay for read */
1508Sbill 		}
1512045Swnj #ifdef	EPAWNJ
1522045Swnj 		else
1532045Swnj 			trace(TR_BREAD|TR_HIT, dev, blkno);
1542045Swnj #endif
1558Sbill 	}
1568Sbill 	if (rablkno && !incore(dev, rablkno)) {
1578Sbill 		rabp = getblk(dev, rablkno);
1582045Swnj 		if (rabp->b_flags & B_DONE) {
1598Sbill 			brelse(rabp);
1602045Swnj #ifdef	EPAWNJ
1612045Swnj 			trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno);
1622045Swnj #endif
1632045Swnj 		} else {
1648Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
1658Sbill 			rabp->b_bcount = BSIZE;
1668Sbill 			(*bdevsw[major(dev)].d_strategy)(rabp);
1672045Swnj #ifdef	EPAWNJ
1682045Swnj 			trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock);
1692045Swnj #endif
1708Sbill #ifdef	DISKMON
1718Sbill 			io_info.nreada++;
1728Sbill #endif
1738Sbill 			u.u_vm.vm_inblk++;		/* pay in advance */
1748Sbill 		}
1758Sbill 	}
1768Sbill 	if(bp == NULL)
1778Sbill 		return(bread(dev, blkno));
1788Sbill 	iowait(bp);
1798Sbill 	return(bp);
1808Sbill }
1818Sbill 
1828Sbill /*
1838Sbill  * Write the buffer, waiting for completion.
1848Sbill  * Then release the buffer.
1858Sbill  */
1868Sbill bwrite(bp)
1878Sbill register struct buf *bp;
1888Sbill {
1898Sbill 	register flag;
1908Sbill 
1918Sbill 	flag = bp->b_flags;
1928Sbill 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
1938Sbill 	bp->b_bcount = BSIZE;
1948Sbill #ifdef	DISKMON
1958Sbill 	io_info.nwrite++;
1968Sbill #endif
1978Sbill 	if ((flag&B_DELWRI) == 0)
1988Sbill 		u.u_vm.vm_oublk++;		/* noone paid yet */
1992045Swnj #ifdef	EPAWNJ
2002045Swnj 	trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno));
2012045Swnj #endif
2028Sbill 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2038Sbill 	if ((flag&B_ASYNC) == 0) {
2048Sbill 		iowait(bp);
2058Sbill 		brelse(bp);
2068Sbill 	} else if (flag & B_DELWRI)
2078Sbill 		bp->b_flags |= B_AGE;
2088Sbill 	else
2098Sbill 		geterror(bp);
2108Sbill }
2118Sbill 
2128Sbill /*
2138Sbill  * Release the buffer, marking it so that if it is grabbed
2148Sbill  * for another purpose it will be written out before being
2158Sbill  * given up (e.g. when writing a partial block where it is
2168Sbill  * assumed that another write for the same block will soon follow).
2178Sbill  * This can't be done for magtape, since writes must be done
2188Sbill  * in the same order as requested.
2198Sbill  */
2208Sbill bdwrite(bp)
2218Sbill register struct buf *bp;
2228Sbill {
2238Sbill 	register struct buf *dp;
2248Sbill 
2258Sbill 	if ((bp->b_flags&B_DELWRI) == 0)
2268Sbill 		u.u_vm.vm_oublk++;		/* noone paid yet */
2278Sbill 	dp = bdevsw[major(bp->b_dev)].d_tab;
2288Sbill 	if(dp->b_flags & B_TAPE)
2298Sbill 		bawrite(bp);
2308Sbill 	else {
2318Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
2328Sbill 		brelse(bp);
2338Sbill 	}
2348Sbill }
2358Sbill 
2368Sbill /*
2378Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
2388Sbill  */
2398Sbill bawrite(bp)
2408Sbill register struct buf *bp;
2418Sbill {
2428Sbill 
2438Sbill 	bp->b_flags |= B_ASYNC;
2448Sbill 	bwrite(bp);
2458Sbill }
2468Sbill 
2478Sbill /*
2488Sbill  * release the buffer, with no I/O implied.
2498Sbill  */
2508Sbill brelse(bp)
2518Sbill register struct buf *bp;
2528Sbill {
2538Sbill 	register struct buf **backp;
2548Sbill 	register s;
2558Sbill 
2568Sbill 	if (bp->b_flags&B_WANTED)
2578Sbill 		wakeup((caddr_t)bp);
2588Sbill 	if (bfreelist.b_flags&B_WANTED) {
2598Sbill 		bfreelist.b_flags &= ~B_WANTED;
2608Sbill 		wakeup((caddr_t)&bfreelist);
2618Sbill 	}
26291Sbill 	if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) {
26391Sbill 		bunhash(bp);
2648Sbill 		bp->b_dev = NODEV;  /* no assoc. on error */
26591Sbill 	}
2668Sbill 	s = spl6();
2678Sbill 	if(bp->b_flags & (B_AGE|B_ERROR)) {
2688Sbill 		backp = &bfreelist.av_forw;
2698Sbill 		(*backp)->av_back = bp;
2708Sbill 		bp->av_forw = *backp;
2718Sbill 		*backp = bp;
2728Sbill 		bp->av_back = &bfreelist;
2738Sbill 	} else {
2748Sbill 		backp = &bfreelist.av_back;
2758Sbill 		(*backp)->av_forw = bp;
2768Sbill 		bp->av_back = *backp;
2778Sbill 		*backp = bp;
2788Sbill 		bp->av_forw = &bfreelist;
2798Sbill 	}
2808Sbill 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
2818Sbill 	splx(s);
2828Sbill }
2838Sbill 
2848Sbill /*
2858Sbill  * See if the block is associated with some buffer
2868Sbill  * (mainly to avoid getting hung up on a wait in breada)
2878Sbill  */
2888Sbill incore(dev, blkno)
2898Sbill dev_t dev;
2908Sbill daddr_t blkno;
2918Sbill {
2928Sbill 	register struct buf *bp;
2938Sbill 	register int dblkno = fsbtodb(blkno);
2948Sbill 
29591Sbill 	for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
29691Sbill 	    bp = &buf[bp->b_hlink])
297*2299Skre 		if (bp->b_blkno == dblkno && bp->b_dev == dev
298*2299Skre 					&& !(bp->b_flags & B_INVAL))
29991Sbill 			return (1);
30091Sbill 	return (0);
3018Sbill }
3028Sbill 
3038Sbill struct buf *
3048Sbill baddr(dev, blkno)
3058Sbill dev_t dev;
3068Sbill daddr_t blkno;
3078Sbill {
3088Sbill 
3098Sbill 	if (incore(dev, blkno))
3108Sbill 		return (bread(dev, blkno));
3118Sbill 	return (0);
3128Sbill }
3138Sbill 
3148Sbill /*
3158Sbill  * Assign a buffer for the given block.  If the appropriate
3168Sbill  * block is already associated, return it; otherwise search
3178Sbill  * for the oldest non-busy buffer and reassign it.
3188Sbill  */
3198Sbill struct buf *
3208Sbill getblk(dev, blkno)
3218Sbill dev_t dev;
3228Sbill daddr_t blkno;
3238Sbill {
32491Sbill 	register struct buf *bp, *dp, *ep;
3251831Sbill 	register int i, x, dblkno;
3268Sbill 
3271831Sbill 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
3281831Sbill 		blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
3291831Sbill 	dblkno = fsbtodb(blkno);
3308Sbill     loop:
331124Sbill 	(void) spl0();
33291Sbill 	for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
33391Sbill 	    bp = &buf[bp->b_hlink]) {
334*2299Skre 		if (bp->b_blkno != dblkno || bp->b_dev != dev
335*2299Skre 					|| bp->b_flags & B_INVAL)
3368Sbill 			continue;
337124Sbill 		(void) spl6();
3388Sbill 		if (bp->b_flags&B_BUSY) {
3398Sbill 			bp->b_flags |= B_WANTED;
3408Sbill 			sleep((caddr_t)bp, PRIBIO+1);
3418Sbill 			goto loop;
3428Sbill 		}
343124Sbill 		(void) spl0();
3448Sbill #ifdef	DISKMON
3458Sbill 		i = 0;
3468Sbill 		dp = bp->av_forw;
3478Sbill 		while (dp != &bfreelist) {
3488Sbill 			i++;
3498Sbill 			dp = dp->av_forw;
3508Sbill 		}
3518Sbill 		if (i<NBUF)
3528Sbill 			io_info.bufcount[i]++;
3538Sbill #endif
3548Sbill 		notavail(bp);
3558Sbill 		bp->b_flags |= B_CACHE;
3568Sbill 		return(bp);
3578Sbill 	}
35891Sbill 	if (major(dev) >= nblkdev)
35991Sbill 		panic("blkdev");
36091Sbill 	dp = bdevsw[major(dev)].d_tab;
36191Sbill 	if (dp == NULL)
36291Sbill 		panic("devtab");
363124Sbill 	(void) spl6();
3648Sbill 	if (bfreelist.av_forw == &bfreelist) {
3658Sbill 		bfreelist.b_flags |= B_WANTED;
3668Sbill 		sleep((caddr_t)&bfreelist, PRIBIO+1);
3678Sbill 		goto loop;
3688Sbill 	}
3691792Sbill 	(void) spl0();
3708Sbill 	bp = bfreelist.av_forw;
3718Sbill 	notavail(bp);
3728Sbill 	if (bp->b_flags & B_DELWRI) {
3738Sbill 		bp->b_flags |= B_ASYNC;
3748Sbill 		bwrite(bp);
3758Sbill 		goto loop;
3768Sbill 	}
37791Sbill 	if (bp->b_dev == NODEV)
37891Sbill 		goto done;
37991Sbill 	/* INLINE EXPANSION OF bunhash(bp) */
3802045Swnj #ifdef EPAWNJ
3812045Swnj 	trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
3822045Swnj #endif
383884Sbill 	(void) spl6();
38491Sbill 	i = BUFHASH(dbtofsb(bp->b_blkno));
38591Sbill 	x = bp - buf;
38691Sbill 	if (bufhash[i] == x) {
38791Sbill 		bufhash[i] = bp->b_hlink;
38891Sbill 	} else {
38991Sbill 		for (ep = &buf[bufhash[i]]; ep != &buf[-1];
39091Sbill 		    ep = &buf[ep->b_hlink])
39191Sbill 			if (ep->b_hlink == x) {
39291Sbill 				ep->b_hlink = bp->b_hlink;
39391Sbill 				goto done;
39491Sbill 			}
39591Sbill 		panic("getblk");
39691Sbill 	}
39791Sbill done:
398884Sbill 	(void) spl0();
39991Sbill 	/* END INLINE EXPANSION */
4008Sbill 	bp->b_flags = B_BUSY;
4018Sbill 	bp->b_back->b_forw = bp->b_forw;
4028Sbill 	bp->b_forw->b_back = bp->b_back;
4038Sbill 	bp->b_forw = dp->b_forw;
4048Sbill 	bp->b_back = dp;
4058Sbill 	dp->b_forw->b_back = bp;
4068Sbill 	dp->b_forw = bp;
4078Sbill 	bp->b_dev = dev;
4088Sbill 	bp->b_blkno = dblkno;
40991Sbill 	i = BUFHASH(blkno);
41091Sbill 	bp->b_hlink = bufhash[i];
41191Sbill 	bufhash[i] = bp - buf;
4128Sbill 	return(bp);
4138Sbill }
4148Sbill 
4158Sbill /*
4168Sbill  * get an empty block,
4178Sbill  * not assigned to any particular device
4188Sbill  */
4198Sbill struct buf *
4208Sbill geteblk()
4218Sbill {
422182Sbill 	register struct buf *bp, *dp;
4238Sbill 
4248Sbill loop:
425124Sbill 	(void) spl6();
4268Sbill 	while (bfreelist.av_forw == &bfreelist) {
4278Sbill 		bfreelist.b_flags |= B_WANTED;
4288Sbill 		sleep((caddr_t)&bfreelist, PRIBIO+1);
4298Sbill 	}
430124Sbill 	(void) spl0();
4318Sbill 	dp = &bfreelist;
4328Sbill 	bp = bfreelist.av_forw;
4338Sbill 	notavail(bp);
4348Sbill 	if (bp->b_flags & B_DELWRI) {
4358Sbill 		bp->b_flags |= B_ASYNC;
4368Sbill 		bwrite(bp);
4378Sbill 		goto loop;
4388Sbill 	}
4392045Swnj 	if (bp->b_dev != NODEV) {
4402045Swnj #ifdef EPAWNJ
4412045Swnj 		trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
4422045Swnj #endif
44391Sbill 		bunhash(bp);
4442045Swnj 	}
4458Sbill 	bp->b_flags = B_BUSY;
4468Sbill 	bp->b_back->b_forw = bp->b_forw;
4478Sbill 	bp->b_forw->b_back = bp->b_back;
4488Sbill 	bp->b_forw = dp->b_forw;
4498Sbill 	bp->b_back = dp;
4508Sbill 	dp->b_forw->b_back = bp;
4518Sbill 	dp->b_forw = bp;
4528Sbill 	bp->b_dev = (dev_t)NODEV;
45391Sbill 	bp->b_hlink = -1;
4548Sbill 	return(bp);
4558Sbill }
4568Sbill 
45791Sbill bunhash(bp)
45891Sbill 	register struct buf *bp;
45991Sbill {
46091Sbill 	register struct buf *ep;
461884Sbill 	register int i, x, s;
46291Sbill 
46391Sbill 	if (bp->b_dev == NODEV)
46491Sbill 		return;
465884Sbill 	s = spl6();
46691Sbill 	i = BUFHASH(dbtofsb(bp->b_blkno));
46791Sbill 	x = bp - buf;
46891Sbill 	if (bufhash[i] == x) {
46991Sbill 		bufhash[i] = bp->b_hlink;
470884Sbill 		goto ret;
47191Sbill 	}
47291Sbill 	for (ep = &buf[bufhash[i]]; ep != &buf[-1];
47391Sbill 	    ep = &buf[ep->b_hlink])
47491Sbill 		if (ep->b_hlink == x) {
47591Sbill 			ep->b_hlink = bp->b_hlink;
476884Sbill 			goto ret;
47791Sbill 		}
47891Sbill 	panic("bunhash");
479884Sbill ret:
480884Sbill 	splx(s);
48191Sbill }
48291Sbill 
4838Sbill /*
4848Sbill  * Wait for I/O completion on the buffer; return errors
4858Sbill  * to the user.
4868Sbill  */
4878Sbill iowait(bp)
4888Sbill register struct buf *bp;
4898Sbill {
4908Sbill 
491124Sbill 	(void) spl6();
4928Sbill 	while ((bp->b_flags&B_DONE)==0)
4938Sbill 		sleep((caddr_t)bp, PRIBIO);
494124Sbill 	(void) spl0();
4958Sbill 	geterror(bp);
4968Sbill }
4978Sbill 
4988Sbill #ifndef FASTVAX
4998Sbill /*
5008Sbill  * Unlink a buffer from the available list and mark it busy.
5018Sbill  * (internal interface)
5028Sbill  */
5038Sbill notavail(bp)
5048Sbill register struct buf *bp;
5058Sbill {
5068Sbill 	register s;
5078Sbill 
5088Sbill 	s = spl6();
5098Sbill 	bp->av_back->av_forw = bp->av_forw;
5108Sbill 	bp->av_forw->av_back = bp->av_back;
5118Sbill 	bp->b_flags |= B_BUSY;
5128Sbill 	splx(s);
5138Sbill }
5148Sbill #endif
5158Sbill 
5168Sbill /*
5178Sbill  * Mark I/O complete on a buffer. If the header
5188Sbill  * indicates a dirty page push completion, the
5198Sbill  * header is inserted into the ``cleaned'' list
5208Sbill  * to be processed by the pageout daemon. Otherwise
5218Sbill  * release it if I/O is asynchronous, and wake
5228Sbill  * up anyone waiting for it.
5238Sbill  */
5248Sbill iodone(bp)
5258Sbill register struct buf *bp;
5268Sbill {
5278Sbill 	register int s;
5288Sbill 
529420Sbill 	if (bp->b_flags & B_DONE)
530420Sbill 		panic("dup iodone");
5318Sbill 	bp->b_flags |= B_DONE;
5328Sbill 	if (bp->b_flags & B_DIRTY) {
5338Sbill 		if (bp->b_flags & B_ERROR)
5348Sbill 			panic("IO err in push");
5358Sbill 		s = spl6();
5368Sbill 		cnt.v_pgout++;
5378Sbill 		bp->av_forw = bclnlist;
5388Sbill 		bp->b_bcount = swsize[bp - swbuf];
5398Sbill 		bp->b_pfcent = swpf[bp - swbuf];
5408Sbill 		bclnlist = bp;
5418Sbill 		if (bswlist.b_flags & B_WANTED)
5428Sbill 			wakeup((caddr_t)&proc[2]);
5438Sbill 		splx(s);
544383Sbill 		return;
5458Sbill 	}
5468Sbill 	if (bp->b_flags&B_ASYNC)
5478Sbill 		brelse(bp);
5488Sbill 	else {
5498Sbill 		bp->b_flags &= ~B_WANTED;
5508Sbill 		wakeup((caddr_t)bp);
5518Sbill 	}
5528Sbill }
5538Sbill 
5548Sbill /*
5558Sbill  * Zero the core associated with a buffer.
5568Sbill  */
5578Sbill clrbuf(bp)
5588Sbill struct buf *bp;
5598Sbill {
5608Sbill 	register *p;
5618Sbill 	register c;
5628Sbill 
5638Sbill 	p = bp->b_un.b_words;
5648Sbill 	c = BSIZE/sizeof(int);
5658Sbill 	do
5668Sbill 		*p++ = 0;
5678Sbill 	while (--c);
5688Sbill 	bp->b_resid = 0;
5698Sbill }
5708Sbill 
5718Sbill /*
5728Sbill  * swap I/O -
5738Sbill  *
5748Sbill  * If the flag indicates a dirty page push initiated
5758Sbill  * by the pageout daemon, we map the page into the i th
5768Sbill  * virtual page of process 2 (the daemon itself) where i is
5778Sbill  * the index of the swap header that has been allocated.
5788Sbill  * We simply initialize the header and queue the I/O but
5798Sbill  * do not wait for completion. When the I/O completes,
5808Sbill  * iodone() will link the header to a list of cleaned
5818Sbill  * pages to be processed by the pageout daemon.
5828Sbill  */
5838Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
5848Sbill 	struct proc *p;
5858Sbill 	swblk_t dblkno;
5868Sbill 	caddr_t addr;
5878Sbill 	int flag, nbytes;
5888Sbill 	dev_t dev;
5898Sbill 	unsigned pfcent;
5908Sbill {
5918Sbill 	register struct buf *bp;
5928Sbill 	register int c;
5938Sbill 	int p2dp;
5948Sbill 	register struct pte *dpte, *vpte;
5958Sbill 
596124Sbill 	(void) spl6();
5978Sbill 	while (bswlist.av_forw == NULL) {
5988Sbill 		bswlist.b_flags |= B_WANTED;
5998Sbill 		sleep((caddr_t)&bswlist, PSWP+1);
6008Sbill 	}
6018Sbill 	bp = bswlist.av_forw;
6028Sbill 	bswlist.av_forw = bp->av_forw;
603124Sbill 	(void) spl0();
6048Sbill 
6058Sbill 	bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
6068Sbill 	if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
6078Sbill 		if (rdflg == B_READ)
6088Sbill 			sum.v_pswpin += btoc(nbytes);
6098Sbill 		else
6108Sbill 			sum.v_pswpout += btoc(nbytes);
6118Sbill 	bp->b_proc = p;
6128Sbill 	if (flag & B_DIRTY) {
6138Sbill 		p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
6148Sbill 		dpte = dptopte(&proc[2], p2dp);
6158Sbill 		vpte = vtopte(p, btop(addr));
6168Sbill 		for (c = 0; c < nbytes; c += NBPG) {
6178Sbill 			if (vpte->pg_pfnum == 0 || vpte->pg_fod)
6188Sbill 				panic("swap bad pte");
6198Sbill 			*dpte++ = *vpte++;
6208Sbill 		}
6218Sbill 		bp->b_un.b_addr = (caddr_t)ctob(p2dp);
6228Sbill 	} else
6238Sbill 		bp->b_un.b_addr = addr;
6248Sbill 	while (nbytes > 0) {
6258Sbill 		c = imin(ctob(120), nbytes);
6268Sbill 		bp->b_bcount = c;
6278Sbill 		bp->b_blkno = dblkno;
6288Sbill 		bp->b_dev = dev;
629718Sbill 		if (flag & B_DIRTY) {
630718Sbill 			swpf[bp - swbuf] = pfcent;
631718Sbill 			swsize[bp - swbuf] = nbytes;
632718Sbill 		}
6338Sbill 		(*bdevsw[major(dev)].d_strategy)(bp);
6348Sbill 		if (flag & B_DIRTY) {
6358Sbill 			if (c < nbytes)
6368Sbill 				panic("big push");
6378Sbill 			return;
6388Sbill 		}
639124Sbill 		(void) spl6();
6408Sbill 		while((bp->b_flags&B_DONE)==0)
6418Sbill 			sleep((caddr_t)bp, PSWP);
642124Sbill 		(void) spl0();
6438Sbill 		bp->b_un.b_addr += c;
6448Sbill 		bp->b_flags &= ~B_DONE;
6458Sbill 		if (bp->b_flags & B_ERROR) {
6468Sbill 			if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
6478Sbill 				panic("hard IO err in swap");
6488Sbill 			swkill(p, (char *)0);
6498Sbill 		}
6508Sbill 		nbytes -= c;
6518Sbill 		dblkno += btoc(c);
6528Sbill 	}
653124Sbill 	(void) spl6();
6548Sbill 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
6558Sbill 	bp->av_forw = bswlist.av_forw;
6568Sbill 	bswlist.av_forw = bp;
6578Sbill 	if (bswlist.b_flags & B_WANTED) {
6588Sbill 		bswlist.b_flags &= ~B_WANTED;
6598Sbill 		wakeup((caddr_t)&bswlist);
6608Sbill 		wakeup((caddr_t)&proc[2]);
6618Sbill 	}
662124Sbill 	(void) spl0();
6638Sbill }
6648Sbill 
6658Sbill /*
6668Sbill  * If rout == 0 then killed on swap error, else
6678Sbill  * rout is the name of the routine where we ran out of
6688Sbill  * swap space.
6698Sbill  */
6708Sbill swkill(p, rout)
6718Sbill 	struct proc *p;
6728Sbill 	char *rout;
6738Sbill {
6748Sbill 
6758Sbill 	printf("%d: ", p->p_pid);
6768Sbill 	if (rout)
6778Sbill 		printf("out of swap space in %s\n", rout);
6788Sbill 	else
6798Sbill 		printf("killed on swap error\n");
6808Sbill 	/*
6818Sbill 	 * To be sure no looping (e.g. in vmsched trying to
6828Sbill 	 * swap out) mark process locked in core (as though
6838Sbill 	 * done by user) after killing it so noone will try
6848Sbill 	 * to swap it out.
6858Sbill 	 */
686165Sbill 	psignal(p, SIGKILL);
6878Sbill 	p->p_flag |= SULOCK;
6888Sbill }
6898Sbill 
6908Sbill /*
6918Sbill  * make sure all write-behind blocks
6928Sbill  * on dev (or NODEV for all)
6938Sbill  * are flushed out.
6948Sbill  * (from umount and update)
6958Sbill  */
6968Sbill bflush(dev)
6978Sbill dev_t dev;
6988Sbill {
6998Sbill 	register struct buf *bp;
7008Sbill 
7018Sbill loop:
702124Sbill 	(void) spl6();
7038Sbill 	for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
7048Sbill 		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
7058Sbill 			bp->b_flags |= B_ASYNC;
7068Sbill 			notavail(bp);
7078Sbill 			bwrite(bp);
7088Sbill 			goto loop;
7098Sbill 		}
7108Sbill 	}
711124Sbill 	(void) spl0();
7128Sbill }
7138Sbill 
7148Sbill /*
7158Sbill  * Raw I/O. The arguments are
7168Sbill  *	The strategy routine for the device
7178Sbill  *	A buffer, which will always be a special buffer
7188Sbill  *	  header owned exclusively by the device for this purpose
7198Sbill  *	The device number
7208Sbill  *	Read/write flag
7218Sbill  * Essentially all the work is computing physical addresses and
7228Sbill  * validating them.
7238Sbill  * If the user has the proper access privilidges, the process is
7248Sbill  * marked 'delayed unlock' and the pages involved in the I/O are
7258Sbill  * faulted and locked. After the completion of the I/O, the above pages
7268Sbill  * are unlocked.
7278Sbill  */
7288Sbill physio(strat, bp, dev, rw, mincnt)
7298Sbill int (*strat)();
7308Sbill register struct buf *bp;
7318Sbill unsigned (*mincnt)();
7328Sbill {
7338Sbill 	register int c;
7348Sbill 	char *a;
7358Sbill 
7368Sbill 	if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
7378Sbill 		u.u_error = EFAULT;
7388Sbill 		return;
7398Sbill 	}
740124Sbill 	(void) spl6();
7418Sbill 	while (bp->b_flags&B_BUSY) {
7428Sbill 		bp->b_flags |= B_WANTED;
7438Sbill 		sleep((caddr_t)bp, PRIBIO+1);
7448Sbill 	}
7458Sbill 	bp->b_error = 0;
7468Sbill 	bp->b_proc = u.u_procp;
7478Sbill 	bp->b_un.b_addr = u.u_base;
7488Sbill 	while (u.u_count != 0 && bp->b_error==0) {
7498Sbill 		bp->b_flags = B_BUSY | B_PHYS | rw;
7508Sbill 		bp->b_dev = dev;
7518Sbill 		bp->b_blkno = u.u_offset >> PGSHIFT;
7528Sbill 		bp->b_bcount = u.u_count;
7538Sbill 		(*mincnt)(bp);
7548Sbill 		c = bp->b_bcount;
7558Sbill 		u.u_procp->p_flag |= SPHYSIO;
7568Sbill 		vslock(a = bp->b_un.b_addr, c);
7578Sbill 		(*strat)(bp);
758124Sbill 		(void) spl6();
7598Sbill 		while ((bp->b_flags&B_DONE) == 0)
7608Sbill 			sleep((caddr_t)bp, PRIBIO);
7618Sbill 		vsunlock(a, c, rw);
7628Sbill 		u.u_procp->p_flag &= ~SPHYSIO;
7638Sbill 		if (bp->b_flags&B_WANTED)
7648Sbill 			wakeup((caddr_t)bp);
765124Sbill 		(void) spl0();
7668Sbill 		bp->b_un.b_addr += c;
7678Sbill 		u.u_count -= c;
7688Sbill 		u.u_offset += c;
7698Sbill 	}
7708Sbill 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
7718Sbill 	u.u_count = bp->b_resid;
7728Sbill 	geterror(bp);
7738Sbill }
7748Sbill 
7758Sbill /*ARGSUSED*/
7768Sbill unsigned
7778Sbill minphys(bp)
7788Sbill struct buf *bp;
7798Sbill {
7808Sbill 
7818Sbill 	if (bp->b_bcount > 60 * 1024)
7828Sbill 		bp->b_bcount = 60 * 1024;
7838Sbill }
7848Sbill 
7858Sbill /*
7868Sbill  * Pick up the device's error number and pass it to the user;
7878Sbill  * if there is an error but the number is 0 set a generalized
7888Sbill  * code.  Actually the latter is always true because devices
7898Sbill  * don't yet return specific errors.
7908Sbill  */
7918Sbill geterror(bp)
7928Sbill register struct buf *bp;
7938Sbill {
7948Sbill 
7958Sbill 	if (bp->b_flags&B_ERROR)
7968Sbill 		if ((u.u_error = bp->b_error)==0)
7978Sbill 			u.u_error = EIO;
7988Sbill }
799*2299Skre 
800*2299Skre /*
801*2299Skre  * Invalidate in core blocks belonging to closed or umounted filesystem
802*2299Skre  *
803*2299Skre  * This is not nicely done at all - the buffer ought to be removed from the
804*2299Skre  * hash chains & have its dev/blkno fields clobbered, but unfortunately we
805*2299Skre  * can't do that here, as it is quite possible that the block is still
806*2299Skre  * being used for i/o. Eventually, all disc drivers should be forced to
807*2299Skre  * have a close routine, which ought ensure that the queue is empty, then
808*2299Skre  * properly flush the queues. Until that happy day, this suffices for
809*2299Skre  * correctness.						... kre
810*2299Skre  */
811*2299Skre binval(dev)
812*2299Skre dev_t dev;
813*2299Skre {
814*2299Skre 	register struct buf *bp, *dp;
815*2299Skre 
816*2299Skre 	dp = bdevsw[major(dev)].d_tab;
817*2299Skre 
818*2299Skre 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
819*2299Skre 		if (bp->b_dev == dev)
820*2299Skre 			bp->b_flags |= B_INVAL;
821*2299Skre }
822