xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 1831)
1*1831Sbill /*	vfs_cluster.c	4.3	11/24/80	*/
28Sbill 
38Sbill #include "../h/param.h"
48Sbill #include "../h/systm.h"
58Sbill #include "../h/dir.h"
68Sbill #include "../h/user.h"
78Sbill #include "../h/buf.h"
88Sbill #include "../h/conf.h"
98Sbill #include "../h/proc.h"
108Sbill #include "../h/seg.h"
118Sbill #include "../h/pte.h"
128Sbill #include "../h/vm.h"
138Sbill 
1491Sbill /*
1591Sbill  * The following several routines allocate and free
1691Sbill  * buffers with various side effects.  In general the
1791Sbill  * arguments to an allocate routine are a device and
1891Sbill  * a block number, and the value is a pointer to
1991Sbill  * to the buffer header; the buffer is marked "busy"
2091Sbill  * so that no one else can touch it.  If the block was
2191Sbill  * already in core, no I/O need be done; if it is
2291Sbill  * already busy, the process waits until it becomes free.
2391Sbill  * The following routines allocate a buffer:
2491Sbill  *	getblk
2591Sbill  *	bread
2691Sbill  *	breada
2791Sbill  *	baddr	(if it is incore)
2891Sbill  * Eventually the buffer must be released, possibly with the
2991Sbill  * side effect of writing it out, by using one of
3091Sbill  *	bwrite
3191Sbill  *	bdwrite
3291Sbill  *	bawrite
3391Sbill  *	brelse
3491Sbill  */
3591Sbill 
3691Sbill #define	BUFHSZ	63
3791Sbill #define	BUFHASH(blkno)	(blkno % BUFHSZ)
3891Sbill short	bufhash[BUFHSZ];
3991Sbill 
4091Sbill /*
4191Sbill  * Initialize hash links for buffers.
4291Sbill  */
4391Sbill bhinit()
4491Sbill {
4591Sbill 	register int i;
4691Sbill 
4791Sbill 	for (i = 0; i < BUFHSZ; i++)
4891Sbill 		bufhash[i] = -1;
4991Sbill }
5091Sbill 
518Sbill /* #define	DISKMON	1 */
528Sbill 
538Sbill #ifdef	DISKMON
548Sbill struct {
558Sbill 	int	nbuf;
568Sbill 	long	nread;
578Sbill 	long	nreada;
588Sbill 	long	ncache;
598Sbill 	long	nwrite;
608Sbill 	long	bufcount[NBUF];
618Sbill } io_info;
628Sbill #endif
638Sbill 
648Sbill /*
658Sbill  * Swap IO headers -
668Sbill  * They contain the necessary information for the swap I/O.
678Sbill  * At any given time, a swap header can be in three
688Sbill  * different lists. When free it is in the free list,
698Sbill  * when allocated and the I/O queued, it is on the swap
708Sbill  * device list, and finally, if the operation was a dirty
718Sbill  * page push, when the I/O completes, it is inserted
728Sbill  * in a list of cleaned pages to be processed by the pageout daemon.
738Sbill  */
748Sbill struct	buf swbuf[NSWBUF];
758Sbill short	swsize[NSWBUF];		/* CAN WE JUST USE B_BCOUNT? */
768Sbill int	swpf[NSWBUF];
778Sbill 
788Sbill 
798Sbill #ifdef	FASTVAX
808Sbill #define	notavail(bp) \
818Sbill { \
828Sbill 	int s = spl6(); \
838Sbill 	(bp)->av_back->av_forw = (bp)->av_forw; \
848Sbill 	(bp)->av_forw->av_back = (bp)->av_back; \
858Sbill 	(bp)->b_flags |= B_BUSY; \
868Sbill 	splx(s); \
878Sbill }
888Sbill #endif
898Sbill 
908Sbill /*
918Sbill  * Read in (if necessary) the block and return a buffer pointer.
928Sbill  */
938Sbill struct buf *
948Sbill bread(dev, blkno)
958Sbill dev_t dev;
968Sbill daddr_t blkno;
978Sbill {
988Sbill 	register struct buf *bp;
998Sbill 
1008Sbill 	bp = getblk(dev, blkno);
1018Sbill 	if (bp->b_flags&B_DONE) {
1028Sbill #ifdef	DISKMON
1038Sbill 		io_info.ncache++;
1048Sbill #endif
1058Sbill 		return(bp);
1068Sbill 	}
1078Sbill 	bp->b_flags |= B_READ;
1088Sbill 	bp->b_bcount = BSIZE;
1098Sbill 	(*bdevsw[major(dev)].d_strategy)(bp);
1108Sbill #ifdef	DISKMON
1118Sbill 	io_info.nread++;
1128Sbill #endif
1138Sbill 	u.u_vm.vm_inblk++;		/* pay for read */
1148Sbill 	iowait(bp);
1158Sbill 	return(bp);
1168Sbill }
1178Sbill 
1188Sbill /*
1198Sbill  * Read in the block, like bread, but also start I/O on the
1208Sbill  * read-ahead block (which is not allocated to the caller)
1218Sbill  */
1228Sbill struct buf *
1238Sbill breada(dev, blkno, rablkno)
1248Sbill dev_t dev;
1258Sbill daddr_t blkno, rablkno;
1268Sbill {
1278Sbill 	register struct buf *bp, *rabp;
1288Sbill 
1298Sbill 	bp = NULL;
1308Sbill 	if (!incore(dev, blkno)) {
1318Sbill 		bp = getblk(dev, blkno);
1328Sbill 		if ((bp->b_flags&B_DONE) == 0) {
1338Sbill 			bp->b_flags |= B_READ;
1348Sbill 			bp->b_bcount = BSIZE;
1358Sbill 			(*bdevsw[major(dev)].d_strategy)(bp);
1368Sbill #ifdef	DISKMON
1378Sbill 			io_info.nread++;
1388Sbill #endif
1398Sbill 			u.u_vm.vm_inblk++;		/* pay for read */
1408Sbill 		}
1418Sbill 	}
1428Sbill 	if (rablkno && !incore(dev, rablkno)) {
1438Sbill 		rabp = getblk(dev, rablkno);
1448Sbill 		if (rabp->b_flags & B_DONE)
1458Sbill 			brelse(rabp);
1468Sbill 		else {
1478Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
1488Sbill 			rabp->b_bcount = BSIZE;
1498Sbill 			(*bdevsw[major(dev)].d_strategy)(rabp);
1508Sbill #ifdef	DISKMON
1518Sbill 			io_info.nreada++;
1528Sbill #endif
1538Sbill 			u.u_vm.vm_inblk++;		/* pay in advance */
1548Sbill 		}
1558Sbill 	}
1568Sbill 	if(bp == NULL)
1578Sbill 		return(bread(dev, blkno));
1588Sbill 	iowait(bp);
1598Sbill 	return(bp);
1608Sbill }
1618Sbill 
1628Sbill /*
1638Sbill  * Write the buffer, waiting for completion.
1648Sbill  * Then release the buffer.
1658Sbill  */
1668Sbill bwrite(bp)
1678Sbill register struct buf *bp;
1688Sbill {
1698Sbill 	register flag;
1708Sbill 
1718Sbill 	flag = bp->b_flags;
1728Sbill 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
1738Sbill 	bp->b_bcount = BSIZE;
1748Sbill #ifdef	DISKMON
1758Sbill 	io_info.nwrite++;
1768Sbill #endif
1778Sbill 	if ((flag&B_DELWRI) == 0)
1788Sbill 		u.u_vm.vm_oublk++;		/* noone paid yet */
1798Sbill 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
1808Sbill 	if ((flag&B_ASYNC) == 0) {
1818Sbill 		iowait(bp);
1828Sbill 		brelse(bp);
1838Sbill 	} else if (flag & B_DELWRI)
1848Sbill 		bp->b_flags |= B_AGE;
1858Sbill 	else
1868Sbill 		geterror(bp);
1878Sbill }
1888Sbill 
1898Sbill /*
1908Sbill  * Release the buffer, marking it so that if it is grabbed
1918Sbill  * for another purpose it will be written out before being
1928Sbill  * given up (e.g. when writing a partial block where it is
1938Sbill  * assumed that another write for the same block will soon follow).
1948Sbill  * This can't be done for magtape, since writes must be done
1958Sbill  * in the same order as requested.
1968Sbill  */
1978Sbill bdwrite(bp)
1988Sbill register struct buf *bp;
1998Sbill {
2008Sbill 	register struct buf *dp;
2018Sbill 
2028Sbill 	if ((bp->b_flags&B_DELWRI) == 0)
2038Sbill 		u.u_vm.vm_oublk++;		/* noone paid yet */
2048Sbill 	dp = bdevsw[major(bp->b_dev)].d_tab;
2058Sbill 	if(dp->b_flags & B_TAPE)
2068Sbill 		bawrite(bp);
2078Sbill 	else {
2088Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
2098Sbill 		brelse(bp);
2108Sbill 	}
2118Sbill }
2128Sbill 
2138Sbill /*
2148Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
2158Sbill  */
2168Sbill bawrite(bp)
2178Sbill register struct buf *bp;
2188Sbill {
2198Sbill 
2208Sbill 	bp->b_flags |= B_ASYNC;
2218Sbill 	bwrite(bp);
2228Sbill }
2238Sbill 
2248Sbill /*
2258Sbill  * release the buffer, with no I/O implied.
2268Sbill  */
2278Sbill brelse(bp)
2288Sbill register struct buf *bp;
2298Sbill {
2308Sbill 	register struct buf **backp;
2318Sbill 	register s;
2328Sbill 
2338Sbill 	if (bp->b_flags&B_WANTED)
2348Sbill 		wakeup((caddr_t)bp);
2358Sbill 	if (bfreelist.b_flags&B_WANTED) {
2368Sbill 		bfreelist.b_flags &= ~B_WANTED;
2378Sbill 		wakeup((caddr_t)&bfreelist);
2388Sbill 	}
23991Sbill 	if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) {
24091Sbill 		bunhash(bp);
2418Sbill 		bp->b_dev = NODEV;  /* no assoc. on error */
24291Sbill 	}
2438Sbill 	s = spl6();
2448Sbill 	if(bp->b_flags & (B_AGE|B_ERROR)) {
2458Sbill 		backp = &bfreelist.av_forw;
2468Sbill 		(*backp)->av_back = bp;
2478Sbill 		bp->av_forw = *backp;
2488Sbill 		*backp = bp;
2498Sbill 		bp->av_back = &bfreelist;
2508Sbill 	} else {
2518Sbill 		backp = &bfreelist.av_back;
2528Sbill 		(*backp)->av_forw = bp;
2538Sbill 		bp->av_back = *backp;
2548Sbill 		*backp = bp;
2558Sbill 		bp->av_forw = &bfreelist;
2568Sbill 	}
2578Sbill 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
2588Sbill 	splx(s);
2598Sbill }
2608Sbill 
2618Sbill /*
2628Sbill  * See if the block is associated with some buffer
2638Sbill  * (mainly to avoid getting hung up on a wait in breada)
2648Sbill  */
2658Sbill incore(dev, blkno)
2668Sbill dev_t dev;
2678Sbill daddr_t blkno;
2688Sbill {
2698Sbill 	register struct buf *bp;
2708Sbill 	register int dblkno = fsbtodb(blkno);
2718Sbill 
27291Sbill 	for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
27391Sbill 	    bp = &buf[bp->b_hlink])
27495Sbill 		if (bp->b_blkno == dblkno && bp->b_dev == dev)
27591Sbill 			return (1);
27691Sbill 	return (0);
2778Sbill }
2788Sbill 
2798Sbill struct buf *
2808Sbill baddr(dev, blkno)
2818Sbill dev_t dev;
2828Sbill daddr_t blkno;
2838Sbill {
2848Sbill 
2858Sbill 	if (incore(dev, blkno))
2868Sbill 		return (bread(dev, blkno));
2878Sbill 	return (0);
2888Sbill }
2898Sbill 
2908Sbill /*
2918Sbill  * Assign a buffer for the given block.  If the appropriate
2928Sbill  * block is already associated, return it; otherwise search
2938Sbill  * for the oldest non-busy buffer and reassign it.
2948Sbill  */
2958Sbill struct buf *
2968Sbill getblk(dev, blkno)
2978Sbill dev_t dev;
2988Sbill daddr_t blkno;
2998Sbill {
30091Sbill 	register struct buf *bp, *dp, *ep;
301*1831Sbill 	register int i, x, dblkno;
3028Sbill 
303*1831Sbill 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
304*1831Sbill 		blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
305*1831Sbill 	dblkno = fsbtodb(blkno);
3068Sbill     loop:
307124Sbill 	(void) spl0();
30891Sbill 	for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
30991Sbill 	    bp = &buf[bp->b_hlink]) {
31091Sbill 		if (bp->b_blkno != dblkno || bp->b_dev != dev)
3118Sbill 			continue;
312124Sbill 		(void) spl6();
3138Sbill 		if (bp->b_flags&B_BUSY) {
3148Sbill 			bp->b_flags |= B_WANTED;
3158Sbill 			sleep((caddr_t)bp, PRIBIO+1);
3168Sbill 			goto loop;
3178Sbill 		}
318124Sbill 		(void) spl0();
3198Sbill #ifdef	DISKMON
3208Sbill 		i = 0;
3218Sbill 		dp = bp->av_forw;
3228Sbill 		while (dp != &bfreelist) {
3238Sbill 			i++;
3248Sbill 			dp = dp->av_forw;
3258Sbill 		}
3268Sbill 		if (i<NBUF)
3278Sbill 			io_info.bufcount[i]++;
3288Sbill #endif
3298Sbill 		notavail(bp);
3308Sbill 		bp->b_flags |= B_CACHE;
3318Sbill 		return(bp);
3328Sbill 	}
33391Sbill 	if (major(dev) >= nblkdev)
33491Sbill 		panic("blkdev");
33591Sbill 	dp = bdevsw[major(dev)].d_tab;
33691Sbill 	if (dp == NULL)
33791Sbill 		panic("devtab");
338124Sbill 	(void) spl6();
3398Sbill 	if (bfreelist.av_forw == &bfreelist) {
3408Sbill 		bfreelist.b_flags |= B_WANTED;
3418Sbill 		sleep((caddr_t)&bfreelist, PRIBIO+1);
3428Sbill 		goto loop;
3438Sbill 	}
3441792Sbill 	(void) spl0();
3458Sbill 	bp = bfreelist.av_forw;
3468Sbill 	notavail(bp);
3478Sbill 	if (bp->b_flags & B_DELWRI) {
3488Sbill 		bp->b_flags |= B_ASYNC;
3498Sbill 		bwrite(bp);
3508Sbill 		goto loop;
3518Sbill 	}
35291Sbill 	if (bp->b_dev == NODEV)
35391Sbill 		goto done;
35491Sbill 	/* INLINE EXPANSION OF bunhash(bp) */
355884Sbill 	(void) spl6();
35691Sbill 	i = BUFHASH(dbtofsb(bp->b_blkno));
35791Sbill 	x = bp - buf;
35891Sbill 	if (bufhash[i] == x) {
35991Sbill 		bufhash[i] = bp->b_hlink;
36091Sbill 	} else {
36191Sbill 		for (ep = &buf[bufhash[i]]; ep != &buf[-1];
36291Sbill 		    ep = &buf[ep->b_hlink])
36391Sbill 			if (ep->b_hlink == x) {
36491Sbill 				ep->b_hlink = bp->b_hlink;
36591Sbill 				goto done;
36691Sbill 			}
36791Sbill 		panic("getblk");
36891Sbill 	}
36991Sbill done:
370884Sbill 	(void) spl0();
37191Sbill 	/* END INLINE EXPANSION */
3728Sbill 	bp->b_flags = B_BUSY;
3738Sbill 	bp->b_back->b_forw = bp->b_forw;
3748Sbill 	bp->b_forw->b_back = bp->b_back;
3758Sbill 	bp->b_forw = dp->b_forw;
3768Sbill 	bp->b_back = dp;
3778Sbill 	dp->b_forw->b_back = bp;
3788Sbill 	dp->b_forw = bp;
3798Sbill 	bp->b_dev = dev;
3808Sbill 	bp->b_blkno = dblkno;
38191Sbill 	i = BUFHASH(blkno);
38291Sbill 	bp->b_hlink = bufhash[i];
38391Sbill 	bufhash[i] = bp - buf;
3848Sbill 	return(bp);
3858Sbill }
3868Sbill 
3878Sbill /*
3888Sbill  * get an empty block,
3898Sbill  * not assigned to any particular device
3908Sbill  */
3918Sbill struct buf *
3928Sbill geteblk()
3938Sbill {
394182Sbill 	register struct buf *bp, *dp;
3958Sbill 
3968Sbill loop:
397124Sbill 	(void) spl6();
3988Sbill 	while (bfreelist.av_forw == &bfreelist) {
3998Sbill 		bfreelist.b_flags |= B_WANTED;
4008Sbill 		sleep((caddr_t)&bfreelist, PRIBIO+1);
4018Sbill 	}
402124Sbill 	(void) spl0();
4038Sbill 	dp = &bfreelist;
4048Sbill 	bp = bfreelist.av_forw;
4058Sbill 	notavail(bp);
4068Sbill 	if (bp->b_flags & B_DELWRI) {
4078Sbill 		bp->b_flags |= B_ASYNC;
4088Sbill 		bwrite(bp);
4098Sbill 		goto loop;
4108Sbill 	}
41191Sbill 	if (bp->b_dev != NODEV)
41291Sbill 		bunhash(bp);
4138Sbill 	bp->b_flags = B_BUSY;
4148Sbill 	bp->b_back->b_forw = bp->b_forw;
4158Sbill 	bp->b_forw->b_back = bp->b_back;
4168Sbill 	bp->b_forw = dp->b_forw;
4178Sbill 	bp->b_back = dp;
4188Sbill 	dp->b_forw->b_back = bp;
4198Sbill 	dp->b_forw = bp;
4208Sbill 	bp->b_dev = (dev_t)NODEV;
42191Sbill 	bp->b_hlink = -1;
4228Sbill 	return(bp);
4238Sbill }
4248Sbill 
42591Sbill bunhash(bp)
42691Sbill 	register struct buf *bp;
42791Sbill {
42891Sbill 	register struct buf *ep;
429884Sbill 	register int i, x, s;
43091Sbill 
43191Sbill 	if (bp->b_dev == NODEV)
43291Sbill 		return;
433884Sbill 	s = spl6();
43491Sbill 	i = BUFHASH(dbtofsb(bp->b_blkno));
43591Sbill 	x = bp - buf;
43691Sbill 	if (bufhash[i] == x) {
43791Sbill 		bufhash[i] = bp->b_hlink;
438884Sbill 		goto ret;
43991Sbill 	}
44091Sbill 	for (ep = &buf[bufhash[i]]; ep != &buf[-1];
44191Sbill 	    ep = &buf[ep->b_hlink])
44291Sbill 		if (ep->b_hlink == x) {
44391Sbill 			ep->b_hlink = bp->b_hlink;
444884Sbill 			goto ret;
44591Sbill 		}
44691Sbill 	panic("bunhash");
447884Sbill ret:
448884Sbill 	splx(s);
44991Sbill }
45091Sbill 
4518Sbill /*
4528Sbill  * Wait for I/O completion on the buffer; return errors
4538Sbill  * to the user.
4548Sbill  */
4558Sbill iowait(bp)
4568Sbill register struct buf *bp;
4578Sbill {
4588Sbill 
459124Sbill 	(void) spl6();
4608Sbill 	while ((bp->b_flags&B_DONE)==0)
4618Sbill 		sleep((caddr_t)bp, PRIBIO);
462124Sbill 	(void) spl0();
4638Sbill 	geterror(bp);
4648Sbill }
4658Sbill 
4668Sbill #ifndef FASTVAX
4678Sbill /*
4688Sbill  * Unlink a buffer from the available list and mark it busy.
4698Sbill  * (internal interface)
4708Sbill  */
4718Sbill notavail(bp)
4728Sbill register struct buf *bp;
4738Sbill {
4748Sbill 	register s;
4758Sbill 
4768Sbill 	s = spl6();
4778Sbill 	bp->av_back->av_forw = bp->av_forw;
4788Sbill 	bp->av_forw->av_back = bp->av_back;
4798Sbill 	bp->b_flags |= B_BUSY;
4808Sbill 	splx(s);
4818Sbill }
4828Sbill #endif
4838Sbill 
4848Sbill /*
4858Sbill  * Mark I/O complete on a buffer. If the header
4868Sbill  * indicates a dirty page push completion, the
4878Sbill  * header is inserted into the ``cleaned'' list
4888Sbill  * to be processed by the pageout daemon. Otherwise
4898Sbill  * release it if I/O is asynchronous, and wake
4908Sbill  * up anyone waiting for it.
4918Sbill  */
4928Sbill iodone(bp)
4938Sbill register struct buf *bp;
4948Sbill {
4958Sbill 	register int s;
4968Sbill 
497420Sbill 	if (bp->b_flags & B_DONE)
498420Sbill 		panic("dup iodone");
4998Sbill 	bp->b_flags |= B_DONE;
5008Sbill 	if (bp->b_flags & B_DIRTY) {
5018Sbill 		if (bp->b_flags & B_ERROR)
5028Sbill 			panic("IO err in push");
5038Sbill 		s = spl6();
5048Sbill 		cnt.v_pgout++;
5058Sbill 		bp->av_forw = bclnlist;
5068Sbill 		bp->b_bcount = swsize[bp - swbuf];
5078Sbill 		bp->b_pfcent = swpf[bp - swbuf];
5088Sbill 		bclnlist = bp;
5098Sbill 		if (bswlist.b_flags & B_WANTED)
5108Sbill 			wakeup((caddr_t)&proc[2]);
5118Sbill 		splx(s);
512383Sbill 		return;
5138Sbill 	}
5148Sbill 	if (bp->b_flags&B_ASYNC)
5158Sbill 		brelse(bp);
5168Sbill 	else {
5178Sbill 		bp->b_flags &= ~B_WANTED;
5188Sbill 		wakeup((caddr_t)bp);
5198Sbill 	}
5208Sbill }
5218Sbill 
5228Sbill /*
5238Sbill  * Zero the core associated with a buffer.
5248Sbill  */
5258Sbill clrbuf(bp)
5268Sbill struct buf *bp;
5278Sbill {
5288Sbill 	register *p;
5298Sbill 	register c;
5308Sbill 
5318Sbill 	p = bp->b_un.b_words;
5328Sbill 	c = BSIZE/sizeof(int);
5338Sbill 	do
5348Sbill 		*p++ = 0;
5358Sbill 	while (--c);
5368Sbill 	bp->b_resid = 0;
5378Sbill }
5388Sbill 
5398Sbill /*
5408Sbill  * swap I/O -
5418Sbill  *
5428Sbill  * If the flag indicates a dirty page push initiated
5438Sbill  * by the pageout daemon, we map the page into the i th
5448Sbill  * virtual page of process 2 (the daemon itself) where i is
5458Sbill  * the index of the swap header that has been allocated.
5468Sbill  * We simply initialize the header and queue the I/O but
5478Sbill  * do not wait for completion. When the I/O completes,
5488Sbill  * iodone() will link the header to a list of cleaned
5498Sbill  * pages to be processed by the pageout daemon.
5508Sbill  */
5518Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
5528Sbill 	struct proc *p;
5538Sbill 	swblk_t dblkno;
5548Sbill 	caddr_t addr;
5558Sbill 	int flag, nbytes;
5568Sbill 	dev_t dev;
5578Sbill 	unsigned pfcent;
5588Sbill {
5598Sbill 	register struct buf *bp;
5608Sbill 	register int c;
5618Sbill 	int p2dp;
5628Sbill 	register struct pte *dpte, *vpte;
5638Sbill 
564124Sbill 	(void) spl6();
5658Sbill 	while (bswlist.av_forw == NULL) {
5668Sbill 		bswlist.b_flags |= B_WANTED;
5678Sbill 		sleep((caddr_t)&bswlist, PSWP+1);
5688Sbill 	}
5698Sbill 	bp = bswlist.av_forw;
5708Sbill 	bswlist.av_forw = bp->av_forw;
571124Sbill 	(void) spl0();
5728Sbill 
5738Sbill 	bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
5748Sbill 	if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
5758Sbill 		if (rdflg == B_READ)
5768Sbill 			sum.v_pswpin += btoc(nbytes);
5778Sbill 		else
5788Sbill 			sum.v_pswpout += btoc(nbytes);
5798Sbill 	bp->b_proc = p;
5808Sbill 	if (flag & B_DIRTY) {
5818Sbill 		p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
5828Sbill 		dpte = dptopte(&proc[2], p2dp);
5838Sbill 		vpte = vtopte(p, btop(addr));
5848Sbill 		for (c = 0; c < nbytes; c += NBPG) {
5858Sbill 			if (vpte->pg_pfnum == 0 || vpte->pg_fod)
5868Sbill 				panic("swap bad pte");
5878Sbill 			*dpte++ = *vpte++;
5888Sbill 		}
5898Sbill 		bp->b_un.b_addr = (caddr_t)ctob(p2dp);
5908Sbill 	} else
5918Sbill 		bp->b_un.b_addr = addr;
5928Sbill 	while (nbytes > 0) {
5938Sbill 		c = imin(ctob(120), nbytes);
5948Sbill 		bp->b_bcount = c;
5958Sbill 		bp->b_blkno = dblkno;
5968Sbill 		bp->b_dev = dev;
597718Sbill 		if (flag & B_DIRTY) {
598718Sbill 			swpf[bp - swbuf] = pfcent;
599718Sbill 			swsize[bp - swbuf] = nbytes;
600718Sbill 		}
6018Sbill 		(*bdevsw[major(dev)].d_strategy)(bp);
6028Sbill 		if (flag & B_DIRTY) {
6038Sbill 			if (c < nbytes)
6048Sbill 				panic("big push");
6058Sbill 			return;
6068Sbill 		}
607124Sbill 		(void) spl6();
6088Sbill 		while((bp->b_flags&B_DONE)==0)
6098Sbill 			sleep((caddr_t)bp, PSWP);
610124Sbill 		(void) spl0();
6118Sbill 		bp->b_un.b_addr += c;
6128Sbill 		bp->b_flags &= ~B_DONE;
6138Sbill 		if (bp->b_flags & B_ERROR) {
6148Sbill 			if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
6158Sbill 				panic("hard IO err in swap");
6168Sbill 			swkill(p, (char *)0);
6178Sbill 		}
6188Sbill 		nbytes -= c;
6198Sbill 		dblkno += btoc(c);
6208Sbill 	}
621124Sbill 	(void) spl6();
6228Sbill 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
6238Sbill 	bp->av_forw = bswlist.av_forw;
6248Sbill 	bswlist.av_forw = bp;
6258Sbill 	if (bswlist.b_flags & B_WANTED) {
6268Sbill 		bswlist.b_flags &= ~B_WANTED;
6278Sbill 		wakeup((caddr_t)&bswlist);
6288Sbill 		wakeup((caddr_t)&proc[2]);
6298Sbill 	}
630124Sbill 	(void) spl0();
6318Sbill }
6328Sbill 
6338Sbill /*
6348Sbill  * If rout == 0 then killed on swap error, else
6358Sbill  * rout is the name of the routine where we ran out of
6368Sbill  * swap space.
6378Sbill  */
6388Sbill swkill(p, rout)
6398Sbill 	struct proc *p;
6408Sbill 	char *rout;
6418Sbill {
6428Sbill 
6438Sbill 	printf("%d: ", p->p_pid);
6448Sbill 	if (rout)
6458Sbill 		printf("out of swap space in %s\n", rout);
6468Sbill 	else
6478Sbill 		printf("killed on swap error\n");
6488Sbill 	/*
6498Sbill 	 * To be sure no looping (e.g. in vmsched trying to
6508Sbill 	 * swap out) mark process locked in core (as though
6518Sbill 	 * done by user) after killing it so noone will try
6528Sbill 	 * to swap it out.
6538Sbill 	 */
654165Sbill 	psignal(p, SIGKILL);
6558Sbill 	p->p_flag |= SULOCK;
6568Sbill }
6578Sbill 
6588Sbill /*
6598Sbill  * make sure all write-behind blocks
6608Sbill  * on dev (or NODEV for all)
6618Sbill  * are flushed out.
6628Sbill  * (from umount and update)
6638Sbill  */
6648Sbill bflush(dev)
6658Sbill dev_t dev;
6668Sbill {
6678Sbill 	register struct buf *bp;
6688Sbill 
6698Sbill loop:
670124Sbill 	(void) spl6();
6718Sbill 	for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
6728Sbill 		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
6738Sbill 			bp->b_flags |= B_ASYNC;
6748Sbill 			notavail(bp);
6758Sbill 			bwrite(bp);
6768Sbill 			goto loop;
6778Sbill 		}
6788Sbill 	}
679124Sbill 	(void) spl0();
6808Sbill }
6818Sbill 
6828Sbill /*
6838Sbill  * Raw I/O. The arguments are
6848Sbill  *	The strategy routine for the device
6858Sbill  *	A buffer, which will always be a special buffer
6868Sbill  *	  header owned exclusively by the device for this purpose
6878Sbill  *	The device number
6888Sbill  *	Read/write flag
6898Sbill  * Essentially all the work is computing physical addresses and
6908Sbill  * validating them.
6918Sbill  * If the user has the proper access privilidges, the process is
6928Sbill  * marked 'delayed unlock' and the pages involved in the I/O are
6938Sbill  * faulted and locked. After the completion of the I/O, the above pages
6948Sbill  * are unlocked.
6958Sbill  */
6968Sbill physio(strat, bp, dev, rw, mincnt)
6978Sbill int (*strat)();
6988Sbill register struct buf *bp;
6998Sbill unsigned (*mincnt)();
7008Sbill {
7018Sbill 	register int c;
7028Sbill 	char *a;
7038Sbill 
7048Sbill 	if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
7058Sbill 		u.u_error = EFAULT;
7068Sbill 		return;
7078Sbill 	}
708124Sbill 	(void) spl6();
7098Sbill 	while (bp->b_flags&B_BUSY) {
7108Sbill 		bp->b_flags |= B_WANTED;
7118Sbill 		sleep((caddr_t)bp, PRIBIO+1);
7128Sbill 	}
7138Sbill 	bp->b_error = 0;
7148Sbill 	bp->b_proc = u.u_procp;
7158Sbill 	bp->b_un.b_addr = u.u_base;
7168Sbill 	while (u.u_count != 0 && bp->b_error==0) {
7178Sbill 		bp->b_flags = B_BUSY | B_PHYS | rw;
7188Sbill 		bp->b_dev = dev;
7198Sbill 		bp->b_blkno = u.u_offset >> PGSHIFT;
7208Sbill 		bp->b_bcount = u.u_count;
7218Sbill 		(*mincnt)(bp);
7228Sbill 		c = bp->b_bcount;
7238Sbill 		u.u_procp->p_flag |= SPHYSIO;
7248Sbill 		vslock(a = bp->b_un.b_addr, c);
7258Sbill 		(*strat)(bp);
726124Sbill 		(void) spl6();
7278Sbill 		while ((bp->b_flags&B_DONE) == 0)
7288Sbill 			sleep((caddr_t)bp, PRIBIO);
7298Sbill 		vsunlock(a, c, rw);
7308Sbill 		u.u_procp->p_flag &= ~SPHYSIO;
7318Sbill 		if (bp->b_flags&B_WANTED)
7328Sbill 			wakeup((caddr_t)bp);
733124Sbill 		(void) spl0();
7348Sbill 		bp->b_un.b_addr += c;
7358Sbill 		u.u_count -= c;
7368Sbill 		u.u_offset += c;
7378Sbill 	}
7388Sbill 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
7398Sbill 	u.u_count = bp->b_resid;
7408Sbill 	geterror(bp);
7418Sbill }
7428Sbill 
7438Sbill /*ARGSUSED*/
7448Sbill unsigned
7458Sbill minphys(bp)
7468Sbill struct buf *bp;
7478Sbill {
7488Sbill 
7498Sbill 	if (bp->b_bcount > 60 * 1024)
7508Sbill 		bp->b_bcount = 60 * 1024;
7518Sbill }
7528Sbill 
7538Sbill /*
7548Sbill  * Pick up the device's error number and pass it to the user;
7558Sbill  * if there is an error but the number is 0 set a generalized
7568Sbill  * code.  Actually the latter is always true because devices
7578Sbill  * don't yet return specific errors.
7588Sbill  */
7598Sbill geterror(bp)
7608Sbill register struct buf *bp;
7618Sbill {
7628Sbill 
7638Sbill 	if (bp->b_flags&B_ERROR)
7648Sbill 		if ((u.u_error = bp->b_error)==0)
7658Sbill 			u.u_error = EIO;
7668Sbill }
767