1*8Sbill /*	vfs_cluster.c	3.1	10/14/12	*/
2*8Sbill 
3*8Sbill #include "../h/param.h"
4*8Sbill #include "../h/systm.h"
5*8Sbill #include "../h/dir.h"
6*8Sbill #include "../h/user.h"
7*8Sbill #include "../h/buf.h"
8*8Sbill #include "../h/conf.h"
9*8Sbill #include "../h/proc.h"
10*8Sbill #include "../h/seg.h"
11*8Sbill #include "../h/pte.h"
12*8Sbill #include "../h/vm.h"
13*8Sbill 
14*8Sbill /* #define	DISKMON	1 */
15*8Sbill 
16*8Sbill #ifdef	DISKMON
17*8Sbill struct {
18*8Sbill 	int	nbuf;
19*8Sbill 	long	nread;
20*8Sbill 	long	nreada;
21*8Sbill 	long	ncache;
22*8Sbill 	long	nwrite;
23*8Sbill 	long	bufcount[NBUF];
24*8Sbill } io_info;
25*8Sbill #endif
26*8Sbill 
27*8Sbill /*
28*8Sbill  * Swap IO headers -
29*8Sbill  * They contain the necessary information for the swap I/O.
30*8Sbill  * At any given time, a swap header can be in three
31*8Sbill  * different lists. When free it is in the free list,
32*8Sbill  * when allocated and the I/O queued, it is on the swap
33*8Sbill  * device list, and finally, if the operation was a dirty
34*8Sbill  * page push, when the I/O completes, it is inserted
35*8Sbill  * in a list of cleaned pages to be processed by the pageout daemon.
36*8Sbill  */
37*8Sbill struct	buf swbuf[NSWBUF];
38*8Sbill short	swsize[NSWBUF];		/* CAN WE JUST USE B_BCOUNT? */
39*8Sbill int	swpf[NSWBUF];
40*8Sbill 
41*8Sbill /*
42*8Sbill  * The following several routines allocate and free
43*8Sbill  * buffers with various side effects.  In general the
44*8Sbill  * arguments to an allocate routine are a device and
45*8Sbill  * a block number, and the value is a pointer to
46*8Sbill  * to the buffer header; the buffer is marked "busy"
47*8Sbill  * so that no one else can touch it.  If the block was
48*8Sbill  * already in core, no I/O need be done; if it is
49*8Sbill  * already busy, the process waits until it becomes free.
50*8Sbill  * The following routines allocate a buffer:
51*8Sbill  *	getblk
52*8Sbill  *	bread
53*8Sbill  *	breada
54*8Sbill  *	baddr	(if it is incore)
55*8Sbill  * Eventually the buffer must be released, possibly with the
56*8Sbill  * side effect of writing it out, by using one of
57*8Sbill  *	bwrite
58*8Sbill  *	bdwrite
59*8Sbill  *	bawrite
60*8Sbill  *	brelse
61*8Sbill  */
62*8Sbill 
63*8Sbill #ifdef	FASTVAX
64*8Sbill #define	notavail(bp) \
65*8Sbill { \
66*8Sbill 	int s = spl6(); \
67*8Sbill 	(bp)->av_back->av_forw = (bp)->av_forw; \
68*8Sbill 	(bp)->av_forw->av_back = (bp)->av_back; \
69*8Sbill 	(bp)->b_flags |= B_BUSY; \
70*8Sbill 	splx(s); \
71*8Sbill }
72*8Sbill #endif
73*8Sbill 
74*8Sbill /*
75*8Sbill  * Read in (if necessary) the block and return a buffer pointer.
76*8Sbill  */
77*8Sbill struct buf *
78*8Sbill bread(dev, blkno)
79*8Sbill dev_t dev;
80*8Sbill daddr_t blkno;
81*8Sbill {
82*8Sbill 	register struct buf *bp;
83*8Sbill 
84*8Sbill 	bp = getblk(dev, blkno);
85*8Sbill 	if (bp->b_flags&B_DONE) {
86*8Sbill #ifdef	DISKMON
87*8Sbill 		io_info.ncache++;
88*8Sbill #endif
89*8Sbill 		return(bp);
90*8Sbill 	}
91*8Sbill 	bp->b_flags |= B_READ;
92*8Sbill 	bp->b_bcount = BSIZE;
93*8Sbill 	(*bdevsw[major(dev)].d_strategy)(bp);
94*8Sbill #ifdef	DISKMON
95*8Sbill 	io_info.nread++;
96*8Sbill #endif
97*8Sbill 	u.u_vm.vm_inblk++;		/* pay for read */
98*8Sbill 	iowait(bp);
99*8Sbill 	return(bp);
100*8Sbill }
101*8Sbill 
102*8Sbill /*
103*8Sbill  * Read in the block, like bread, but also start I/O on the
104*8Sbill  * read-ahead block (which is not allocated to the caller)
105*8Sbill  */
106*8Sbill struct buf *
107*8Sbill breada(dev, blkno, rablkno)
108*8Sbill dev_t dev;
109*8Sbill daddr_t blkno, rablkno;
110*8Sbill {
111*8Sbill 	register struct buf *bp, *rabp;
112*8Sbill 
113*8Sbill 	bp = NULL;
114*8Sbill 	if (!incore(dev, blkno)) {
115*8Sbill 		bp = getblk(dev, blkno);
116*8Sbill 		if ((bp->b_flags&B_DONE) == 0) {
117*8Sbill 			bp->b_flags |= B_READ;
118*8Sbill 			bp->b_bcount = BSIZE;
119*8Sbill 			(*bdevsw[major(dev)].d_strategy)(bp);
120*8Sbill #ifdef	DISKMON
121*8Sbill 			io_info.nread++;
122*8Sbill #endif
123*8Sbill 			u.u_vm.vm_inblk++;		/* pay for read */
124*8Sbill 		}
125*8Sbill 	}
126*8Sbill 	if (rablkno && !incore(dev, rablkno)) {
127*8Sbill 		rabp = getblk(dev, rablkno);
128*8Sbill 		if (rabp->b_flags & B_DONE)
129*8Sbill 			brelse(rabp);
130*8Sbill 		else {
131*8Sbill 			rabp->b_flags |= B_READ|B_ASYNC;
132*8Sbill 			rabp->b_bcount = BSIZE;
133*8Sbill 			(*bdevsw[major(dev)].d_strategy)(rabp);
134*8Sbill #ifdef	DISKMON
135*8Sbill 			io_info.nreada++;
136*8Sbill #endif
137*8Sbill 			u.u_vm.vm_inblk++;		/* pay in advance */
138*8Sbill 		}
139*8Sbill 	}
140*8Sbill 	if(bp == NULL)
141*8Sbill 		return(bread(dev, blkno));
142*8Sbill 	iowait(bp);
143*8Sbill 	return(bp);
144*8Sbill }
145*8Sbill 
146*8Sbill /*
147*8Sbill  * Write the buffer, waiting for completion.
148*8Sbill  * Then release the buffer.
149*8Sbill  */
150*8Sbill bwrite(bp)
151*8Sbill register struct buf *bp;
152*8Sbill {
153*8Sbill 	register flag;
154*8Sbill 
155*8Sbill 	flag = bp->b_flags;
156*8Sbill 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
157*8Sbill 	bp->b_bcount = BSIZE;
158*8Sbill #ifdef	DISKMON
159*8Sbill 	io_info.nwrite++;
160*8Sbill #endif
161*8Sbill 	if ((flag&B_DELWRI) == 0)
162*8Sbill 		u.u_vm.vm_oublk++;		/* noone paid yet */
163*8Sbill 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
164*8Sbill 	if ((flag&B_ASYNC) == 0) {
165*8Sbill 		iowait(bp);
166*8Sbill 		brelse(bp);
167*8Sbill 	} else if (flag & B_DELWRI)
168*8Sbill 		bp->b_flags |= B_AGE;
169*8Sbill 	else
170*8Sbill 		geterror(bp);
171*8Sbill }
172*8Sbill 
173*8Sbill /*
174*8Sbill  * Release the buffer, marking it so that if it is grabbed
175*8Sbill  * for another purpose it will be written out before being
176*8Sbill  * given up (e.g. when writing a partial block where it is
177*8Sbill  * assumed that another write for the same block will soon follow).
178*8Sbill  * This can't be done for magtape, since writes must be done
179*8Sbill  * in the same order as requested.
180*8Sbill  */
181*8Sbill bdwrite(bp)
182*8Sbill register struct buf *bp;
183*8Sbill {
184*8Sbill 	register struct buf *dp;
185*8Sbill 
186*8Sbill 	if ((bp->b_flags&B_DELWRI) == 0)
187*8Sbill 		u.u_vm.vm_oublk++;		/* noone paid yet */
188*8Sbill 	dp = bdevsw[major(bp->b_dev)].d_tab;
189*8Sbill 	if(dp->b_flags & B_TAPE)
190*8Sbill 		bawrite(bp);
191*8Sbill 	else {
192*8Sbill 		bp->b_flags |= B_DELWRI | B_DONE;
193*8Sbill 		brelse(bp);
194*8Sbill 	}
195*8Sbill }
196*8Sbill 
197*8Sbill /*
198*8Sbill  * Release the buffer, start I/O on it, but don't wait for completion.
199*8Sbill  */
200*8Sbill bawrite(bp)
201*8Sbill register struct buf *bp;
202*8Sbill {
203*8Sbill 
204*8Sbill 	bp->b_flags |= B_ASYNC;
205*8Sbill 	bwrite(bp);
206*8Sbill }
207*8Sbill 
208*8Sbill /*
209*8Sbill  * release the buffer, with no I/O implied.
210*8Sbill  */
211*8Sbill brelse(bp)
212*8Sbill register struct buf *bp;
213*8Sbill {
214*8Sbill 	register struct buf **backp;
215*8Sbill 	register s;
216*8Sbill 
217*8Sbill 	if (bp->b_flags&B_WANTED)
218*8Sbill 		wakeup((caddr_t)bp);
219*8Sbill 	if (bfreelist.b_flags&B_WANTED) {
220*8Sbill 		bfreelist.b_flags &= ~B_WANTED;
221*8Sbill 		wakeup((caddr_t)&bfreelist);
222*8Sbill 	}
223*8Sbill 	if (bp->b_flags&B_ERROR)
224*8Sbill 		bp->b_dev = NODEV;  /* no assoc. on error */
225*8Sbill 	s = spl6();
226*8Sbill 	if(bp->b_flags & (B_AGE|B_ERROR)) {
227*8Sbill 		backp = &bfreelist.av_forw;
228*8Sbill 		(*backp)->av_back = bp;
229*8Sbill 		bp->av_forw = *backp;
230*8Sbill 		*backp = bp;
231*8Sbill 		bp->av_back = &bfreelist;
232*8Sbill 	} else {
233*8Sbill 		backp = &bfreelist.av_back;
234*8Sbill 		(*backp)->av_forw = bp;
235*8Sbill 		bp->av_back = *backp;
236*8Sbill 		*backp = bp;
237*8Sbill 		bp->av_forw = &bfreelist;
238*8Sbill 	}
239*8Sbill 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
240*8Sbill 	splx(s);
241*8Sbill }
242*8Sbill 
243*8Sbill /*
244*8Sbill  * See if the block is associated with some buffer
245*8Sbill  * (mainly to avoid getting hung up on a wait in breada)
246*8Sbill  */
247*8Sbill incore(dev, blkno)
248*8Sbill dev_t dev;
249*8Sbill daddr_t blkno;
250*8Sbill {
251*8Sbill 	register struct buf *bp;
252*8Sbill 	register struct buf *dp;
253*8Sbill 	register int dblkno = fsbtodb(blkno);
254*8Sbill 
255*8Sbill 	dp = bdevsw[major(dev)].d_tab;
256*8Sbill 	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw)
257*8Sbill 		if (bp->b_blkno==dblkno && bp->b_dev==dev)
258*8Sbill 			return(1);
259*8Sbill 	return(0);
260*8Sbill }
261*8Sbill 
262*8Sbill struct buf *
263*8Sbill baddr(dev, blkno)
264*8Sbill dev_t dev;
265*8Sbill daddr_t blkno;
266*8Sbill {
267*8Sbill 
268*8Sbill 	if (incore(dev, blkno))
269*8Sbill 		return (bread(dev, blkno));
270*8Sbill 	return (0);
271*8Sbill }
272*8Sbill 
273*8Sbill /*
274*8Sbill  * Assign a buffer for the given block.  If the appropriate
275*8Sbill  * block is already associated, return it; otherwise search
276*8Sbill  * for the oldest non-busy buffer and reassign it.
277*8Sbill  */
278*8Sbill struct buf *
279*8Sbill getblk(dev, blkno)
280*8Sbill dev_t dev;
281*8Sbill daddr_t blkno;
282*8Sbill {
283*8Sbill 	register struct buf *bp;
284*8Sbill 	register struct buf *dp;
285*8Sbill #ifdef	DISKMON
286*8Sbill 	register i;
287*8Sbill #endif
288*8Sbill 	register int dblkno = fsbtodb(blkno);
289*8Sbill 
290*8Sbill 	if(major(dev) >= nblkdev)
291*8Sbill 		panic("blkdev");
292*8Sbill 
293*8Sbill     loop:
294*8Sbill 	VOID spl0();
295*8Sbill 	dp = bdevsw[major(dev)].d_tab;
296*8Sbill 	if(dp == NULL)
297*8Sbill 		panic("devtab");
298*8Sbill 	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw) {
299*8Sbill 		if (bp->b_blkno!=dblkno || bp->b_dev!=dev)
300*8Sbill 			continue;
301*8Sbill 		VOID spl6();
302*8Sbill 		if (bp->b_flags&B_BUSY) {
303*8Sbill 			bp->b_flags |= B_WANTED;
304*8Sbill 			sleep((caddr_t)bp, PRIBIO+1);
305*8Sbill 			goto loop;
306*8Sbill 		}
307*8Sbill 		VOID spl0();
308*8Sbill #ifdef	DISKMON
309*8Sbill 		i = 0;
310*8Sbill 		dp = bp->av_forw;
311*8Sbill 		while (dp != &bfreelist) {
312*8Sbill 			i++;
313*8Sbill 			dp = dp->av_forw;
314*8Sbill 		}
315*8Sbill 		if (i<NBUF)
316*8Sbill 			io_info.bufcount[i]++;
317*8Sbill #endif
318*8Sbill 		notavail(bp);
319*8Sbill 		bp->b_flags |= B_CACHE;
320*8Sbill 		return(bp);
321*8Sbill 	}
322*8Sbill 	VOID spl6();
323*8Sbill 	if (bfreelist.av_forw == &bfreelist) {
324*8Sbill 		bfreelist.b_flags |= B_WANTED;
325*8Sbill 		sleep((caddr_t)&bfreelist, PRIBIO+1);
326*8Sbill 		goto loop;
327*8Sbill 	}
328*8Sbill 	spl0();
329*8Sbill 	bp = bfreelist.av_forw;
330*8Sbill 	notavail(bp);
331*8Sbill 	if (bp->b_flags & B_DELWRI) {
332*8Sbill 		bp->b_flags |= B_ASYNC;
333*8Sbill 		bwrite(bp);
334*8Sbill 		goto loop;
335*8Sbill 	}
336*8Sbill 	bp->b_flags = B_BUSY;
337*8Sbill 	bp->b_back->b_forw = bp->b_forw;
338*8Sbill 	bp->b_forw->b_back = bp->b_back;
339*8Sbill 	bp->b_forw = dp->b_forw;
340*8Sbill 	bp->b_back = dp;
341*8Sbill 	dp->b_forw->b_back = bp;
342*8Sbill 	dp->b_forw = bp;
343*8Sbill 	bp->b_dev = dev;
344*8Sbill 	bp->b_blkno = dblkno;
345*8Sbill 	return(bp);
346*8Sbill }
347*8Sbill 
348*8Sbill /*
349*8Sbill  * get an empty block,
350*8Sbill  * not assigned to any particular device
351*8Sbill  */
352*8Sbill struct buf *
353*8Sbill geteblk()
354*8Sbill {
355*8Sbill 	register struct buf *bp;
356*8Sbill 	register struct buf *dp;
357*8Sbill 
358*8Sbill loop:
359*8Sbill 	VOID spl6();
360*8Sbill 	while (bfreelist.av_forw == &bfreelist) {
361*8Sbill 		bfreelist.b_flags |= B_WANTED;
362*8Sbill 		sleep((caddr_t)&bfreelist, PRIBIO+1);
363*8Sbill 	}
364*8Sbill 	VOID spl0();
365*8Sbill 	dp = &bfreelist;
366*8Sbill 	bp = bfreelist.av_forw;
367*8Sbill 	notavail(bp);
368*8Sbill 	if (bp->b_flags & B_DELWRI) {
369*8Sbill 		bp->b_flags |= B_ASYNC;
370*8Sbill 		bwrite(bp);
371*8Sbill 		goto loop;
372*8Sbill 	}
373*8Sbill 	bp->b_flags = B_BUSY;
374*8Sbill 	bp->b_back->b_forw = bp->b_forw;
375*8Sbill 	bp->b_forw->b_back = bp->b_back;
376*8Sbill 	bp->b_forw = dp->b_forw;
377*8Sbill 	bp->b_back = dp;
378*8Sbill 	dp->b_forw->b_back = bp;
379*8Sbill 	dp->b_forw = bp;
380*8Sbill 	bp->b_dev = (dev_t)NODEV;
381*8Sbill 	return(bp);
382*8Sbill }
383*8Sbill 
384*8Sbill /*
385*8Sbill  * Wait for I/O completion on the buffer; return errors
386*8Sbill  * to the user.
387*8Sbill  */
388*8Sbill iowait(bp)
389*8Sbill register struct buf *bp;
390*8Sbill {
391*8Sbill 
392*8Sbill 	VOID spl6();
393*8Sbill 	while ((bp->b_flags&B_DONE)==0)
394*8Sbill 		sleep((caddr_t)bp, PRIBIO);
395*8Sbill 	VOID spl0();
396*8Sbill 	geterror(bp);
397*8Sbill }
398*8Sbill 
399*8Sbill #ifndef FASTVAX
400*8Sbill /*
401*8Sbill  * Unlink a buffer from the available list and mark it busy.
402*8Sbill  * (internal interface)
403*8Sbill  */
404*8Sbill notavail(bp)
405*8Sbill register struct buf *bp;
406*8Sbill {
407*8Sbill 	register s;
408*8Sbill 
409*8Sbill 	s = spl6();
410*8Sbill 	bp->av_back->av_forw = bp->av_forw;
411*8Sbill 	bp->av_forw->av_back = bp->av_back;
412*8Sbill 	bp->b_flags |= B_BUSY;
413*8Sbill 	splx(s);
414*8Sbill }
415*8Sbill #endif
416*8Sbill 
417*8Sbill /*
418*8Sbill  * Mark I/O complete on a buffer. If the header
419*8Sbill  * indicates a dirty page push completion, the
420*8Sbill  * header is inserted into the ``cleaned'' list
421*8Sbill  * to be processed by the pageout daemon. Otherwise
422*8Sbill  * release it if I/O is asynchronous, and wake
423*8Sbill  * up anyone waiting for it.
424*8Sbill  */
425*8Sbill iodone(bp)
426*8Sbill register struct buf *bp;
427*8Sbill {
428*8Sbill 	register int s;
429*8Sbill 
430*8Sbill 	bp->b_flags |= B_DONE;
431*8Sbill 	if (bp->b_flags & B_DIRTY) {
432*8Sbill 		if (bp->b_flags & B_ERROR)
433*8Sbill 			panic("IO err in push");
434*8Sbill 		s = spl6();
435*8Sbill 		cnt.v_pgout++;
436*8Sbill 		bp->av_forw = bclnlist;
437*8Sbill 		bp->b_bcount = swsize[bp - swbuf];
438*8Sbill 		bp->b_pfcent = swpf[bp - swbuf];
439*8Sbill 		bclnlist = bp;
440*8Sbill 		if (bswlist.b_flags & B_WANTED)
441*8Sbill 			wakeup((caddr_t)&proc[2]);
442*8Sbill 		splx(s);
443*8Sbill 	}
444*8Sbill 	if (bp->b_flags&B_ASYNC)
445*8Sbill 		brelse(bp);
446*8Sbill 	else {
447*8Sbill 		bp->b_flags &= ~B_WANTED;
448*8Sbill 		wakeup((caddr_t)bp);
449*8Sbill 	}
450*8Sbill }
451*8Sbill 
452*8Sbill /*
453*8Sbill  * Zero the core associated with a buffer.
454*8Sbill  */
455*8Sbill clrbuf(bp)
456*8Sbill struct buf *bp;
457*8Sbill {
458*8Sbill 	register *p;
459*8Sbill 	register c;
460*8Sbill 
461*8Sbill 	p = bp->b_un.b_words;
462*8Sbill 	c = BSIZE/sizeof(int);
463*8Sbill 	do
464*8Sbill 		*p++ = 0;
465*8Sbill 	while (--c);
466*8Sbill 	bp->b_resid = 0;
467*8Sbill }
468*8Sbill 
469*8Sbill /*
470*8Sbill  * swap I/O -
471*8Sbill  *
472*8Sbill  * If the flag indicates a dirty page push initiated
473*8Sbill  * by the pageout daemon, we map the page into the i th
474*8Sbill  * virtual page of process 2 (the daemon itself) where i is
475*8Sbill  * the index of the swap header that has been allocated.
476*8Sbill  * We simply initialize the header and queue the I/O but
477*8Sbill  * do not wait for completion. When the I/O completes,
478*8Sbill  * iodone() will link the header to a list of cleaned
479*8Sbill  * pages to be processed by the pageout daemon.
480*8Sbill  */
481*8Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
482*8Sbill 	struct proc *p;
483*8Sbill 	swblk_t dblkno;
484*8Sbill 	caddr_t addr;
485*8Sbill 	int flag, nbytes;
486*8Sbill 	dev_t dev;
487*8Sbill 	unsigned pfcent;
488*8Sbill {
489*8Sbill 	register struct buf *bp;
490*8Sbill 	register int c;
491*8Sbill 	int p2dp;
492*8Sbill 	register struct pte *dpte, *vpte;
493*8Sbill 
494*8Sbill 	VOID spl6();
495*8Sbill 	while (bswlist.av_forw == NULL) {
496*8Sbill 		bswlist.b_flags |= B_WANTED;
497*8Sbill 		sleep((caddr_t)&bswlist, PSWP+1);
498*8Sbill 	}
499*8Sbill 	bp = bswlist.av_forw;
500*8Sbill 	bswlist.av_forw = bp->av_forw;
501*8Sbill 	VOID spl0();
502*8Sbill 
503*8Sbill 	bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
504*8Sbill 	if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
505*8Sbill 		if (rdflg == B_READ)
506*8Sbill 			sum.v_pswpin += btoc(nbytes);
507*8Sbill 		else
508*8Sbill 			sum.v_pswpout += btoc(nbytes);
509*8Sbill 	bp->b_proc = p;
510*8Sbill 	if (flag & B_DIRTY) {
511*8Sbill 		p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
512*8Sbill 		dpte = dptopte(&proc[2], p2dp);
513*8Sbill 		vpte = vtopte(p, btop(addr));
514*8Sbill 		for (c = 0; c < nbytes; c += NBPG) {
515*8Sbill 			if (vpte->pg_pfnum == 0 || vpte->pg_fod)
516*8Sbill 				panic("swap bad pte");
517*8Sbill 			*dpte++ = *vpte++;
518*8Sbill 		}
519*8Sbill 		bp->b_un.b_addr = (caddr_t)ctob(p2dp);
520*8Sbill 	} else
521*8Sbill 		bp->b_un.b_addr = addr;
522*8Sbill 	while (nbytes > 0) {
523*8Sbill 		c = imin(ctob(120), nbytes);
524*8Sbill 		bp->b_bcount = c;
525*8Sbill 		bp->b_blkno = dblkno;
526*8Sbill 		bp->b_dev = dev;
527*8Sbill 		if (dev == swapdev)
528*8Sbill 			bp->b_blkno += swplo;
529*8Sbill 		(*bdevsw[major(dev)].d_strategy)(bp);
530*8Sbill 		if (flag & B_DIRTY) {
531*8Sbill 			if (c < nbytes)
532*8Sbill 				panic("big push");
533*8Sbill 			swsize[bp - swbuf] = nbytes;
534*8Sbill 			swpf[bp - swbuf] = pfcent;
535*8Sbill 			return;
536*8Sbill 		}
537*8Sbill 		VOID spl6();
538*8Sbill 		while((bp->b_flags&B_DONE)==0)
539*8Sbill 			sleep((caddr_t)bp, PSWP);
540*8Sbill 		VOID spl0();
541*8Sbill 		bp->b_un.b_addr += c;
542*8Sbill 		bp->b_flags &= ~B_DONE;
543*8Sbill 		if (bp->b_flags & B_ERROR) {
544*8Sbill 			if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
545*8Sbill 				panic("hard IO err in swap");
546*8Sbill 			swkill(p, (char *)0);
547*8Sbill 		}
548*8Sbill 		nbytes -= c;
549*8Sbill 		dblkno += btoc(c);
550*8Sbill 	}
551*8Sbill 	VOID spl6();
552*8Sbill 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
553*8Sbill 	bp->av_forw = bswlist.av_forw;
554*8Sbill 	bswlist.av_forw = bp;
555*8Sbill 	if (bswlist.b_flags & B_WANTED) {
556*8Sbill 		bswlist.b_flags &= ~B_WANTED;
557*8Sbill 		wakeup((caddr_t)&bswlist);
558*8Sbill 		wakeup((caddr_t)&proc[2]);
559*8Sbill 	}
560*8Sbill 	VOID spl0();
561*8Sbill }
562*8Sbill 
563*8Sbill /*
564*8Sbill  * If rout == 0 then killed on swap error, else
565*8Sbill  * rout is the name of the routine where we ran out of
566*8Sbill  * swap space.
567*8Sbill  */
568*8Sbill swkill(p, rout)
569*8Sbill 	struct proc *p;
570*8Sbill 	char *rout;
571*8Sbill {
572*8Sbill 
573*8Sbill 	printf("%d: ", p->p_pid);
574*8Sbill 	if (rout)
575*8Sbill 		printf("out of swap space in %s\n", rout);
576*8Sbill 	else
577*8Sbill 		printf("killed on swap error\n");
578*8Sbill 	/*
579*8Sbill 	 * To be sure no looping (e.g. in vmsched trying to
580*8Sbill 	 * swap out) mark process locked in core (as though
581*8Sbill 	 * done by user) after killing it so noone will try
582*8Sbill 	 * to swap it out.
583*8Sbill 	 */
584*8Sbill 	psignal(p, SIGKIL);
585*8Sbill 	p->p_flag |= SULOCK;
586*8Sbill }
587*8Sbill 
588*8Sbill /*
589*8Sbill  * make sure all write-behind blocks
590*8Sbill  * on dev (or NODEV for all)
591*8Sbill  * are flushed out.
592*8Sbill  * (from umount and update)
593*8Sbill  */
594*8Sbill bflush(dev)
595*8Sbill dev_t dev;
596*8Sbill {
597*8Sbill 	register struct buf *bp;
598*8Sbill 
599*8Sbill loop:
600*8Sbill 	VOID spl6();
601*8Sbill 	for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
602*8Sbill 		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
603*8Sbill 			bp->b_flags |= B_ASYNC;
604*8Sbill 			notavail(bp);
605*8Sbill 			bwrite(bp);
606*8Sbill 			goto loop;
607*8Sbill 		}
608*8Sbill 	}
609*8Sbill 	VOID spl0();
610*8Sbill }
611*8Sbill 
612*8Sbill /*
613*8Sbill  * Raw I/O. The arguments are
614*8Sbill  *	The strategy routine for the device
615*8Sbill  *	A buffer, which will always be a special buffer
616*8Sbill  *	  header owned exclusively by the device for this purpose
617*8Sbill  *	The device number
618*8Sbill  *	Read/write flag
619*8Sbill  * Essentially all the work is computing physical addresses and
620*8Sbill  * validating them.
621*8Sbill  * If the user has the proper access privilidges, the process is
622*8Sbill  * marked 'delayed unlock' and the pages involved in the I/O are
623*8Sbill  * faulted and locked. After the completion of the I/O, the above pages
624*8Sbill  * are unlocked.
625*8Sbill  */
626*8Sbill physio(strat, bp, dev, rw, mincnt)
627*8Sbill int (*strat)();
628*8Sbill register struct buf *bp;
629*8Sbill unsigned (*mincnt)();
630*8Sbill {
631*8Sbill 	register int c;
632*8Sbill 	char *a;
633*8Sbill 
634*8Sbill 	if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
635*8Sbill 		u.u_error = EFAULT;
636*8Sbill 		return;
637*8Sbill 	}
638*8Sbill 	VOID spl6();
639*8Sbill 	while (bp->b_flags&B_BUSY) {
640*8Sbill 		bp->b_flags |= B_WANTED;
641*8Sbill 		sleep((caddr_t)bp, PRIBIO+1);
642*8Sbill 	}
643*8Sbill 	bp->b_error = 0;
644*8Sbill 	bp->b_proc = u.u_procp;
645*8Sbill 	bp->b_un.b_addr = u.u_base;
646*8Sbill 	while (u.u_count != 0 && bp->b_error==0) {
647*8Sbill 		bp->b_flags = B_BUSY | B_PHYS | rw;
648*8Sbill 		bp->b_dev = dev;
649*8Sbill 		bp->b_blkno = u.u_offset >> PGSHIFT;
650*8Sbill 		bp->b_bcount = u.u_count;
651*8Sbill 		(*mincnt)(bp);
652*8Sbill 		c = bp->b_bcount;
653*8Sbill 		u.u_procp->p_flag |= SPHYSIO;
654*8Sbill 		vslock(a = bp->b_un.b_addr, c);
655*8Sbill 		(*strat)(bp);
656*8Sbill 		VOID spl6();
657*8Sbill 		while ((bp->b_flags&B_DONE) == 0)
658*8Sbill 			sleep((caddr_t)bp, PRIBIO);
659*8Sbill 		vsunlock(a, c, rw);
660*8Sbill 		u.u_procp->p_flag &= ~SPHYSIO;
661*8Sbill 		if (bp->b_flags&B_WANTED)
662*8Sbill 			wakeup((caddr_t)bp);
663*8Sbill 		VOID spl0();
664*8Sbill 		bp->b_un.b_addr += c;
665*8Sbill 		u.u_count -= c;
666*8Sbill 		u.u_offset += c;
667*8Sbill 	}
668*8Sbill 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
669*8Sbill 	u.u_count = bp->b_resid;
670*8Sbill 	geterror(bp);
671*8Sbill }
672*8Sbill 
673*8Sbill /*ARGSUSED*/
674*8Sbill unsigned
675*8Sbill minphys(bp)
676*8Sbill struct buf *bp;
677*8Sbill {
678*8Sbill 
679*8Sbill 	if (bp->b_bcount > 60 * 1024)
680*8Sbill 		bp->b_bcount = 60 * 1024;
681*8Sbill }
682*8Sbill 
683*8Sbill /*
684*8Sbill  * Pick up the device's error number and pass it to the user;
685*8Sbill  * if there is an error but the number is 0 set a generalized
686*8Sbill  * code.  Actually the latter is always true because devices
687*8Sbill  * don't yet return specific errors.
688*8Sbill  */
689*8Sbill geterror(bp)
690*8Sbill register struct buf *bp;
691*8Sbill {
692*8Sbill 
693*8Sbill 	if (bp->b_flags&B_ERROR)
694*8Sbill 		if ((u.u_error = bp->b_error)==0)
695*8Sbill 			u.u_error = EIO;
696*8Sbill }
697