xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 2045)
1 /*	vfs_cluster.c	4.4	12/26/80	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/dir.h"
6 #include "../h/user.h"
7 #include "../h/buf.h"
8 #include "../h/conf.h"
9 #include "../h/proc.h"
10 #include "../h/seg.h"
11 #include "../h/pte.h"
12 #include "../h/vm.h"
13 #include "../h/trace.h"
14 
15 /*
16  * The following several routines allocate and free
17  * buffers with various side effects.  In general the
18  * arguments to an allocate routine are a device and
19  * a block number, and the value is a pointer to
20  * to the buffer header; the buffer is marked "busy"
21  * so that no one else can touch it.  If the block was
22  * already in core, no I/O need be done; if it is
23  * already busy, the process waits until it becomes free.
24  * The following routines allocate a buffer:
25  *	getblk
26  *	bread
27  *	breada
28  *	baddr	(if it is incore)
29  * Eventually the buffer must be released, possibly with the
30  * side effect of writing it out, by using one of
31  *	bwrite
32  *	bdwrite
33  *	bawrite
34  *	brelse
35  */
36 
37 #define	BUFHSZ	63
38 #define	BUFHASH(blkno)	(blkno % BUFHSZ)
39 short	bufhash[BUFHSZ];
40 
41 /*
42  * Initialize hash links for buffers.
43  */
44 bhinit()
45 {
46 	register int i;
47 
48 	for (i = 0; i < BUFHSZ; i++)
49 		bufhash[i] = -1;
50 }
51 
52 /* #define	DISKMON	1 */
53 
54 #ifdef	DISKMON
55 struct {
56 	int	nbuf;
57 	long	nread;
58 	long	nreada;
59 	long	ncache;
60 	long	nwrite;
61 	long	bufcount[NBUF];
62 } io_info;
63 #endif
64 
65 /*
66  * Swap IO headers -
67  * They contain the necessary information for the swap I/O.
68  * At any given time, a swap header can be in three
69  * different lists. When free it is in the free list,
70  * when allocated and the I/O queued, it is on the swap
71  * device list, and finally, if the operation was a dirty
72  * page push, when the I/O completes, it is inserted
73  * in a list of cleaned pages to be processed by the pageout daemon.
74  */
75 struct	buf swbuf[NSWBUF];
76 short	swsize[NSWBUF];		/* CAN WE JUST USE B_BCOUNT? */
77 int	swpf[NSWBUF];
78 
79 
80 #ifdef	FASTVAX
81 #define	notavail(bp) \
82 { \
83 	int s = spl6(); \
84 	(bp)->av_back->av_forw = (bp)->av_forw; \
85 	(bp)->av_forw->av_back = (bp)->av_back; \
86 	(bp)->b_flags |= B_BUSY; \
87 	splx(s); \
88 }
89 #endif
90 
91 /*
92  * Read in (if necessary) the block and return a buffer pointer.
93  */
94 struct buf *
95 bread(dev, blkno)
96 dev_t dev;
97 daddr_t blkno;
98 {
99 	register struct buf *bp;
100 
101 	bp = getblk(dev, blkno);
102 	if (bp->b_flags&B_DONE) {
103 #ifdef	EPAWNJ
104 		trace(TR_BREAD|TR_HIT, dev, blkno);
105 #endif
106 #ifdef	DISKMON
107 		io_info.ncache++;
108 #endif
109 		return(bp);
110 	}
111 	bp->b_flags |= B_READ;
112 	bp->b_bcount = BSIZE;
113 	(*bdevsw[major(dev)].d_strategy)(bp);
114 #ifdef	EPAWNJ
115 	trace(TR_BREAD|TR_MISS, dev, blkno);
116 #endif
117 #ifdef	DISKMON
118 	io_info.nread++;
119 #endif
120 	u.u_vm.vm_inblk++;		/* pay for read */
121 	iowait(bp);
122 	return(bp);
123 }
124 
125 /*
126  * Read in the block, like bread, but also start I/O on the
127  * read-ahead block (which is not allocated to the caller)
128  */
129 struct buf *
130 breada(dev, blkno, rablkno)
131 dev_t dev;
132 daddr_t blkno, rablkno;
133 {
134 	register struct buf *bp, *rabp;
135 
136 	bp = NULL;
137 	if (!incore(dev, blkno)) {
138 		bp = getblk(dev, blkno);
139 		if ((bp->b_flags&B_DONE) == 0) {
140 			bp->b_flags |= B_READ;
141 			bp->b_bcount = BSIZE;
142 			(*bdevsw[major(dev)].d_strategy)(bp);
143 #ifdef	EPAWNJ
144 			trace(TR_BREAD|TR_MISS, dev, blkno);
145 #endif
146 #ifdef	DISKMON
147 			io_info.nread++;
148 #endif
149 			u.u_vm.vm_inblk++;		/* pay for read */
150 		}
151 #ifdef	EPAWNJ
152 		else
153 			trace(TR_BREAD|TR_HIT, dev, blkno);
154 #endif
155 	}
156 	if (rablkno && !incore(dev, rablkno)) {
157 		rabp = getblk(dev, rablkno);
158 		if (rabp->b_flags & B_DONE) {
159 			brelse(rabp);
160 #ifdef	EPAWNJ
161 			trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno);
162 #endif
163 		} else {
164 			rabp->b_flags |= B_READ|B_ASYNC;
165 			rabp->b_bcount = BSIZE;
166 			(*bdevsw[major(dev)].d_strategy)(rabp);
167 #ifdef	EPAWNJ
168 			trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock);
169 #endif
170 #ifdef	DISKMON
171 			io_info.nreada++;
172 #endif
173 			u.u_vm.vm_inblk++;		/* pay in advance */
174 		}
175 	}
176 	if(bp == NULL)
177 		return(bread(dev, blkno));
178 	iowait(bp);
179 	return(bp);
180 }
181 
182 /*
183  * Write the buffer, waiting for completion.
184  * Then release the buffer.
185  */
186 bwrite(bp)
187 register struct buf *bp;
188 {
189 	register flag;
190 
191 	flag = bp->b_flags;
192 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
193 	bp->b_bcount = BSIZE;
194 #ifdef	DISKMON
195 	io_info.nwrite++;
196 #endif
197 	if ((flag&B_DELWRI) == 0)
198 		u.u_vm.vm_oublk++;		/* noone paid yet */
199 #ifdef	EPAWNJ
200 	trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno));
201 #endif
202 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
203 	if ((flag&B_ASYNC) == 0) {
204 		iowait(bp);
205 		brelse(bp);
206 	} else if (flag & B_DELWRI)
207 		bp->b_flags |= B_AGE;
208 	else
209 		geterror(bp);
210 }
211 
212 /*
213  * Release the buffer, marking it so that if it is grabbed
214  * for another purpose it will be written out before being
215  * given up (e.g. when writing a partial block where it is
216  * assumed that another write for the same block will soon follow).
217  * This can't be done for magtape, since writes must be done
218  * in the same order as requested.
219  */
220 bdwrite(bp)
221 register struct buf *bp;
222 {
223 	register struct buf *dp;
224 
225 	if ((bp->b_flags&B_DELWRI) == 0)
226 		u.u_vm.vm_oublk++;		/* noone paid yet */
227 	dp = bdevsw[major(bp->b_dev)].d_tab;
228 	if(dp->b_flags & B_TAPE)
229 		bawrite(bp);
230 	else {
231 		bp->b_flags |= B_DELWRI | B_DONE;
232 		brelse(bp);
233 	}
234 }
235 
236 /*
237  * Release the buffer, start I/O on it, but don't wait for completion.
238  */
239 bawrite(bp)
240 register struct buf *bp;
241 {
242 
243 	bp->b_flags |= B_ASYNC;
244 	bwrite(bp);
245 }
246 
247 /*
248  * release the buffer, with no I/O implied.
249  */
250 brelse(bp)
251 register struct buf *bp;
252 {
253 	register struct buf **backp;
254 	register s;
255 
256 	if (bp->b_flags&B_WANTED)
257 		wakeup((caddr_t)bp);
258 	if (bfreelist.b_flags&B_WANTED) {
259 		bfreelist.b_flags &= ~B_WANTED;
260 		wakeup((caddr_t)&bfreelist);
261 	}
262 	if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) {
263 		bunhash(bp);
264 		bp->b_dev = NODEV;  /* no assoc. on error */
265 	}
266 	s = spl6();
267 	if(bp->b_flags & (B_AGE|B_ERROR)) {
268 		backp = &bfreelist.av_forw;
269 		(*backp)->av_back = bp;
270 		bp->av_forw = *backp;
271 		*backp = bp;
272 		bp->av_back = &bfreelist;
273 	} else {
274 		backp = &bfreelist.av_back;
275 		(*backp)->av_forw = bp;
276 		bp->av_back = *backp;
277 		*backp = bp;
278 		bp->av_forw = &bfreelist;
279 	}
280 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
281 	splx(s);
282 }
283 
284 /*
285  * See if the block is associated with some buffer
286  * (mainly to avoid getting hung up on a wait in breada)
287  */
288 incore(dev, blkno)
289 dev_t dev;
290 daddr_t blkno;
291 {
292 	register struct buf *bp;
293 	register int dblkno = fsbtodb(blkno);
294 
295 	for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
296 	    bp = &buf[bp->b_hlink])
297 		if (bp->b_blkno == dblkno && bp->b_dev == dev)
298 			return (1);
299 	return (0);
300 }
301 
302 struct buf *
303 baddr(dev, blkno)
304 dev_t dev;
305 daddr_t blkno;
306 {
307 
308 	if (incore(dev, blkno))
309 		return (bread(dev, blkno));
310 	return (0);
311 }
312 
313 /*
314  * Assign a buffer for the given block.  If the appropriate
315  * block is already associated, return it; otherwise search
316  * for the oldest non-busy buffer and reassign it.
317  */
318 struct buf *
319 getblk(dev, blkno)
320 dev_t dev;
321 daddr_t blkno;
322 {
323 	register struct buf *bp, *dp, *ep;
324 	register int i, x, dblkno;
325 
326 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
327 		blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
328 	dblkno = fsbtodb(blkno);
329     loop:
330 	(void) spl0();
331 	for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
332 	    bp = &buf[bp->b_hlink]) {
333 		if (bp->b_blkno != dblkno || bp->b_dev != dev)
334 			continue;
335 		(void) spl6();
336 		if (bp->b_flags&B_BUSY) {
337 			bp->b_flags |= B_WANTED;
338 			sleep((caddr_t)bp, PRIBIO+1);
339 			goto loop;
340 		}
341 		(void) spl0();
342 #ifdef	DISKMON
343 		i = 0;
344 		dp = bp->av_forw;
345 		while (dp != &bfreelist) {
346 			i++;
347 			dp = dp->av_forw;
348 		}
349 		if (i<NBUF)
350 			io_info.bufcount[i]++;
351 #endif
352 		notavail(bp);
353 		bp->b_flags |= B_CACHE;
354 		return(bp);
355 	}
356 	if (major(dev) >= nblkdev)
357 		panic("blkdev");
358 	dp = bdevsw[major(dev)].d_tab;
359 	if (dp == NULL)
360 		panic("devtab");
361 	(void) spl6();
362 	if (bfreelist.av_forw == &bfreelist) {
363 		bfreelist.b_flags |= B_WANTED;
364 		sleep((caddr_t)&bfreelist, PRIBIO+1);
365 		goto loop;
366 	}
367 	(void) spl0();
368 	bp = bfreelist.av_forw;
369 	notavail(bp);
370 	if (bp->b_flags & B_DELWRI) {
371 		bp->b_flags |= B_ASYNC;
372 		bwrite(bp);
373 		goto loop;
374 	}
375 	if (bp->b_dev == NODEV)
376 		goto done;
377 	/* INLINE EXPANSION OF bunhash(bp) */
378 #ifdef EPAWNJ
379 	trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
380 #endif
381 	(void) spl6();
382 	i = BUFHASH(dbtofsb(bp->b_blkno));
383 	x = bp - buf;
384 	if (bufhash[i] == x) {
385 		bufhash[i] = bp->b_hlink;
386 	} else {
387 		for (ep = &buf[bufhash[i]]; ep != &buf[-1];
388 		    ep = &buf[ep->b_hlink])
389 			if (ep->b_hlink == x) {
390 				ep->b_hlink = bp->b_hlink;
391 				goto done;
392 			}
393 		panic("getblk");
394 	}
395 done:
396 	(void) spl0();
397 	/* END INLINE EXPANSION */
398 	bp->b_flags = B_BUSY;
399 	bp->b_back->b_forw = bp->b_forw;
400 	bp->b_forw->b_back = bp->b_back;
401 	bp->b_forw = dp->b_forw;
402 	bp->b_back = dp;
403 	dp->b_forw->b_back = bp;
404 	dp->b_forw = bp;
405 	bp->b_dev = dev;
406 	bp->b_blkno = dblkno;
407 	i = BUFHASH(blkno);
408 	bp->b_hlink = bufhash[i];
409 	bufhash[i] = bp - buf;
410 	return(bp);
411 }
412 
413 /*
414  * get an empty block,
415  * not assigned to any particular device
416  */
417 struct buf *
418 geteblk()
419 {
420 	register struct buf *bp, *dp;
421 
422 loop:
423 	(void) spl6();
424 	while (bfreelist.av_forw == &bfreelist) {
425 		bfreelist.b_flags |= B_WANTED;
426 		sleep((caddr_t)&bfreelist, PRIBIO+1);
427 	}
428 	(void) spl0();
429 	dp = &bfreelist;
430 	bp = bfreelist.av_forw;
431 	notavail(bp);
432 	if (bp->b_flags & B_DELWRI) {
433 		bp->b_flags |= B_ASYNC;
434 		bwrite(bp);
435 		goto loop;
436 	}
437 	if (bp->b_dev != NODEV) {
438 #ifdef EPAWNJ
439 		trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
440 #endif
441 		bunhash(bp);
442 	}
443 	bp->b_flags = B_BUSY;
444 	bp->b_back->b_forw = bp->b_forw;
445 	bp->b_forw->b_back = bp->b_back;
446 	bp->b_forw = dp->b_forw;
447 	bp->b_back = dp;
448 	dp->b_forw->b_back = bp;
449 	dp->b_forw = bp;
450 	bp->b_dev = (dev_t)NODEV;
451 	bp->b_hlink = -1;
452 	return(bp);
453 }
454 
455 bunhash(bp)
456 	register struct buf *bp;
457 {
458 	register struct buf *ep;
459 	register int i, x, s;
460 
461 	if (bp->b_dev == NODEV)
462 		return;
463 	s = spl6();
464 	i = BUFHASH(dbtofsb(bp->b_blkno));
465 	x = bp - buf;
466 	if (bufhash[i] == x) {
467 		bufhash[i] = bp->b_hlink;
468 		goto ret;
469 	}
470 	for (ep = &buf[bufhash[i]]; ep != &buf[-1];
471 	    ep = &buf[ep->b_hlink])
472 		if (ep->b_hlink == x) {
473 			ep->b_hlink = bp->b_hlink;
474 			goto ret;
475 		}
476 	panic("bunhash");
477 ret:
478 	splx(s);
479 }
480 
481 /*
482  * Wait for I/O completion on the buffer; return errors
483  * to the user.
484  */
485 iowait(bp)
486 register struct buf *bp;
487 {
488 
489 	(void) spl6();
490 	while ((bp->b_flags&B_DONE)==0)
491 		sleep((caddr_t)bp, PRIBIO);
492 	(void) spl0();
493 	geterror(bp);
494 }
495 
496 #ifndef FASTVAX
497 /*
498  * Unlink a buffer from the available list and mark it busy.
499  * (internal interface)
500  */
501 notavail(bp)
502 register struct buf *bp;
503 {
504 	register s;
505 
506 	s = spl6();
507 	bp->av_back->av_forw = bp->av_forw;
508 	bp->av_forw->av_back = bp->av_back;
509 	bp->b_flags |= B_BUSY;
510 	splx(s);
511 }
512 #endif
513 
514 /*
515  * Mark I/O complete on a buffer. If the header
516  * indicates a dirty page push completion, the
517  * header is inserted into the ``cleaned'' list
518  * to be processed by the pageout daemon. Otherwise
519  * release it if I/O is asynchronous, and wake
520  * up anyone waiting for it.
521  */
522 iodone(bp)
523 register struct buf *bp;
524 {
525 	register int s;
526 
527 	if (bp->b_flags & B_DONE)
528 		panic("dup iodone");
529 	bp->b_flags |= B_DONE;
530 	if (bp->b_flags & B_DIRTY) {
531 		if (bp->b_flags & B_ERROR)
532 			panic("IO err in push");
533 		s = spl6();
534 		cnt.v_pgout++;
535 		bp->av_forw = bclnlist;
536 		bp->b_bcount = swsize[bp - swbuf];
537 		bp->b_pfcent = swpf[bp - swbuf];
538 		bclnlist = bp;
539 		if (bswlist.b_flags & B_WANTED)
540 			wakeup((caddr_t)&proc[2]);
541 		splx(s);
542 		return;
543 	}
544 	if (bp->b_flags&B_ASYNC)
545 		brelse(bp);
546 	else {
547 		bp->b_flags &= ~B_WANTED;
548 		wakeup((caddr_t)bp);
549 	}
550 }
551 
552 /*
553  * Zero the core associated with a buffer.
554  */
555 clrbuf(bp)
556 struct buf *bp;
557 {
558 	register *p;
559 	register c;
560 
561 	p = bp->b_un.b_words;
562 	c = BSIZE/sizeof(int);
563 	do
564 		*p++ = 0;
565 	while (--c);
566 	bp->b_resid = 0;
567 }
568 
569 /*
570  * swap I/O -
571  *
572  * If the flag indicates a dirty page push initiated
573  * by the pageout daemon, we map the page into the i th
574  * virtual page of process 2 (the daemon itself) where i is
575  * the index of the swap header that has been allocated.
576  * We simply initialize the header and queue the I/O but
577  * do not wait for completion. When the I/O completes,
578  * iodone() will link the header to a list of cleaned
579  * pages to be processed by the pageout daemon.
580  */
581 swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
582 	struct proc *p;
583 	swblk_t dblkno;
584 	caddr_t addr;
585 	int flag, nbytes;
586 	dev_t dev;
587 	unsigned pfcent;
588 {
589 	register struct buf *bp;
590 	register int c;
591 	int p2dp;
592 	register struct pte *dpte, *vpte;
593 
594 	(void) spl6();
595 	while (bswlist.av_forw == NULL) {
596 		bswlist.b_flags |= B_WANTED;
597 		sleep((caddr_t)&bswlist, PSWP+1);
598 	}
599 	bp = bswlist.av_forw;
600 	bswlist.av_forw = bp->av_forw;
601 	(void) spl0();
602 
603 	bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
604 	if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
605 		if (rdflg == B_READ)
606 			sum.v_pswpin += btoc(nbytes);
607 		else
608 			sum.v_pswpout += btoc(nbytes);
609 	bp->b_proc = p;
610 	if (flag & B_DIRTY) {
611 		p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
612 		dpte = dptopte(&proc[2], p2dp);
613 		vpte = vtopte(p, btop(addr));
614 		for (c = 0; c < nbytes; c += NBPG) {
615 			if (vpte->pg_pfnum == 0 || vpte->pg_fod)
616 				panic("swap bad pte");
617 			*dpte++ = *vpte++;
618 		}
619 		bp->b_un.b_addr = (caddr_t)ctob(p2dp);
620 	} else
621 		bp->b_un.b_addr = addr;
622 	while (nbytes > 0) {
623 		c = imin(ctob(120), nbytes);
624 		bp->b_bcount = c;
625 		bp->b_blkno = dblkno;
626 		bp->b_dev = dev;
627 		if (flag & B_DIRTY) {
628 			swpf[bp - swbuf] = pfcent;
629 			swsize[bp - swbuf] = nbytes;
630 		}
631 		(*bdevsw[major(dev)].d_strategy)(bp);
632 		if (flag & B_DIRTY) {
633 			if (c < nbytes)
634 				panic("big push");
635 			return;
636 		}
637 		(void) spl6();
638 		while((bp->b_flags&B_DONE)==0)
639 			sleep((caddr_t)bp, PSWP);
640 		(void) spl0();
641 		bp->b_un.b_addr += c;
642 		bp->b_flags &= ~B_DONE;
643 		if (bp->b_flags & B_ERROR) {
644 			if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
645 				panic("hard IO err in swap");
646 			swkill(p, (char *)0);
647 		}
648 		nbytes -= c;
649 		dblkno += btoc(c);
650 	}
651 	(void) spl6();
652 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
653 	bp->av_forw = bswlist.av_forw;
654 	bswlist.av_forw = bp;
655 	if (bswlist.b_flags & B_WANTED) {
656 		bswlist.b_flags &= ~B_WANTED;
657 		wakeup((caddr_t)&bswlist);
658 		wakeup((caddr_t)&proc[2]);
659 	}
660 	(void) spl0();
661 }
662 
663 /*
664  * If rout == 0 then killed on swap error, else
665  * rout is the name of the routine where we ran out of
666  * swap space.
667  */
668 swkill(p, rout)
669 	struct proc *p;
670 	char *rout;
671 {
672 
673 	printf("%d: ", p->p_pid);
674 	if (rout)
675 		printf("out of swap space in %s\n", rout);
676 	else
677 		printf("killed on swap error\n");
678 	/*
679 	 * To be sure no looping (e.g. in vmsched trying to
680 	 * swap out) mark process locked in core (as though
681 	 * done by user) after killing it so noone will try
682 	 * to swap it out.
683 	 */
684 	psignal(p, SIGKILL);
685 	p->p_flag |= SULOCK;
686 }
687 
688 /*
689  * make sure all write-behind blocks
690  * on dev (or NODEV for all)
691  * are flushed out.
692  * (from umount and update)
693  */
694 bflush(dev)
695 dev_t dev;
696 {
697 	register struct buf *bp;
698 
699 loop:
700 	(void) spl6();
701 	for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
702 		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
703 			bp->b_flags |= B_ASYNC;
704 			notavail(bp);
705 			bwrite(bp);
706 			goto loop;
707 		}
708 	}
709 	(void) spl0();
710 }
711 
712 /*
713  * Raw I/O. The arguments are
714  *	The strategy routine for the device
715  *	A buffer, which will always be a special buffer
716  *	  header owned exclusively by the device for this purpose
717  *	The device number
718  *	Read/write flag
719  * Essentially all the work is computing physical addresses and
720  * validating them.
721  * If the user has the proper access privilidges, the process is
722  * marked 'delayed unlock' and the pages involved in the I/O are
723  * faulted and locked. After the completion of the I/O, the above pages
724  * are unlocked.
725  */
726 physio(strat, bp, dev, rw, mincnt)
727 int (*strat)();
728 register struct buf *bp;
729 unsigned (*mincnt)();
730 {
731 	register int c;
732 	char *a;
733 
734 	if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
735 		u.u_error = EFAULT;
736 		return;
737 	}
738 	(void) spl6();
739 	while (bp->b_flags&B_BUSY) {
740 		bp->b_flags |= B_WANTED;
741 		sleep((caddr_t)bp, PRIBIO+1);
742 	}
743 	bp->b_error = 0;
744 	bp->b_proc = u.u_procp;
745 	bp->b_un.b_addr = u.u_base;
746 	while (u.u_count != 0 && bp->b_error==0) {
747 		bp->b_flags = B_BUSY | B_PHYS | rw;
748 		bp->b_dev = dev;
749 		bp->b_blkno = u.u_offset >> PGSHIFT;
750 		bp->b_bcount = u.u_count;
751 		(*mincnt)(bp);
752 		c = bp->b_bcount;
753 		u.u_procp->p_flag |= SPHYSIO;
754 		vslock(a = bp->b_un.b_addr, c);
755 		(*strat)(bp);
756 		(void) spl6();
757 		while ((bp->b_flags&B_DONE) == 0)
758 			sleep((caddr_t)bp, PRIBIO);
759 		vsunlock(a, c, rw);
760 		u.u_procp->p_flag &= ~SPHYSIO;
761 		if (bp->b_flags&B_WANTED)
762 			wakeup((caddr_t)bp);
763 		(void) spl0();
764 		bp->b_un.b_addr += c;
765 		u.u_count -= c;
766 		u.u_offset += c;
767 	}
768 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
769 	u.u_count = bp->b_resid;
770 	geterror(bp);
771 }
772 
773 /*ARGSUSED*/
774 unsigned
775 minphys(bp)
776 struct buf *bp;
777 {
778 
779 	if (bp->b_bcount > 60 * 1024)
780 		bp->b_bcount = 60 * 1024;
781 }
782 
783 /*
784  * Pick up the device's error number and pass it to the user;
785  * if there is an error but the number is 0 set a generalized
786  * code.  Actually the latter is always true because devices
787  * don't yet return specific errors.
788  */
789 geterror(bp)
790 register struct buf *bp;
791 {
792 
793 	if (bp->b_flags&B_ERROR)
794 		if ((u.u_error = bp->b_error)==0)
795 			u.u_error = EIO;
796 }
797