xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 2299)
1 /*	vfs_cluster.c	4.5	01/28/81	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/dir.h"
6 #include "../h/user.h"
7 #include "../h/buf.h"
8 #include "../h/conf.h"
9 #include "../h/proc.h"
10 #include "../h/seg.h"
11 #include "../h/pte.h"
12 #include "../h/vm.h"
13 #include "../h/trace.h"
14 
15 /*
16  * The following several routines allocate and free
17  * buffers with various side effects.  In general the
18  * arguments to an allocate routine are a device and
19  * a block number, and the value is a pointer to
20  * to the buffer header; the buffer is marked "busy"
21  * so that no one else can touch it.  If the block was
22  * already in core, no I/O need be done; if it is
23  * already busy, the process waits until it becomes free.
24  * The following routines allocate a buffer:
25  *	getblk
26  *	bread
27  *	breada
28  *	baddr	(if it is incore)
29  * Eventually the buffer must be released, possibly with the
30  * side effect of writing it out, by using one of
31  *	bwrite
32  *	bdwrite
33  *	bawrite
34  *	brelse
35  */
36 
37 #define	BUFHSZ	63
38 #define	BUFHASH(blkno)	(blkno % BUFHSZ)
39 short	bufhash[BUFHSZ];
40 
41 /*
42  * Initialize hash links for buffers.
43  */
44 bhinit()
45 {
46 	register int i;
47 
48 	for (i = 0; i < BUFHSZ; i++)
49 		bufhash[i] = -1;
50 }
51 
52 /* #define	DISKMON	1 */
53 
54 #ifdef	DISKMON
55 struct {
56 	int	nbuf;
57 	long	nread;
58 	long	nreada;
59 	long	ncache;
60 	long	nwrite;
61 	long	bufcount[NBUF];
62 } io_info;
63 #endif
64 
65 /*
66  * Swap IO headers -
67  * They contain the necessary information for the swap I/O.
68  * At any given time, a swap header can be in three
69  * different lists. When free it is in the free list,
70  * when allocated and the I/O queued, it is on the swap
71  * device list, and finally, if the operation was a dirty
72  * page push, when the I/O completes, it is inserted
73  * in a list of cleaned pages to be processed by the pageout daemon.
74  */
75 struct	buf swbuf[NSWBUF];
76 short	swsize[NSWBUF];		/* CAN WE JUST USE B_BCOUNT? */
77 int	swpf[NSWBUF];
78 
79 
80 #ifdef	FASTVAX
81 #define	notavail(bp) \
82 { \
83 	int s = spl6(); \
84 	(bp)->av_back->av_forw = (bp)->av_forw; \
85 	(bp)->av_forw->av_back = (bp)->av_back; \
86 	(bp)->b_flags |= B_BUSY; \
87 	splx(s); \
88 }
89 #endif
90 
91 /*
92  * Read in (if necessary) the block and return a buffer pointer.
93  */
94 struct buf *
95 bread(dev, blkno)
96 dev_t dev;
97 daddr_t blkno;
98 {
99 	register struct buf *bp;
100 
101 	bp = getblk(dev, blkno);
102 	if (bp->b_flags&B_DONE) {
103 #ifdef	EPAWNJ
104 		trace(TR_BREAD|TR_HIT, dev, blkno);
105 #endif
106 #ifdef	DISKMON
107 		io_info.ncache++;
108 #endif
109 		return(bp);
110 	}
111 	bp->b_flags |= B_READ;
112 	bp->b_bcount = BSIZE;
113 	(*bdevsw[major(dev)].d_strategy)(bp);
114 #ifdef	EPAWNJ
115 	trace(TR_BREAD|TR_MISS, dev, blkno);
116 #endif
117 #ifdef	DISKMON
118 	io_info.nread++;
119 #endif
120 	u.u_vm.vm_inblk++;		/* pay for read */
121 	iowait(bp);
122 	return(bp);
123 }
124 
125 /*
126  * Read in the block, like bread, but also start I/O on the
127  * read-ahead block (which is not allocated to the caller)
128  */
129 struct buf *
130 breada(dev, blkno, rablkno)
131 dev_t dev;
132 daddr_t blkno, rablkno;
133 {
134 	register struct buf *bp, *rabp;
135 
136 	bp = NULL;
137 	if (!incore(dev, blkno)) {
138 		bp = getblk(dev, blkno);
139 		if ((bp->b_flags&B_DONE) == 0) {
140 			bp->b_flags |= B_READ;
141 			bp->b_bcount = BSIZE;
142 			(*bdevsw[major(dev)].d_strategy)(bp);
143 #ifdef	EPAWNJ
144 			trace(TR_BREAD|TR_MISS, dev, blkno);
145 #endif
146 #ifdef	DISKMON
147 			io_info.nread++;
148 #endif
149 			u.u_vm.vm_inblk++;		/* pay for read */
150 		}
151 #ifdef	EPAWNJ
152 		else
153 			trace(TR_BREAD|TR_HIT, dev, blkno);
154 #endif
155 	}
156 	if (rablkno && !incore(dev, rablkno)) {
157 		rabp = getblk(dev, rablkno);
158 		if (rabp->b_flags & B_DONE) {
159 			brelse(rabp);
160 #ifdef	EPAWNJ
161 			trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno);
162 #endif
163 		} else {
164 			rabp->b_flags |= B_READ|B_ASYNC;
165 			rabp->b_bcount = BSIZE;
166 			(*bdevsw[major(dev)].d_strategy)(rabp);
167 #ifdef	EPAWNJ
168 			trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock);
169 #endif
170 #ifdef	DISKMON
171 			io_info.nreada++;
172 #endif
173 			u.u_vm.vm_inblk++;		/* pay in advance */
174 		}
175 	}
176 	if(bp == NULL)
177 		return(bread(dev, blkno));
178 	iowait(bp);
179 	return(bp);
180 }
181 
182 /*
183  * Write the buffer, waiting for completion.
184  * Then release the buffer.
185  */
186 bwrite(bp)
187 register struct buf *bp;
188 {
189 	register flag;
190 
191 	flag = bp->b_flags;
192 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
193 	bp->b_bcount = BSIZE;
194 #ifdef	DISKMON
195 	io_info.nwrite++;
196 #endif
197 	if ((flag&B_DELWRI) == 0)
198 		u.u_vm.vm_oublk++;		/* noone paid yet */
199 #ifdef	EPAWNJ
200 	trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno));
201 #endif
202 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
203 	if ((flag&B_ASYNC) == 0) {
204 		iowait(bp);
205 		brelse(bp);
206 	} else if (flag & B_DELWRI)
207 		bp->b_flags |= B_AGE;
208 	else
209 		geterror(bp);
210 }
211 
212 /*
213  * Release the buffer, marking it so that if it is grabbed
214  * for another purpose it will be written out before being
215  * given up (e.g. when writing a partial block where it is
216  * assumed that another write for the same block will soon follow).
217  * This can't be done for magtape, since writes must be done
218  * in the same order as requested.
219  */
220 bdwrite(bp)
221 register struct buf *bp;
222 {
223 	register struct buf *dp;
224 
225 	if ((bp->b_flags&B_DELWRI) == 0)
226 		u.u_vm.vm_oublk++;		/* noone paid yet */
227 	dp = bdevsw[major(bp->b_dev)].d_tab;
228 	if(dp->b_flags & B_TAPE)
229 		bawrite(bp);
230 	else {
231 		bp->b_flags |= B_DELWRI | B_DONE;
232 		brelse(bp);
233 	}
234 }
235 
236 /*
237  * Release the buffer, start I/O on it, but don't wait for completion.
238  */
239 bawrite(bp)
240 register struct buf *bp;
241 {
242 
243 	bp->b_flags |= B_ASYNC;
244 	bwrite(bp);
245 }
246 
247 /*
248  * release the buffer, with no I/O implied.
249  */
250 brelse(bp)
251 register struct buf *bp;
252 {
253 	register struct buf **backp;
254 	register s;
255 
256 	if (bp->b_flags&B_WANTED)
257 		wakeup((caddr_t)bp);
258 	if (bfreelist.b_flags&B_WANTED) {
259 		bfreelist.b_flags &= ~B_WANTED;
260 		wakeup((caddr_t)&bfreelist);
261 	}
262 	if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) {
263 		bunhash(bp);
264 		bp->b_dev = NODEV;  /* no assoc. on error */
265 	}
266 	s = spl6();
267 	if(bp->b_flags & (B_AGE|B_ERROR)) {
268 		backp = &bfreelist.av_forw;
269 		(*backp)->av_back = bp;
270 		bp->av_forw = *backp;
271 		*backp = bp;
272 		bp->av_back = &bfreelist;
273 	} else {
274 		backp = &bfreelist.av_back;
275 		(*backp)->av_forw = bp;
276 		bp->av_back = *backp;
277 		*backp = bp;
278 		bp->av_forw = &bfreelist;
279 	}
280 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
281 	splx(s);
282 }
283 
284 /*
285  * See if the block is associated with some buffer
286  * (mainly to avoid getting hung up on a wait in breada)
287  */
288 incore(dev, blkno)
289 dev_t dev;
290 daddr_t blkno;
291 {
292 	register struct buf *bp;
293 	register int dblkno = fsbtodb(blkno);
294 
295 	for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
296 	    bp = &buf[bp->b_hlink])
297 		if (bp->b_blkno == dblkno && bp->b_dev == dev
298 					&& !(bp->b_flags & B_INVAL))
299 			return (1);
300 	return (0);
301 }
302 
303 struct buf *
304 baddr(dev, blkno)
305 dev_t dev;
306 daddr_t blkno;
307 {
308 
309 	if (incore(dev, blkno))
310 		return (bread(dev, blkno));
311 	return (0);
312 }
313 
314 /*
315  * Assign a buffer for the given block.  If the appropriate
316  * block is already associated, return it; otherwise search
317  * for the oldest non-busy buffer and reassign it.
318  */
319 struct buf *
320 getblk(dev, blkno)
321 dev_t dev;
322 daddr_t blkno;
323 {
324 	register struct buf *bp, *dp, *ep;
325 	register int i, x, dblkno;
326 
327 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
328 		blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
329 	dblkno = fsbtodb(blkno);
330     loop:
331 	(void) spl0();
332 	for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
333 	    bp = &buf[bp->b_hlink]) {
334 		if (bp->b_blkno != dblkno || bp->b_dev != dev
335 					|| bp->b_flags & B_INVAL)
336 			continue;
337 		(void) spl6();
338 		if (bp->b_flags&B_BUSY) {
339 			bp->b_flags |= B_WANTED;
340 			sleep((caddr_t)bp, PRIBIO+1);
341 			goto loop;
342 		}
343 		(void) spl0();
344 #ifdef	DISKMON
345 		i = 0;
346 		dp = bp->av_forw;
347 		while (dp != &bfreelist) {
348 			i++;
349 			dp = dp->av_forw;
350 		}
351 		if (i<NBUF)
352 			io_info.bufcount[i]++;
353 #endif
354 		notavail(bp);
355 		bp->b_flags |= B_CACHE;
356 		return(bp);
357 	}
358 	if (major(dev) >= nblkdev)
359 		panic("blkdev");
360 	dp = bdevsw[major(dev)].d_tab;
361 	if (dp == NULL)
362 		panic("devtab");
363 	(void) spl6();
364 	if (bfreelist.av_forw == &bfreelist) {
365 		bfreelist.b_flags |= B_WANTED;
366 		sleep((caddr_t)&bfreelist, PRIBIO+1);
367 		goto loop;
368 	}
369 	(void) spl0();
370 	bp = bfreelist.av_forw;
371 	notavail(bp);
372 	if (bp->b_flags & B_DELWRI) {
373 		bp->b_flags |= B_ASYNC;
374 		bwrite(bp);
375 		goto loop;
376 	}
377 	if (bp->b_dev == NODEV)
378 		goto done;
379 	/* INLINE EXPANSION OF bunhash(bp) */
380 #ifdef EPAWNJ
381 	trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
382 #endif
383 	(void) spl6();
384 	i = BUFHASH(dbtofsb(bp->b_blkno));
385 	x = bp - buf;
386 	if (bufhash[i] == x) {
387 		bufhash[i] = bp->b_hlink;
388 	} else {
389 		for (ep = &buf[bufhash[i]]; ep != &buf[-1];
390 		    ep = &buf[ep->b_hlink])
391 			if (ep->b_hlink == x) {
392 				ep->b_hlink = bp->b_hlink;
393 				goto done;
394 			}
395 		panic("getblk");
396 	}
397 done:
398 	(void) spl0();
399 	/* END INLINE EXPANSION */
400 	bp->b_flags = B_BUSY;
401 	bp->b_back->b_forw = bp->b_forw;
402 	bp->b_forw->b_back = bp->b_back;
403 	bp->b_forw = dp->b_forw;
404 	bp->b_back = dp;
405 	dp->b_forw->b_back = bp;
406 	dp->b_forw = bp;
407 	bp->b_dev = dev;
408 	bp->b_blkno = dblkno;
409 	i = BUFHASH(blkno);
410 	bp->b_hlink = bufhash[i];
411 	bufhash[i] = bp - buf;
412 	return(bp);
413 }
414 
415 /*
416  * get an empty block,
417  * not assigned to any particular device
418  */
419 struct buf *
420 geteblk()
421 {
422 	register struct buf *bp, *dp;
423 
424 loop:
425 	(void) spl6();
426 	while (bfreelist.av_forw == &bfreelist) {
427 		bfreelist.b_flags |= B_WANTED;
428 		sleep((caddr_t)&bfreelist, PRIBIO+1);
429 	}
430 	(void) spl0();
431 	dp = &bfreelist;
432 	bp = bfreelist.av_forw;
433 	notavail(bp);
434 	if (bp->b_flags & B_DELWRI) {
435 		bp->b_flags |= B_ASYNC;
436 		bwrite(bp);
437 		goto loop;
438 	}
439 	if (bp->b_dev != NODEV) {
440 #ifdef EPAWNJ
441 		trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
442 #endif
443 		bunhash(bp);
444 	}
445 	bp->b_flags = B_BUSY;
446 	bp->b_back->b_forw = bp->b_forw;
447 	bp->b_forw->b_back = bp->b_back;
448 	bp->b_forw = dp->b_forw;
449 	bp->b_back = dp;
450 	dp->b_forw->b_back = bp;
451 	dp->b_forw = bp;
452 	bp->b_dev = (dev_t)NODEV;
453 	bp->b_hlink = -1;
454 	return(bp);
455 }
456 
457 bunhash(bp)
458 	register struct buf *bp;
459 {
460 	register struct buf *ep;
461 	register int i, x, s;
462 
463 	if (bp->b_dev == NODEV)
464 		return;
465 	s = spl6();
466 	i = BUFHASH(dbtofsb(bp->b_blkno));
467 	x = bp - buf;
468 	if (bufhash[i] == x) {
469 		bufhash[i] = bp->b_hlink;
470 		goto ret;
471 	}
472 	for (ep = &buf[bufhash[i]]; ep != &buf[-1];
473 	    ep = &buf[ep->b_hlink])
474 		if (ep->b_hlink == x) {
475 			ep->b_hlink = bp->b_hlink;
476 			goto ret;
477 		}
478 	panic("bunhash");
479 ret:
480 	splx(s);
481 }
482 
483 /*
484  * Wait for I/O completion on the buffer; return errors
485  * to the user.
486  */
487 iowait(bp)
488 register struct buf *bp;
489 {
490 
491 	(void) spl6();
492 	while ((bp->b_flags&B_DONE)==0)
493 		sleep((caddr_t)bp, PRIBIO);
494 	(void) spl0();
495 	geterror(bp);
496 }
497 
498 #ifndef FASTVAX
499 /*
500  * Unlink a buffer from the available list and mark it busy.
501  * (internal interface)
502  */
503 notavail(bp)
504 register struct buf *bp;
505 {
506 	register s;
507 
508 	s = spl6();
509 	bp->av_back->av_forw = bp->av_forw;
510 	bp->av_forw->av_back = bp->av_back;
511 	bp->b_flags |= B_BUSY;
512 	splx(s);
513 }
514 #endif
515 
516 /*
517  * Mark I/O complete on a buffer. If the header
518  * indicates a dirty page push completion, the
519  * header is inserted into the ``cleaned'' list
520  * to be processed by the pageout daemon. Otherwise
521  * release it if I/O is asynchronous, and wake
522  * up anyone waiting for it.
523  */
524 iodone(bp)
525 register struct buf *bp;
526 {
527 	register int s;
528 
529 	if (bp->b_flags & B_DONE)
530 		panic("dup iodone");
531 	bp->b_flags |= B_DONE;
532 	if (bp->b_flags & B_DIRTY) {
533 		if (bp->b_flags & B_ERROR)
534 			panic("IO err in push");
535 		s = spl6();
536 		cnt.v_pgout++;
537 		bp->av_forw = bclnlist;
538 		bp->b_bcount = swsize[bp - swbuf];
539 		bp->b_pfcent = swpf[bp - swbuf];
540 		bclnlist = bp;
541 		if (bswlist.b_flags & B_WANTED)
542 			wakeup((caddr_t)&proc[2]);
543 		splx(s);
544 		return;
545 	}
546 	if (bp->b_flags&B_ASYNC)
547 		brelse(bp);
548 	else {
549 		bp->b_flags &= ~B_WANTED;
550 		wakeup((caddr_t)bp);
551 	}
552 }
553 
554 /*
555  * Zero the core associated with a buffer.
556  */
557 clrbuf(bp)
558 struct buf *bp;
559 {
560 	register *p;
561 	register c;
562 
563 	p = bp->b_un.b_words;
564 	c = BSIZE/sizeof(int);
565 	do
566 		*p++ = 0;
567 	while (--c);
568 	bp->b_resid = 0;
569 }
570 
571 /*
572  * swap I/O -
573  *
574  * If the flag indicates a dirty page push initiated
575  * by the pageout daemon, we map the page into the i th
576  * virtual page of process 2 (the daemon itself) where i is
577  * the index of the swap header that has been allocated.
578  * We simply initialize the header and queue the I/O but
579  * do not wait for completion. When the I/O completes,
580  * iodone() will link the header to a list of cleaned
581  * pages to be processed by the pageout daemon.
582  */
583 swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
584 	struct proc *p;
585 	swblk_t dblkno;
586 	caddr_t addr;
587 	int flag, nbytes;
588 	dev_t dev;
589 	unsigned pfcent;
590 {
591 	register struct buf *bp;
592 	register int c;
593 	int p2dp;
594 	register struct pte *dpte, *vpte;
595 
596 	(void) spl6();
597 	while (bswlist.av_forw == NULL) {
598 		bswlist.b_flags |= B_WANTED;
599 		sleep((caddr_t)&bswlist, PSWP+1);
600 	}
601 	bp = bswlist.av_forw;
602 	bswlist.av_forw = bp->av_forw;
603 	(void) spl0();
604 
605 	bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
606 	if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
607 		if (rdflg == B_READ)
608 			sum.v_pswpin += btoc(nbytes);
609 		else
610 			sum.v_pswpout += btoc(nbytes);
611 	bp->b_proc = p;
612 	if (flag & B_DIRTY) {
613 		p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
614 		dpte = dptopte(&proc[2], p2dp);
615 		vpte = vtopte(p, btop(addr));
616 		for (c = 0; c < nbytes; c += NBPG) {
617 			if (vpte->pg_pfnum == 0 || vpte->pg_fod)
618 				panic("swap bad pte");
619 			*dpte++ = *vpte++;
620 		}
621 		bp->b_un.b_addr = (caddr_t)ctob(p2dp);
622 	} else
623 		bp->b_un.b_addr = addr;
624 	while (nbytes > 0) {
625 		c = imin(ctob(120), nbytes);
626 		bp->b_bcount = c;
627 		bp->b_blkno = dblkno;
628 		bp->b_dev = dev;
629 		if (flag & B_DIRTY) {
630 			swpf[bp - swbuf] = pfcent;
631 			swsize[bp - swbuf] = nbytes;
632 		}
633 		(*bdevsw[major(dev)].d_strategy)(bp);
634 		if (flag & B_DIRTY) {
635 			if (c < nbytes)
636 				panic("big push");
637 			return;
638 		}
639 		(void) spl6();
640 		while((bp->b_flags&B_DONE)==0)
641 			sleep((caddr_t)bp, PSWP);
642 		(void) spl0();
643 		bp->b_un.b_addr += c;
644 		bp->b_flags &= ~B_DONE;
645 		if (bp->b_flags & B_ERROR) {
646 			if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
647 				panic("hard IO err in swap");
648 			swkill(p, (char *)0);
649 		}
650 		nbytes -= c;
651 		dblkno += btoc(c);
652 	}
653 	(void) spl6();
654 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
655 	bp->av_forw = bswlist.av_forw;
656 	bswlist.av_forw = bp;
657 	if (bswlist.b_flags & B_WANTED) {
658 		bswlist.b_flags &= ~B_WANTED;
659 		wakeup((caddr_t)&bswlist);
660 		wakeup((caddr_t)&proc[2]);
661 	}
662 	(void) spl0();
663 }
664 
665 /*
666  * If rout == 0 then killed on swap error, else
667  * rout is the name of the routine where we ran out of
668  * swap space.
669  */
670 swkill(p, rout)
671 	struct proc *p;
672 	char *rout;
673 {
674 
675 	printf("%d: ", p->p_pid);
676 	if (rout)
677 		printf("out of swap space in %s\n", rout);
678 	else
679 		printf("killed on swap error\n");
680 	/*
681 	 * To be sure no looping (e.g. in vmsched trying to
682 	 * swap out) mark process locked in core (as though
683 	 * done by user) after killing it so noone will try
684 	 * to swap it out.
685 	 */
686 	psignal(p, SIGKILL);
687 	p->p_flag |= SULOCK;
688 }
689 
690 /*
691  * make sure all write-behind blocks
692  * on dev (or NODEV for all)
693  * are flushed out.
694  * (from umount and update)
695  */
696 bflush(dev)
697 dev_t dev;
698 {
699 	register struct buf *bp;
700 
701 loop:
702 	(void) spl6();
703 	for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
704 		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
705 			bp->b_flags |= B_ASYNC;
706 			notavail(bp);
707 			bwrite(bp);
708 			goto loop;
709 		}
710 	}
711 	(void) spl0();
712 }
713 
714 /*
715  * Raw I/O. The arguments are
716  *	The strategy routine for the device
717  *	A buffer, which will always be a special buffer
718  *	  header owned exclusively by the device for this purpose
719  *	The device number
720  *	Read/write flag
721  * Essentially all the work is computing physical addresses and
722  * validating them.
723  * If the user has the proper access privilidges, the process is
724  * marked 'delayed unlock' and the pages involved in the I/O are
725  * faulted and locked. After the completion of the I/O, the above pages
726  * are unlocked.
727  */
728 physio(strat, bp, dev, rw, mincnt)
729 int (*strat)();
730 register struct buf *bp;
731 unsigned (*mincnt)();
732 {
733 	register int c;
734 	char *a;
735 
736 	if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
737 		u.u_error = EFAULT;
738 		return;
739 	}
740 	(void) spl6();
741 	while (bp->b_flags&B_BUSY) {
742 		bp->b_flags |= B_WANTED;
743 		sleep((caddr_t)bp, PRIBIO+1);
744 	}
745 	bp->b_error = 0;
746 	bp->b_proc = u.u_procp;
747 	bp->b_un.b_addr = u.u_base;
748 	while (u.u_count != 0 && bp->b_error==0) {
749 		bp->b_flags = B_BUSY | B_PHYS | rw;
750 		bp->b_dev = dev;
751 		bp->b_blkno = u.u_offset >> PGSHIFT;
752 		bp->b_bcount = u.u_count;
753 		(*mincnt)(bp);
754 		c = bp->b_bcount;
755 		u.u_procp->p_flag |= SPHYSIO;
756 		vslock(a = bp->b_un.b_addr, c);
757 		(*strat)(bp);
758 		(void) spl6();
759 		while ((bp->b_flags&B_DONE) == 0)
760 			sleep((caddr_t)bp, PRIBIO);
761 		vsunlock(a, c, rw);
762 		u.u_procp->p_flag &= ~SPHYSIO;
763 		if (bp->b_flags&B_WANTED)
764 			wakeup((caddr_t)bp);
765 		(void) spl0();
766 		bp->b_un.b_addr += c;
767 		u.u_count -= c;
768 		u.u_offset += c;
769 	}
770 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
771 	u.u_count = bp->b_resid;
772 	geterror(bp);
773 }
774 
775 /*ARGSUSED*/
776 unsigned
777 minphys(bp)
778 struct buf *bp;
779 {
780 
781 	if (bp->b_bcount > 60 * 1024)
782 		bp->b_bcount = 60 * 1024;
783 }
784 
785 /*
786  * Pick up the device's error number and pass it to the user;
787  * if there is an error but the number is 0 set a generalized
788  * code.  Actually the latter is always true because devices
789  * don't yet return specific errors.
790  */
791 geterror(bp)
792 register struct buf *bp;
793 {
794 
795 	if (bp->b_flags&B_ERROR)
796 		if ((u.u_error = bp->b_error)==0)
797 			u.u_error = EIO;
798 }
799 
800 /*
801  * Invalidate in core blocks belonging to closed or umounted filesystem
802  *
803  * This is not nicely done at all - the buffer ought to be removed from the
804  * hash chains & have its dev/blkno fields clobbered, but unfortunately we
805  * can't do that here, as it is quite possible that the block is still
806  * being used for i/o. Eventually, all disc drivers should be forced to
807  * have a close routine, which ought ensure that the queue is empty, then
808  * properly flush the queues. Until that happy day, this suffices for
809  * correctness.						... kre
810  */
811 binval(dev)
812 dev_t dev;
813 {
814 	register struct buf *bp, *dp;
815 
816 	dp = bdevsw[major(dev)].d_tab;
817 
818 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
819 		if (bp->b_dev == dev)
820 			bp->b_flags |= B_INVAL;
821 }
822