xref: /csrg-svn/sys/kern/vfs_cluster.c (revision 2706)
1 /*	vfs_cluster.c	4.13	02/26/81	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/dir.h"
6 #include "../h/user.h"
7 #include "../h/buf.h"
8 #include "../h/conf.h"
9 #include "../h/proc.h"
10 #include "../h/seg.h"
11 #include "../h/pte.h"
12 #include "../h/vm.h"
13 #include "../h/trace.h"
14 
15 /*
16  * The following several routines allocate and free
17  * buffers with various side effects.  In general the
18  * arguments to an allocate routine are a device and
19  * a block number, and the value is a pointer to
20  * to the buffer header; the buffer is marked "busy"
21  * so that no one else can touch it.  If the block was
22  * already in core, no I/O need be done; if it is
23  * already busy, the process waits until it becomes free.
24  * The following routines allocate a buffer:
25  *	getblk
26  *	bread
27  *	breada
28  *	baddr	(if it is incore)
29  * Eventually the buffer must be released, possibly with the
30  * side effect of writing it out, by using one of
31  *	bwrite
32  *	bdwrite
33  *	bawrite
34  *	brelse
35  */
36 
37 #define	BUFHSZ	63
38 struct	bufhd bufhash[BUFHSZ];
39 #define	BUFHASH(dev, dblkno)	\
40 		((struct buf *)&bufhash[((int)(dev)+(int)(dblkno)) % BUFHSZ])
41 
42 /*
43  * Initialize hash links for buffers.
44  */
45 bhinit()
46 {
47 	register int i;
48 	register struct bufhd *bp;
49 
50 	for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++)
51 		bp->b_forw = bp->b_back = (struct buf *)bp;
52 }
53 
54 /* #define	DISKMON	1 */
55 
56 #ifdef	DISKMON
57 struct {
58 	int	nbuf;
59 	long	nread;
60 	long	nreada;
61 	long	ncache;
62 	long	nwrite;
63 	long	bufcount[NBUF];
64 } io_info;
65 #endif
66 
67 /*
68  * Swap IO headers -
69  * They contain the necessary information for the swap I/O.
70  * At any given time, a swap header can be in three
71  * different lists. When free it is in the free list,
72  * when allocated and the I/O queued, it is on the swap
73  * device list, and finally, if the operation was a dirty
74  * page push, when the I/O completes, it is inserted
75  * in a list of cleaned pages to be processed by the pageout daemon.
76  */
77 struct	buf swbuf[NSWBUF];
78 short	swsize[NSWBUF];		/* CAN WE JUST USE B_BCOUNT? */
79 int	swpf[NSWBUF];
80 
81 
82 #ifndef	UNFAST
83 #define	notavail(bp) \
84 { \
85 	int s = spl6(); \
86 	(bp)->av_back->av_forw = (bp)->av_forw; \
87 	(bp)->av_forw->av_back = (bp)->av_back; \
88 	(bp)->b_flags |= B_BUSY; \
89 	splx(s); \
90 }
91 #endif
92 
93 /*
94  * Read in (if necessary) the block and return a buffer pointer.
95  */
96 struct buf *
97 bread(dev, blkno)
98 dev_t dev;
99 daddr_t blkno;
100 {
101 	register struct buf *bp;
102 
103 	bp = getblk(dev, blkno);
104 	if (bp->b_flags&B_DONE) {
105 #ifdef	EPAWNJ
106 		trace(TR_BREAD|TR_HIT, dev, blkno);
107 #endif
108 #ifdef	DISKMON
109 		io_info.ncache++;
110 #endif
111 		return(bp);
112 	}
113 	bp->b_flags |= B_READ;
114 	bp->b_bcount = BSIZE;
115 	(*bdevsw[major(dev)].d_strategy)(bp);
116 #ifdef	EPAWNJ
117 	trace(TR_BREAD|TR_MISS, dev, blkno);
118 #endif
119 #ifdef	DISKMON
120 	io_info.nread++;
121 #endif
122 	u.u_vm.vm_inblk++;		/* pay for read */
123 	iowait(bp);
124 	return(bp);
125 }
126 
127 /*
128  * Read in the block, like bread, but also start I/O on the
129  * read-ahead block (which is not allocated to the caller)
130  */
131 struct buf *
132 breada(dev, blkno, rablkno)
133 dev_t dev;
134 daddr_t blkno, rablkno;
135 {
136 	register struct buf *bp, *rabp;
137 
138 	bp = NULL;
139 	if (!incore(dev, blkno)) {
140 		bp = getblk(dev, blkno);
141 		if ((bp->b_flags&B_DONE) == 0) {
142 			bp->b_flags |= B_READ;
143 			bp->b_bcount = BSIZE;
144 			(*bdevsw[major(dev)].d_strategy)(bp);
145 #ifdef	EPAWNJ
146 			trace(TR_BREAD|TR_MISS, dev, blkno);
147 #endif
148 #ifdef	DISKMON
149 			io_info.nread++;
150 #endif
151 			u.u_vm.vm_inblk++;		/* pay for read */
152 		}
153 #ifdef	EPAWNJ
154 		else
155 			trace(TR_BREAD|TR_HIT, dev, blkno);
156 #endif
157 	}
158 	if (rablkno && !incore(dev, rablkno)) {
159 		rabp = getblk(dev, rablkno);
160 		if (rabp->b_flags & B_DONE) {
161 			brelse(rabp);
162 #ifdef	EPAWNJ
163 			trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno);
164 #endif
165 		} else {
166 			rabp->b_flags |= B_READ|B_ASYNC;
167 			rabp->b_bcount = BSIZE;
168 			(*bdevsw[major(dev)].d_strategy)(rabp);
169 #ifdef	EPAWNJ
170 			trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock);
171 #endif
172 #ifdef	DISKMON
173 			io_info.nreada++;
174 #endif
175 			u.u_vm.vm_inblk++;		/* pay in advance */
176 		}
177 	}
178 	if(bp == NULL)
179 		return(bread(dev, blkno));
180 	iowait(bp);
181 	return(bp);
182 }
183 
184 /*
185  * Write the buffer, waiting for completion.
186  * Then release the buffer.
187  */
188 bwrite(bp)
189 register struct buf *bp;
190 {
191 	register flag;
192 
193 	flag = bp->b_flags;
194 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
195 	bp->b_bcount = BSIZE;
196 #ifdef	DISKMON
197 	io_info.nwrite++;
198 #endif
199 	if ((flag&B_DELWRI) == 0)
200 		u.u_vm.vm_oublk++;		/* noone paid yet */
201 #ifdef	EPAWNJ
202 	trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno));
203 #endif
204 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
205 	if ((flag&B_ASYNC) == 0) {
206 		iowait(bp);
207 		brelse(bp);
208 	} else if (flag & B_DELWRI)
209 		bp->b_flags |= B_AGE;
210 	else
211 		geterror(bp);
212 }
213 
214 /*
215  * Release the buffer, marking it so that if it is grabbed
216  * for another purpose it will be written out before being
217  * given up (e.g. when writing a partial block where it is
218  * assumed that another write for the same block will soon follow).
219  * This can't be done for magtape, since writes must be done
220  * in the same order as requested.
221  */
222 bdwrite(bp)
223 register struct buf *bp;
224 {
225 	register int flags;
226 
227 	if ((bp->b_flags&B_DELWRI) == 0)
228 		u.u_vm.vm_oublk++;		/* noone paid yet */
229 	flags = bdevsw[major(bp->b_dev)].d_flags;
230 	if(flags & B_TAPE)
231 		bawrite(bp);
232 	else {
233 		bp->b_flags |= B_DELWRI | B_DONE;
234 		brelse(bp);
235 	}
236 }
237 
238 /*
239  * Release the buffer, start I/O on it, but don't wait for completion.
240  */
241 bawrite(bp)
242 register struct buf *bp;
243 {
244 
245 	bp->b_flags |= B_ASYNC;
246 	bwrite(bp);
247 }
248 
249 /*
250  * release the buffer, with no I/O implied.
251  */
252 brelse(bp)
253 register struct buf *bp;
254 {
255 	register struct buf *flist;
256 	register s;
257 
258 	if (bp->b_flags&B_WANTED)
259 		wakeup((caddr_t)bp);
260 	if (bfreelist[0].b_flags&B_WANTED) {
261 		bfreelist[0].b_flags &= ~B_WANTED;
262 		wakeup((caddr_t)bfreelist);
263 	}
264 	if (bp->b_flags&B_ERROR)
265 		if (bp->b_flags & B_LOCKED)
266 			bp->b_flags &= ~B_ERROR;	/* try again later */
267 		else
268 			bp->b_dev = NODEV;  		/* no assoc */
269 	s = spl6();
270 	if (bp->b_flags & (B_ERROR|B_INVAL)) {
271 		/* block has no info ... put at front of most free list */
272 		flist = &bfreelist[BQUEUES-1];
273 		flist->av_forw->av_back = bp;
274 		bp->av_forw = flist->av_forw;
275 		flist->av_forw = bp;
276 		bp->av_back = flist;
277 	} else {
278 		if (bp->b_flags & B_LOCKED)
279 			flist = &bfreelist[BQ_LOCKED];
280 		else if (bp->b_flags & B_AGE)
281 			flist = &bfreelist[BQ_AGE];
282 		else
283 			flist = &bfreelist[BQ_LRU];
284 		flist->av_back->av_forw = bp;
285 		bp->av_back = flist->av_back;
286 		flist->av_back = bp;
287 		bp->av_forw = flist;
288 	}
289 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
290 	splx(s);
291 }
292 
293 /*
294  * See if the block is associated with some buffer
295  * (mainly to avoid getting hung up on a wait in breada)
296  */
297 incore(dev, blkno)
298 dev_t dev;
299 daddr_t blkno;
300 {
301 	register struct buf *bp;
302 	register struct buf *dp;
303 	register int dblkno = fsbtodb(blkno);
304 
305 	dp = BUFHASH(dev, dblkno);
306 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
307 		if (bp->b_blkno == dblkno && bp->b_dev == dev &&
308 		    !(bp->b_flags & B_INVAL))
309 			return (1);
310 	return (0);
311 }
312 
313 struct buf *
314 baddr(dev, blkno)
315 dev_t dev;
316 daddr_t blkno;
317 {
318 
319 	if (incore(dev, blkno))
320 		return (bread(dev, blkno));
321 	return (0);
322 }
323 
324 /*
325  * Assign a buffer for the given block.  If the appropriate
326  * block is already associated, return it; otherwise search
327  * for the oldest non-busy buffer and reassign it.
328  */
329 struct buf *
330 getblk(dev, blkno)
331 dev_t dev;
332 daddr_t blkno;
333 {
334 	register struct buf *bp, *dp, *ep;
335 	register int dblkno = fsbtodb(blkno);
336 #ifdef	DISKMON
337 	register int i;
338 #endif
339 
340 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
341 		blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
342 	dblkno = fsbtodb(blkno);
343 	dp = BUFHASH(dev, dblkno);
344     loop:
345 	(void) spl0();
346 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
347 		if (bp->b_blkno != dblkno || bp->b_dev != dev ||
348 		    bp->b_flags&B_INVAL)
349 			continue;
350 		(void) spl6();
351 		if (bp->b_flags&B_BUSY) {
352 			bp->b_flags |= B_WANTED;
353 			sleep((caddr_t)bp, PRIBIO+1);
354 			goto loop;
355 		}
356 		(void) spl0();
357 #ifdef	DISKMON
358 		i = 0;
359 		dp = bp->av_forw;
360 		while ((dp->b_flags & B_HEAD) == 0) {
361 			i++;
362 			dp = dp->av_forw;
363 		}
364 		if (i<NBUF)
365 			io_info.bufcount[i]++;
366 #endif
367 		notavail(bp);
368 		bp->b_flags |= B_CACHE;
369 		return(bp);
370 	}
371 	if (major(dev) >= nblkdev)
372 		panic("blkdev");
373 	(void) spl6();
374 	for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--)
375 		if (ep->av_forw != ep)
376 			break;
377 	if (ep == bfreelist) {		/* no free blocks at all */
378 		ep->b_flags |= B_WANTED;
379 		sleep((caddr_t)ep, PRIBIO+1);
380 		goto loop;
381 	}
382 	(void) spl0();
383 	bp = ep->av_forw;
384 	notavail(bp);
385 	if (bp->b_flags & B_DELWRI) {
386 		bp->b_flags |= B_ASYNC;
387 		bwrite(bp);
388 		goto loop;
389 	}
390 #ifdef EPAWNJ
391 	trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
392 #endif
393 	bp->b_flags = B_BUSY;
394 	bp->b_back->b_forw = bp->b_forw;
395 	bp->b_forw->b_back = bp->b_back;
396 	bp->b_forw = dp->b_forw;
397 	bp->b_back = dp;
398 	dp->b_forw->b_back = bp;
399 	dp->b_forw = bp;
400 	bp->b_dev = dev;
401 	bp->b_blkno = dblkno;
402 	return(bp);
403 }
404 
405 /*
406  * get an empty block,
407  * not assigned to any particular device
408  */
409 struct buf *
410 geteblk()
411 {
412 	register struct buf *bp, *dp;
413 
414 loop:
415 	(void) spl6();
416 	for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--)
417 		if (dp->av_forw != dp)
418 			break;
419 	if (dp == bfreelist) {		/* no free blocks */
420 		dp->b_flags |= B_WANTED;
421 		sleep((caddr_t)dp, PRIBIO+1);
422 		goto loop;
423 	}
424 	(void) spl0();
425 	bp = dp->av_forw;
426 	notavail(bp);
427 	if (bp->b_flags & B_DELWRI) {
428 		bp->b_flags |= B_ASYNC;
429 		bwrite(bp);
430 		goto loop;
431 	}
432 #ifdef EPAWNJ
433 	trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
434 #endif
435 	bp->b_flags = B_BUSY|B_INVAL;
436 	bp->b_back->b_forw = bp->b_forw;
437 	bp->b_forw->b_back = bp->b_back;
438 	bp->b_forw = dp->b_forw;
439 	bp->b_back = dp;
440 	dp->b_forw->b_back = bp;
441 	dp->b_forw = bp;
442 	bp->b_dev = (dev_t)NODEV;
443 	return(bp);
444 }
445 
446 /*
447  * Wait for I/O completion on the buffer; return errors
448  * to the user.
449  */
450 iowait(bp)
451 register struct buf *bp;
452 {
453 
454 	(void) spl6();
455 	while ((bp->b_flags&B_DONE)==0)
456 		sleep((caddr_t)bp, PRIBIO);
457 	(void) spl0();
458 	geterror(bp);
459 }
460 
461 #ifdef UNFAST
462 /*
463  * Unlink a buffer from the available list and mark it busy.
464  * (internal interface)
465  */
466 notavail(bp)
467 register struct buf *bp;
468 {
469 	register s;
470 
471 	s = spl6();
472 	bp->av_back->av_forw = bp->av_forw;
473 	bp->av_forw->av_back = bp->av_back;
474 	bp->b_flags |= B_BUSY;
475 	splx(s);
476 }
477 #endif
478 
479 /*
480  * Mark I/O complete on a buffer. If the header
481  * indicates a dirty page push completion, the
482  * header is inserted into the ``cleaned'' list
483  * to be processed by the pageout daemon. Otherwise
484  * release it if I/O is asynchronous, and wake
485  * up anyone waiting for it.
486  */
487 iodone(bp)
488 register struct buf *bp;
489 {
490 	register int s;
491 
492 	if (bp->b_flags & B_DONE)
493 		panic("dup iodone");
494 	bp->b_flags |= B_DONE;
495 	if (bp->b_flags & B_DIRTY) {
496 		if (bp->b_flags & B_ERROR)
497 			panic("IO err in push");
498 		s = spl6();
499 		cnt.v_pgout++;
500 		bp->av_forw = bclnlist;
501 		bp->b_bcount = swsize[bp - swbuf];
502 		bp->b_pfcent = swpf[bp - swbuf];
503 		bclnlist = bp;
504 		if (bswlist.b_flags & B_WANTED)
505 			wakeup((caddr_t)&proc[2]);
506 		splx(s);
507 		return;
508 	}
509 	if (bp->b_flags&B_ASYNC)
510 		brelse(bp);
511 	else {
512 		bp->b_flags &= ~B_WANTED;
513 		wakeup((caddr_t)bp);
514 	}
515 }
516 
517 /*
518  * Zero the core associated with a buffer.
519  */
520 clrbuf(bp)
521 struct buf *bp;
522 {
523 	register *p;
524 	register c;
525 
526 	p = bp->b_un.b_words;
527 	c = BSIZE/sizeof(int);
528 	do
529 		*p++ = 0;
530 	while (--c);
531 	bp->b_resid = 0;
532 }
533 
534 /*
535  * swap I/O -
536  *
537  * If the flag indicates a dirty page push initiated
538  * by the pageout daemon, we map the page into the i th
539  * virtual page of process 2 (the daemon itself) where i is
540  * the index of the swap header that has been allocated.
541  * We simply initialize the header and queue the I/O but
542  * do not wait for completion. When the I/O completes,
543  * iodone() will link the header to a list of cleaned
544  * pages to be processed by the pageout daemon.
545  */
546 swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
547 	struct proc *p;
548 	swblk_t dblkno;
549 	caddr_t addr;
550 	int flag, nbytes;
551 	dev_t dev;
552 	unsigned pfcent;
553 {
554 	register struct buf *bp;
555 	register int c;
556 	int p2dp;
557 	register struct pte *dpte, *vpte;
558 
559 	(void) spl6();
560 	while (bswlist.av_forw == NULL) {
561 		bswlist.b_flags |= B_WANTED;
562 		sleep((caddr_t)&bswlist, PSWP+1);
563 	}
564 	bp = bswlist.av_forw;
565 	bswlist.av_forw = bp->av_forw;
566 	(void) spl0();
567 
568 	bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
569 	if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
570 		if (rdflg == B_READ)
571 			sum.v_pswpin += btoc(nbytes);
572 		else
573 			sum.v_pswpout += btoc(nbytes);
574 	bp->b_proc = p;
575 	if (flag & B_DIRTY) {
576 		p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
577 		dpte = dptopte(&proc[2], p2dp);
578 		vpte = vtopte(p, btop(addr));
579 		for (c = 0; c < nbytes; c += NBPG) {
580 			if (vpte->pg_pfnum == 0 || vpte->pg_fod)
581 				panic("swap bad pte");
582 			*dpte++ = *vpte++;
583 		}
584 		bp->b_un.b_addr = (caddr_t)ctob(p2dp);
585 	} else
586 		bp->b_un.b_addr = addr;
587 	while (nbytes > 0) {
588 		c = imin(ctob(120), nbytes);
589 		bp->b_bcount = c;
590 		bp->b_blkno = dblkno;
591 		bp->b_dev = dev;
592 		if (flag & B_DIRTY) {
593 			swpf[bp - swbuf] = pfcent;
594 			swsize[bp - swbuf] = nbytes;
595 		}
596 		(*bdevsw[major(dev)].d_strategy)(bp);
597 		if (flag & B_DIRTY) {
598 			if (c < nbytes)
599 				panic("big push");
600 			return;
601 		}
602 		(void) spl6();
603 		while((bp->b_flags&B_DONE)==0)
604 			sleep((caddr_t)bp, PSWP);
605 		(void) spl0();
606 		bp->b_un.b_addr += c;
607 		bp->b_flags &= ~B_DONE;
608 		if (bp->b_flags & B_ERROR) {
609 			if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
610 				panic("hard IO err in swap");
611 			swkill(p, (char *)0);
612 		}
613 		nbytes -= c;
614 		dblkno += btoc(c);
615 	}
616 	(void) spl6();
617 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
618 	bp->av_forw = bswlist.av_forw;
619 	bswlist.av_forw = bp;
620 	if (bswlist.b_flags & B_WANTED) {
621 		bswlist.b_flags &= ~B_WANTED;
622 		wakeup((caddr_t)&bswlist);
623 		wakeup((caddr_t)&proc[2]);
624 	}
625 	(void) spl0();
626 }
627 
628 /*
629  * If rout == 0 then killed on swap error, else
630  * rout is the name of the routine where we ran out of
631  * swap space.
632  */
633 swkill(p, rout)
634 	struct proc *p;
635 	char *rout;
636 {
637 
638 	printf("%d: ", p->p_pid);
639 	if (rout)
640 		printf("out of swap space in %s\n", rout);
641 	else
642 		printf("killed on swap error\n");
643 	/*
644 	 * To be sure no looping (e.g. in vmsched trying to
645 	 * swap out) mark process locked in core (as though
646 	 * done by user) after killing it so noone will try
647 	 * to swap it out.
648 	 */
649 	psignal(p, SIGKILL);
650 	p->p_flag |= SULOCK;
651 }
652 
653 /*
654  * make sure all write-behind blocks
655  * on dev (or NODEV for all)
656  * are flushed out.
657  * (from umount and update)
658  */
659 bflush(dev)
660 dev_t dev;
661 {
662 	register struct buf *bp;
663 	register struct buf *flist;
664 
665 loop:
666 	(void) spl6();
667 	for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++)
668 	for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
669 		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
670 			bp->b_flags |= B_ASYNC;
671 			notavail(bp);
672 			bwrite(bp);
673 			goto loop;
674 		}
675 	}
676 	(void) spl0();
677 }
678 
679 /*
680  * Raw I/O. The arguments are
681  *	The strategy routine for the device
682  *	A buffer, which will always be a special buffer
683  *	  header owned exclusively by the device for this purpose
684  *	The device number
685  *	Read/write flag
686  * Essentially all the work is computing physical addresses and
687  * validating them.
688  * If the user has the proper access privilidges, the process is
689  * marked 'delayed unlock' and the pages involved in the I/O are
690  * faulted and locked. After the completion of the I/O, the above pages
691  * are unlocked.
692  */
693 physio(strat, bp, dev, rw, mincnt)
694 int (*strat)();
695 register struct buf *bp;
696 unsigned (*mincnt)();
697 {
698 	register int c;
699 	char *a;
700 
701 	if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
702 		u.u_error = EFAULT;
703 		return;
704 	}
705 	(void) spl6();
706 	while (bp->b_flags&B_BUSY) {
707 		bp->b_flags |= B_WANTED;
708 		sleep((caddr_t)bp, PRIBIO+1);
709 	}
710 	bp->b_error = 0;
711 	bp->b_proc = u.u_procp;
712 	bp->b_un.b_addr = u.u_base;
713 	while (u.u_count != 0 && bp->b_error==0) {
714 		bp->b_flags = B_BUSY | B_PHYS | rw;
715 		bp->b_dev = dev;
716 		bp->b_blkno = u.u_offset >> PGSHIFT;
717 		bp->b_bcount = u.u_count;
718 		(*mincnt)(bp);
719 		c = bp->b_bcount;
720 		u.u_procp->p_flag |= SPHYSIO;
721 		vslock(a = bp->b_un.b_addr, c);
722 		(*strat)(bp);
723 		(void) spl6();
724 		while ((bp->b_flags&B_DONE) == 0)
725 			sleep((caddr_t)bp, PRIBIO);
726 		vsunlock(a, c, rw);
727 		u.u_procp->p_flag &= ~SPHYSIO;
728 		if (bp->b_flags&B_WANTED)
729 			wakeup((caddr_t)bp);
730 		(void) spl0();
731 		bp->b_un.b_addr += c;
732 		u.u_count -= c;
733 		u.u_offset += c;
734 	}
735 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
736 	u.u_count = bp->b_resid;
737 	geterror(bp);
738 }
739 
740 /*ARGSUSED*/
741 unsigned
742 minphys(bp)
743 struct buf *bp;
744 {
745 
746 	if (bp->b_bcount > 60 * 1024)
747 		bp->b_bcount = 60 * 1024;
748 }
749 
750 /*
751  * Pick up the device's error number and pass it to the user;
752  * if there is an error but the number is 0 set a generalized
753  * code.  Actually the latter is always true because devices
754  * don't yet return specific errors.
755  */
756 geterror(bp)
757 register struct buf *bp;
758 {
759 
760 	if (bp->b_flags&B_ERROR)
761 		if ((u.u_error = bp->b_error)==0)
762 			u.u_error = EIO;
763 }
764 
765 /*
766  * Invalidate in core blocks belonging to closed or umounted filesystem
767  *
768  * This is not nicely done at all - the buffer ought to be removed from the
769  * hash chains & have its dev/blkno fields clobbered, but unfortunately we
770  * can't do that here, as it is quite possible that the block is still
771  * being used for i/o. Eventually, all disc drivers should be forced to
772  * have a close routine, which ought ensure that the queue is empty, then
773  * properly flush the queues. Until that happy day, this suffices for
774  * correctness.						... kre
775  */
776 binval(dev)
777 dev_t dev;
778 {
779 	register struct buf *bp;
780 	register struct bufhd *hp;
781 #define dp ((struct buf *)hp)
782 
783 	for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
784 		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
785 			if (bp->b_dev == dev)
786 				bp->b_flags |= B_INVAL;
787 }
788