xref: /openbsd-src/sys/nfs/nfs_bio.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: nfs_bio.c,v 1.22 2001/06/27 04:58:46 art Exp $	*/
2 /*	$NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Rick Macklem at The University of Guelph.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  *	@(#)nfs_bio.c	8.9 (Berkeley) 3/30/95
40  */
41 
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/resourcevar.h>
46 #include <sys/signalvar.h>
47 #include <sys/proc.h>
48 #include <sys/buf.h>
49 #include <sys/vnode.h>
50 #include <sys/mount.h>
51 #include <sys/kernel.h>
52 #include <sys/namei.h>
53 
54 #include <vm/vm.h>
55 
56 #include <nfs/rpcv2.h>
57 #include <nfs/nfsproto.h>
58 #include <nfs/nfs.h>
59 #include <nfs/nfsmount.h>
60 #include <nfs/nfsnode.h>
61 #include <nfs/nfs_var.h>
62 
63 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
64 extern int nfs_numasync;
65 struct nfsstats nfsstats;
66 
67 /*
68  * Vnode op for read using bio
69  * Any similarity to readip() is purely coincidental
70  */
71 int
72 nfs_bioread(vp, uio, ioflag, cred)
73 	register struct vnode *vp;
74 	register struct uio *uio;
75 	int ioflag;
76 	struct ucred *cred;
77 {
78 	register struct nfsnode *np = VTONFS(vp);
79 	register int biosize, diff;
80 	struct buf *bp = NULL, *rabp;
81 	struct vattr vattr;
82 	struct proc *p;
83 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
84 	daddr_t lbn, bn, rabn;
85 	caddr_t baddr;
86 	int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
87 
88 #ifdef DIAGNOSTIC
89 	if (uio->uio_rw != UIO_READ)
90 		panic("nfs_read mode");
91 #endif
92 	if (uio->uio_resid == 0)
93 		return (0);
94 	if (uio->uio_offset < 0)
95 		return (EINVAL);
96 	p = uio->uio_procp;
97 	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
98 		(void)nfs_fsinfo(nmp, vp, cred, p);
99 	biosize = nmp->nm_rsize;
100 	/*
101 	 * For nfs, cache consistency can only be maintained approximately.
102 	 * Although RFC1094 does not specify the criteria, the following is
103 	 * believed to be compatible with the reference port.
104 	 * For nfs:
105 	 * If the file's modify time on the server has changed since the
106 	 * last read rpc or you have written to the file,
107 	 * you may have lost data cache consistency with the
108 	 * server, so flush all of the file's data out of the cache.
109 	 * Then force a getattr rpc to ensure that you have up to date
110 	 * attributes.
111 	 * NB: This implies that cache data can be read when up to
112 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
113 	 * attributes this could be forced by setting n_attrstamp to 0 before
114 	 * the VOP_GETATTR() call.
115 	 */
116 	/*
117 	 * There is no way to modify a symbolic link via NFS or via
118 	 * VFS, so we don't check if the link was modified
119 	 */
120 	if (vp->v_type != VLNK) {
121 		if (np->n_flag & NMODIFIED) {
122 			np->n_attrstamp = 0;
123 			error = VOP_GETATTR(vp, &vattr, cred, p);
124 			if (error)
125 				return (error);
126 			np->n_mtime = vattr.va_mtime.tv_sec;
127 		} else {
128 			error = VOP_GETATTR(vp, &vattr, cred, p);
129 			if (error)
130 				return (error);
131 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
132 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
133 				if (error)
134 					return (error);
135 				np->n_mtime = vattr.va_mtime.tv_sec;
136 			}
137 		}
138 	}
139 	do {
140 	    if ((vp->v_flag & VROOT) && vp->v_type == VLNK) {
141 		    return (nfs_readlinkrpc(vp, uio, cred));
142 	    }
143 	    baddr = (caddr_t)0;
144 	    switch (vp->v_type) {
145 	    case VREG:
146 		nfsstats.biocache_reads++;
147 		lbn = uio->uio_offset / biosize;
148 		on = uio->uio_offset & (biosize - 1);
149 		bn = lbn * (biosize / DEV_BSIZE);
150 		not_readin = 1;
151 
152 		/*
153 		 * Start the read ahead(s), as required.
154 		 */
155 		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
156 		    for (nra = 0; nra < nmp->nm_readahead &&
157 			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
158 			rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
159 			if (!incore(vp, rabn)) {
160 			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
161 			    if (!rabp)
162 				return (EINTR);
163 			    if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
164 				rabp->b_flags |= (B_READ | B_ASYNC);
165 				if (nfs_asyncio(rabp, cred)) {
166 				    rabp->b_flags |= B_INVAL;
167 				    brelse(rabp);
168 				}
169 			    } else
170 				brelse(rabp);
171 			}
172 		    }
173 		}
174 
175 		/*
176 		 * If the block is in the cache and has the required data
177 		 * in a valid region, just copy it out.
178 		 * Otherwise, get the block and write back/read in,
179 		 * as required.
180 		 */
181 		if ((bp = incore(vp, bn)) &&
182 		    (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
183 		    (B_BUSY | B_WRITEINPROG))
184 			got_buf = 0;
185 		else {
186 again:
187 			bp = nfs_getcacheblk(vp, bn, biosize, p);
188 			if (!bp)
189 				return (EINTR);
190 			got_buf = 1;
191 			if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
192 				bp->b_flags |= B_READ;
193 				not_readin = 0;
194 				error = nfs_doio(bp, cred, p);
195 				if (error) {
196 				    brelse(bp);
197 				    return (error);
198 				}
199 			}
200 		}
201 		n = min((unsigned)(biosize - on), uio->uio_resid);
202 		diff = np->n_size - uio->uio_offset;
203 		if (diff < n)
204 			n = diff;
205 		if (not_readin && n > 0) {
206 			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
207 				if (!got_buf) {
208 				    bp = nfs_getcacheblk(vp, bn, biosize, p);
209 				    if (!bp)
210 					return (EINTR);
211 				    got_buf = 1;
212 				}
213 				bp->b_flags |= B_INVAFTERWRITE;
214 				if (bp->b_dirtyend > 0) {
215 				    if ((bp->b_flags & B_DELWRI) == 0)
216 					panic("nfsbioread");
217 				    if (VOP_BWRITE(bp) == EINTR)
218 					return (EINTR);
219 				} else
220 				    brelse(bp);
221 				goto again;
222 			}
223 		}
224 		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
225 		if (diff < n)
226 			n = diff;
227 		break;
228 	    case VLNK:
229 		nfsstats.biocache_readlinks++;
230 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
231 		if (!bp)
232 			return (EINTR);
233 		if ((bp->b_flags & B_DONE) == 0) {
234 			bp->b_flags |= B_READ;
235 			error = nfs_doio(bp, cred, p);
236 			if (error) {
237 				brelse(bp);
238 				return (error);
239 			}
240 		}
241 		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
242 		got_buf = 1;
243 		on = 0;
244 		break;
245 	    default:
246 		printf(" nfsbioread: type %x unexpected\n",vp->v_type);
247 		break;
248 	    };
249 
250 	    if (n > 0) {
251 		if (!baddr)
252 			baddr = bp->b_data;
253 		error = uiomove(baddr + on, (int)n, uio);
254 	    }
255 	    switch (vp->v_type) {
256 	    case VREG:
257 		break;
258 	    case VLNK:
259 		n = 0;
260 		break;
261 	    default:
262 		printf(" nfsbioread: type %x unexpected\n",vp->v_type);
263 	    }
264 	    if (got_buf)
265 		brelse(bp);
266 	} while (error == 0 && uio->uio_resid > 0 && n > 0);
267 	return (error);
268 }
269 
270 /*
271  * Vnode op for write using bio
272  */
273 int
274 nfs_write(v)
275 	void *v;
276 {
277 	struct vop_write_args /* {
278 		struct vnode *a_vp;
279 		struct uio *a_uio;
280 		int  a_ioflag;
281 		struct ucred *a_cred;
282 	} */ *ap = v;
283 	register int biosize;
284 	register struct uio *uio = ap->a_uio;
285 	struct proc *p = uio->uio_procp;
286 	register struct vnode *vp = ap->a_vp;
287 	struct nfsnode *np = VTONFS(vp);
288 	register struct ucred *cred = ap->a_cred;
289 	int ioflag = ap->a_ioflag;
290 	struct buf *bp;
291 	struct vattr vattr;
292 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
293 	daddr_t lbn, bn;
294 	int n, on, error = 0;
295 
296 #ifdef DIAGNOSTIC
297 	if (uio->uio_rw != UIO_WRITE)
298 		panic("nfs_write mode");
299 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
300 		panic("nfs_write proc");
301 #endif
302 	if (vp->v_type != VREG)
303 		return (EIO);
304 	if (np->n_flag & NWRITEERR) {
305 		np->n_flag &= ~NWRITEERR;
306 		return (np->n_error);
307 	}
308 	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
309 		(void)nfs_fsinfo(nmp, vp, cred, p);
310 	if (ioflag & (IO_APPEND | IO_SYNC)) {
311 		if (np->n_flag & NMODIFIED) {
312 			np->n_attrstamp = 0;
313 			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
314 			if (error)
315 				return (error);
316 		}
317 		if (ioflag & IO_APPEND) {
318 			np->n_attrstamp = 0;
319 			error = VOP_GETATTR(vp, &vattr, cred, p);
320 			if (error)
321 				return (error);
322 			uio->uio_offset = np->n_size;
323 		}
324 	}
325 	if (uio->uio_offset < 0)
326 		return (EINVAL);
327 	if (uio->uio_resid == 0)
328 		return (0);
329 	/*
330 	 * Maybe this should be above the vnode op call, but so long as
331 	 * file servers have no limits, i don't think it matters
332 	 */
333 	if (p && uio->uio_offset + uio->uio_resid >
334 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
335 		psignal(p, SIGXFSZ);
336 		return (EFBIG);
337 	}
338 	/*
339 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
340 	 * will be the same size within a filesystem. nfs_writerpc will
341 	 * still use nm_wsize when sizing the rpc's.
342 	 */
343 	biosize = nmp->nm_rsize;
344 	do {
345 
346 		/*
347 		 * XXX make sure we aren't cached in the VM page cache
348 		 */
349 		uvm_vnp_uncache(vp);
350 
351 		nfsstats.biocache_writes++;
352 		lbn = uio->uio_offset / biosize;
353 		on = uio->uio_offset & (biosize-1);
354 		n = min((unsigned)(biosize - on), uio->uio_resid);
355 		bn = lbn * (biosize / DEV_BSIZE);
356 again:
357 		bp = nfs_getcacheblk(vp, bn, biosize, p);
358 		if (!bp)
359 			return (EINTR);
360 		if (bp->b_wcred == NOCRED) {
361 			crhold(cred);
362 			bp->b_wcred = cred;
363 		}
364 		np->n_flag |= NMODIFIED;
365 		if (uio->uio_offset + n > np->n_size) {
366 			np->n_size = uio->uio_offset + n;
367 			uvm_vnp_setsize(vp, (u_long)np->n_size);
368 		}
369 
370 		/*
371 		 * If the new write will leave a contiguous dirty
372 		 * area, just update the b_dirtyoff and b_dirtyend,
373 		 * otherwise force a write rpc of the old dirty area.
374 		 */
375 		if (bp->b_dirtyend > 0 &&
376 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
377 			bp->b_proc = p;
378 			if (VOP_BWRITE(bp) == EINTR)
379 				return (EINTR);
380 			goto again;
381 		}
382 
383 		error = uiomove((char *)bp->b_data + on, n, uio);
384 		if (error) {
385 			bp->b_flags |= B_ERROR;
386 			brelse(bp);
387 			return (error);
388 		}
389 		if (bp->b_dirtyend > 0) {
390 			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
391 			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
392 		} else {
393 			bp->b_dirtyoff = on;
394 			bp->b_dirtyend = on + n;
395 		}
396 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
397 		    bp->b_validoff > bp->b_dirtyend) {
398 			bp->b_validoff = bp->b_dirtyoff;
399 			bp->b_validend = bp->b_dirtyend;
400 		} else {
401 			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
402 			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
403 		}
404 
405 		/*
406 		 * Since this block is being modified, it must be written
407 		 * again and not just committed.
408 		 */
409 		bp->b_flags &= ~B_NEEDCOMMIT;
410 
411 		/*
412 		 * If the lease is non-cachable or IO_SYNC do bwrite().
413 		 */
414 		if (ioflag & IO_SYNC) {
415 			bp->b_proc = p;
416 			error = VOP_BWRITE(bp);
417 			if (error)
418 				return (error);
419 		} else if ((n + on) == biosize) {
420 			bp->b_proc = (struct proc *)0;
421 			bp->b_flags |= B_ASYNC;
422 			(void)nfs_writebp(bp, 0);
423 		} else {
424 			bdwrite(bp);
425 		}
426 	} while (uio->uio_resid > 0 && n > 0);
427 	return (0);
428 }
429 
430 /*
431  * Get an nfs cache block.
432  * Allocate a new one if the block isn't currently in the cache
433  * and return the block marked busy. If the calling process is
434  * interrupted by a signal for an interruptible mount point, return
435  * NULL.
436  */
437 struct buf *
438 nfs_getcacheblk(vp, bn, size, p)
439 	struct vnode *vp;
440 	daddr_t bn;
441 	int size;
442 	struct proc *p;
443 {
444 	register struct buf *bp;
445 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
446 
447 	if (nmp->nm_flag & NFSMNT_INT) {
448 		bp = getblk(vp, bn, size, PCATCH, 0);
449 		while (bp == (struct buf *)0) {
450 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
451 				return ((struct buf *)0);
452 			bp = getblk(vp, bn, size, 0, 2 * hz);
453 		}
454 	} else
455 		bp = getblk(vp, bn, size, 0, 0);
456 	return (bp);
457 }
458 
459 /*
460  * Flush and invalidate all dirty buffers. If another process is already
461  * doing the flush, just wait for completion.
462  */
463 int
464 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
465 	struct vnode *vp;
466 	int flags;
467 	struct ucred *cred;
468 	struct proc *p;
469 	int intrflg;
470 {
471 	register struct nfsnode *np = VTONFS(vp);
472 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
473 	int error = 0, slpflag, slptimeo;
474 
475 	if ((nmp->nm_flag & NFSMNT_INT) == 0)
476 		intrflg = 0;
477 	if (intrflg) {
478 		slpflag = PCATCH;
479 		slptimeo = 2 * hz;
480 	} else {
481 		slpflag = 0;
482 		slptimeo = 0;
483 	}
484 	/*
485 	 * First wait for any other process doing a flush to complete.
486 	 */
487 	while (np->n_flag & NFLUSHINPROG) {
488 		np->n_flag |= NFLUSHWANT;
489 		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
490 			slptimeo);
491 		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
492 			return (EINTR);
493 	}
494 
495 	/*
496 	 * Now, flush as required.
497 	 */
498 	np->n_flag |= NFLUSHINPROG;
499 	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
500 	while (error) {
501 		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
502 			np->n_flag &= ~NFLUSHINPROG;
503 			if (np->n_flag & NFLUSHWANT) {
504 				np->n_flag &= ~NFLUSHWANT;
505 				wakeup((caddr_t)&np->n_flag);
506 			}
507 			return (EINTR);
508 		}
509 		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
510 	}
511 	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
512 	if (np->n_flag & NFLUSHWANT) {
513 		np->n_flag &= ~NFLUSHWANT;
514 		wakeup((caddr_t)&np->n_flag);
515 	}
516 	return (0);
517 }
518 
519 /*
520  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
521  * This is mainly to avoid queueing async I/O requests when the nfsiods
522  * are all hung on a dead server.
523  */
524 int
525 nfs_asyncio(bp, cred)
526 	register struct buf *bp;
527 	struct ucred *cred;
528 {
529 	int i,s;
530 
531 	if (nfs_numasync == 0)
532 		return (EIO);
533 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
534 	    if (nfs_iodwant[i]) {
535 		if (bp->b_flags & B_READ) {
536 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
537 				crhold(cred);
538 				bp->b_rcred = cred;
539 			}
540 		} else {
541 			bp->b_flags |= B_WRITEINPROG;
542 			if (bp->b_wcred == NOCRED && cred != NOCRED) {
543 				crhold(cred);
544 				bp->b_wcred = cred;
545 			}
546 		}
547 
548 		TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
549 		nfs_iodwant[i] = (struct proc *)0;
550 		wakeup((caddr_t)&nfs_iodwant[i]);
551 		return (0);
552 	    }
553 
554 	/*
555 	 * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
556 	 * return EIO so the process will call nfs_doio() and do it
557 	 * synchronously.
558 	 */
559 	if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
560 		return (EIO);
561 
562 	/*
563 	 * Just turn the async write into a delayed write, instead of
564 	 * doing in synchronously. Hopefully, at least one of the nfsiods
565 	 * is currently doing a write for this file and will pick up the
566 	 * delayed writes before going back to sleep.
567 	 */
568 	s = splbio();
569 	buf_dirty(bp);
570 	splx(s);
571 	biodone(bp);
572 	return (0);
573 }
574 
575 /*
576  * Do an I/O operation to/from a cache block. This may be called
577  * synchronously or from an nfsiod.
578  */
579 int
580 nfs_doio(bp, cr, p)
581 	register struct buf *bp;
582 	struct ucred *cr;
583 	struct proc *p;
584 {
585 	register struct uio *uiop;
586 	register struct vnode *vp;
587 	struct nfsnode *np;
588 	struct nfsmount *nmp;
589 	int s, error = 0, diff, len, iomode, must_commit = 0;
590 	struct uio uio;
591 	struct iovec io;
592 
593 	vp = bp->b_vp;
594 	np = VTONFS(vp);
595 	nmp = VFSTONFS(vp->v_mount);
596 	uiop = &uio;
597 	uiop->uio_iov = &io;
598 	uiop->uio_iovcnt = 1;
599 	uiop->uio_segflg = UIO_SYSSPACE;
600 	uiop->uio_procp = p;
601 
602 	/*
603 	 * Historically, paging was done with physio, but no more...
604 	 */
605 	if (bp->b_flags & B_PHYS) {
606 	    /*
607 	     * ...though reading /dev/drum still gets us here.
608 	     */
609 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
610 	    /* mapping was done by vmapbuf() */
611 	    io.iov_base = bp->b_data;
612 	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
613 	    if (bp->b_flags & B_READ) {
614 		uiop->uio_rw = UIO_READ;
615 		nfsstats.read_physios++;
616 		error = nfs_readrpc(vp, uiop, cr);
617 	    } else {
618 		iomode = NFSV3WRITE_DATASYNC;
619 		uiop->uio_rw = UIO_WRITE;
620 		nfsstats.write_physios++;
621 		error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
622 	    }
623 	    if (error) {
624 		bp->b_flags |= B_ERROR;
625 		bp->b_error = error;
626 	    }
627 	} else if (bp->b_flags & B_READ) {
628 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
629 	    io.iov_base = bp->b_data;
630 	    uiop->uio_rw = UIO_READ;
631 	    switch (vp->v_type) {
632 	    case VREG:
633 		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
634 		nfsstats.read_bios++;
635 		error = nfs_readrpc(vp, uiop, cr);
636 		if (!error) {
637 		    bp->b_validoff = 0;
638 		    if (uiop->uio_resid) {
639 			/*
640 			 * If len > 0, there is a hole in the file and
641 			 * no writes after the hole have been pushed to
642 			 * the server yet.
643 			 * Just zero fill the rest of the valid area.
644 			 */
645 			diff = bp->b_bcount - uiop->uio_resid;
646 			len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
647 				+ diff);
648 			if (len > 0) {
649 			    len = min(len, uiop->uio_resid);
650 			    bzero((char *)bp->b_data + diff, len);
651 			    bp->b_validend = diff + len;
652 			} else
653 			    bp->b_validend = diff;
654 		    } else
655 			bp->b_validend = bp->b_bcount;
656 		}
657 		if (p && (vp->v_flag & VTEXT) &&
658 		    (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) {
659 			uprintf("Process killed due to text file modification\n");
660 			psignal(p, SIGKILL);
661 			p->p_holdcnt++;
662 		}
663 		break;
664 	    case VLNK:
665 		uiop->uio_offset = (off_t)0;
666 		nfsstats.readlink_bios++;
667 		error = nfs_readlinkrpc(vp, uiop, cr);
668 		break;
669 	    default:
670 		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
671 		break;
672 	    };
673 	    if (error) {
674 		bp->b_flags |= B_ERROR;
675 		bp->b_error = error;
676 	    }
677 	} else {
678 	    io.iov_len = uiop->uio_resid = bp->b_dirtyend
679 		- bp->b_dirtyoff;
680 	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
681 		+ bp->b_dirtyoff;
682 	    io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
683 	    uiop->uio_rw = UIO_WRITE;
684 	    nfsstats.write_bios++;
685 	    if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
686 		iomode = NFSV3WRITE_UNSTABLE;
687 	    else
688 		iomode = NFSV3WRITE_FILESYNC;
689 	    bp->b_flags |= B_WRITEINPROG;
690 #ifdef fvdl_debug
691 	    printf("nfs_doio(%x): bp %x doff %d dend %d\n",
692 		vp, bp, bp->b_dirtyoff, bp->b_dirtyend);
693 #endif
694 	    error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
695 	    if (!error && iomode == NFSV3WRITE_UNSTABLE)
696 		bp->b_flags |= B_NEEDCOMMIT;
697 	    else
698 		bp->b_flags &= ~B_NEEDCOMMIT;
699 	    bp->b_flags &= ~B_WRITEINPROG;
700 
701 	    /*
702 	     * For an interrupted write, the buffer is still valid and the
703 	     * write hasn't been pushed to the server yet, so we can't set
704 	     * B_ERROR and report the interruption by setting B_EINTR. For
705 	     * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
706 	     * is essentially a noop.
707 	     * For the case of a V3 write rpc not being committed to stable
708 	     * storage, the block is still dirty and requires either a commit
709 	     * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
710 	     * before the block is reused. This is indicated by setting the
711 	     * B_DELWRI and B_NEEDCOMMIT flags.
712 	     */
713 	    if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
714 		    s = splbio();
715 		    buf_dirty(bp);
716 		    splx(s);
717 
718 		    if (!(bp->b_flags & B_ASYNC) && error)
719 			    bp->b_flags |= B_EINTR;
720 	    } else {
721 		if (error) {
722 		    bp->b_flags |= B_ERROR;
723 		    bp->b_error = np->n_error = error;
724 		    np->n_flag |= NWRITEERR;
725 		}
726 		bp->b_dirtyoff = bp->b_dirtyend = 0;
727 	    }
728 	}
729 	bp->b_resid = uiop->uio_resid;
730 	if (must_commit)
731 		nfs_clearcommit(vp->v_mount);
732 	biodone(bp);
733 	return (error);
734 }
735