xref: /csrg-svn/sys/nfs/nfs_bio.c (revision 57783)
138882Smacklem /*
238882Smacklem  * Copyright (c) 1989 The Regents of the University of California.
338882Smacklem  * All rights reserved.
438882Smacklem  *
538882Smacklem  * This code is derived from software contributed to Berkeley by
638882Smacklem  * Rick Macklem at The University of Guelph.
738882Smacklem  *
844509Sbostic  * %sccs.include.redist.c%
938882Smacklem  *
10*57783Smckusick  *	@(#)nfs_bio.c	7.36 (Berkeley) 02/02/93
1138882Smacklem  */
1238882Smacklem 
1353322Smckusick #include <sys/param.h>
1455063Spendry #include <sys/systm.h>
1553322Smckusick #include <sys/resourcevar.h>
1653322Smckusick #include <sys/proc.h>
1753322Smckusick #include <sys/buf.h>
1853322Smckusick #include <sys/vnode.h>
1953322Smckusick #include <sys/trace.h>
2053322Smckusick #include <sys/mount.h>
2153322Smckusick #include <sys/kernel.h>
2256535Sbostic 
2353322Smckusick #include <vm/vm.h>
2456535Sbostic 
2553322Smckusick #include <nfs/nfsnode.h>
2653322Smckusick #include <nfs/rpcv2.h>
2753322Smckusick #include <nfs/nfsv2.h>
2853322Smckusick #include <nfs/nfs.h>
2953322Smckusick #include <nfs/nfsmount.h>
3053322Smckusick #include <nfs/nqnfs.h>
3138882Smacklem 
32*57783Smckusick struct buf *nfsincore(), *nfs_getcacheblk(), *nfsgetblk();
33*57783Smckusick extern struct queue_entry nfs_bufq;
34*57783Smckusick extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
35*57783Smckusick extern int nfs_numasync;
3638882Smacklem 
3738882Smacklem /*
3838882Smacklem  * Vnode op for read using bio
3938882Smacklem  * Any similarity to readip() is purely coincidental
4038882Smacklem  */
4141897Smckusick nfs_bioread(vp, uio, ioflag, cred)
4238882Smacklem 	register struct vnode *vp;
4343348Smckusick 	register struct uio *uio;
4438882Smacklem 	int ioflag;
4538882Smacklem 	struct ucred *cred;
4638882Smacklem {
4738882Smacklem 	register struct nfsnode *np = VTONFS(vp);
48*57783Smckusick 	register int biosize, diff;
49*57783Smckusick 	struct buf *bp, *rabp;
5038882Smacklem 	struct vattr vattr;
51*57783Smckusick 	struct proc *p;
5252196Smckusick 	struct nfsmount *nmp;
53*57783Smckusick 	daddr_t lbn, bn, rabn;
54*57783Smckusick 	caddr_t baddr;
55*57783Smckusick 	int got_buf, len, nra, error = 0, n, on, not_readin;
5638882Smacklem 
5742241Smckusick #ifdef lint
5842241Smckusick 	ioflag = ioflag;
5942241Smckusick #endif /* lint */
6048047Smckusick #ifdef DIAGNOSTIC
6138882Smacklem 	if (uio->uio_rw != UIO_READ)
6238882Smacklem 		panic("nfs_read mode");
6348047Smckusick #endif
6438882Smacklem 	if (uio->uio_resid == 0)
6539584Smckusick 		return (0);
6641897Smckusick 	if (uio->uio_offset < 0 && vp->v_type != VDIR)
6739584Smckusick 		return (EINVAL);
6852196Smckusick 	nmp = VFSTONFS(vp->v_mount);
6952196Smckusick 	biosize = nmp->nm_rsize;
70*57783Smckusick 	p = uio->uio_procp;
7138882Smacklem 	/*
7252196Smckusick 	 * For nfs, cache consistency can only be maintained approximately.
7352196Smckusick 	 * Although RFC1094 does not specify the criteria, the following is
7452196Smckusick 	 * believed to be compatible with the reference port.
7552196Smckusick 	 * For nqnfs, full cache consistency is maintained within the loop.
7652196Smckusick 	 * For nfs:
7738882Smacklem 	 * If the file's modify time on the server has changed since the
7838882Smacklem 	 * last read rpc or you have written to the file,
7938882Smacklem 	 * you may have lost data cache consistency with the
8038882Smacklem 	 * server, so flush all of the file's data out of the cache.
8141897Smckusick 	 * Then force a getattr rpc to ensure that you have up to date
8241897Smckusick 	 * attributes.
8352196Smckusick 	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
8452196Smckusick 	 * the ones changing the modify time.
8538882Smacklem 	 * NB: This implies that cache data can be read when up to
8638882Smacklem 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
8738882Smacklem 	 * attributes this could be forced by setting n_attrstamp to 0 before
8853550Sheideman 	 * the VOP_GETATTR() call.
8938882Smacklem 	 */
9052196Smckusick 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
9141897Smckusick 		if (np->n_flag & NMODIFIED) {
9252196Smckusick 			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
93*57783Smckusick 			     vp->v_type != VREG) {
94*57783Smckusick 				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
95*57783Smckusick 					return (error);
96*57783Smckusick 			}
9741897Smckusick 			np->n_attrstamp = 0;
9841897Smckusick 			np->n_direofoffset = 0;
99*57783Smckusick 			if (error = VOP_GETATTR(vp, &vattr, cred, p))
10039750Smckusick 				return (error);
10154106Smckusick 			np->n_mtime = vattr.va_mtime.ts_sec;
10241897Smckusick 		} else {
103*57783Smckusick 			if (error = VOP_GETATTR(vp, &vattr, cred, p))
10441897Smckusick 				return (error);
10554106Smckusick 			if (np->n_mtime != vattr.va_mtime.ts_sec) {
10641897Smckusick 				np->n_direofoffset = 0;
107*57783Smckusick 				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
108*57783Smckusick 					return (error);
10954106Smckusick 				np->n_mtime = vattr.va_mtime.ts_sec;
11041897Smckusick 			}
11139750Smckusick 		}
11238882Smacklem 	}
11338882Smacklem 	do {
11452196Smckusick 
11552196Smckusick 	    /*
11652196Smckusick 	     * Get a valid lease. If cached data is stale, flush it.
11752196Smckusick 	     */
118*57783Smckusick 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
119*57783Smckusick 		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
120*57783Smckusick 		    do {
121*57783Smckusick 			error = nqnfs_getlease(vp, NQL_READ, cred, p);
122*57783Smckusick 		    } while (error == NQNFS_EXPIRED);
123*57783Smckusick 		    if (error)
12452196Smckusick 			return (error);
125*57783Smckusick 		    if (np->n_lrev != np->n_brev ||
126*57783Smckusick 			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
12752196Smckusick 			if (vp->v_type == VDIR) {
128*57783Smckusick 			    np->n_direofoffset = 0;
129*57783Smckusick 			    cache_purge(vp);
13052196Smckusick 			}
131*57783Smckusick 			if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
132*57783Smckusick 			    return (error);
13352196Smckusick 			np->n_brev = np->n_lrev;
134*57783Smckusick 		    }
135*57783Smckusick 		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
136*57783Smckusick 		    np->n_direofoffset = 0;
137*57783Smckusick 		    cache_purge(vp);
138*57783Smckusick 		    if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
139*57783Smckusick 			return (error);
14052196Smckusick 		}
14152196Smckusick 	    }
14252196Smckusick 	    if (np->n_flag & NQNFSNONCACHE) {
14352196Smckusick 		switch (vp->v_type) {
14452196Smckusick 		case VREG:
14552196Smckusick 			error = nfs_readrpc(vp, uio, cred);
14652196Smckusick 			break;
14752196Smckusick 		case VLNK:
14852196Smckusick 			error = nfs_readlinkrpc(vp, uio, cred);
14952196Smckusick 			break;
15052196Smckusick 		case VDIR:
15152196Smckusick 			error = nfs_readdirrpc(vp, uio, cred);
15252196Smckusick 			break;
15352196Smckusick 		};
15452196Smckusick 		return (error);
15552196Smckusick 	    }
156*57783Smckusick 	    baddr = (caddr_t)0;
15741897Smckusick 	    switch (vp->v_type) {
15841897Smckusick 	    case VREG:
15939750Smckusick 		nfsstats.biocache_reads++;
16043348Smckusick 		lbn = uio->uio_offset / biosize;
16143348Smckusick 		on = uio->uio_offset & (biosize-1);
162*57783Smckusick 		bn = lbn * (biosize / DEV_BSIZE);
163*57783Smckusick 		not_readin = 1;
164*57783Smckusick 
165*57783Smckusick 		/*
166*57783Smckusick 		 * Start the read ahead(s), as required.
167*57783Smckusick 		 */
168*57783Smckusick 		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
169*57783Smckusick 		    lbn == vp->v_lastr + 1) {
170*57783Smckusick 		    for (nra = 0; nra < nmp->nm_readahead &&
171*57783Smckusick 			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
172*57783Smckusick 			rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
173*57783Smckusick 			if (!nfsincore(vp, rabn)) {
174*57783Smckusick 			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
175*57783Smckusick 			    if (!rabp)
176*57783Smckusick 				return (EINTR);
177*57783Smckusick 			    if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
178*57783Smckusick 				rabp->b_flags |= (B_READ | B_ASYNC);
179*57783Smckusick 				if (nfs_asyncio(rabp, cred)) {
180*57783Smckusick 				    rabp->b_flags |= B_INVAL;
181*57783Smckusick 				    brelse(rabp);
182*57783Smckusick 				}
183*57783Smckusick 			    }
184*57783Smckusick 			}
185*57783Smckusick 		    }
186*57783Smckusick 		}
187*57783Smckusick 
188*57783Smckusick 		/*
189*57783Smckusick 		 * If the block is in the cache and has the required data
190*57783Smckusick 		 * in a valid region, just copy it out.
191*57783Smckusick 		 * Otherwise, get the block and write back/read in,
192*57783Smckusick 		 * as required.
193*57783Smckusick 		 */
194*57783Smckusick 		if ((bp = nfsincore(vp, bn)) &&
195*57783Smckusick 		    (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
196*57783Smckusick 		    (B_BUSY | B_WRITEINPROG))
197*57783Smckusick 			got_buf = 0;
198*57783Smckusick 		else {
199*57783Smckusick again:
200*57783Smckusick 			bp = nfs_getcacheblk(vp, bn, biosize, p);
201*57783Smckusick 			if (!bp)
202*57783Smckusick 				return (EINTR);
203*57783Smckusick 			got_buf = 1;
204*57783Smckusick 			if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
205*57783Smckusick 				bp->b_flags |= B_READ;
206*57783Smckusick 				not_readin = 0;
207*57783Smckusick 				if (error = nfs_doio(bp, cred, p)) {
208*57783Smckusick 				    brelse(bp);
209*57783Smckusick 				    return (error);
210*57783Smckusick 				}
211*57783Smckusick 			}
212*57783Smckusick 		}
21355057Spendry 		n = min((unsigned)(biosize - on), uio->uio_resid);
21438882Smacklem 		diff = np->n_size - uio->uio_offset;
21538882Smacklem 		if (diff < n)
21638882Smacklem 			n = diff;
217*57783Smckusick 		if (not_readin && n > 0) {
218*57783Smckusick 			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
219*57783Smckusick 				if (!got_buf) {
220*57783Smckusick 				    bp = nfs_getcacheblk(vp, bn, biosize, p);
221*57783Smckusick 				    if (!bp)
222*57783Smckusick 					return (EINTR);
223*57783Smckusick 				    got_buf = 1;
224*57783Smckusick 				}
22552196Smckusick 				bp->b_flags |= B_INVAL;
22652196Smckusick 				if (bp->b_dirtyend > 0) {
227*57783Smckusick 				    if ((bp->b_flags & B_DELWRI) == 0)
228*57783Smckusick 					panic("nfsbioread");
229*57783Smckusick 				    if (VOP_BWRITE(bp) == EINTR)
230*57783Smckusick 					return (EINTR);
23152196Smckusick 				} else
232*57783Smckusick 				    brelse(bp);
23352196Smckusick 				goto again;
23452196Smckusick 			}
23552196Smckusick 		}
23639901Smckusick 		vp->v_lastr = lbn;
237*57783Smckusick 		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
238*57783Smckusick 		if (diff < n)
239*57783Smckusick 			n = diff;
24041897Smckusick 		break;
24141897Smckusick 	    case VLNK:
24241897Smckusick 		nfsstats.biocache_readlinks++;
243*57783Smckusick 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
244*57783Smckusick 		if (!bp)
245*57783Smckusick 			return (EINTR);
246*57783Smckusick 		if ((bp->b_flags & B_DONE) == 0) {
247*57783Smckusick 			bp->b_flags |= B_READ;
248*57783Smckusick 			if (error = nfs_doio(bp, cred, p)) {
249*57783Smckusick 				brelse(bp);
250*57783Smckusick 				return (error);
251*57783Smckusick 			}
252*57783Smckusick 		}
253*57783Smckusick 		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
254*57783Smckusick 		got_buf = 1;
25541897Smckusick 		on = 0;
25641897Smckusick 		break;
25741897Smckusick 	    case VDIR:
25841897Smckusick 		nfsstats.biocache_readdirs++;
259*57783Smckusick 		bn = (daddr_t)uio->uio_offset;
260*57783Smckusick 		bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
261*57783Smckusick 		if (!bp)
262*57783Smckusick 			return (EINTR);
263*57783Smckusick 		if ((bp->b_flags & B_DONE) == 0) {
264*57783Smckusick 			bp->b_flags |= B_READ;
265*57783Smckusick 			if (error = nfs_doio(bp, cred, p)) {
26652196Smckusick 				brelse(bp);
26752196Smckusick 				return (error);
26852196Smckusick 			}
269*57783Smckusick 		}
270*57783Smckusick 
271*57783Smckusick 		/*
272*57783Smckusick 		 * If not eof and read aheads are enabled, start one.
273*57783Smckusick 		 * (You need the current block first, so that you have the
274*57783Smckusick 		 *  directory offset cookie of the next block.
275*57783Smckusick 		 */
276*57783Smckusick 		rabn = bp->b_blkno;
277*57783Smckusick 		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
278*57783Smckusick 		    rabn != 0 && rabn != np->n_direofoffset &&
279*57783Smckusick 		    !nfsincore(vp, rabn)) {
280*57783Smckusick 			rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
281*57783Smckusick 			if (rabp) {
282*57783Smckusick 			    if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
283*57783Smckusick 				rabp->b_flags |= (B_READ | B_ASYNC);
284*57783Smckusick 				if (nfs_asyncio(rabp, cred)) {
285*57783Smckusick 				    rabp->b_flags |= B_INVAL;
286*57783Smckusick 				    brelse(rabp);
28752196Smckusick 				}
288*57783Smckusick 			    }
28952196Smckusick 			}
29052196Smckusick 		}
291*57783Smckusick 		on = 0;
292*57783Smckusick 		n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
293*57783Smckusick 		got_buf = 1;
294*57783Smckusick 		break;
295*57783Smckusick 	    };
296*57783Smckusick 
297*57783Smckusick 	    if (n > 0) {
298*57783Smckusick 		if (!baddr)
299*57783Smckusick 			baddr = bp->b_un.b_addr;
300*57783Smckusick 		error = uiomove(baddr + on, (int)n, uio);
30152196Smckusick 	    }
30241897Smckusick 	    switch (vp->v_type) {
30341897Smckusick 	    case VREG:
304*57783Smckusick 		if (n + on == biosize || uio->uio_offset == np->n_size)
30538882Smacklem 			bp->b_flags |= B_AGE;
30641897Smckusick 		break;
30741897Smckusick 	    case VLNK:
30841897Smckusick 		n = 0;
30941897Smckusick 		break;
31041897Smckusick 	    case VDIR:
31141897Smckusick 		uio->uio_offset = bp->b_blkno;
31241897Smckusick 		break;
31341897Smckusick 	    };
314*57783Smckusick 	    if (got_buf)
315*57783Smckusick 		brelse(bp);
316*57783Smckusick 	} while (error == 0 && uio->uio_resid > 0 && n > 0);
31738882Smacklem 	return (error);
31838882Smacklem }
31938882Smacklem 
32038882Smacklem /*
32138882Smacklem  * Vnode op for write using bio
32238882Smacklem  */
32354669Smckusick nfs_write(ap)
32454448Smckusick 	struct vop_write_args /* {
32554448Smckusick 		struct vnode *a_vp;
32654448Smckusick 		struct uio *a_uio;
32754448Smckusick 		int  a_ioflag;
32854448Smckusick 		struct ucred *a_cred;
32954448Smckusick 	} */ *ap;
33038882Smacklem {
33152196Smckusick 	register int biosize;
33254448Smckusick 	register struct uio *uio = ap->a_uio;
33354448Smckusick 	struct proc *p = uio->uio_procp;
33454448Smckusick 	register struct vnode *vp = ap->a_vp;
33554448Smckusick 	struct nfsnode *np = VTONFS(vp);
33654448Smckusick 	register struct ucred *cred = ap->a_cred;
33754448Smckusick 	int ioflag = ap->a_ioflag;
33838882Smacklem 	struct buf *bp;
33941897Smckusick 	struct vattr vattr;
34052196Smckusick 	struct nfsmount *nmp;
34138882Smacklem 	daddr_t lbn, bn;
34240220Smckusick 	int n, on, error = 0;
34338882Smacklem 
34448047Smckusick #ifdef DIAGNOSTIC
34554448Smckusick 	if (uio->uio_rw != UIO_WRITE)
34641897Smckusick 		panic("nfs_write mode");
34754448Smckusick 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
34848047Smckusick 		panic("nfs_write proc");
34948047Smckusick #endif
35054448Smckusick 	if (vp->v_type != VREG)
35141897Smckusick 		return (EIO);
35253627Smckusick 	if (np->n_flag & NWRITEERR) {
35353627Smckusick 		np->n_flag &= ~NWRITEERR;
35453627Smckusick 		return (np->n_error);
35553627Smckusick 	}
35654448Smckusick 	if (ioflag & (IO_APPEND | IO_SYNC)) {
35752986Smckusick 		if (np->n_flag & NMODIFIED) {
35856282Smckusick 			np->n_attrstamp = 0;
359*57783Smckusick 			if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
360*57783Smckusick 				return (error);
36152986Smckusick 		}
36254448Smckusick 		if (ioflag & IO_APPEND) {
36352986Smckusick 			np->n_attrstamp = 0;
36454448Smckusick 			if (error = VOP_GETATTR(vp, &vattr, cred, p))
36552986Smckusick 				return (error);
36654448Smckusick 			uio->uio_offset = np->n_size;
36752986Smckusick 		}
36852986Smckusick 	}
36954448Smckusick 	nmp = VFSTONFS(vp->v_mount);
37054448Smckusick 	if (uio->uio_offset < 0)
37139584Smckusick 		return (EINVAL);
37254448Smckusick 	if (uio->uio_resid == 0)
37339584Smckusick 		return (0);
37438882Smacklem 	/*
37538882Smacklem 	 * Maybe this should be above the vnode op call, but so long as
37638882Smacklem 	 * file servers have no limits, i don't think it matters
37738882Smacklem 	 */
37854448Smckusick 	if (p && uio->uio_offset + uio->uio_resid >
37947572Skarels 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
38047572Skarels 		psignal(p, SIGXFSZ);
38139584Smckusick 		return (EFBIG);
38238882Smacklem 	}
38343348Smckusick 	/*
38443348Smckusick 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
38543348Smckusick 	 * will be the same size within a filesystem. nfs_writerpc will
38643348Smckusick 	 * still use nm_wsize when sizing the rpc's.
38743348Smckusick 	 */
38852196Smckusick 	biosize = nmp->nm_rsize;
38938882Smacklem 	do {
39052196Smckusick 
39152196Smckusick 		/*
39252196Smckusick 		 * Check for a valid write lease.
39352196Smckusick 		 * If non-cachable, just do the rpc
39452196Smckusick 		 */
39552196Smckusick 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
39654448Smckusick 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
39752196Smckusick 			do {
39854448Smckusick 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
39952196Smckusick 			} while (error == NQNFS_EXPIRED);
40052196Smckusick 			if (error)
40152196Smckusick 				return (error);
40254448Smckusick 			if (np->n_lrev != np->n_brev ||
40352196Smckusick 			    (np->n_flag & NQNFSNONCACHE)) {
404*57783Smckusick 				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
405*57783Smckusick 					return (error);
40652196Smckusick 				np->n_brev = np->n_lrev;
40752196Smckusick 			}
40852196Smckusick 		}
40952196Smckusick 		if (np->n_flag & NQNFSNONCACHE)
410*57783Smckusick 			return (nfs_writerpc(vp, uio, cred, ioflag));
41139750Smckusick 		nfsstats.biocache_writes++;
41254448Smckusick 		lbn = uio->uio_offset / biosize;
41354448Smckusick 		on = uio->uio_offset & (biosize-1);
41455057Spendry 		n = min((unsigned)(biosize - on), uio->uio_resid);
41552196Smckusick 		bn = lbn * (biosize / DEV_BSIZE);
41640037Smckusick again:
417*57783Smckusick 		bp = nfs_getcacheblk(vp, bn, biosize, p);
418*57783Smckusick 		if (!bp)
419*57783Smckusick 			return (EINTR);
42038882Smacklem 		if (bp->b_wcred == NOCRED) {
42154448Smckusick 			crhold(cred);
42254448Smckusick 			bp->b_wcred = cred;
42338882Smacklem 		}
424*57783Smckusick 		np->n_flag |= NMODIFIED;
425*57783Smckusick 		if (uio->uio_offset + n > np->n_size) {
426*57783Smckusick 			np->n_size = uio->uio_offset + n;
427*57783Smckusick 			vnode_pager_setsize(vp, (u_long)np->n_size);
428*57783Smckusick 		}
42952196Smckusick 
43052196Smckusick 		/*
43152196Smckusick 		 * If the new write will leave a contiguous dirty
43252196Smckusick 		 * area, just update the b_dirtyoff and b_dirtyend,
43352196Smckusick 		 * otherwise force a write rpc of the old dirty area.
43452196Smckusick 		 */
43552196Smckusick 		if (bp->b_dirtyend > 0 &&
43652196Smckusick 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
43752196Smckusick 			bp->b_proc = p;
438*57783Smckusick 			if (VOP_BWRITE(bp) == EINTR)
439*57783Smckusick 				return (EINTR);
44052196Smckusick 			goto again;
44152196Smckusick 		}
44252196Smckusick 
44352196Smckusick 		/*
44452196Smckusick 		 * Check for valid write lease and get one as required.
44552196Smckusick 		 * In case getblk() and/or bwrite() delayed us.
44652196Smckusick 		 */
44752196Smckusick 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
44854448Smckusick 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
44952196Smckusick 			do {
45054448Smckusick 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
45152196Smckusick 			} while (error == NQNFS_EXPIRED);
45252196Smckusick 			if (error) {
45352196Smckusick 				brelse(bp);
45452196Smckusick 				return (error);
45538882Smacklem 			}
45654448Smckusick 			if (np->n_lrev != np->n_brev ||
45752196Smckusick 			    (np->n_flag & NQNFSNONCACHE)) {
45856282Smckusick 				brelse(bp);
459*57783Smckusick 				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
460*57783Smckusick 					return (error);
46152196Smckusick 				np->n_brev = np->n_lrev;
46256282Smckusick 				goto again;
46352196Smckusick 			}
46438882Smacklem 		}
46554448Smckusick 		if (error = uiomove(bp->b_un.b_addr + on, n, uio)) {
466*57783Smckusick 			bp->b_flags |= B_ERROR;
46740037Smckusick 			brelse(bp);
46839584Smckusick 			return (error);
46940037Smckusick 		}
47052196Smckusick 		if (bp->b_dirtyend > 0) {
47155057Spendry 			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
472*57783Smckusick 			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
47352196Smckusick 		} else {
47452196Smckusick 			bp->b_dirtyoff = on;
475*57783Smckusick 			bp->b_dirtyend = on + n;
47652196Smckusick 		}
477*57783Smckusick #ifndef notdef
47852196Smckusick 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
47952196Smckusick 		    bp->b_validoff > bp->b_dirtyend) {
48052196Smckusick 			bp->b_validoff = bp->b_dirtyoff;
48152196Smckusick 			bp->b_validend = bp->b_dirtyend;
48252196Smckusick 		} else {
48355057Spendry 			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
48455057Spendry 			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
48552196Smckusick 		}
486*57783Smckusick #else
487*57783Smckusick 		bp->b_validoff = bp->b_dirtyoff;
488*57783Smckusick 		bp->b_validend = bp->b_dirtyend;
489*57783Smckusick #endif
490*57783Smckusick 		if (ioflag & IO_APPEND)
491*57783Smckusick 			bp->b_flags |= B_APPENDWRITE;
49252196Smckusick 
49352196Smckusick 		/*
49452196Smckusick 		 * If the lease is non-cachable or IO_SYNC do bwrite().
49552196Smckusick 		 */
49654448Smckusick 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
49752196Smckusick 			bp->b_proc = p;
498*57783Smckusick 			if (error = VOP_BWRITE(bp))
499*57783Smckusick 				return (error);
500*57783Smckusick 		} else if ((n + on) == biosize &&
501*57783Smckusick 			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
50241897Smckusick 			bp->b_proc = (struct proc *)0;
50338882Smacklem 			bawrite(bp);
504*57783Smckusick 		} else
50538882Smacklem 			bdwrite(bp);
506*57783Smckusick 	} while (uio->uio_resid > 0 && n > 0);
507*57783Smckusick 	return (0);
508*57783Smckusick }
509*57783Smckusick 
510*57783Smckusick /*
511*57783Smckusick  * Get an nfs cache block.
512*57783Smckusick  * Allocate a new one if the block isn't currently in the cache
513*57783Smckusick  * and return the block marked busy. If the calling process is
514*57783Smckusick  * interrupted by a signal for an interruptible mount point, return
515*57783Smckusick  * NULL.
516*57783Smckusick  */
517*57783Smckusick struct buf *
518*57783Smckusick nfs_getcacheblk(vp, bn, size, p)
519*57783Smckusick 	struct vnode *vp;
520*57783Smckusick 	daddr_t bn;
521*57783Smckusick 	int size;
522*57783Smckusick 	struct proc *p;
523*57783Smckusick {
524*57783Smckusick 	register struct buf *bp;
525*57783Smckusick 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
526*57783Smckusick 
527*57783Smckusick 	if (nmp->nm_flag & NFSMNT_INT) {
528*57783Smckusick 		bp = nfsgetblk(vp, bn, size, PCATCH, 0);
529*57783Smckusick 		while (bp == (struct buf *)0) {
530*57783Smckusick 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
531*57783Smckusick 				return ((struct buf *)0);
532*57783Smckusick 			bp = nfsgetblk(vp, bn, size, 0, 2 * hz);
53338882Smacklem 		}
534*57783Smckusick 	} else
535*57783Smckusick 		bp = nfsgetblk(vp, bn, size, 0, 0);
536*57783Smckusick 	return (bp);
537*57783Smckusick }
538*57783Smckusick 
539*57783Smckusick /*
540*57783Smckusick  * Flush and invalidate all dirty buffers. If another process is already
541*57783Smckusick  * doing the flush, just wait for completion.
542*57783Smckusick  */
543*57783Smckusick nfs_vinvalbuf(vp, flags, cred, p, intrflg)
544*57783Smckusick 	struct vnode *vp;
545*57783Smckusick 	int flags;
546*57783Smckusick 	struct ucred *cred;
547*57783Smckusick 	struct proc *p;
548*57783Smckusick 	int intrflg;
549*57783Smckusick {
550*57783Smckusick 	register struct nfsnode *np = VTONFS(vp);
551*57783Smckusick 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
552*57783Smckusick 	int error = 0, slpflag, slptimeo;
553*57783Smckusick 
554*57783Smckusick 	if ((nmp->nm_flag & NFSMNT_INT) == 0)
555*57783Smckusick 		intrflg = 0;
556*57783Smckusick 	if (intrflg) {
557*57783Smckusick 		slpflag = PCATCH;
558*57783Smckusick 		slptimeo = 2 * hz;
559*57783Smckusick 	} else {
560*57783Smckusick 		slpflag = 0;
561*57783Smckusick 		slptimeo = 0;
562*57783Smckusick 	}
563*57783Smckusick 	/*
564*57783Smckusick 	 * First wait for any other process doing a flush to complete.
565*57783Smckusick 	 */
566*57783Smckusick 	while (np->n_flag & NFLUSHINPROG) {
567*57783Smckusick 		np->n_flag |= NFLUSHWANT;
568*57783Smckusick 		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
569*57783Smckusick 			slptimeo);
570*57783Smckusick 		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
571*57783Smckusick 			return (EINTR);
572*57783Smckusick 	}
573*57783Smckusick 
574*57783Smckusick 	/*
575*57783Smckusick 	 * Now, flush as required.
576*57783Smckusick 	 */
577*57783Smckusick 	np->n_flag |= NFLUSHINPROG;
578*57783Smckusick 	error = nfsvinvalbuf(vp, flags, cred, p, slpflag, 0);
579*57783Smckusick 	while (error) {
580*57783Smckusick 		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
581*57783Smckusick 			np->n_flag &= ~NFLUSHINPROG;
582*57783Smckusick 			if (np->n_flag & NFLUSHWANT) {
583*57783Smckusick 				np->n_flag &= ~NFLUSHWANT;
584*57783Smckusick 				wakeup((caddr_t)&np->n_flag);
585*57783Smckusick 			}
586*57783Smckusick 			return (EINTR);
587*57783Smckusick 		}
588*57783Smckusick 		error = nfsvinvalbuf(vp, flags, cred, p, 0, slptimeo);
589*57783Smckusick 	}
590*57783Smckusick 	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
591*57783Smckusick 	if (np->n_flag & NFLUSHWANT) {
592*57783Smckusick 		np->n_flag &= ~NFLUSHWANT;
593*57783Smckusick 		wakeup((caddr_t)&np->n_flag);
594*57783Smckusick 	}
595*57783Smckusick 	return (0);
596*57783Smckusick }
597*57783Smckusick 
598*57783Smckusick /*
599*57783Smckusick  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
600*57783Smckusick  * This is mainly to avoid queueing async I/O requests when the nfsiods
601*57783Smckusick  * are all hung on a dead server.
602*57783Smckusick  */
603*57783Smckusick nfs_asyncio(bp, cred)
604*57783Smckusick 	register struct buf *bp;
605*57783Smckusick 	struct ucred *cred;
606*57783Smckusick {
607*57783Smckusick 	register int i;
608*57783Smckusick 
609*57783Smckusick 	if (nfs_numasync == 0)
610*57783Smckusick 		return (EIO);
611*57783Smckusick 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
612*57783Smckusick 	    if (nfs_iodwant[i]) {
613*57783Smckusick 		if (bp->b_flags & B_READ) {
614*57783Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
615*57783Smckusick 				crhold(cred);
616*57783Smckusick 				bp->b_rcred = cred;
617*57783Smckusick 			}
618*57783Smckusick 		} else {
619*57783Smckusick 			if (bp->b_wcred == NOCRED && cred != NOCRED) {
620*57783Smckusick 				crhold(cred);
621*57783Smckusick 				bp->b_wcred = cred;
622*57783Smckusick 			}
623*57783Smckusick 		}
624*57783Smckusick 
625*57783Smckusick 		queue_enter_tail(&nfs_bufq, bp, struct buf *, b_freelist);
626*57783Smckusick 		nfs_iodwant[i] = (struct proc *)0;
627*57783Smckusick 		wakeup((caddr_t)&nfs_iodwant[i]);
628*57783Smckusick 		return (0);
629*57783Smckusick 	    }
630*57783Smckusick 	return (EIO);
631*57783Smckusick }
632*57783Smckusick 
633*57783Smckusick /*
634*57783Smckusick  * Do an I/O operation to/from a cache block. This may be called
635*57783Smckusick  * synchronously or from an nfsiod.
636*57783Smckusick  */
637*57783Smckusick int
638*57783Smckusick nfs_doio(bp, cr, p)
639*57783Smckusick 	register struct buf *bp;
640*57783Smckusick 	struct cred *cr;
641*57783Smckusick 	struct proc *p;
642*57783Smckusick {
643*57783Smckusick 	register struct uio *uiop;
644*57783Smckusick 	register struct vnode *vp;
645*57783Smckusick 	struct nfsnode *np;
646*57783Smckusick 	struct nfsmount *nmp;
647*57783Smckusick 	int error, diff, len;
648*57783Smckusick 	struct uio uio;
649*57783Smckusick 	struct iovec io;
650*57783Smckusick 
651*57783Smckusick 	vp = bp->b_vp;
652*57783Smckusick 	np = VTONFS(vp);
653*57783Smckusick 	nmp = VFSTONFS(vp->v_mount);
654*57783Smckusick 	uiop = &uio;
655*57783Smckusick 	uiop->uio_iov = &io;
656*57783Smckusick 	uiop->uio_iovcnt = 1;
657*57783Smckusick 	uiop->uio_segflg = UIO_SYSSPACE;
658*57783Smckusick 	uiop->uio_procp = p;
659*57783Smckusick 
660*57783Smckusick 	/*
661*57783Smckusick 	 * Historically, paging was done with physio, but no more.
662*57783Smckusick 	 */
663*57783Smckusick 	if (bp->b_flags & B_PHYS)
664*57783Smckusick 	    panic("doio phys");
665*57783Smckusick 	if (bp->b_flags & B_READ) {
666*57783Smckusick 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
667*57783Smckusick 	    io.iov_base = bp->b_un.b_addr;
668*57783Smckusick 	    uiop->uio_rw = UIO_READ;
669*57783Smckusick 	    switch (vp->v_type) {
670*57783Smckusick 	    case VREG:
671*57783Smckusick 		uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
672*57783Smckusick 		nfsstats.read_bios++;
673*57783Smckusick 		error = nfs_readrpc(vp, uiop, cr);
674*57783Smckusick 		if (!error) {
675*57783Smckusick 		    bp->b_validoff = 0;
676*57783Smckusick 		    if (uiop->uio_resid) {
677*57783Smckusick 			/*
678*57783Smckusick 			 * If len > 0, there is a hole in the file and
679*57783Smckusick 			 * no writes after the hole have been pushed to
680*57783Smckusick 			 * the server yet.
681*57783Smckusick 			 * Just zero fill the rest of the valid area.
682*57783Smckusick 			 */
683*57783Smckusick 			diff = bp->b_bcount - uiop->uio_resid;
684*57783Smckusick 			len = np->n_size - (bp->b_blkno * DEV_BSIZE
685*57783Smckusick 				+ diff);
686*57783Smckusick 			if (len > 0) {
687*57783Smckusick 			    len = min(len, uiop->uio_resid);
688*57783Smckusick 			    bzero(bp->b_un.b_addr + diff, len);
689*57783Smckusick 			    bp->b_validend = diff + len;
690*57783Smckusick 			} else
691*57783Smckusick 			    bp->b_validend = diff;
692*57783Smckusick 		    } else
693*57783Smckusick 			bp->b_validend = bp->b_bcount;
694*57783Smckusick 		}
695*57783Smckusick 		if (p && (vp->v_flag & VTEXT) &&
696*57783Smckusick 			(((nmp->nm_flag & NFSMNT_NQNFS) &&
697*57783Smckusick 			  np->n_lrev != np->n_brev) ||
698*57783Smckusick 			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
699*57783Smckusick 			  np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
700*57783Smckusick 			uprintf("Process killed due to text file modification\n");
701*57783Smckusick 			psignal(p, SIGKILL);
702*57783Smckusick 			p->p_flag |= SKEEP;
703*57783Smckusick 		}
704*57783Smckusick 		break;
705*57783Smckusick 	    case VLNK:
706*57783Smckusick 		uiop->uio_offset = 0;
707*57783Smckusick 		nfsstats.readlink_bios++;
708*57783Smckusick 		error = nfs_readlinkrpc(vp, uiop, cr);
709*57783Smckusick 		break;
710*57783Smckusick 	    case VDIR:
711*57783Smckusick 		uiop->uio_offset = bp->b_lblkno;
712*57783Smckusick 		nfsstats.readdir_bios++;
713*57783Smckusick 		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
714*57783Smckusick 		    error = nfs_readdirlookrpc(vp, uiop, cr);
715*57783Smckusick 		else
716*57783Smckusick 		    error = nfs_readdirrpc(vp, uiop, cr);
717*57783Smckusick 		/*
718*57783Smckusick 		 * Save offset cookie in b_blkno.
719*57783Smckusick 		 */
720*57783Smckusick 		bp->b_blkno = uiop->uio_offset;
721*57783Smckusick 		break;
722*57783Smckusick 	    };
723*57783Smckusick 	    if (error) {
724*57783Smckusick 		bp->b_flags |= B_ERROR;
725*57783Smckusick 		bp->b_error = error;
726*57783Smckusick 	    }
727*57783Smckusick 	} else {
728*57783Smckusick 	    io.iov_len = uiop->uio_resid = bp->b_dirtyend
729*57783Smckusick 		- bp->b_dirtyoff;
730*57783Smckusick 	    uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
731*57783Smckusick 		+ bp->b_dirtyoff;
732*57783Smckusick 	    io.iov_base = bp->b_un.b_addr + bp->b_dirtyoff;
733*57783Smckusick 	    uiop->uio_rw = UIO_WRITE;
734*57783Smckusick 	    nfsstats.write_bios++;
735*57783Smckusick 	    if (bp->b_flags & B_APPENDWRITE)
736*57783Smckusick 		error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
737*57783Smckusick 	    else
738*57783Smckusick 		error = nfs_writerpc(vp, uiop, cr, 0);
739*57783Smckusick 	    bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
740*57783Smckusick 
741*57783Smckusick 	    /*
742*57783Smckusick 	     * For an interrupted write, the buffer is still valid and the
743*57783Smckusick 	     * write hasn't been pushed to the server yet, so we can't set
744*57783Smckusick 	     * B_ERROR and report the interruption by setting B_EINTR. For
745*57783Smckusick 	     * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
746*57783Smckusick 	     * is essentially a noop.
747*57783Smckusick 	     */
748*57783Smckusick 	    if (error == EINTR) {
749*57783Smckusick 		bp->b_flags &= ~B_INVAL;
750*57783Smckusick 		bp->b_flags |= B_DELWRI;
751*57783Smckusick 
752*57783Smckusick 		/*
753*57783Smckusick 		 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
754*57783Smckusick 		 * buffer to the clean list, we have to reassign it back to the
755*57783Smckusick 		 * dirty one. Ugh.
756*57783Smckusick 		 */
757*57783Smckusick 		if (bp->b_flags & B_ASYNC)
758*57783Smckusick 		    reassignbuf(bp, vp);
759*57783Smckusick 		else
760*57783Smckusick 		    bp->b_flags |= B_EINTR;
761*57783Smckusick 	    } else {
762*57783Smckusick 		if (error) {
763*57783Smckusick 		    bp->b_flags |= B_ERROR;
764*57783Smckusick 		    bp->b_error = np->n_error = error;
765*57783Smckusick 		    np->n_flag |= NWRITEERR;
766*57783Smckusick 		}
767*57783Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
768*57783Smckusick 	    }
769*57783Smckusick 	}
770*57783Smckusick 	bp->b_resid = uiop->uio_resid;
771*57783Smckusick 	biodone(bp);
77238882Smacklem 	return (error);
77338882Smacklem }
774