xref: /csrg-svn/sys/nfs/nfs_bio.c (revision 63233)
138882Smacklem /*
2*63233Sbostic  * Copyright (c) 1989, 1993
3*63233Sbostic  *	The Regents of the University of California.  All rights reserved.
438882Smacklem  *
538882Smacklem  * This code is derived from software contributed to Berkeley by
638882Smacklem  * Rick Macklem at The University of Guelph.
738882Smacklem  *
844509Sbostic  * %sccs.include.redist.c%
938882Smacklem  *
10*63233Sbostic  *	@(#)nfs_bio.c	8.1 (Berkeley) 06/10/93
1138882Smacklem  */
1238882Smacklem 
1353322Smckusick #include <sys/param.h>
1455063Spendry #include <sys/systm.h>
1553322Smckusick #include <sys/resourcevar.h>
1653322Smckusick #include <sys/proc.h>
1753322Smckusick #include <sys/buf.h>
1853322Smckusick #include <sys/vnode.h>
1953322Smckusick #include <sys/trace.h>
2053322Smckusick #include <sys/mount.h>
2153322Smckusick #include <sys/kernel.h>
2256535Sbostic 
2353322Smckusick #include <vm/vm.h>
2456535Sbostic 
2553322Smckusick #include <nfs/nfsnode.h>
2653322Smckusick #include <nfs/rpcv2.h>
2753322Smckusick #include <nfs/nfsv2.h>
2853322Smckusick #include <nfs/nfs.h>
2953322Smckusick #include <nfs/nfsmount.h>
3053322Smckusick #include <nfs/nqnfs.h>
3138882Smacklem 
3257808Smckusick struct buf *incore(), *nfs_getcacheblk();
3357783Smckusick extern struct queue_entry nfs_bufq;
3457783Smckusick extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
3557783Smckusick extern int nfs_numasync;
3638882Smacklem 
3738882Smacklem /*
3838882Smacklem  * Vnode op for read using bio
3938882Smacklem  * Any similarity to readip() is purely coincidental
4038882Smacklem  */
4141897Smckusick nfs_bioread(vp, uio, ioflag, cred)
4238882Smacklem 	register struct vnode *vp;
4343348Smckusick 	register struct uio *uio;
4438882Smacklem 	int ioflag;
4538882Smacklem 	struct ucred *cred;
4638882Smacklem {
4738882Smacklem 	register struct nfsnode *np = VTONFS(vp);
4857783Smckusick 	register int biosize, diff;
4957783Smckusick 	struct buf *bp, *rabp;
5038882Smacklem 	struct vattr vattr;
5157783Smckusick 	struct proc *p;
5252196Smckusick 	struct nfsmount *nmp;
5357783Smckusick 	daddr_t lbn, bn, rabn;
5457783Smckusick 	caddr_t baddr;
5557783Smckusick 	int got_buf, len, nra, error = 0, n, on, not_readin;
5638882Smacklem 
5742241Smckusick #ifdef lint
5842241Smckusick 	ioflag = ioflag;
5942241Smckusick #endif /* lint */
6048047Smckusick #ifdef DIAGNOSTIC
6138882Smacklem 	if (uio->uio_rw != UIO_READ)
6238882Smacklem 		panic("nfs_read mode");
6348047Smckusick #endif
6438882Smacklem 	if (uio->uio_resid == 0)
6539584Smckusick 		return (0);
6641897Smckusick 	if (uio->uio_offset < 0 && vp->v_type != VDIR)
6739584Smckusick 		return (EINVAL);
6852196Smckusick 	nmp = VFSTONFS(vp->v_mount);
6952196Smckusick 	biosize = nmp->nm_rsize;
7057783Smckusick 	p = uio->uio_procp;
7138882Smacklem 	/*
7252196Smckusick 	 * For nfs, cache consistency can only be maintained approximately.
7352196Smckusick 	 * Although RFC1094 does not specify the criteria, the following is
7452196Smckusick 	 * believed to be compatible with the reference port.
7552196Smckusick 	 * For nqnfs, full cache consistency is maintained within the loop.
7652196Smckusick 	 * For nfs:
7738882Smacklem 	 * If the file's modify time on the server has changed since the
7838882Smacklem 	 * last read rpc or you have written to the file,
7938882Smacklem 	 * you may have lost data cache consistency with the
8038882Smacklem 	 * server, so flush all of the file's data out of the cache.
8141897Smckusick 	 * Then force a getattr rpc to ensure that you have up to date
8241897Smckusick 	 * attributes.
8352196Smckusick 	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
8452196Smckusick 	 * the ones changing the modify time.
8538882Smacklem 	 * NB: This implies that cache data can be read when up to
8638882Smacklem 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
8738882Smacklem 	 * attributes this could be forced by setting n_attrstamp to 0 before
8853550Sheideman 	 * the VOP_GETATTR() call.
8938882Smacklem 	 */
9052196Smckusick 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
9141897Smckusick 		if (np->n_flag & NMODIFIED) {
9252196Smckusick 			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
9357783Smckusick 			     vp->v_type != VREG) {
9457783Smckusick 				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
9557783Smckusick 					return (error);
9657783Smckusick 			}
9741897Smckusick 			np->n_attrstamp = 0;
9841897Smckusick 			np->n_direofoffset = 0;
9957783Smckusick 			if (error = VOP_GETATTR(vp, &vattr, cred, p))
10039750Smckusick 				return (error);
10154106Smckusick 			np->n_mtime = vattr.va_mtime.ts_sec;
10241897Smckusick 		} else {
10357783Smckusick 			if (error = VOP_GETATTR(vp, &vattr, cred, p))
10441897Smckusick 				return (error);
10554106Smckusick 			if (np->n_mtime != vattr.va_mtime.ts_sec) {
10641897Smckusick 				np->n_direofoffset = 0;
10757783Smckusick 				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
10857783Smckusick 					return (error);
10954106Smckusick 				np->n_mtime = vattr.va_mtime.ts_sec;
11041897Smckusick 			}
11139750Smckusick 		}
11238882Smacklem 	}
11338882Smacklem 	do {
11452196Smckusick 
11552196Smckusick 	    /*
11652196Smckusick 	     * Get a valid lease. If cached data is stale, flush it.
11752196Smckusick 	     */
11857783Smckusick 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
11957783Smckusick 		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
12057783Smckusick 		    do {
12157783Smckusick 			error = nqnfs_getlease(vp, NQL_READ, cred, p);
12257783Smckusick 		    } while (error == NQNFS_EXPIRED);
12357783Smckusick 		    if (error)
12452196Smckusick 			return (error);
12557783Smckusick 		    if (np->n_lrev != np->n_brev ||
12659704Smckusick 			(np->n_flag & NQNFSNONCACHE) ||
12757783Smckusick 			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
12852196Smckusick 			if (vp->v_type == VDIR) {
12957783Smckusick 			    np->n_direofoffset = 0;
13057783Smckusick 			    cache_purge(vp);
13152196Smckusick 			}
13257783Smckusick 			if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
13357783Smckusick 			    return (error);
13452196Smckusick 			np->n_brev = np->n_lrev;
13557783Smckusick 		    }
13657783Smckusick 		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
13757783Smckusick 		    np->n_direofoffset = 0;
13857783Smckusick 		    cache_purge(vp);
13957783Smckusick 		    if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
14057783Smckusick 			return (error);
14152196Smckusick 		}
14252196Smckusick 	    }
14352196Smckusick 	    if (np->n_flag & NQNFSNONCACHE) {
14452196Smckusick 		switch (vp->v_type) {
14552196Smckusick 		case VREG:
14652196Smckusick 			error = nfs_readrpc(vp, uio, cred);
14752196Smckusick 			break;
14852196Smckusick 		case VLNK:
14952196Smckusick 			error = nfs_readlinkrpc(vp, uio, cred);
15052196Smckusick 			break;
15152196Smckusick 		case VDIR:
15252196Smckusick 			error = nfs_readdirrpc(vp, uio, cred);
15352196Smckusick 			break;
15452196Smckusick 		};
15552196Smckusick 		return (error);
15652196Smckusick 	    }
15757783Smckusick 	    baddr = (caddr_t)0;
15841897Smckusick 	    switch (vp->v_type) {
15941897Smckusick 	    case VREG:
16039750Smckusick 		nfsstats.biocache_reads++;
16143348Smckusick 		lbn = uio->uio_offset / biosize;
16243348Smckusick 		on = uio->uio_offset & (biosize-1);
16357783Smckusick 		bn = lbn * (biosize / DEV_BSIZE);
16457783Smckusick 		not_readin = 1;
16557783Smckusick 
16657783Smckusick 		/*
16757783Smckusick 		 * Start the read ahead(s), as required.
16857783Smckusick 		 */
16957783Smckusick 		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
17057783Smckusick 		    lbn == vp->v_lastr + 1) {
17157783Smckusick 		    for (nra = 0; nra < nmp->nm_readahead &&
17257783Smckusick 			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
17357783Smckusick 			rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
17457808Smckusick 			if (!incore(vp, rabn)) {
17557783Smckusick 			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
17657783Smckusick 			    if (!rabp)
17757783Smckusick 				return (EINTR);
17857783Smckusick 			    if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
17957783Smckusick 				rabp->b_flags |= (B_READ | B_ASYNC);
18057783Smckusick 				if (nfs_asyncio(rabp, cred)) {
18157783Smckusick 				    rabp->b_flags |= B_INVAL;
18257783Smckusick 				    brelse(rabp);
18357783Smckusick 				}
18457783Smckusick 			    }
18557783Smckusick 			}
18657783Smckusick 		    }
18757783Smckusick 		}
18857783Smckusick 
18957783Smckusick 		/*
19057783Smckusick 		 * If the block is in the cache and has the required data
19157783Smckusick 		 * in a valid region, just copy it out.
19257783Smckusick 		 * Otherwise, get the block and write back/read in,
19357783Smckusick 		 * as required.
19457783Smckusick 		 */
19557808Smckusick 		if ((bp = incore(vp, bn)) &&
19657783Smckusick 		    (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
19757783Smckusick 		    (B_BUSY | B_WRITEINPROG))
19857783Smckusick 			got_buf = 0;
19957783Smckusick 		else {
20057783Smckusick again:
20157783Smckusick 			bp = nfs_getcacheblk(vp, bn, biosize, p);
20257783Smckusick 			if (!bp)
20357783Smckusick 				return (EINTR);
20457783Smckusick 			got_buf = 1;
20557783Smckusick 			if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
20657783Smckusick 				bp->b_flags |= B_READ;
20757783Smckusick 				not_readin = 0;
20857783Smckusick 				if (error = nfs_doio(bp, cred, p)) {
20957783Smckusick 				    brelse(bp);
21057783Smckusick 				    return (error);
21157783Smckusick 				}
21257783Smckusick 			}
21357783Smckusick 		}
21455057Spendry 		n = min((unsigned)(biosize - on), uio->uio_resid);
21538882Smacklem 		diff = np->n_size - uio->uio_offset;
21638882Smacklem 		if (diff < n)
21738882Smacklem 			n = diff;
21857783Smckusick 		if (not_readin && n > 0) {
21957783Smckusick 			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
22057783Smckusick 				if (!got_buf) {
22157783Smckusick 				    bp = nfs_getcacheblk(vp, bn, biosize, p);
22257783Smckusick 				    if (!bp)
22357783Smckusick 					return (EINTR);
22457783Smckusick 				    got_buf = 1;
22557783Smckusick 				}
22652196Smckusick 				bp->b_flags |= B_INVAL;
22752196Smckusick 				if (bp->b_dirtyend > 0) {
22857783Smckusick 				    if ((bp->b_flags & B_DELWRI) == 0)
22957783Smckusick 					panic("nfsbioread");
23057783Smckusick 				    if (VOP_BWRITE(bp) == EINTR)
23157783Smckusick 					return (EINTR);
23252196Smckusick 				} else
23357783Smckusick 				    brelse(bp);
23452196Smckusick 				goto again;
23552196Smckusick 			}
23652196Smckusick 		}
23739901Smckusick 		vp->v_lastr = lbn;
23857783Smckusick 		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
23957783Smckusick 		if (diff < n)
24057783Smckusick 			n = diff;
24141897Smckusick 		break;
24241897Smckusick 	    case VLNK:
24341897Smckusick 		nfsstats.biocache_readlinks++;
24457783Smckusick 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
24557783Smckusick 		if (!bp)
24657783Smckusick 			return (EINTR);
24757783Smckusick 		if ((bp->b_flags & B_DONE) == 0) {
24857783Smckusick 			bp->b_flags |= B_READ;
24957783Smckusick 			if (error = nfs_doio(bp, cred, p)) {
25057783Smckusick 				brelse(bp);
25157783Smckusick 				return (error);
25257783Smckusick 			}
25357783Smckusick 		}
25457783Smckusick 		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
25557783Smckusick 		got_buf = 1;
25641897Smckusick 		on = 0;
25741897Smckusick 		break;
25841897Smckusick 	    case VDIR:
25941897Smckusick 		nfsstats.biocache_readdirs++;
26057783Smckusick 		bn = (daddr_t)uio->uio_offset;
26157783Smckusick 		bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
26257783Smckusick 		if (!bp)
26357783Smckusick 			return (EINTR);
26457783Smckusick 		if ((bp->b_flags & B_DONE) == 0) {
26557783Smckusick 			bp->b_flags |= B_READ;
26657783Smckusick 			if (error = nfs_doio(bp, cred, p)) {
26752196Smckusick 				brelse(bp);
26852196Smckusick 				return (error);
26952196Smckusick 			}
27057783Smckusick 		}
27157783Smckusick 
27257783Smckusick 		/*
27357783Smckusick 		 * If not eof and read aheads are enabled, start one.
27457783Smckusick 		 * (You need the current block first, so that you have the
27557783Smckusick 		 *  directory offset cookie of the next block.
27657783Smckusick 		 */
27757783Smckusick 		rabn = bp->b_blkno;
27857783Smckusick 		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
27957783Smckusick 		    rabn != 0 && rabn != np->n_direofoffset &&
28057808Smckusick 		    !incore(vp, rabn)) {
28157783Smckusick 			rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
28257783Smckusick 			if (rabp) {
28357783Smckusick 			    if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
28457783Smckusick 				rabp->b_flags |= (B_READ | B_ASYNC);
28557783Smckusick 				if (nfs_asyncio(rabp, cred)) {
28657783Smckusick 				    rabp->b_flags |= B_INVAL;
28757783Smckusick 				    brelse(rabp);
28852196Smckusick 				}
28957783Smckusick 			    }
29052196Smckusick 			}
29152196Smckusick 		}
29257783Smckusick 		on = 0;
29357783Smckusick 		n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
29457783Smckusick 		got_buf = 1;
29557783Smckusick 		break;
29657783Smckusick 	    };
29757783Smckusick 
29857783Smckusick 	    if (n > 0) {
29957783Smckusick 		if (!baddr)
30057783Smckusick 			baddr = bp->b_un.b_addr;
30157783Smckusick 		error = uiomove(baddr + on, (int)n, uio);
30252196Smckusick 	    }
30341897Smckusick 	    switch (vp->v_type) {
30441897Smckusick 	    case VREG:
30557783Smckusick 		if (n + on == biosize || uio->uio_offset == np->n_size)
30638882Smacklem 			bp->b_flags |= B_AGE;
30741897Smckusick 		break;
30841897Smckusick 	    case VLNK:
30941897Smckusick 		n = 0;
31041897Smckusick 		break;
31141897Smckusick 	    case VDIR:
31241897Smckusick 		uio->uio_offset = bp->b_blkno;
31341897Smckusick 		break;
31441897Smckusick 	    };
31557783Smckusick 	    if (got_buf)
31657783Smckusick 		brelse(bp);
31757783Smckusick 	} while (error == 0 && uio->uio_resid > 0 && n > 0);
31838882Smacklem 	return (error);
31938882Smacklem }
32038882Smacklem 
32138882Smacklem /*
32238882Smacklem  * Vnode op for write using bio
32338882Smacklem  */
32454669Smckusick nfs_write(ap)
32554448Smckusick 	struct vop_write_args /* {
32654448Smckusick 		struct vnode *a_vp;
32754448Smckusick 		struct uio *a_uio;
32854448Smckusick 		int  a_ioflag;
32954448Smckusick 		struct ucred *a_cred;
33054448Smckusick 	} */ *ap;
33138882Smacklem {
33252196Smckusick 	register int biosize;
33354448Smckusick 	register struct uio *uio = ap->a_uio;
33454448Smckusick 	struct proc *p = uio->uio_procp;
33554448Smckusick 	register struct vnode *vp = ap->a_vp;
33654448Smckusick 	struct nfsnode *np = VTONFS(vp);
33754448Smckusick 	register struct ucred *cred = ap->a_cred;
33854448Smckusick 	int ioflag = ap->a_ioflag;
33938882Smacklem 	struct buf *bp;
34041897Smckusick 	struct vattr vattr;
34152196Smckusick 	struct nfsmount *nmp;
34238882Smacklem 	daddr_t lbn, bn;
34340220Smckusick 	int n, on, error = 0;
34438882Smacklem 
34548047Smckusick #ifdef DIAGNOSTIC
34654448Smckusick 	if (uio->uio_rw != UIO_WRITE)
34741897Smckusick 		panic("nfs_write mode");
34854448Smckusick 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
34948047Smckusick 		panic("nfs_write proc");
35048047Smckusick #endif
35154448Smckusick 	if (vp->v_type != VREG)
35241897Smckusick 		return (EIO);
35353627Smckusick 	if (np->n_flag & NWRITEERR) {
35453627Smckusick 		np->n_flag &= ~NWRITEERR;
35553627Smckusick 		return (np->n_error);
35653627Smckusick 	}
35754448Smckusick 	if (ioflag & (IO_APPEND | IO_SYNC)) {
35852986Smckusick 		if (np->n_flag & NMODIFIED) {
35956282Smckusick 			np->n_attrstamp = 0;
36057783Smckusick 			if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
36157783Smckusick 				return (error);
36252986Smckusick 		}
36354448Smckusick 		if (ioflag & IO_APPEND) {
36452986Smckusick 			np->n_attrstamp = 0;
36554448Smckusick 			if (error = VOP_GETATTR(vp, &vattr, cred, p))
36652986Smckusick 				return (error);
36754448Smckusick 			uio->uio_offset = np->n_size;
36852986Smckusick 		}
36952986Smckusick 	}
37054448Smckusick 	nmp = VFSTONFS(vp->v_mount);
37154448Smckusick 	if (uio->uio_offset < 0)
37239584Smckusick 		return (EINVAL);
37354448Smckusick 	if (uio->uio_resid == 0)
37439584Smckusick 		return (0);
37538882Smacklem 	/*
37638882Smacklem 	 * Maybe this should be above the vnode op call, but so long as
37738882Smacklem 	 * file servers have no limits, i don't think it matters
37838882Smacklem 	 */
37954448Smckusick 	if (p && uio->uio_offset + uio->uio_resid >
38047572Skarels 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
38147572Skarels 		psignal(p, SIGXFSZ);
38239584Smckusick 		return (EFBIG);
38338882Smacklem 	}
38443348Smckusick 	/*
38543348Smckusick 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
38643348Smckusick 	 * will be the same size within a filesystem. nfs_writerpc will
38743348Smckusick 	 * still use nm_wsize when sizing the rpc's.
38843348Smckusick 	 */
38952196Smckusick 	biosize = nmp->nm_rsize;
39038882Smacklem 	do {
39152196Smckusick 
39252196Smckusick 		/*
39352196Smckusick 		 * Check for a valid write lease.
39452196Smckusick 		 * If non-cachable, just do the rpc
39552196Smckusick 		 */
39652196Smckusick 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
39754448Smckusick 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
39852196Smckusick 			do {
39954448Smckusick 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
40052196Smckusick 			} while (error == NQNFS_EXPIRED);
40152196Smckusick 			if (error)
40252196Smckusick 				return (error);
40354448Smckusick 			if (np->n_lrev != np->n_brev ||
40452196Smckusick 			    (np->n_flag & NQNFSNONCACHE)) {
40557783Smckusick 				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
40657783Smckusick 					return (error);
40752196Smckusick 				np->n_brev = np->n_lrev;
40852196Smckusick 			}
40952196Smckusick 		}
41052196Smckusick 		if (np->n_flag & NQNFSNONCACHE)
41157783Smckusick 			return (nfs_writerpc(vp, uio, cred, ioflag));
41239750Smckusick 		nfsstats.biocache_writes++;
41354448Smckusick 		lbn = uio->uio_offset / biosize;
41454448Smckusick 		on = uio->uio_offset & (biosize-1);
41555057Spendry 		n = min((unsigned)(biosize - on), uio->uio_resid);
41652196Smckusick 		bn = lbn * (biosize / DEV_BSIZE);
41740037Smckusick again:
41857783Smckusick 		bp = nfs_getcacheblk(vp, bn, biosize, p);
41957783Smckusick 		if (!bp)
42057783Smckusick 			return (EINTR);
42138882Smacklem 		if (bp->b_wcred == NOCRED) {
42254448Smckusick 			crhold(cred);
42354448Smckusick 			bp->b_wcred = cred;
42438882Smacklem 		}
42557783Smckusick 		np->n_flag |= NMODIFIED;
42657783Smckusick 		if (uio->uio_offset + n > np->n_size) {
42757783Smckusick 			np->n_size = uio->uio_offset + n;
42857783Smckusick 			vnode_pager_setsize(vp, (u_long)np->n_size);
42957783Smckusick 		}
43052196Smckusick 
43152196Smckusick 		/*
43252196Smckusick 		 * If the new write will leave a contiguous dirty
43352196Smckusick 		 * area, just update the b_dirtyoff and b_dirtyend,
43452196Smckusick 		 * otherwise force a write rpc of the old dirty area.
43552196Smckusick 		 */
43652196Smckusick 		if (bp->b_dirtyend > 0 &&
43752196Smckusick 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
43852196Smckusick 			bp->b_proc = p;
43957783Smckusick 			if (VOP_BWRITE(bp) == EINTR)
44057783Smckusick 				return (EINTR);
44152196Smckusick 			goto again;
44252196Smckusick 		}
44352196Smckusick 
44452196Smckusick 		/*
44552196Smckusick 		 * Check for valid write lease and get one as required.
44652196Smckusick 		 * In case getblk() and/or bwrite() delayed us.
44752196Smckusick 		 */
44852196Smckusick 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
44954448Smckusick 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
45052196Smckusick 			do {
45154448Smckusick 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
45252196Smckusick 			} while (error == NQNFS_EXPIRED);
45352196Smckusick 			if (error) {
45452196Smckusick 				brelse(bp);
45552196Smckusick 				return (error);
45638882Smacklem 			}
45754448Smckusick 			if (np->n_lrev != np->n_brev ||
45852196Smckusick 			    (np->n_flag & NQNFSNONCACHE)) {
45956282Smckusick 				brelse(bp);
46057783Smckusick 				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
46157783Smckusick 					return (error);
46252196Smckusick 				np->n_brev = np->n_lrev;
46356282Smckusick 				goto again;
46452196Smckusick 			}
46538882Smacklem 		}
46654448Smckusick 		if (error = uiomove(bp->b_un.b_addr + on, n, uio)) {
46757783Smckusick 			bp->b_flags |= B_ERROR;
46840037Smckusick 			brelse(bp);
46939584Smckusick 			return (error);
47040037Smckusick 		}
47152196Smckusick 		if (bp->b_dirtyend > 0) {
47255057Spendry 			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
47357783Smckusick 			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
47452196Smckusick 		} else {
47552196Smckusick 			bp->b_dirtyoff = on;
47657783Smckusick 			bp->b_dirtyend = on + n;
47752196Smckusick 		}
47857783Smckusick #ifndef notdef
47952196Smckusick 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
48052196Smckusick 		    bp->b_validoff > bp->b_dirtyend) {
48152196Smckusick 			bp->b_validoff = bp->b_dirtyoff;
48252196Smckusick 			bp->b_validend = bp->b_dirtyend;
48352196Smckusick 		} else {
48455057Spendry 			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
48555057Spendry 			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
48652196Smckusick 		}
48757783Smckusick #else
48857783Smckusick 		bp->b_validoff = bp->b_dirtyoff;
48957783Smckusick 		bp->b_validend = bp->b_dirtyend;
49057783Smckusick #endif
49157783Smckusick 		if (ioflag & IO_APPEND)
49257783Smckusick 			bp->b_flags |= B_APPENDWRITE;
49352196Smckusick 
49452196Smckusick 		/*
49552196Smckusick 		 * If the lease is non-cachable or IO_SYNC do bwrite().
49652196Smckusick 		 */
49754448Smckusick 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
49852196Smckusick 			bp->b_proc = p;
49957783Smckusick 			if (error = VOP_BWRITE(bp))
50057783Smckusick 				return (error);
50157783Smckusick 		} else if ((n + on) == biosize &&
50257783Smckusick 			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
50341897Smckusick 			bp->b_proc = (struct proc *)0;
50438882Smacklem 			bawrite(bp);
50557783Smckusick 		} else
50638882Smacklem 			bdwrite(bp);
50757783Smckusick 	} while (uio->uio_resid > 0 && n > 0);
50857783Smckusick 	return (0);
50957783Smckusick }
51057783Smckusick 
51157783Smckusick /*
51257783Smckusick  * Get an nfs cache block.
51357783Smckusick  * Allocate a new one if the block isn't currently in the cache
51457783Smckusick  * and return the block marked busy. If the calling process is
51557783Smckusick  * interrupted by a signal for an interruptible mount point, return
51657783Smckusick  * NULL.
51757783Smckusick  */
51857783Smckusick struct buf *
51957783Smckusick nfs_getcacheblk(vp, bn, size, p)
52057783Smckusick 	struct vnode *vp;
52157783Smckusick 	daddr_t bn;
52257783Smckusick 	int size;
52357783Smckusick 	struct proc *p;
52457783Smckusick {
52557783Smckusick 	register struct buf *bp;
52657783Smckusick 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
52757783Smckusick 
52857783Smckusick 	if (nmp->nm_flag & NFSMNT_INT) {
52957808Smckusick 		bp = getblk(vp, bn, size, PCATCH, 0);
53057783Smckusick 		while (bp == (struct buf *)0) {
53157783Smckusick 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
53257783Smckusick 				return ((struct buf *)0);
53357808Smckusick 			bp = getblk(vp, bn, size, 0, 2 * hz);
53438882Smacklem 		}
53557783Smckusick 	} else
53657808Smckusick 		bp = getblk(vp, bn, size, 0, 0);
53757783Smckusick 	return (bp);
53857783Smckusick }
53957783Smckusick 
54057783Smckusick /*
54157783Smckusick  * Flush and invalidate all dirty buffers. If another process is already
54257783Smckusick  * doing the flush, just wait for completion.
54357783Smckusick  */
54457783Smckusick nfs_vinvalbuf(vp, flags, cred, p, intrflg)
54557783Smckusick 	struct vnode *vp;
54657783Smckusick 	int flags;
54757783Smckusick 	struct ucred *cred;
54857783Smckusick 	struct proc *p;
54957783Smckusick 	int intrflg;
55057783Smckusick {
55157783Smckusick 	register struct nfsnode *np = VTONFS(vp);
55257783Smckusick 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
55357783Smckusick 	int error = 0, slpflag, slptimeo;
55457783Smckusick 
55557783Smckusick 	if ((nmp->nm_flag & NFSMNT_INT) == 0)
55657783Smckusick 		intrflg = 0;
55757783Smckusick 	if (intrflg) {
55857783Smckusick 		slpflag = PCATCH;
55957783Smckusick 		slptimeo = 2 * hz;
56057783Smckusick 	} else {
56157783Smckusick 		slpflag = 0;
56257783Smckusick 		slptimeo = 0;
56357783Smckusick 	}
56457783Smckusick 	/*
56557783Smckusick 	 * First wait for any other process doing a flush to complete.
56657783Smckusick 	 */
56757783Smckusick 	while (np->n_flag & NFLUSHINPROG) {
56857783Smckusick 		np->n_flag |= NFLUSHWANT;
56957783Smckusick 		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
57057783Smckusick 			slptimeo);
57157783Smckusick 		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
57257783Smckusick 			return (EINTR);
57357783Smckusick 	}
57457783Smckusick 
57557783Smckusick 	/*
57657783Smckusick 	 * Now, flush as required.
57757783Smckusick 	 */
57857783Smckusick 	np->n_flag |= NFLUSHINPROG;
57957808Smckusick 	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
58057783Smckusick 	while (error) {
58157783Smckusick 		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
58257783Smckusick 			np->n_flag &= ~NFLUSHINPROG;
58357783Smckusick 			if (np->n_flag & NFLUSHWANT) {
58457783Smckusick 				np->n_flag &= ~NFLUSHWANT;
58557783Smckusick 				wakeup((caddr_t)&np->n_flag);
58657783Smckusick 			}
58757783Smckusick 			return (EINTR);
58857783Smckusick 		}
58957808Smckusick 		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
59057783Smckusick 	}
59157783Smckusick 	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
59257783Smckusick 	if (np->n_flag & NFLUSHWANT) {
59357783Smckusick 		np->n_flag &= ~NFLUSHWANT;
59457783Smckusick 		wakeup((caddr_t)&np->n_flag);
59557783Smckusick 	}
59657783Smckusick 	return (0);
59757783Smckusick }
59857783Smckusick 
59957783Smckusick /*
60057783Smckusick  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
60157783Smckusick  * This is mainly to avoid queueing async I/O requests when the nfsiods
60257783Smckusick  * are all hung on a dead server.
60357783Smckusick  */
60457783Smckusick nfs_asyncio(bp, cred)
60557783Smckusick 	register struct buf *bp;
60657783Smckusick 	struct ucred *cred;
60757783Smckusick {
60857783Smckusick 	register int i;
60957783Smckusick 
61057783Smckusick 	if (nfs_numasync == 0)
61157783Smckusick 		return (EIO);
61257783Smckusick 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
61357783Smckusick 	    if (nfs_iodwant[i]) {
61457783Smckusick 		if (bp->b_flags & B_READ) {
61557783Smckusick 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
61657783Smckusick 				crhold(cred);
61757783Smckusick 				bp->b_rcred = cred;
61857783Smckusick 			}
61957783Smckusick 		} else {
62057783Smckusick 			if (bp->b_wcred == NOCRED && cred != NOCRED) {
62157783Smckusick 				crhold(cred);
62257783Smckusick 				bp->b_wcred = cred;
62357783Smckusick 			}
62457783Smckusick 		}
62557783Smckusick 
62657783Smckusick 		queue_enter_tail(&nfs_bufq, bp, struct buf *, b_freelist);
62757783Smckusick 		nfs_iodwant[i] = (struct proc *)0;
62857783Smckusick 		wakeup((caddr_t)&nfs_iodwant[i]);
62957783Smckusick 		return (0);
63057783Smckusick 	    }
63157783Smckusick 	return (EIO);
63257783Smckusick }
63357783Smckusick 
63457783Smckusick /*
63557783Smckusick  * Do an I/O operation to/from a cache block. This may be called
63657783Smckusick  * synchronously or from an nfsiod.
63757783Smckusick  */
63857783Smckusick int
63957783Smckusick nfs_doio(bp, cr, p)
64057783Smckusick 	register struct buf *bp;
64157783Smckusick 	struct cred *cr;
64257783Smckusick 	struct proc *p;
64357783Smckusick {
64457783Smckusick 	register struct uio *uiop;
64557783Smckusick 	register struct vnode *vp;
64657783Smckusick 	struct nfsnode *np;
64757783Smckusick 	struct nfsmount *nmp;
64857783Smckusick 	int error, diff, len;
64957783Smckusick 	struct uio uio;
65057783Smckusick 	struct iovec io;
65157783Smckusick 
65257783Smckusick 	vp = bp->b_vp;
65357783Smckusick 	np = VTONFS(vp);
65457783Smckusick 	nmp = VFSTONFS(vp->v_mount);
65557783Smckusick 	uiop = &uio;
65657783Smckusick 	uiop->uio_iov = &io;
65757783Smckusick 	uiop->uio_iovcnt = 1;
65857783Smckusick 	uiop->uio_segflg = UIO_SYSSPACE;
65957783Smckusick 	uiop->uio_procp = p;
66057783Smckusick 
66157783Smckusick 	/*
66257783Smckusick 	 * Historically, paging was done with physio, but no more.
66357783Smckusick 	 */
66457783Smckusick 	if (bp->b_flags & B_PHYS)
66557783Smckusick 	    panic("doio phys");
66657783Smckusick 	if (bp->b_flags & B_READ) {
66757783Smckusick 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
66857783Smckusick 	    io.iov_base = bp->b_un.b_addr;
66957783Smckusick 	    uiop->uio_rw = UIO_READ;
67057783Smckusick 	    switch (vp->v_type) {
67157783Smckusick 	    case VREG:
67257783Smckusick 		uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
67357783Smckusick 		nfsstats.read_bios++;
67457783Smckusick 		error = nfs_readrpc(vp, uiop, cr);
67557783Smckusick 		if (!error) {
67657783Smckusick 		    bp->b_validoff = 0;
67757783Smckusick 		    if (uiop->uio_resid) {
67857783Smckusick 			/*
67957783Smckusick 			 * If len > 0, there is a hole in the file and
68057783Smckusick 			 * no writes after the hole have been pushed to
68157783Smckusick 			 * the server yet.
68257783Smckusick 			 * Just zero fill the rest of the valid area.
68357783Smckusick 			 */
68457783Smckusick 			diff = bp->b_bcount - uiop->uio_resid;
68557783Smckusick 			len = np->n_size - (bp->b_blkno * DEV_BSIZE
68657783Smckusick 				+ diff);
68757783Smckusick 			if (len > 0) {
68857783Smckusick 			    len = min(len, uiop->uio_resid);
68957783Smckusick 			    bzero(bp->b_un.b_addr + diff, len);
69057783Smckusick 			    bp->b_validend = diff + len;
69157783Smckusick 			} else
69257783Smckusick 			    bp->b_validend = diff;
69357783Smckusick 		    } else
69457783Smckusick 			bp->b_validend = bp->b_bcount;
69557783Smckusick 		}
69657783Smckusick 		if (p && (vp->v_flag & VTEXT) &&
69757783Smckusick 			(((nmp->nm_flag & NFSMNT_NQNFS) &&
69857783Smckusick 			  np->n_lrev != np->n_brev) ||
69957783Smckusick 			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
70057783Smckusick 			  np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
70157783Smckusick 			uprintf("Process killed due to text file modification\n");
70257783Smckusick 			psignal(p, SIGKILL);
70357783Smckusick 			p->p_flag |= SKEEP;
70457783Smckusick 		}
70557783Smckusick 		break;
70657783Smckusick 	    case VLNK:
70757783Smckusick 		uiop->uio_offset = 0;
70857783Smckusick 		nfsstats.readlink_bios++;
70957783Smckusick 		error = nfs_readlinkrpc(vp, uiop, cr);
71057783Smckusick 		break;
71157783Smckusick 	    case VDIR:
71257783Smckusick 		uiop->uio_offset = bp->b_lblkno;
71357783Smckusick 		nfsstats.readdir_bios++;
71457783Smckusick 		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
71557783Smckusick 		    error = nfs_readdirlookrpc(vp, uiop, cr);
71657783Smckusick 		else
71757783Smckusick 		    error = nfs_readdirrpc(vp, uiop, cr);
71857783Smckusick 		/*
71957783Smckusick 		 * Save offset cookie in b_blkno.
72057783Smckusick 		 */
72157783Smckusick 		bp->b_blkno = uiop->uio_offset;
72257783Smckusick 		break;
72357783Smckusick 	    };
72457783Smckusick 	    if (error) {
72557783Smckusick 		bp->b_flags |= B_ERROR;
72657783Smckusick 		bp->b_error = error;
72757783Smckusick 	    }
72857783Smckusick 	} else {
72957783Smckusick 	    io.iov_len = uiop->uio_resid = bp->b_dirtyend
73057783Smckusick 		- bp->b_dirtyoff;
73157783Smckusick 	    uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
73257783Smckusick 		+ bp->b_dirtyoff;
73357783Smckusick 	    io.iov_base = bp->b_un.b_addr + bp->b_dirtyoff;
73457783Smckusick 	    uiop->uio_rw = UIO_WRITE;
73557783Smckusick 	    nfsstats.write_bios++;
73657783Smckusick 	    if (bp->b_flags & B_APPENDWRITE)
73757783Smckusick 		error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
73857783Smckusick 	    else
73957783Smckusick 		error = nfs_writerpc(vp, uiop, cr, 0);
74057783Smckusick 	    bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
74157783Smckusick 
74257783Smckusick 	    /*
74357783Smckusick 	     * For an interrupted write, the buffer is still valid and the
74457783Smckusick 	     * write hasn't been pushed to the server yet, so we can't set
74557783Smckusick 	     * B_ERROR and report the interruption by setting B_EINTR. For
74657783Smckusick 	     * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
74757783Smckusick 	     * is essentially a noop.
74857783Smckusick 	     */
74957783Smckusick 	    if (error == EINTR) {
75057783Smckusick 		bp->b_flags &= ~B_INVAL;
75157783Smckusick 		bp->b_flags |= B_DELWRI;
75257783Smckusick 
75357783Smckusick 		/*
75457783Smckusick 		 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
75557783Smckusick 		 * buffer to the clean list, we have to reassign it back to the
75657783Smckusick 		 * dirty one. Ugh.
75757783Smckusick 		 */
75857783Smckusick 		if (bp->b_flags & B_ASYNC)
75957783Smckusick 		    reassignbuf(bp, vp);
76057783Smckusick 		else
76157783Smckusick 		    bp->b_flags |= B_EINTR;
76257783Smckusick 	    } else {
76357783Smckusick 		if (error) {
76457783Smckusick 		    bp->b_flags |= B_ERROR;
76557783Smckusick 		    bp->b_error = np->n_error = error;
76657783Smckusick 		    np->n_flag |= NWRITEERR;
76757783Smckusick 		}
76857783Smckusick 		bp->b_dirtyoff = bp->b_dirtyend = 0;
76957783Smckusick 	    }
77057783Smckusick 	}
77157783Smckusick 	bp->b_resid = uiop->uio_resid;
77257783Smckusick 	biodone(bp);
77338882Smacklem 	return (error);
77438882Smacklem }
775