xref: /csrg-svn/sys/nfs/nfs_bio.c (revision 52986)
138882Smacklem /*
238882Smacklem  * Copyright (c) 1989 The Regents of the University of California.
338882Smacklem  * All rights reserved.
438882Smacklem  *
538882Smacklem  * This code is derived from software contributed to Berkeley by
638882Smacklem  * Rick Macklem at The University of Guelph.
738882Smacklem  *
844509Sbostic  * %sccs.include.redist.c%
938882Smacklem  *
10*52986Smckusick  *	@(#)nfs_bio.c	7.22 (Berkeley) 03/17/92
1138882Smacklem  */
1238882Smacklem 
1338882Smacklem #include "param.h"
1452196Smckusick #include "resourcevar.h"
1547572Skarels #include "proc.h"
1638882Smacklem #include "buf.h"
1738882Smacklem #include "vnode.h"
1838882Smacklem #include "trace.h"
1938882Smacklem #include "mount.h"
2052196Smckusick #include "kernel.h"
2152196Smckusick #include "machine/endian.h"
2252196Smckusick #include "nfsnode.h"
2352196Smckusick #include "rpcv2.h"
2439750Smckusick #include "nfsv2.h"
2539750Smckusick #include "nfs.h"
2641897Smckusick #include "nfsmount.h"
2752196Smckusick #include "nqnfs.h"
2838882Smacklem 
2938882Smacklem /* True and false, how exciting */
3038882Smacklem #define	TRUE	1
3138882Smacklem #define	FALSE	0
3238882Smacklem 
3338882Smacklem /*
3438882Smacklem  * Vnode op for read using bio
3538882Smacklem  * Any similarity to readip() is purely coincidental
3638882Smacklem  */
3741897Smckusick nfs_bioread(vp, uio, ioflag, cred)
3838882Smacklem 	register struct vnode *vp;
3943348Smckusick 	register struct uio *uio;
4038882Smacklem 	int ioflag;
4138882Smacklem 	struct ucred *cred;
4238882Smacklem {
4338882Smacklem 	register struct nfsnode *np = VTONFS(vp);
4443348Smckusick 	register int biosize;
4538882Smacklem 	struct buf *bp;
4638882Smacklem 	struct vattr vattr;
4752196Smckusick 	struct nfsmount *nmp;
4852196Smckusick 	daddr_t lbn, bn, rablock[NFS_MAXRAHEAD];
4952196Smckusick 	int rasize[NFS_MAXRAHEAD], nra, diff, error = 0;
5052196Smckusick 	int n, on;
5138882Smacklem 
5242241Smckusick #ifdef lint
5342241Smckusick 	ioflag = ioflag;
5442241Smckusick #endif /* lint */
5548047Smckusick #ifdef DIAGNOSTIC
5638882Smacklem 	if (uio->uio_rw != UIO_READ)
5738882Smacklem 		panic("nfs_read mode");
5848047Smckusick #endif
5938882Smacklem 	if (uio->uio_resid == 0)
6039584Smckusick 		return (0);
6141897Smckusick 	if (uio->uio_offset < 0 && vp->v_type != VDIR)
6239584Smckusick 		return (EINVAL);
6352196Smckusick 	nmp = VFSTONFS(vp->v_mount);
6452196Smckusick 	biosize = nmp->nm_rsize;
6538882Smacklem 	/*
6652196Smckusick 	 * For nfs, cache consistency can only be maintained approximately.
6752196Smckusick 	 * Although RFC1094 does not specify the criteria, the following is
6852196Smckusick 	 * believed to be compatible with the reference port.
6952196Smckusick 	 * For nqnfs, full cache consistency is maintained within the loop.
7052196Smckusick 	 * For nfs:
7138882Smacklem 	 * If the file's modify time on the server has changed since the
7238882Smacklem 	 * last read rpc or you have written to the file,
7338882Smacklem 	 * you may have lost data cache consistency with the
7438882Smacklem 	 * server, so flush all of the file's data out of the cache.
7541897Smckusick 	 * Then force a getattr rpc to ensure that you have up to date
7641897Smckusick 	 * attributes.
7752196Smckusick 	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
7852196Smckusick 	 * the ones changing the modify time.
7938882Smacklem 	 * NB: This implies that cache data can be read when up to
8038882Smacklem 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
8138882Smacklem 	 * attributes this could be forced by setting n_attrstamp to 0 before
8252196Smckusick 	 * the nfs_getattr() call.
8338882Smacklem 	 */
8452196Smckusick 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
8541897Smckusick 		if (np->n_flag & NMODIFIED) {
8641897Smckusick 			np->n_flag &= ~NMODIFIED;
8752196Smckusick 			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
8852196Smckusick 			     vp->v_type != VREG)
8952196Smckusick 				vinvalbuf(vp, TRUE);
9041897Smckusick 			np->n_attrstamp = 0;
9141897Smckusick 			np->n_direofoffset = 0;
9252196Smckusick 			if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp))
9339750Smckusick 				return (error);
9439750Smckusick 			np->n_mtime = vattr.va_mtime.tv_sec;
9541897Smckusick 		} else {
9652196Smckusick 			if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp))
9741897Smckusick 				return (error);
9841897Smckusick 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
9941897Smckusick 				np->n_direofoffset = 0;
10041897Smckusick 				vinvalbuf(vp, TRUE);
10141897Smckusick 				np->n_mtime = vattr.va_mtime.tv_sec;
10241897Smckusick 			}
10339750Smckusick 		}
10438882Smacklem 	}
10538882Smacklem 	do {
10652196Smckusick 
10752196Smckusick 	    /*
10852196Smckusick 	     * Get a valid lease. If cached data is stale, flush it.
10952196Smckusick 	     */
11052196Smckusick 	    if ((nmp->nm_flag & NFSMNT_NQNFS) &&
11152196Smckusick 		NQNFS_CKINVALID(vp, np, NQL_READ)) {
11252196Smckusick 		do {
11352196Smckusick 			error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
11452196Smckusick 		} while (error == NQNFS_EXPIRED);
11552196Smckusick 		if (error)
11652196Smckusick 			return (error);
11752196Smckusick 		if (QUADNE(np->n_lrev, np->n_brev) ||
11852196Smckusick 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
11952196Smckusick 			if (vp->v_type == VDIR) {
12052196Smckusick 				np->n_direofoffset = 0;
12152196Smckusick 				cache_purge(vp);
12252196Smckusick 			}
12352196Smckusick 			np->n_flag &= ~NMODIFIED;
12452196Smckusick 			vinvalbuf(vp, TRUE);
12552196Smckusick 			np->n_brev = np->n_lrev;
12652196Smckusick 		}
12752196Smckusick 	    }
12852196Smckusick 	    if (np->n_flag & NQNFSNONCACHE) {
12952196Smckusick 		switch (vp->v_type) {
13052196Smckusick 		case VREG:
13152196Smckusick 			error = nfs_readrpc(vp, uio, cred);
13252196Smckusick 			break;
13352196Smckusick 		case VLNK:
13452196Smckusick 			error = nfs_readlinkrpc(vp, uio, cred);
13552196Smckusick 			break;
13652196Smckusick 		case VDIR:
13752196Smckusick 			error = nfs_readdirrpc(vp, uio, cred);
13852196Smckusick 			break;
13952196Smckusick 		};
14052196Smckusick 		return (error);
14152196Smckusick 	    }
14241897Smckusick 	    switch (vp->v_type) {
14341897Smckusick 	    case VREG:
14439750Smckusick 		nfsstats.biocache_reads++;
14543348Smckusick 		lbn = uio->uio_offset / biosize;
14643348Smckusick 		on = uio->uio_offset & (biosize-1);
14743348Smckusick 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
14838882Smacklem 		diff = np->n_size - uio->uio_offset;
14938882Smacklem 		if (diff <= 0)
15039584Smckusick 			return (error);
15138882Smacklem 		if (diff < n)
15238882Smacklem 			n = diff;
15343348Smckusick 		bn = lbn*(biosize/DEV_BSIZE);
15452196Smckusick 		for (nra = 0; nra < nmp->nm_readahead &&
15552196Smckusick 			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
15652196Smckusick 			rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
15752196Smckusick 			rasize[nra] = biosize;
15852196Smckusick 		}
15952196Smckusick again:
16052196Smckusick 		if (nra > 0 && lbn >= vp->v_lastr)
16152196Smckusick 			error = breadn(vp, bn, biosize, rablock, rasize, nra,
16238882Smacklem 				cred, &bp);
16338882Smacklem 		else
16443348Smckusick 			error = bread(vp, bn, biosize, cred, &bp);
16552196Smckusick 		if (bp->b_validend > 0) {
16652196Smckusick 			if (on < bp->b_validoff || (on+n) > bp->b_validend) {
16752196Smckusick 				bp->b_flags |= B_INVAL;
16852196Smckusick 				if (bp->b_dirtyend > 0) {
16952196Smckusick 					if ((bp->b_flags & B_DELWRI) == 0)
17052196Smckusick 						panic("nfsbioread");
17152196Smckusick 					(void) bwrite(bp);
17252196Smckusick 				} else
17352196Smckusick 					brelse(bp);
17452196Smckusick 				goto again;
17552196Smckusick 			}
17652196Smckusick 		} else {
17752196Smckusick 			bp->b_validoff = 0;
17852196Smckusick 			bp->b_validend = biosize - bp->b_resid;
17952196Smckusick 		}
18039901Smckusick 		vp->v_lastr = lbn;
18138882Smacklem 		if (bp->b_resid) {
18243348Smckusick 		   diff = (on >= (biosize-bp->b_resid)) ? 0 :
18343348Smckusick 			(biosize-bp->b_resid-on);
18441897Smckusick 		   n = MIN(n, diff);
18538882Smacklem 		}
18641897Smckusick 		break;
18741897Smckusick 	    case VLNK:
18841897Smckusick 		nfsstats.biocache_readlinks++;
18941897Smckusick 		on = 0;
19041897Smckusick 		error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp);
19141897Smckusick 		n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
19241897Smckusick 		break;
19341897Smckusick 	    case VDIR:
19441897Smckusick 		nfsstats.biocache_readdirs++;
19541897Smckusick 		on = 0;
19648047Smckusick 		error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp);
19748047Smckusick 		n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
19841897Smckusick 		break;
19941897Smckusick 	    };
20041897Smckusick 	    if (error) {
20141897Smckusick 		brelse(bp);
20241897Smckusick 		return (error);
20341897Smckusick 	    }
20452196Smckusick 
20552196Smckusick 	    /*
20652196Smckusick 	     * For nqnfs:
20752196Smckusick 	     * Must check for valid lease, since it may have expired while in
20852196Smckusick 	     * bread(). If expired, get a lease.
20952196Smckusick 	     * If data is stale, flush and try again.
21052196Smckusick 	     * nb: If a read rpc is done by bread() or breada() and there is
21152196Smckusick 	     *     no valid lease, a get_lease request will be piggy backed.
21252196Smckusick 	     */
21352196Smckusick 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
21452196Smckusick 		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
21552196Smckusick 			do {
21652196Smckusick 				error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
21752196Smckusick 			} while (error == NQNFS_EXPIRED);
21852196Smckusick 			if (error) {
21952196Smckusick 				brelse(bp);
22052196Smckusick 				return (error);
22152196Smckusick 			}
22252196Smckusick 			if ((np->n_flag & NQNFSNONCACHE) ||
22352196Smckusick 			    QUADNE(np->n_lrev, np->n_brev) ||
22452196Smckusick 			    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
22552196Smckusick 				if (vp->v_type == VDIR) {
22652196Smckusick 					np->n_direofoffset = 0;
22752196Smckusick 					cache_purge(vp);
22852196Smckusick 				}
22952196Smckusick 				brelse(bp);
23052196Smckusick 				np->n_flag &= ~NMODIFIED;
23152196Smckusick 				vinvalbuf(vp, TRUE);
23252196Smckusick 				np->n_brev = np->n_lrev;
23352196Smckusick 				continue;
23452196Smckusick 			}
23552196Smckusick 		} else if ((np->n_flag & NQNFSNONCACHE) ||
23652196Smckusick 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
23752196Smckusick 			np->n_direofoffset = 0;
23852196Smckusick 			brelse(bp);
23952196Smckusick 			np->n_flag &= ~NMODIFIED;
24052196Smckusick 			vinvalbuf(vp, TRUE);
24152196Smckusick 			np->n_brev = np->n_lrev;
24252196Smckusick 			continue;
24352196Smckusick 		}
24452196Smckusick 	    }
24541897Smckusick 	    if (n > 0)
24641897Smckusick 		error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
24741897Smckusick 	    switch (vp->v_type) {
24841897Smckusick 	    case VREG:
24943348Smckusick 		if (n+on == biosize || uio->uio_offset == np->n_size)
25038882Smacklem 			bp->b_flags |= B_AGE;
25141897Smckusick 		break;
25241897Smckusick 	    case VLNK:
25341897Smckusick 		n = 0;
25441897Smckusick 		break;
25541897Smckusick 	    case VDIR:
25641897Smckusick 		uio->uio_offset = bp->b_blkno;
25741897Smckusick 		break;
25841897Smckusick 	    };
25941897Smckusick 	    brelse(bp);
26038882Smacklem 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
26138882Smacklem 	return (error);
26238882Smacklem }
26338882Smacklem 
26438882Smacklem /*
26538882Smacklem  * Vnode op for write using bio
26638882Smacklem  */
26739584Smckusick nfs_write(vp, uio, ioflag, cred)
26838882Smacklem 	register struct vnode *vp;
26938882Smacklem 	register struct uio *uio;
27038882Smacklem 	int ioflag;
27138882Smacklem 	struct ucred *cred;
27238882Smacklem {
27352196Smckusick 	register int biosize;
27448047Smckusick 	struct proc *p = uio->uio_procp;
27538882Smacklem 	struct buf *bp;
27638882Smacklem 	struct nfsnode *np = VTONFS(vp);
27741897Smckusick 	struct vattr vattr;
27852196Smckusick 	struct nfsmount *nmp;
27938882Smacklem 	daddr_t lbn, bn;
28040220Smckusick 	int n, on, error = 0;
28138882Smacklem 
28248047Smckusick #ifdef DIAGNOSTIC
28341897Smckusick 	if (uio->uio_rw != UIO_WRITE)
28441897Smckusick 		panic("nfs_write mode");
28548047Smckusick 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
28648047Smckusick 		panic("nfs_write proc");
28748047Smckusick #endif
28841897Smckusick 	if (vp->v_type != VREG)
28941897Smckusick 		return (EIO);
290*52986Smckusick 	if (ioflag & (IO_APPEND | IO_SYNC)) {
291*52986Smckusick 		if (np->n_flag & NMODIFIED) {
292*52986Smckusick 			np->n_flag &= ~NMODIFIED;
293*52986Smckusick 			vinvalbuf(vp, TRUE);
294*52986Smckusick 		}
295*52986Smckusick 		if (ioflag & IO_APPEND) {
296*52986Smckusick 			np->n_attrstamp = 0;
297*52986Smckusick 			if (error = nfs_getattr(vp, &vattr, cred, p))
298*52986Smckusick 				return (error);
299*52986Smckusick 			uio->uio_offset = np->n_size;
300*52986Smckusick 		}
301*52986Smckusick 	}
30252196Smckusick 	nmp = VFSTONFS(vp->v_mount);
30339584Smckusick 	if (uio->uio_offset < 0)
30439584Smckusick 		return (EINVAL);
30538882Smacklem 	if (uio->uio_resid == 0)
30639584Smckusick 		return (0);
30738882Smacklem 	/*
30838882Smacklem 	 * Maybe this should be above the vnode op call, but so long as
30938882Smacklem 	 * file servers have no limits, i don't think it matters
31038882Smacklem 	 */
31152196Smckusick 	if (p && uio->uio_offset + uio->uio_resid >
31247572Skarels 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
31347572Skarels 		psignal(p, SIGXFSZ);
31439584Smckusick 		return (EFBIG);
31538882Smacklem 	}
31643348Smckusick 	/*
31743348Smckusick 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
31843348Smckusick 	 * will be the same size within a filesystem. nfs_writerpc will
31943348Smckusick 	 * still use nm_wsize when sizing the rpc's.
32043348Smckusick 	 */
32152196Smckusick 	biosize = nmp->nm_rsize;
32241897Smckusick 	np->n_flag |= NMODIFIED;
32338882Smacklem 	do {
32452196Smckusick 
32552196Smckusick 		/*
32652196Smckusick 		 * Check for a valid write lease.
32752196Smckusick 		 * If non-cachable, just do the rpc
32852196Smckusick 		 */
32952196Smckusick 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
33052196Smckusick 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
33152196Smckusick 			do {
33252196Smckusick 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
33352196Smckusick 			} while (error == NQNFS_EXPIRED);
33452196Smckusick 			if (error)
33552196Smckusick 				return (error);
33652196Smckusick 			if (QUADNE(np->n_lrev, np->n_brev) ||
33752196Smckusick 			    (np->n_flag & NQNFSNONCACHE)) {
33852196Smckusick 				vinvalbuf(vp, TRUE);
33952196Smckusick 				np->n_brev = np->n_lrev;
34052196Smckusick 			}
34152196Smckusick 		}
34252196Smckusick 		if (np->n_flag & NQNFSNONCACHE)
34352196Smckusick 			return (nfs_writerpc(vp, uio, cred));
34439750Smckusick 		nfsstats.biocache_writes++;
34543348Smckusick 		lbn = uio->uio_offset / biosize;
34643348Smckusick 		on = uio->uio_offset & (biosize-1);
34743348Smckusick 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
34852196Smckusick 		if (uio->uio_offset + n > np->n_size) {
34952196Smckusick 			np->n_size = uio->uio_offset + n;
35045714Smckusick 			vnode_pager_setsize(vp, np->n_size);
35145714Smckusick 		}
35252196Smckusick 		bn = lbn * (biosize / DEV_BSIZE);
35340037Smckusick again:
35443348Smckusick 		bp = getblk(vp, bn, biosize);
35538882Smacklem 		if (bp->b_wcred == NOCRED) {
35638882Smacklem 			crhold(cred);
35738882Smacklem 			bp->b_wcred = cred;
35838882Smacklem 		}
35952196Smckusick 
36052196Smckusick 		/*
36152196Smckusick 		 * If the new write will leave a contiguous dirty
36252196Smckusick 		 * area, just update the b_dirtyoff and b_dirtyend,
36352196Smckusick 		 * otherwise force a write rpc of the old dirty area.
36452196Smckusick 		 */
36552196Smckusick 		if (bp->b_dirtyend > 0 &&
36652196Smckusick 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
36752196Smckusick 			bp->b_proc = p;
36852196Smckusick 			if (error = bwrite(bp))
36952196Smckusick 				return (error);
37052196Smckusick 			goto again;
37152196Smckusick 		}
37252196Smckusick 
37352196Smckusick 		/*
37452196Smckusick 		 * Check for valid write lease and get one as required.
37552196Smckusick 		 * In case getblk() and/or bwrite() delayed us.
37652196Smckusick 		 */
37752196Smckusick 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
37852196Smckusick 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
37952196Smckusick 			do {
38052196Smckusick 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
38152196Smckusick 			} while (error == NQNFS_EXPIRED);
38252196Smckusick 			if (error) {
38352196Smckusick 				brelse(bp);
38452196Smckusick 				return (error);
38538882Smacklem 			}
38652196Smckusick 			if (QUADNE(np->n_lrev, np->n_brev) ||
38752196Smckusick 			    (np->n_flag & NQNFSNONCACHE)) {
38852196Smckusick 				vinvalbuf(vp, TRUE);
38952196Smckusick 				np->n_brev = np->n_lrev;
39052196Smckusick 			}
39138882Smacklem 		}
39240037Smckusick 		if (error = uiomove(bp->b_un.b_addr + on, n, uio)) {
39340037Smckusick 			brelse(bp);
39439584Smckusick 			return (error);
39540037Smckusick 		}
39652196Smckusick 		if (bp->b_dirtyend > 0) {
39752196Smckusick 			bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
39852196Smckusick 			bp->b_dirtyend = MAX((on+n), bp->b_dirtyend);
39952196Smckusick 		} else {
40052196Smckusick 			bp->b_dirtyoff = on;
40152196Smckusick 			bp->b_dirtyend = on+n;
40252196Smckusick 		}
40352196Smckusick 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
40452196Smckusick 		    bp->b_validoff > bp->b_dirtyend) {
40552196Smckusick 			bp->b_validoff = bp->b_dirtyoff;
40652196Smckusick 			bp->b_validend = bp->b_dirtyend;
40752196Smckusick 		} else {
40852196Smckusick 			bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff);
40952196Smckusick 			bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend);
41052196Smckusick 		}
41152196Smckusick 
41252196Smckusick 		/*
41352196Smckusick 		 * If the lease is non-cachable or IO_SYNC do bwrite().
41452196Smckusick 		 */
41552196Smckusick 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
41652196Smckusick 			bp->b_proc = p;
41752196Smckusick 			bwrite(bp);
41852196Smckusick 		} else if ((n+on) == biosize &&
41952196Smckusick 			 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
42038882Smacklem 			bp->b_flags |= B_AGE;
42141897Smckusick 			bp->b_proc = (struct proc *)0;
42238882Smacklem 			bawrite(bp);
42338882Smacklem 		} else {
42441897Smckusick 			bp->b_proc = (struct proc *)0;
42538882Smacklem 			bdwrite(bp);
42638882Smacklem 		}
42738882Smacklem 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
42838882Smacklem 	return (error);
42938882Smacklem }
430