138882Smacklem /* 238882Smacklem * Copyright (c) 1989 The Regents of the University of California. 338882Smacklem * All rights reserved. 438882Smacklem * 538882Smacklem * This code is derived from software contributed to Berkeley by 638882Smacklem * Rick Macklem at The University of Guelph. 738882Smacklem * 844509Sbostic * %sccs.include.redist.c% 938882Smacklem * 10*54669Smckusick * @(#)nfs_bio.c 7.29 (Berkeley) 07/03/92 1138882Smacklem */ 1238882Smacklem 1353322Smckusick #include <sys/param.h> 1453322Smckusick #include <sys/resourcevar.h> 1553322Smckusick #include <sys/proc.h> 1653322Smckusick #include <sys/buf.h> 1753322Smckusick #include <sys/vnode.h> 1853322Smckusick #include <sys/trace.h> 1953322Smckusick #include <sys/mount.h> 2053322Smckusick #include <sys/kernel.h> 2153322Smckusick #include <machine/endian.h> 2253322Smckusick #include <vm/vm.h> 2353322Smckusick #include <nfs/nfsnode.h> 2453322Smckusick #include <nfs/rpcv2.h> 2553322Smckusick #include <nfs/nfsv2.h> 2653322Smckusick #include <nfs/nfs.h> 2753322Smckusick #include <nfs/nfsmount.h> 2853322Smckusick #include <nfs/nqnfs.h> 2938882Smacklem 3038882Smacklem /* True and false, how exciting */ 3138882Smacklem #define TRUE 1 3238882Smacklem #define FALSE 0 3338882Smacklem 3438882Smacklem /* 3538882Smacklem * Vnode op for read using bio 3638882Smacklem * Any similarity to readip() is purely coincidental 3738882Smacklem */ 3841897Smckusick nfs_bioread(vp, uio, ioflag, cred) 3938882Smacklem register struct vnode *vp; 4043348Smckusick register struct uio *uio; 4138882Smacklem int ioflag; 4238882Smacklem struct ucred *cred; 4338882Smacklem { 4438882Smacklem register struct nfsnode *np = VTONFS(vp); 4543348Smckusick register int biosize; 4638882Smacklem struct buf *bp; 4738882Smacklem struct vattr vattr; 4852196Smckusick struct nfsmount *nmp; 4952196Smckusick daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; 5052196Smckusick int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; 5152196Smckusick int n, on; 5238882Smacklem 5342241Smckusick #ifdef lint 5442241Smckusick ioflag = ioflag; 5542241Smckusick #endif /* lint */ 5648047Smckusick #ifdef DIAGNOSTIC 5738882Smacklem if (uio->uio_rw != UIO_READ) 5838882Smacklem panic("nfs_read mode"); 5948047Smckusick #endif 6038882Smacklem if (uio->uio_resid == 0) 6139584Smckusick return (0); 6241897Smckusick if (uio->uio_offset < 0 && vp->v_type != VDIR) 6339584Smckusick return (EINVAL); 6452196Smckusick nmp = VFSTONFS(vp->v_mount); 6552196Smckusick biosize = nmp->nm_rsize; 6638882Smacklem /* 6752196Smckusick * For nfs, cache consistency can only be maintained approximately. 6852196Smckusick * Although RFC1094 does not specify the criteria, the following is 6952196Smckusick * believed to be compatible with the reference port. 7052196Smckusick * For nqnfs, full cache consistency is maintained within the loop. 7152196Smckusick * For nfs: 7238882Smacklem * If the file's modify time on the server has changed since the 7338882Smacklem * last read rpc or you have written to the file, 7438882Smacklem * you may have lost data cache consistency with the 7538882Smacklem * server, so flush all of the file's data out of the cache. 7641897Smckusick * Then force a getattr rpc to ensure that you have up to date 7741897Smckusick * attributes. 7852196Smckusick * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 7952196Smckusick * the ones changing the modify time. 8038882Smacklem * NB: This implies that cache data can be read when up to 8138882Smacklem * NFS_ATTRTIMEO seconds out of date. If you find that you need current 8238882Smacklem * attributes this could be forced by setting n_attrstamp to 0 before 8353550Sheideman * the VOP_GETATTR() call. 8438882Smacklem */ 8552196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 8641897Smckusick if (np->n_flag & NMODIFIED) { 8741897Smckusick np->n_flag &= ~NMODIFIED; 8852196Smckusick if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 8952196Smckusick vp->v_type != VREG) 9054448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 9141897Smckusick np->n_attrstamp = 0; 9241897Smckusick np->n_direofoffset = 0; 9353550Sheideman if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 9439750Smckusick return (error); 9554106Smckusick np->n_mtime = vattr.va_mtime.ts_sec; 9641897Smckusick } else { 9753550Sheideman if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 9841897Smckusick return (error); 9954106Smckusick if (np->n_mtime != vattr.va_mtime.ts_sec) { 10041897Smckusick np->n_direofoffset = 0; 10154448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 10254106Smckusick np->n_mtime = vattr.va_mtime.ts_sec; 10341897Smckusick } 10439750Smckusick } 10538882Smacklem } 10638882Smacklem do { 10752196Smckusick 10852196Smckusick /* 10952196Smckusick * Get a valid lease. If cached data is stale, flush it. 11052196Smckusick */ 11152196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 11252196Smckusick NQNFS_CKINVALID(vp, np, NQL_READ)) { 11352196Smckusick do { 11452196Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 11552196Smckusick } while (error == NQNFS_EXPIRED); 11652196Smckusick if (error) 11752196Smckusick return (error); 11854448Smckusick if (np->n_lrev != np->n_brev || 11952196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 12052196Smckusick if (vp->v_type == VDIR) { 12152196Smckusick np->n_direofoffset = 0; 12252196Smckusick cache_purge(vp); 12352196Smckusick } 12452196Smckusick np->n_flag &= ~NMODIFIED; 12554448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 12652196Smckusick np->n_brev = np->n_lrev; 12752196Smckusick } 12852196Smckusick } 12952196Smckusick if (np->n_flag & NQNFSNONCACHE) { 13052196Smckusick switch (vp->v_type) { 13152196Smckusick case VREG: 13252196Smckusick error = nfs_readrpc(vp, uio, cred); 13352196Smckusick break; 13452196Smckusick case VLNK: 13552196Smckusick error = nfs_readlinkrpc(vp, uio, cred); 13652196Smckusick break; 13752196Smckusick case VDIR: 13852196Smckusick error = nfs_readdirrpc(vp, uio, cred); 13952196Smckusick break; 14052196Smckusick }; 14152196Smckusick return (error); 14252196Smckusick } 14341897Smckusick switch (vp->v_type) { 14441897Smckusick case VREG: 14539750Smckusick nfsstats.biocache_reads++; 14643348Smckusick lbn = uio->uio_offset / biosize; 14743348Smckusick on = uio->uio_offset & (biosize-1); 14843348Smckusick n = MIN((unsigned)(biosize - on), uio->uio_resid); 14938882Smacklem diff = np->n_size - uio->uio_offset; 15038882Smacklem if (diff <= 0) 15139584Smckusick return (error); 15238882Smacklem if (diff < n) 15338882Smacklem n = diff; 15443348Smckusick bn = lbn*(biosize/DEV_BSIZE); 15552196Smckusick for (nra = 0; nra < nmp->nm_readahead && 15652196Smckusick (lbn + 1 + nra) * biosize < np->n_size; nra++) { 15752196Smckusick rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 15852196Smckusick rasize[nra] = biosize; 15952196Smckusick } 16052196Smckusick again: 16152196Smckusick if (nra > 0 && lbn >= vp->v_lastr) 16252196Smckusick error = breadn(vp, bn, biosize, rablock, rasize, nra, 16338882Smacklem cred, &bp); 16438882Smacklem else 16543348Smckusick error = bread(vp, bn, biosize, cred, &bp); 16652196Smckusick if (bp->b_validend > 0) { 16752196Smckusick if (on < bp->b_validoff || (on+n) > bp->b_validend) { 16852196Smckusick bp->b_flags |= B_INVAL; 16952196Smckusick if (bp->b_dirtyend > 0) { 17052196Smckusick if ((bp->b_flags & B_DELWRI) == 0) 17152196Smckusick panic("nfsbioread"); 17252196Smckusick (void) bwrite(bp); 17352196Smckusick } else 17452196Smckusick brelse(bp); 17552196Smckusick goto again; 17652196Smckusick } 17752196Smckusick } else { 17852196Smckusick bp->b_validoff = 0; 17952196Smckusick bp->b_validend = biosize - bp->b_resid; 18052196Smckusick } 18139901Smckusick vp->v_lastr = lbn; 18238882Smacklem if (bp->b_resid) { 18343348Smckusick diff = (on >= (biosize-bp->b_resid)) ? 0 : 18443348Smckusick (biosize-bp->b_resid-on); 18541897Smckusick n = MIN(n, diff); 18638882Smacklem } 18741897Smckusick break; 18841897Smckusick case VLNK: 18941897Smckusick nfsstats.biocache_readlinks++; 19041897Smckusick on = 0; 19141897Smckusick error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); 19241897Smckusick n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 19341897Smckusick break; 19441897Smckusick case VDIR: 19541897Smckusick nfsstats.biocache_readdirs++; 19641897Smckusick on = 0; 19748047Smckusick error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); 19848047Smckusick n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 19941897Smckusick break; 20041897Smckusick }; 20141897Smckusick if (error) { 20241897Smckusick brelse(bp); 20341897Smckusick return (error); 20441897Smckusick } 20552196Smckusick 20652196Smckusick /* 20752196Smckusick * For nqnfs: 20852196Smckusick * Must check for valid lease, since it may have expired while in 20952196Smckusick * bread(). If expired, get a lease. 21052196Smckusick * If data is stale, flush and try again. 21152196Smckusick * nb: If a read rpc is done by bread() or breada() and there is 21252196Smckusick * no valid lease, a get_lease request will be piggy backed. 21352196Smckusick */ 21452196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 21552196Smckusick if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 21652196Smckusick do { 21752196Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 21852196Smckusick } while (error == NQNFS_EXPIRED); 21952196Smckusick if (error) { 22052196Smckusick brelse(bp); 22152196Smckusick return (error); 22252196Smckusick } 22352196Smckusick if ((np->n_flag & NQNFSNONCACHE) || 22454448Smckusick np->n_lrev != np->n_brev || 22552196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 22652196Smckusick if (vp->v_type == VDIR) { 22752196Smckusick np->n_direofoffset = 0; 22852196Smckusick cache_purge(vp); 22952196Smckusick } 23052196Smckusick brelse(bp); 23152196Smckusick np->n_flag &= ~NMODIFIED; 23254448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 23352196Smckusick np->n_brev = np->n_lrev; 23452196Smckusick continue; 23552196Smckusick } 23652196Smckusick } else if ((np->n_flag & NQNFSNONCACHE) || 23752196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 23852196Smckusick np->n_direofoffset = 0; 23952196Smckusick brelse(bp); 24052196Smckusick np->n_flag &= ~NMODIFIED; 24154448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 24252196Smckusick np->n_brev = np->n_lrev; 24352196Smckusick continue; 24452196Smckusick } 24552196Smckusick } 24641897Smckusick if (n > 0) 24741897Smckusick error = uiomove(bp->b_un.b_addr + on, (int)n, uio); 24841897Smckusick switch (vp->v_type) { 24941897Smckusick case VREG: 25043348Smckusick if (n+on == biosize || uio->uio_offset == np->n_size) 25138882Smacklem bp->b_flags |= B_AGE; 25241897Smckusick break; 25341897Smckusick case VLNK: 25441897Smckusick n = 0; 25541897Smckusick break; 25641897Smckusick case VDIR: 25741897Smckusick uio->uio_offset = bp->b_blkno; 25841897Smckusick break; 25941897Smckusick }; 26041897Smckusick brelse(bp); 26138882Smacklem } while (error == 0 && uio->uio_resid > 0 && n != 0); 26238882Smacklem return (error); 26338882Smacklem } 26438882Smacklem 26538882Smacklem /* 26638882Smacklem * Vnode op for write using bio 26738882Smacklem */ 268*54669Smckusick nfs_write(ap) 26954448Smckusick struct vop_write_args /* { 27054448Smckusick struct vnode *a_vp; 27154448Smckusick struct uio *a_uio; 27254448Smckusick int a_ioflag; 27354448Smckusick struct ucred *a_cred; 27454448Smckusick } */ *ap; 27538882Smacklem { 27652196Smckusick register int biosize; 27754448Smckusick register struct uio *uio = ap->a_uio; 27854448Smckusick struct proc *p = uio->uio_procp; 27954448Smckusick register struct vnode *vp = ap->a_vp; 28054448Smckusick struct nfsnode *np = VTONFS(vp); 28154448Smckusick register struct ucred *cred = ap->a_cred; 28254448Smckusick int ioflag = ap->a_ioflag; 28338882Smacklem struct buf *bp; 28441897Smckusick struct vattr vattr; 28552196Smckusick struct nfsmount *nmp; 28638882Smacklem daddr_t lbn, bn; 28740220Smckusick int n, on, error = 0; 28838882Smacklem 28948047Smckusick #ifdef DIAGNOSTIC 29054448Smckusick if (uio->uio_rw != UIO_WRITE) 29141897Smckusick panic("nfs_write mode"); 29254448Smckusick if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 29348047Smckusick panic("nfs_write proc"); 29448047Smckusick #endif 29554448Smckusick if (vp->v_type != VREG) 29641897Smckusick return (EIO); 29753627Smckusick if (np->n_flag & NWRITEERR) { 29853627Smckusick np->n_flag &= ~NWRITEERR; 29953627Smckusick return (np->n_error); 30053627Smckusick } 30154448Smckusick if (ioflag & (IO_APPEND | IO_SYNC)) { 30252986Smckusick if (np->n_flag & NMODIFIED) { 30352986Smckusick np->n_flag &= ~NMODIFIED; 30454448Smckusick vinvalbuf(vp, TRUE, cred, p); 30552986Smckusick } 30654448Smckusick if (ioflag & IO_APPEND) { 30752986Smckusick np->n_attrstamp = 0; 30854448Smckusick if (error = VOP_GETATTR(vp, &vattr, cred, p)) 30952986Smckusick return (error); 31054448Smckusick uio->uio_offset = np->n_size; 31152986Smckusick } 31252986Smckusick } 31354448Smckusick nmp = VFSTONFS(vp->v_mount); 31454448Smckusick if (uio->uio_offset < 0) 31539584Smckusick return (EINVAL); 31654448Smckusick if (uio->uio_resid == 0) 31739584Smckusick return (0); 31838882Smacklem /* 31938882Smacklem * Maybe this should be above the vnode op call, but so long as 32038882Smacklem * file servers have no limits, i don't think it matters 32138882Smacklem */ 32254448Smckusick if (p && uio->uio_offset + uio->uio_resid > 32347572Skarels p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 32447572Skarels psignal(p, SIGXFSZ); 32539584Smckusick return (EFBIG); 32638882Smacklem } 32743348Smckusick /* 32843348Smckusick * I use nm_rsize, not nm_wsize so that all buffer cache blocks 32943348Smckusick * will be the same size within a filesystem. nfs_writerpc will 33043348Smckusick * still use nm_wsize when sizing the rpc's. 33143348Smckusick */ 33252196Smckusick biosize = nmp->nm_rsize; 33341897Smckusick np->n_flag |= NMODIFIED; 33438882Smacklem do { 33552196Smckusick 33652196Smckusick /* 33752196Smckusick * Check for a valid write lease. 33852196Smckusick * If non-cachable, just do the rpc 33952196Smckusick */ 34052196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 34154448Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 34252196Smckusick do { 34354448Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 34452196Smckusick } while (error == NQNFS_EXPIRED); 34552196Smckusick if (error) 34652196Smckusick return (error); 34754448Smckusick if (np->n_lrev != np->n_brev || 34852196Smckusick (np->n_flag & NQNFSNONCACHE)) { 34954448Smckusick vinvalbuf(vp, TRUE, cred, p); 35052196Smckusick np->n_brev = np->n_lrev; 35152196Smckusick } 35252196Smckusick } 35352196Smckusick if (np->n_flag & NQNFSNONCACHE) 35454448Smckusick return (nfs_writerpc(vp, uio, cred)); 35539750Smckusick nfsstats.biocache_writes++; 35654448Smckusick lbn = uio->uio_offset / biosize; 35754448Smckusick on = uio->uio_offset & (biosize-1); 35854448Smckusick n = MIN((unsigned)(biosize - on), uio->uio_resid); 35954448Smckusick if (uio->uio_offset + n > np->n_size) { 36054448Smckusick np->n_size = uio->uio_offset + n; 36154448Smckusick vnode_pager_setsize(vp, (u_long)np->n_size); 36245714Smckusick } 36352196Smckusick bn = lbn * (biosize / DEV_BSIZE); 36440037Smckusick again: 36554448Smckusick bp = getblk(vp, bn, biosize); 36638882Smacklem if (bp->b_wcred == NOCRED) { 36754448Smckusick crhold(cred); 36854448Smckusick bp->b_wcred = cred; 36938882Smacklem } 37052196Smckusick 37152196Smckusick /* 37252196Smckusick * If the new write will leave a contiguous dirty 37352196Smckusick * area, just update the b_dirtyoff and b_dirtyend, 37452196Smckusick * otherwise force a write rpc of the old dirty area. 37552196Smckusick */ 37652196Smckusick if (bp->b_dirtyend > 0 && 37752196Smckusick (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 37852196Smckusick bp->b_proc = p; 37952196Smckusick if (error = bwrite(bp)) 38052196Smckusick return (error); 38152196Smckusick goto again; 38252196Smckusick } 38352196Smckusick 38452196Smckusick /* 38552196Smckusick * Check for valid write lease and get one as required. 38652196Smckusick * In case getblk() and/or bwrite() delayed us. 38752196Smckusick */ 38852196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 38954448Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 39052196Smckusick do { 39154448Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 39252196Smckusick } while (error == NQNFS_EXPIRED); 39352196Smckusick if (error) { 39452196Smckusick brelse(bp); 39552196Smckusick return (error); 39638882Smacklem } 39754448Smckusick if (np->n_lrev != np->n_brev || 39852196Smckusick (np->n_flag & NQNFSNONCACHE)) { 39954448Smckusick vinvalbuf(vp, TRUE, cred, p); 40052196Smckusick np->n_brev = np->n_lrev; 40152196Smckusick } 40238882Smacklem } 40354448Smckusick if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { 40440037Smckusick brelse(bp); 40539584Smckusick return (error); 40640037Smckusick } 40752196Smckusick if (bp->b_dirtyend > 0) { 40852196Smckusick bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); 40952196Smckusick bp->b_dirtyend = MAX((on+n), bp->b_dirtyend); 41052196Smckusick } else { 41152196Smckusick bp->b_dirtyoff = on; 41252196Smckusick bp->b_dirtyend = on+n; 41352196Smckusick } 41452196Smckusick if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 41552196Smckusick bp->b_validoff > bp->b_dirtyend) { 41652196Smckusick bp->b_validoff = bp->b_dirtyoff; 41752196Smckusick bp->b_validend = bp->b_dirtyend; 41852196Smckusick } else { 41952196Smckusick bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff); 42052196Smckusick bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend); 42152196Smckusick } 42252196Smckusick 42352196Smckusick /* 42452196Smckusick * If the lease is non-cachable or IO_SYNC do bwrite(). 42552196Smckusick */ 42654448Smckusick if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 42752196Smckusick bp->b_proc = p; 42852196Smckusick bwrite(bp); 42952196Smckusick } else if ((n+on) == biosize && 43052196Smckusick (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 43138882Smacklem bp->b_flags |= B_AGE; 43241897Smckusick bp->b_proc = (struct proc *)0; 43338882Smacklem bawrite(bp); 43438882Smacklem } else { 43541897Smckusick bp->b_proc = (struct proc *)0; 43638882Smacklem bdwrite(bp); 43738882Smacklem } 43854448Smckusick } while (error == 0 && uio->uio_resid > 0 && n != 0); 43938882Smacklem return (error); 44038882Smacklem } 441