138882Smacklem /* 238882Smacklem * Copyright (c) 1989 The Regents of the University of California. 338882Smacklem * All rights reserved. 438882Smacklem * 538882Smacklem * This code is derived from software contributed to Berkeley by 638882Smacklem * Rick Macklem at The University of Guelph. 738882Smacklem * 844509Sbostic * %sccs.include.redist.c% 938882Smacklem * 10*53627Smckusick * @(#)nfs_bio.c 7.26 (Berkeley) 05/20/92 1138882Smacklem */ 1238882Smacklem 1353322Smckusick #include <sys/param.h> 1453322Smckusick #include <sys/resourcevar.h> 1553322Smckusick #include <sys/proc.h> 1653322Smckusick #include <sys/buf.h> 1753322Smckusick #include <sys/vnode.h> 1853322Smckusick #include <sys/trace.h> 1953322Smckusick #include <sys/mount.h> 2053322Smckusick #include <sys/kernel.h> 2153322Smckusick #include <machine/endian.h> 2253322Smckusick #include <vm/vm.h> 2353322Smckusick #include <nfs/nfsnode.h> 2453322Smckusick #include <nfs/rpcv2.h> 2553322Smckusick #include <nfs/nfsv2.h> 2653322Smckusick #include <nfs/nfs.h> 2753322Smckusick #include <nfs/nfsmount.h> 2853322Smckusick #include <nfs/nqnfs.h> 2938882Smacklem 3038882Smacklem /* True and false, how exciting */ 3138882Smacklem #define TRUE 1 3238882Smacklem #define FALSE 0 3338882Smacklem 3438882Smacklem /* 3538882Smacklem * Vnode op for read using bio 3638882Smacklem * Any similarity to readip() is purely coincidental 3738882Smacklem */ 3841897Smckusick nfs_bioread(vp, uio, ioflag, cred) 3938882Smacklem register struct vnode *vp; 4043348Smckusick register struct uio *uio; 4138882Smacklem int ioflag; 4238882Smacklem struct ucred *cred; 4338882Smacklem { 4453550Sheideman USES_VOP_GETATTR; 4538882Smacklem register struct nfsnode *np = VTONFS(vp); 4643348Smckusick register int biosize; 4738882Smacklem struct buf *bp; 4838882Smacklem struct vattr vattr; 4952196Smckusick struct nfsmount *nmp; 5052196Smckusick daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; 5152196Smckusick int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; 5252196Smckusick int n, on; 5338882Smacklem 5442241Smckusick #ifdef lint 5542241Smckusick ioflag = ioflag; 5642241Smckusick #endif /* lint */ 5748047Smckusick #ifdef DIAGNOSTIC 5838882Smacklem if (uio->uio_rw != UIO_READ) 5938882Smacklem panic("nfs_read mode"); 6048047Smckusick #endif 6138882Smacklem if (uio->uio_resid == 0) 6239584Smckusick return (0); 6341897Smckusick if (uio->uio_offset < 0 && vp->v_type != VDIR) 6439584Smckusick return (EINVAL); 6552196Smckusick nmp = VFSTONFS(vp->v_mount); 6652196Smckusick biosize = nmp->nm_rsize; 6738882Smacklem /* 6852196Smckusick * For nfs, cache consistency can only be maintained approximately. 6952196Smckusick * Although RFC1094 does not specify the criteria, the following is 7052196Smckusick * believed to be compatible with the reference port. 7152196Smckusick * For nqnfs, full cache consistency is maintained within the loop. 7252196Smckusick * For nfs: 7338882Smacklem * If the file's modify time on the server has changed since the 7438882Smacklem * last read rpc or you have written to the file, 7538882Smacklem * you may have lost data cache consistency with the 7638882Smacklem * server, so flush all of the file's data out of the cache. 7741897Smckusick * Then force a getattr rpc to ensure that you have up to date 7841897Smckusick * attributes. 7952196Smckusick * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 8052196Smckusick * the ones changing the modify time. 8138882Smacklem * NB: This implies that cache data can be read when up to 8238882Smacklem * NFS_ATTRTIMEO seconds out of date. If you find that you need current 8338882Smacklem * attributes this could be forced by setting n_attrstamp to 0 before 8453550Sheideman * the VOP_GETATTR() call. 8538882Smacklem */ 8652196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 8741897Smckusick if (np->n_flag & NMODIFIED) { 8841897Smckusick np->n_flag &= ~NMODIFIED; 8952196Smckusick if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 9052196Smckusick vp->v_type != VREG) 9152196Smckusick vinvalbuf(vp, TRUE); 9241897Smckusick np->n_attrstamp = 0; 9341897Smckusick np->n_direofoffset = 0; 9453550Sheideman if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 9539750Smckusick return (error); 9639750Smckusick np->n_mtime = vattr.va_mtime.tv_sec; 9741897Smckusick } else { 9853550Sheideman if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 9941897Smckusick return (error); 10041897Smckusick if (np->n_mtime != vattr.va_mtime.tv_sec) { 10141897Smckusick np->n_direofoffset = 0; 10241897Smckusick vinvalbuf(vp, TRUE); 10341897Smckusick np->n_mtime = vattr.va_mtime.tv_sec; 10441897Smckusick } 10539750Smckusick } 10638882Smacklem } 10738882Smacklem do { 10852196Smckusick 10952196Smckusick /* 11052196Smckusick * Get a valid lease. If cached data is stale, flush it. 11152196Smckusick */ 11252196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 11352196Smckusick NQNFS_CKINVALID(vp, np, NQL_READ)) { 11452196Smckusick do { 11552196Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 11652196Smckusick } while (error == NQNFS_EXPIRED); 11752196Smckusick if (error) 11852196Smckusick return (error); 11952196Smckusick if (QUADNE(np->n_lrev, np->n_brev) || 12052196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 12152196Smckusick if (vp->v_type == VDIR) { 12252196Smckusick np->n_direofoffset = 0; 12352196Smckusick cache_purge(vp); 12452196Smckusick } 12552196Smckusick np->n_flag &= ~NMODIFIED; 12652196Smckusick vinvalbuf(vp, TRUE); 12752196Smckusick np->n_brev = np->n_lrev; 12852196Smckusick } 12952196Smckusick } 13052196Smckusick if (np->n_flag & NQNFSNONCACHE) { 13152196Smckusick switch (vp->v_type) { 13252196Smckusick case VREG: 13352196Smckusick error = nfs_readrpc(vp, uio, cred); 13452196Smckusick break; 13552196Smckusick case VLNK: 13652196Smckusick error = nfs_readlinkrpc(vp, uio, cred); 13752196Smckusick break; 13852196Smckusick case VDIR: 13952196Smckusick error = nfs_readdirrpc(vp, uio, cred); 14052196Smckusick break; 14152196Smckusick }; 14252196Smckusick return (error); 14352196Smckusick } 14441897Smckusick switch (vp->v_type) { 14541897Smckusick case VREG: 14639750Smckusick nfsstats.biocache_reads++; 14743348Smckusick lbn = uio->uio_offset / biosize; 14843348Smckusick on = uio->uio_offset & (biosize-1); 14943348Smckusick n = MIN((unsigned)(biosize - on), uio->uio_resid); 15038882Smacklem diff = np->n_size - uio->uio_offset; 15138882Smacklem if (diff <= 0) 15239584Smckusick return (error); 15338882Smacklem if (diff < n) 15438882Smacklem n = diff; 15543348Smckusick bn = lbn*(biosize/DEV_BSIZE); 15652196Smckusick for (nra = 0; nra < nmp->nm_readahead && 15752196Smckusick (lbn + 1 + nra) * biosize < np->n_size; nra++) { 15852196Smckusick rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 15952196Smckusick rasize[nra] = biosize; 16052196Smckusick } 16152196Smckusick again: 16252196Smckusick if (nra > 0 && lbn >= vp->v_lastr) 16352196Smckusick error = breadn(vp, bn, biosize, rablock, rasize, nra, 16438882Smacklem cred, &bp); 16538882Smacklem else 16643348Smckusick error = bread(vp, bn, biosize, cred, &bp); 16752196Smckusick if (bp->b_validend > 0) { 16852196Smckusick if (on < bp->b_validoff || (on+n) > bp->b_validend) { 16952196Smckusick bp->b_flags |= B_INVAL; 17052196Smckusick if (bp->b_dirtyend > 0) { 17152196Smckusick if ((bp->b_flags & B_DELWRI) == 0) 17252196Smckusick panic("nfsbioread"); 17352196Smckusick (void) bwrite(bp); 17452196Smckusick } else 17552196Smckusick brelse(bp); 17652196Smckusick goto again; 17752196Smckusick } 17852196Smckusick } else { 17952196Smckusick bp->b_validoff = 0; 18052196Smckusick bp->b_validend = biosize - bp->b_resid; 18152196Smckusick } 18239901Smckusick vp->v_lastr = lbn; 18338882Smacklem if (bp->b_resid) { 18443348Smckusick diff = (on >= (biosize-bp->b_resid)) ? 0 : 18543348Smckusick (biosize-bp->b_resid-on); 18641897Smckusick n = MIN(n, diff); 18738882Smacklem } 18841897Smckusick break; 18941897Smckusick case VLNK: 19041897Smckusick nfsstats.biocache_readlinks++; 19141897Smckusick on = 0; 19241897Smckusick error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); 19341897Smckusick n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 19441897Smckusick break; 19541897Smckusick case VDIR: 19641897Smckusick nfsstats.biocache_readdirs++; 19741897Smckusick on = 0; 19848047Smckusick error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); 19948047Smckusick n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 20041897Smckusick break; 20141897Smckusick }; 20241897Smckusick if (error) { 20341897Smckusick brelse(bp); 20441897Smckusick return (error); 20541897Smckusick } 20652196Smckusick 20752196Smckusick /* 20852196Smckusick * For nqnfs: 20952196Smckusick * Must check for valid lease, since it may have expired while in 21052196Smckusick * bread(). If expired, get a lease. 21152196Smckusick * If data is stale, flush and try again. 21252196Smckusick * nb: If a read rpc is done by bread() or breada() and there is 21352196Smckusick * no valid lease, a get_lease request will be piggy backed. 21452196Smckusick */ 21552196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 21652196Smckusick if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 21752196Smckusick do { 21852196Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 21952196Smckusick } while (error == NQNFS_EXPIRED); 22052196Smckusick if (error) { 22152196Smckusick brelse(bp); 22252196Smckusick return (error); 22352196Smckusick } 22452196Smckusick if ((np->n_flag & NQNFSNONCACHE) || 22552196Smckusick QUADNE(np->n_lrev, np->n_brev) || 22652196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 22752196Smckusick if (vp->v_type == VDIR) { 22852196Smckusick np->n_direofoffset = 0; 22952196Smckusick cache_purge(vp); 23052196Smckusick } 23152196Smckusick brelse(bp); 23252196Smckusick np->n_flag &= ~NMODIFIED; 23352196Smckusick vinvalbuf(vp, TRUE); 23452196Smckusick np->n_brev = np->n_lrev; 23552196Smckusick continue; 23652196Smckusick } 23752196Smckusick } else if ((np->n_flag & NQNFSNONCACHE) || 23852196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 23952196Smckusick np->n_direofoffset = 0; 24052196Smckusick brelse(bp); 24152196Smckusick np->n_flag &= ~NMODIFIED; 24252196Smckusick vinvalbuf(vp, TRUE); 24352196Smckusick np->n_brev = np->n_lrev; 24452196Smckusick continue; 24552196Smckusick } 24652196Smckusick } 24741897Smckusick if (n > 0) 24841897Smckusick error = uiomove(bp->b_un.b_addr + on, (int)n, uio); 24941897Smckusick switch (vp->v_type) { 25041897Smckusick case VREG: 25143348Smckusick if (n+on == biosize || uio->uio_offset == np->n_size) 25238882Smacklem bp->b_flags |= B_AGE; 25341897Smckusick break; 25441897Smckusick case VLNK: 25541897Smckusick n = 0; 25641897Smckusick break; 25741897Smckusick case VDIR: 25841897Smckusick uio->uio_offset = bp->b_blkno; 25941897Smckusick break; 26041897Smckusick }; 26141897Smckusick brelse(bp); 26238882Smacklem } while (error == 0 && uio->uio_resid > 0 && n != 0); 26338882Smacklem return (error); 26438882Smacklem } 26538882Smacklem 26638882Smacklem /* 26738882Smacklem * Vnode op for write using bio 26838882Smacklem */ 26953550Sheideman nfs_write (ap) 27053550Sheideman struct vop_write_args *ap; 27138882Smacklem { 27253550Sheideman USES_VOP_GETATTR; 27352196Smckusick register int biosize; 27453598Sheideman struct proc *p = ap->a_uio->uio_procp; 27538882Smacklem struct buf *bp; 27653598Sheideman struct nfsnode *np = VTONFS(ap->a_vp); 27741897Smckusick struct vattr vattr; 27852196Smckusick struct nfsmount *nmp; 27938882Smacklem daddr_t lbn, bn; 28040220Smckusick int n, on, error = 0; 28138882Smacklem 28248047Smckusick #ifdef DIAGNOSTIC 28353598Sheideman if (ap->a_uio->uio_rw != UIO_WRITE) 28441897Smckusick panic("nfs_write mode"); 28553598Sheideman if (ap->a_uio->uio_segflg == UIO_USERSPACE && ap->a_uio->uio_procp != curproc) 28648047Smckusick panic("nfs_write proc"); 28748047Smckusick #endif 28853598Sheideman if (ap->a_vp->v_type != VREG) 28941897Smckusick return (EIO); 290*53627Smckusick if (np->n_flag & NWRITEERR) { 291*53627Smckusick np->n_flag &= ~NWRITEERR; 292*53627Smckusick return (np->n_error); 293*53627Smckusick } 29453598Sheideman if (ap->a_ioflag & (IO_APPEND | IO_SYNC)) { 29552986Smckusick if (np->n_flag & NMODIFIED) { 29652986Smckusick np->n_flag &= ~NMODIFIED; 29753598Sheideman vinvalbuf(ap->a_vp, TRUE); 29852986Smckusick } 29953598Sheideman if (ap->a_ioflag & IO_APPEND) { 30052986Smckusick np->n_attrstamp = 0; 30153598Sheideman if (error = VOP_GETATTR(ap->a_vp, &vattr, ap->a_cred, p)) 30252986Smckusick return (error); 30353598Sheideman ap->a_uio->uio_offset = np->n_size; 30452986Smckusick } 30552986Smckusick } 30653598Sheideman nmp = VFSTONFS(ap->a_vp->v_mount); 30753598Sheideman if (ap->a_uio->uio_offset < 0) 30839584Smckusick return (EINVAL); 30953598Sheideman if (ap->a_uio->uio_resid == 0) 31039584Smckusick return (0); 31138882Smacklem /* 31238882Smacklem * Maybe this should be above the vnode op call, but so long as 31338882Smacklem * file servers have no limits, i don't think it matters 31438882Smacklem */ 31553598Sheideman if (p && ap->a_uio->uio_offset + ap->a_uio->uio_resid > 31647572Skarels p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 31747572Skarels psignal(p, SIGXFSZ); 31839584Smckusick return (EFBIG); 31938882Smacklem } 32043348Smckusick /* 32143348Smckusick * I use nm_rsize, not nm_wsize so that all buffer cache blocks 32243348Smckusick * will be the same size within a filesystem. nfs_writerpc will 32343348Smckusick * still use nm_wsize when sizing the rpc's. 32443348Smckusick */ 32552196Smckusick biosize = nmp->nm_rsize; 32641897Smckusick np->n_flag |= NMODIFIED; 32738882Smacklem do { 32852196Smckusick 32952196Smckusick /* 33052196Smckusick * Check for a valid write lease. 33152196Smckusick * If non-cachable, just do the rpc 33252196Smckusick */ 33352196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 33453598Sheideman NQNFS_CKINVALID(ap->a_vp, np, NQL_WRITE)) { 33552196Smckusick do { 33653598Sheideman error = nqnfs_getlease(ap->a_vp, NQL_WRITE, ap->a_cred, p); 33752196Smckusick } while (error == NQNFS_EXPIRED); 33852196Smckusick if (error) 33952196Smckusick return (error); 34052196Smckusick if (QUADNE(np->n_lrev, np->n_brev) || 34152196Smckusick (np->n_flag & NQNFSNONCACHE)) { 34253598Sheideman vinvalbuf(ap->a_vp, TRUE); 34352196Smckusick np->n_brev = np->n_lrev; 34452196Smckusick } 34552196Smckusick } 34652196Smckusick if (np->n_flag & NQNFSNONCACHE) 34753598Sheideman return (nfs_writerpc(ap->a_vp, ap->a_uio, ap->a_cred)); 34839750Smckusick nfsstats.biocache_writes++; 34953598Sheideman lbn = ap->a_uio->uio_offset / biosize; 35053598Sheideman on = ap->a_uio->uio_offset & (biosize-1); 35153598Sheideman n = MIN((unsigned)(biosize - on), ap->a_uio->uio_resid); 35253598Sheideman if (ap->a_uio->uio_offset + n > np->n_size) { 35353598Sheideman np->n_size = ap->a_uio->uio_offset + n; 35453598Sheideman vnode_pager_setsize(ap->a_vp, (u_long)np->n_size); 35545714Smckusick } 35652196Smckusick bn = lbn * (biosize / DEV_BSIZE); 35740037Smckusick again: 35853598Sheideman bp = getblk(ap->a_vp, bn, biosize); 35938882Smacklem if (bp->b_wcred == NOCRED) { 36053598Sheideman crhold(ap->a_cred); 36153598Sheideman bp->b_wcred = ap->a_cred; 36238882Smacklem } 36352196Smckusick 36452196Smckusick /* 36552196Smckusick * If the new write will leave a contiguous dirty 36652196Smckusick * area, just update the b_dirtyoff and b_dirtyend, 36752196Smckusick * otherwise force a write rpc of the old dirty area. 36852196Smckusick */ 36952196Smckusick if (bp->b_dirtyend > 0 && 37052196Smckusick (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 37152196Smckusick bp->b_proc = p; 37252196Smckusick if (error = bwrite(bp)) 37352196Smckusick return (error); 37452196Smckusick goto again; 37552196Smckusick } 37652196Smckusick 37752196Smckusick /* 37852196Smckusick * Check for valid write lease and get one as required. 37952196Smckusick * In case getblk() and/or bwrite() delayed us. 38052196Smckusick */ 38152196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 38253598Sheideman NQNFS_CKINVALID(ap->a_vp, np, NQL_WRITE)) { 38352196Smckusick do { 38453598Sheideman error = nqnfs_getlease(ap->a_vp, NQL_WRITE, ap->a_cred, p); 38552196Smckusick } while (error == NQNFS_EXPIRED); 38652196Smckusick if (error) { 38752196Smckusick brelse(bp); 38852196Smckusick return (error); 38938882Smacklem } 39052196Smckusick if (QUADNE(np->n_lrev, np->n_brev) || 39152196Smckusick (np->n_flag & NQNFSNONCACHE)) { 39253598Sheideman vinvalbuf(ap->a_vp, TRUE); 39352196Smckusick np->n_brev = np->n_lrev; 39452196Smckusick } 39538882Smacklem } 39653598Sheideman if (error = uiomove(bp->b_un.b_addr + on, n, ap->a_uio)) { 39740037Smckusick brelse(bp); 39839584Smckusick return (error); 39940037Smckusick } 40052196Smckusick if (bp->b_dirtyend > 0) { 40152196Smckusick bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); 40252196Smckusick bp->b_dirtyend = MAX((on+n), bp->b_dirtyend); 40352196Smckusick } else { 40452196Smckusick bp->b_dirtyoff = on; 40552196Smckusick bp->b_dirtyend = on+n; 40652196Smckusick } 40752196Smckusick if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 40852196Smckusick bp->b_validoff > bp->b_dirtyend) { 40952196Smckusick bp->b_validoff = bp->b_dirtyoff; 41052196Smckusick bp->b_validend = bp->b_dirtyend; 41152196Smckusick } else { 41252196Smckusick bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff); 41352196Smckusick bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend); 41452196Smckusick } 41552196Smckusick 41652196Smckusick /* 41752196Smckusick * If the lease is non-cachable or IO_SYNC do bwrite(). 41852196Smckusick */ 41953598Sheideman if ((np->n_flag & NQNFSNONCACHE) || (ap->a_ioflag & IO_SYNC)) { 42052196Smckusick bp->b_proc = p; 42152196Smckusick bwrite(bp); 42252196Smckusick } else if ((n+on) == biosize && 42352196Smckusick (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 42438882Smacklem bp->b_flags |= B_AGE; 42541897Smckusick bp->b_proc = (struct proc *)0; 42638882Smacklem bawrite(bp); 42738882Smacklem } else { 42841897Smckusick bp->b_proc = (struct proc *)0; 42938882Smacklem bdwrite(bp); 43038882Smacklem } 43153598Sheideman } while (error == 0 && ap->a_uio->uio_resid > 0 && n != 0); 43238882Smacklem return (error); 43338882Smacklem } 434