138882Smacklem /* 238882Smacklem * Copyright (c) 1989 The Regents of the University of California. 338882Smacklem * All rights reserved. 438882Smacklem * 538882Smacklem * This code is derived from software contributed to Berkeley by 638882Smacklem * Rick Macklem at The University of Guelph. 738882Smacklem * 844509Sbostic * %sccs.include.redist.c% 938882Smacklem * 10*56535Sbostic * @(#)nfs_bio.c 7.33 (Berkeley) 10/11/92 1138882Smacklem */ 1238882Smacklem 1353322Smckusick #include <sys/param.h> 1455063Spendry #include <sys/systm.h> 1553322Smckusick #include <sys/resourcevar.h> 1653322Smckusick #include <sys/proc.h> 1753322Smckusick #include <sys/buf.h> 1853322Smckusick #include <sys/vnode.h> 1953322Smckusick #include <sys/trace.h> 2053322Smckusick #include <sys/mount.h> 2153322Smckusick #include <sys/kernel.h> 22*56535Sbostic 2353322Smckusick #include <vm/vm.h> 24*56535Sbostic 2553322Smckusick #include <nfs/nfsnode.h> 2653322Smckusick #include <nfs/rpcv2.h> 2753322Smckusick #include <nfs/nfsv2.h> 2853322Smckusick #include <nfs/nfs.h> 2953322Smckusick #include <nfs/nfsmount.h> 3053322Smckusick #include <nfs/nqnfs.h> 3138882Smacklem 3238882Smacklem /* True and false, how exciting */ 3338882Smacklem #define TRUE 1 3438882Smacklem #define FALSE 0 3538882Smacklem 3638882Smacklem /* 3738882Smacklem * Vnode op for read using bio 3838882Smacklem * Any similarity to readip() is purely coincidental 3938882Smacklem */ 4041897Smckusick nfs_bioread(vp, uio, ioflag, cred) 4138882Smacklem register struct vnode *vp; 4243348Smckusick register struct uio *uio; 4338882Smacklem int ioflag; 4438882Smacklem struct ucred *cred; 4538882Smacklem { 4638882Smacklem register struct nfsnode *np = VTONFS(vp); 4743348Smckusick register int biosize; 4838882Smacklem struct buf *bp; 4938882Smacklem struct vattr vattr; 5052196Smckusick struct nfsmount *nmp; 5152196Smckusick daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; 5252196Smckusick int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; 5352196Smckusick int n, on; 5438882Smacklem 5542241Smckusick #ifdef lint 5642241Smckusick ioflag = ioflag; 5742241Smckusick #endif /* lint */ 5848047Smckusick #ifdef DIAGNOSTIC 5938882Smacklem if (uio->uio_rw != UIO_READ) 6038882Smacklem panic("nfs_read mode"); 6148047Smckusick #endif 6238882Smacklem if (uio->uio_resid == 0) 6339584Smckusick return (0); 6441897Smckusick if (uio->uio_offset < 0 && vp->v_type != VDIR) 6539584Smckusick return (EINVAL); 6652196Smckusick nmp = VFSTONFS(vp->v_mount); 6752196Smckusick biosize = nmp->nm_rsize; 6838882Smacklem /* 6952196Smckusick * For nfs, cache consistency can only be maintained approximately. 7052196Smckusick * Although RFC1094 does not specify the criteria, the following is 7152196Smckusick * believed to be compatible with the reference port. 7252196Smckusick * For nqnfs, full cache consistency is maintained within the loop. 7352196Smckusick * For nfs: 7438882Smacklem * If the file's modify time on the server has changed since the 7538882Smacklem * last read rpc or you have written to the file, 7638882Smacklem * you may have lost data cache consistency with the 7738882Smacklem * server, so flush all of the file's data out of the cache. 7841897Smckusick * Then force a getattr rpc to ensure that you have up to date 7941897Smckusick * attributes. 8052196Smckusick * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 8152196Smckusick * the ones changing the modify time. 8238882Smacklem * NB: This implies that cache data can be read when up to 8338882Smacklem * NFS_ATTRTIMEO seconds out of date. If you find that you need current 8438882Smacklem * attributes this could be forced by setting n_attrstamp to 0 before 8553550Sheideman * the VOP_GETATTR() call. 8638882Smacklem */ 8752196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 8841897Smckusick if (np->n_flag & NMODIFIED) { 8941897Smckusick np->n_flag &= ~NMODIFIED; 9052196Smckusick if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 9152196Smckusick vp->v_type != VREG) 9254448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 9341897Smckusick np->n_attrstamp = 0; 9441897Smckusick np->n_direofoffset = 0; 9553550Sheideman if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 9639750Smckusick return (error); 9754106Smckusick np->n_mtime = vattr.va_mtime.ts_sec; 9841897Smckusick } else { 9953550Sheideman if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 10041897Smckusick return (error); 10154106Smckusick if (np->n_mtime != vattr.va_mtime.ts_sec) { 10241897Smckusick np->n_direofoffset = 0; 10354448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 10454106Smckusick np->n_mtime = vattr.va_mtime.ts_sec; 10541897Smckusick } 10639750Smckusick } 10738882Smacklem } 10838882Smacklem do { 10952196Smckusick 11052196Smckusick /* 11152196Smckusick * Get a valid lease. If cached data is stale, flush it. 11252196Smckusick */ 11352196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 11452196Smckusick NQNFS_CKINVALID(vp, np, NQL_READ)) { 11552196Smckusick do { 11652196Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 11752196Smckusick } while (error == NQNFS_EXPIRED); 11852196Smckusick if (error) 11952196Smckusick return (error); 12054448Smckusick if (np->n_lrev != np->n_brev || 12152196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 12252196Smckusick if (vp->v_type == VDIR) { 12352196Smckusick np->n_direofoffset = 0; 12452196Smckusick cache_purge(vp); 12552196Smckusick } 12652196Smckusick np->n_flag &= ~NMODIFIED; 12754448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 12852196Smckusick np->n_brev = np->n_lrev; 12952196Smckusick } 13052196Smckusick } 13152196Smckusick if (np->n_flag & NQNFSNONCACHE) { 13252196Smckusick switch (vp->v_type) { 13352196Smckusick case VREG: 13452196Smckusick error = nfs_readrpc(vp, uio, cred); 13552196Smckusick break; 13652196Smckusick case VLNK: 13752196Smckusick error = nfs_readlinkrpc(vp, uio, cred); 13852196Smckusick break; 13952196Smckusick case VDIR: 14052196Smckusick error = nfs_readdirrpc(vp, uio, cred); 14152196Smckusick break; 14252196Smckusick }; 14352196Smckusick return (error); 14452196Smckusick } 14541897Smckusick switch (vp->v_type) { 14641897Smckusick case VREG: 14739750Smckusick nfsstats.biocache_reads++; 14843348Smckusick lbn = uio->uio_offset / biosize; 14943348Smckusick on = uio->uio_offset & (biosize-1); 15055057Spendry n = min((unsigned)(biosize - on), uio->uio_resid); 15138882Smacklem diff = np->n_size - uio->uio_offset; 15238882Smacklem if (diff <= 0) 15339584Smckusick return (error); 15438882Smacklem if (diff < n) 15538882Smacklem n = diff; 15643348Smckusick bn = lbn*(biosize/DEV_BSIZE); 15752196Smckusick for (nra = 0; nra < nmp->nm_readahead && 15852196Smckusick (lbn + 1 + nra) * biosize < np->n_size; nra++) { 15952196Smckusick rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 16052196Smckusick rasize[nra] = biosize; 16152196Smckusick } 16252196Smckusick again: 16352196Smckusick if (nra > 0 && lbn >= vp->v_lastr) 16452196Smckusick error = breadn(vp, bn, biosize, rablock, rasize, nra, 16538882Smacklem cred, &bp); 16638882Smacklem else 16743348Smckusick error = bread(vp, bn, biosize, cred, &bp); 16852196Smckusick if (bp->b_validend > 0) { 16952196Smckusick if (on < bp->b_validoff || (on+n) > bp->b_validend) { 17052196Smckusick bp->b_flags |= B_INVAL; 17152196Smckusick if (bp->b_dirtyend > 0) { 17252196Smckusick if ((bp->b_flags & B_DELWRI) == 0) 17352196Smckusick panic("nfsbioread"); 17452196Smckusick (void) bwrite(bp); 17552196Smckusick } else 17652196Smckusick brelse(bp); 17752196Smckusick goto again; 17852196Smckusick } 17952196Smckusick } else { 18052196Smckusick bp->b_validoff = 0; 18152196Smckusick bp->b_validend = biosize - bp->b_resid; 18252196Smckusick } 18339901Smckusick vp->v_lastr = lbn; 18438882Smacklem if (bp->b_resid) { 18543348Smckusick diff = (on >= (biosize-bp->b_resid)) ? 0 : 18643348Smckusick (biosize-bp->b_resid-on); 18755057Spendry n = min(n, diff); 18838882Smacklem } 18941897Smckusick break; 19041897Smckusick case VLNK: 19141897Smckusick nfsstats.biocache_readlinks++; 19241897Smckusick on = 0; 19341897Smckusick error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); 19455057Spendry n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 19541897Smckusick break; 19641897Smckusick case VDIR: 19741897Smckusick nfsstats.biocache_readdirs++; 19841897Smckusick on = 0; 19948047Smckusick error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); 20055057Spendry n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 20141897Smckusick break; 20241897Smckusick }; 20341897Smckusick if (error) { 20441897Smckusick brelse(bp); 20541897Smckusick return (error); 20641897Smckusick } 20752196Smckusick 20852196Smckusick /* 20952196Smckusick * For nqnfs: 21052196Smckusick * Must check for valid lease, since it may have expired while in 21152196Smckusick * bread(). If expired, get a lease. 21252196Smckusick * If data is stale, flush and try again. 21352196Smckusick * nb: If a read rpc is done by bread() or breada() and there is 21452196Smckusick * no valid lease, a get_lease request will be piggy backed. 21552196Smckusick */ 21652196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 21752196Smckusick if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 21852196Smckusick do { 21952196Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 22052196Smckusick } while (error == NQNFS_EXPIRED); 22152196Smckusick if (error) { 22252196Smckusick brelse(bp); 22352196Smckusick return (error); 22452196Smckusick } 22552196Smckusick if ((np->n_flag & NQNFSNONCACHE) || 22654448Smckusick np->n_lrev != np->n_brev || 22752196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 22852196Smckusick if (vp->v_type == VDIR) { 22952196Smckusick np->n_direofoffset = 0; 23052196Smckusick cache_purge(vp); 23152196Smckusick } 23252196Smckusick brelse(bp); 23352196Smckusick np->n_flag &= ~NMODIFIED; 23454448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 23552196Smckusick np->n_brev = np->n_lrev; 23652196Smckusick continue; 23752196Smckusick } 23852196Smckusick } else if ((np->n_flag & NQNFSNONCACHE) || 23952196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 24052196Smckusick np->n_direofoffset = 0; 24152196Smckusick brelse(bp); 24252196Smckusick np->n_flag &= ~NMODIFIED; 24354448Smckusick vinvalbuf(vp, TRUE, cred, uio->uio_procp); 24452196Smckusick np->n_brev = np->n_lrev; 24552196Smckusick continue; 24652196Smckusick } 24752196Smckusick } 24841897Smckusick if (n > 0) 24941897Smckusick error = uiomove(bp->b_un.b_addr + on, (int)n, uio); 25041897Smckusick switch (vp->v_type) { 25141897Smckusick case VREG: 25243348Smckusick if (n+on == biosize || uio->uio_offset == np->n_size) 25338882Smacklem bp->b_flags |= B_AGE; 25441897Smckusick break; 25541897Smckusick case VLNK: 25641897Smckusick n = 0; 25741897Smckusick break; 25841897Smckusick case VDIR: 25941897Smckusick uio->uio_offset = bp->b_blkno; 26041897Smckusick break; 26141897Smckusick }; 26241897Smckusick brelse(bp); 26338882Smacklem } while (error == 0 && uio->uio_resid > 0 && n != 0); 26438882Smacklem return (error); 26538882Smacklem } 26638882Smacklem 26738882Smacklem /* 26838882Smacklem * Vnode op for write using bio 26938882Smacklem */ 27054669Smckusick nfs_write(ap) 27154448Smckusick struct vop_write_args /* { 27254448Smckusick struct vnode *a_vp; 27354448Smckusick struct uio *a_uio; 27454448Smckusick int a_ioflag; 27554448Smckusick struct ucred *a_cred; 27654448Smckusick } */ *ap; 27738882Smacklem { 27852196Smckusick register int biosize; 27954448Smckusick register struct uio *uio = ap->a_uio; 28054448Smckusick struct proc *p = uio->uio_procp; 28154448Smckusick register struct vnode *vp = ap->a_vp; 28254448Smckusick struct nfsnode *np = VTONFS(vp); 28354448Smckusick register struct ucred *cred = ap->a_cred; 28454448Smckusick int ioflag = ap->a_ioflag; 28538882Smacklem struct buf *bp; 28641897Smckusick struct vattr vattr; 28752196Smckusick struct nfsmount *nmp; 28838882Smacklem daddr_t lbn, bn; 28940220Smckusick int n, on, error = 0; 29038882Smacklem 29148047Smckusick #ifdef DIAGNOSTIC 29254448Smckusick if (uio->uio_rw != UIO_WRITE) 29341897Smckusick panic("nfs_write mode"); 29454448Smckusick if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 29548047Smckusick panic("nfs_write proc"); 29648047Smckusick #endif 29754448Smckusick if (vp->v_type != VREG) 29841897Smckusick return (EIO); 29953627Smckusick if (np->n_flag & NWRITEERR) { 30053627Smckusick np->n_flag &= ~NWRITEERR; 30153627Smckusick return (np->n_error); 30253627Smckusick } 30354448Smckusick if (ioflag & (IO_APPEND | IO_SYNC)) { 30452986Smckusick if (np->n_flag & NMODIFIED) { 30552986Smckusick np->n_flag &= ~NMODIFIED; 30656282Smckusick np->n_attrstamp = 0; 30754448Smckusick vinvalbuf(vp, TRUE, cred, p); 30852986Smckusick } 30954448Smckusick if (ioflag & IO_APPEND) { 31052986Smckusick np->n_attrstamp = 0; 31154448Smckusick if (error = VOP_GETATTR(vp, &vattr, cred, p)) 31252986Smckusick return (error); 31354448Smckusick uio->uio_offset = np->n_size; 31452986Smckusick } 31552986Smckusick } 31654448Smckusick nmp = VFSTONFS(vp->v_mount); 31754448Smckusick if (uio->uio_offset < 0) 31839584Smckusick return (EINVAL); 31954448Smckusick if (uio->uio_resid == 0) 32039584Smckusick return (0); 32138882Smacklem /* 32238882Smacklem * Maybe this should be above the vnode op call, but so long as 32338882Smacklem * file servers have no limits, i don't think it matters 32438882Smacklem */ 32554448Smckusick if (p && uio->uio_offset + uio->uio_resid > 32647572Skarels p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 32747572Skarels psignal(p, SIGXFSZ); 32839584Smckusick return (EFBIG); 32938882Smacklem } 33043348Smckusick /* 33143348Smckusick * I use nm_rsize, not nm_wsize so that all buffer cache blocks 33243348Smckusick * will be the same size within a filesystem. nfs_writerpc will 33343348Smckusick * still use nm_wsize when sizing the rpc's. 33443348Smckusick */ 33552196Smckusick biosize = nmp->nm_rsize; 33641897Smckusick np->n_flag |= NMODIFIED; 33738882Smacklem do { 33852196Smckusick 33952196Smckusick /* 34052196Smckusick * Check for a valid write lease. 34152196Smckusick * If non-cachable, just do the rpc 34252196Smckusick */ 34352196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 34454448Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 34552196Smckusick do { 34654448Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 34752196Smckusick } while (error == NQNFS_EXPIRED); 34852196Smckusick if (error) 34952196Smckusick return (error); 35054448Smckusick if (np->n_lrev != np->n_brev || 35152196Smckusick (np->n_flag & NQNFSNONCACHE)) { 35254448Smckusick vinvalbuf(vp, TRUE, cred, p); 35352196Smckusick np->n_brev = np->n_lrev; 35452196Smckusick } 35552196Smckusick } 35652196Smckusick if (np->n_flag & NQNFSNONCACHE) 35756282Smckusick return (nfs_writerpc(vp, uio, cred, 0)); 35839750Smckusick nfsstats.biocache_writes++; 35954448Smckusick lbn = uio->uio_offset / biosize; 36054448Smckusick on = uio->uio_offset & (biosize-1); 36155057Spendry n = min((unsigned)(biosize - on), uio->uio_resid); 36254448Smckusick if (uio->uio_offset + n > np->n_size) { 36354448Smckusick np->n_size = uio->uio_offset + n; 36454448Smckusick vnode_pager_setsize(vp, (u_long)np->n_size); 36545714Smckusick } 36652196Smckusick bn = lbn * (biosize / DEV_BSIZE); 36740037Smckusick again: 36854448Smckusick bp = getblk(vp, bn, biosize); 36938882Smacklem if (bp->b_wcred == NOCRED) { 37054448Smckusick crhold(cred); 37154448Smckusick bp->b_wcred = cred; 37238882Smacklem } 37352196Smckusick 37452196Smckusick /* 37552196Smckusick * If the new write will leave a contiguous dirty 37652196Smckusick * area, just update the b_dirtyoff and b_dirtyend, 37752196Smckusick * otherwise force a write rpc of the old dirty area. 37852196Smckusick */ 37952196Smckusick if (bp->b_dirtyend > 0 && 38052196Smckusick (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 38152196Smckusick bp->b_proc = p; 38252196Smckusick if (error = bwrite(bp)) 38352196Smckusick return (error); 38452196Smckusick goto again; 38552196Smckusick } 38652196Smckusick 38752196Smckusick /* 38852196Smckusick * Check for valid write lease and get one as required. 38952196Smckusick * In case getblk() and/or bwrite() delayed us. 39052196Smckusick */ 39152196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 39254448Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 39352196Smckusick do { 39454448Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 39552196Smckusick } while (error == NQNFS_EXPIRED); 39652196Smckusick if (error) { 39752196Smckusick brelse(bp); 39852196Smckusick return (error); 39938882Smacklem } 40054448Smckusick if (np->n_lrev != np->n_brev || 40152196Smckusick (np->n_flag & NQNFSNONCACHE)) { 40256282Smckusick brelse(bp); 40354448Smckusick vinvalbuf(vp, TRUE, cred, p); 40452196Smckusick np->n_brev = np->n_lrev; 40556282Smckusick goto again; 40652196Smckusick } 40738882Smacklem } 40854448Smckusick if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { 40940037Smckusick brelse(bp); 41039584Smckusick return (error); 41140037Smckusick } 41252196Smckusick if (bp->b_dirtyend > 0) { 41355057Spendry bp->b_dirtyoff = min(on, bp->b_dirtyoff); 41455057Spendry bp->b_dirtyend = max((on+n), bp->b_dirtyend); 41552196Smckusick } else { 41652196Smckusick bp->b_dirtyoff = on; 41752196Smckusick bp->b_dirtyend = on+n; 41852196Smckusick } 41952196Smckusick if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 42052196Smckusick bp->b_validoff > bp->b_dirtyend) { 42152196Smckusick bp->b_validoff = bp->b_dirtyoff; 42252196Smckusick bp->b_validend = bp->b_dirtyend; 42352196Smckusick } else { 42455057Spendry bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 42555057Spendry bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 42652196Smckusick } 42752196Smckusick 42852196Smckusick /* 42952196Smckusick * If the lease is non-cachable or IO_SYNC do bwrite(). 43052196Smckusick */ 43154448Smckusick if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 43252196Smckusick bp->b_proc = p; 43352196Smckusick bwrite(bp); 43452196Smckusick } else if ((n+on) == biosize && 43552196Smckusick (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 43638882Smacklem bp->b_flags |= B_AGE; 43741897Smckusick bp->b_proc = (struct proc *)0; 43838882Smacklem bawrite(bp); 43938882Smacklem } else { 44041897Smckusick bp->b_proc = (struct proc *)0; 44138882Smacklem bdwrite(bp); 44238882Smacklem } 44354448Smckusick } while (error == 0 && uio->uio_resid > 0 && n != 0); 44438882Smacklem return (error); 44538882Smacklem } 446