138882Smacklem /* 238882Smacklem * Copyright (c) 1989 The Regents of the University of California. 338882Smacklem * All rights reserved. 438882Smacklem * 538882Smacklem * This code is derived from software contributed to Berkeley by 638882Smacklem * Rick Macklem at The University of Guelph. 738882Smacklem * 844509Sbostic * %sccs.include.redist.c% 938882Smacklem * 10*52986Smckusick * @(#)nfs_bio.c 7.22 (Berkeley) 03/17/92 1138882Smacklem */ 1238882Smacklem 1338882Smacklem #include "param.h" 1452196Smckusick #include "resourcevar.h" 1547572Skarels #include "proc.h" 1638882Smacklem #include "buf.h" 1738882Smacklem #include "vnode.h" 1838882Smacklem #include "trace.h" 1938882Smacklem #include "mount.h" 2052196Smckusick #include "kernel.h" 2152196Smckusick #include "machine/endian.h" 2252196Smckusick #include "nfsnode.h" 2352196Smckusick #include "rpcv2.h" 2439750Smckusick #include "nfsv2.h" 2539750Smckusick #include "nfs.h" 2641897Smckusick #include "nfsmount.h" 2752196Smckusick #include "nqnfs.h" 2838882Smacklem 2938882Smacklem /* True and false, how exciting */ 3038882Smacklem #define TRUE 1 3138882Smacklem #define FALSE 0 3238882Smacklem 3338882Smacklem /* 3438882Smacklem * Vnode op for read using bio 3538882Smacklem * Any similarity to readip() is purely coincidental 3638882Smacklem */ 3741897Smckusick nfs_bioread(vp, uio, ioflag, cred) 3838882Smacklem register struct vnode *vp; 3943348Smckusick register struct uio *uio; 4038882Smacklem int ioflag; 4138882Smacklem struct ucred *cred; 4238882Smacklem { 4338882Smacklem register struct nfsnode *np = VTONFS(vp); 4443348Smckusick register int biosize; 4538882Smacklem struct buf *bp; 4638882Smacklem struct vattr vattr; 4752196Smckusick struct nfsmount *nmp; 4852196Smckusick daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; 4952196Smckusick int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; 5052196Smckusick int n, on; 5138882Smacklem 5242241Smckusick #ifdef lint 5342241Smckusick ioflag = ioflag; 5442241Smckusick #endif /* lint */ 5548047Smckusick #ifdef DIAGNOSTIC 5638882Smacklem if (uio->uio_rw != UIO_READ) 5738882Smacklem panic("nfs_read mode"); 5848047Smckusick #endif 5938882Smacklem if (uio->uio_resid == 0) 6039584Smckusick return (0); 6141897Smckusick if (uio->uio_offset < 0 && vp->v_type != VDIR) 6239584Smckusick return (EINVAL); 6352196Smckusick nmp = VFSTONFS(vp->v_mount); 6452196Smckusick biosize = nmp->nm_rsize; 6538882Smacklem /* 6652196Smckusick * For nfs, cache consistency can only be maintained approximately. 6752196Smckusick * Although RFC1094 does not specify the criteria, the following is 6852196Smckusick * believed to be compatible with the reference port. 6952196Smckusick * For nqnfs, full cache consistency is maintained within the loop. 7052196Smckusick * For nfs: 7138882Smacklem * If the file's modify time on the server has changed since the 7238882Smacklem * last read rpc or you have written to the file, 7338882Smacklem * you may have lost data cache consistency with the 7438882Smacklem * server, so flush all of the file's data out of the cache. 7541897Smckusick * Then force a getattr rpc to ensure that you have up to date 7641897Smckusick * attributes. 7752196Smckusick * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 7852196Smckusick * the ones changing the modify time. 7938882Smacklem * NB: This implies that cache data can be read when up to 8038882Smacklem * NFS_ATTRTIMEO seconds out of date. If you find that you need current 8138882Smacklem * attributes this could be forced by setting n_attrstamp to 0 before 8252196Smckusick * the nfs_getattr() call. 8338882Smacklem */ 8452196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 8541897Smckusick if (np->n_flag & NMODIFIED) { 8641897Smckusick np->n_flag &= ~NMODIFIED; 8752196Smckusick if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 8852196Smckusick vp->v_type != VREG) 8952196Smckusick vinvalbuf(vp, TRUE); 9041897Smckusick np->n_attrstamp = 0; 9141897Smckusick np->n_direofoffset = 0; 9252196Smckusick if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp)) 9339750Smckusick return (error); 9439750Smckusick np->n_mtime = vattr.va_mtime.tv_sec; 9541897Smckusick } else { 9652196Smckusick if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp)) 9741897Smckusick return (error); 9841897Smckusick if (np->n_mtime != vattr.va_mtime.tv_sec) { 9941897Smckusick np->n_direofoffset = 0; 10041897Smckusick vinvalbuf(vp, TRUE); 10141897Smckusick np->n_mtime = vattr.va_mtime.tv_sec; 10241897Smckusick } 10339750Smckusick } 10438882Smacklem } 10538882Smacklem do { 10652196Smckusick 10752196Smckusick /* 10852196Smckusick * Get a valid lease. If cached data is stale, flush it. 10952196Smckusick */ 11052196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 11152196Smckusick NQNFS_CKINVALID(vp, np, NQL_READ)) { 11252196Smckusick do { 11352196Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 11452196Smckusick } while (error == NQNFS_EXPIRED); 11552196Smckusick if (error) 11652196Smckusick return (error); 11752196Smckusick if (QUADNE(np->n_lrev, np->n_brev) || 11852196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 11952196Smckusick if (vp->v_type == VDIR) { 12052196Smckusick np->n_direofoffset = 0; 12152196Smckusick cache_purge(vp); 12252196Smckusick } 12352196Smckusick np->n_flag &= ~NMODIFIED; 12452196Smckusick vinvalbuf(vp, TRUE); 12552196Smckusick np->n_brev = np->n_lrev; 12652196Smckusick } 12752196Smckusick } 12852196Smckusick if (np->n_flag & NQNFSNONCACHE) { 12952196Smckusick switch (vp->v_type) { 13052196Smckusick case VREG: 13152196Smckusick error = nfs_readrpc(vp, uio, cred); 13252196Smckusick break; 13352196Smckusick case VLNK: 13452196Smckusick error = nfs_readlinkrpc(vp, uio, cred); 13552196Smckusick break; 13652196Smckusick case VDIR: 13752196Smckusick error = nfs_readdirrpc(vp, uio, cred); 13852196Smckusick break; 13952196Smckusick }; 14052196Smckusick return (error); 14152196Smckusick } 14241897Smckusick switch (vp->v_type) { 14341897Smckusick case VREG: 14439750Smckusick nfsstats.biocache_reads++; 14543348Smckusick lbn = uio->uio_offset / biosize; 14643348Smckusick on = uio->uio_offset & (biosize-1); 14743348Smckusick n = MIN((unsigned)(biosize - on), uio->uio_resid); 14838882Smacklem diff = np->n_size - uio->uio_offset; 14938882Smacklem if (diff <= 0) 15039584Smckusick return (error); 15138882Smacklem if (diff < n) 15238882Smacklem n = diff; 15343348Smckusick bn = lbn*(biosize/DEV_BSIZE); 15452196Smckusick for (nra = 0; nra < nmp->nm_readahead && 15552196Smckusick (lbn + 1 + nra) * biosize < np->n_size; nra++) { 15652196Smckusick rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 15752196Smckusick rasize[nra] = biosize; 15852196Smckusick } 15952196Smckusick again: 16052196Smckusick if (nra > 0 && lbn >= vp->v_lastr) 16152196Smckusick error = breadn(vp, bn, biosize, rablock, rasize, nra, 16238882Smacklem cred, &bp); 16338882Smacklem else 16443348Smckusick error = bread(vp, bn, biosize, cred, &bp); 16552196Smckusick if (bp->b_validend > 0) { 16652196Smckusick if (on < bp->b_validoff || (on+n) > bp->b_validend) { 16752196Smckusick bp->b_flags |= B_INVAL; 16852196Smckusick if (bp->b_dirtyend > 0) { 16952196Smckusick if ((bp->b_flags & B_DELWRI) == 0) 17052196Smckusick panic("nfsbioread"); 17152196Smckusick (void) bwrite(bp); 17252196Smckusick } else 17352196Smckusick brelse(bp); 17452196Smckusick goto again; 17552196Smckusick } 17652196Smckusick } else { 17752196Smckusick bp->b_validoff = 0; 17852196Smckusick bp->b_validend = biosize - bp->b_resid; 17952196Smckusick } 18039901Smckusick vp->v_lastr = lbn; 18138882Smacklem if (bp->b_resid) { 18243348Smckusick diff = (on >= (biosize-bp->b_resid)) ? 0 : 18343348Smckusick (biosize-bp->b_resid-on); 18441897Smckusick n = MIN(n, diff); 18538882Smacklem } 18641897Smckusick break; 18741897Smckusick case VLNK: 18841897Smckusick nfsstats.biocache_readlinks++; 18941897Smckusick on = 0; 19041897Smckusick error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); 19141897Smckusick n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 19241897Smckusick break; 19341897Smckusick case VDIR: 19441897Smckusick nfsstats.biocache_readdirs++; 19541897Smckusick on = 0; 19648047Smckusick error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); 19748047Smckusick n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 19841897Smckusick break; 19941897Smckusick }; 20041897Smckusick if (error) { 20141897Smckusick brelse(bp); 20241897Smckusick return (error); 20341897Smckusick } 20452196Smckusick 20552196Smckusick /* 20652196Smckusick * For nqnfs: 20752196Smckusick * Must check for valid lease, since it may have expired while in 20852196Smckusick * bread(). If expired, get a lease. 20952196Smckusick * If data is stale, flush and try again. 21052196Smckusick * nb: If a read rpc is done by bread() or breada() and there is 21152196Smckusick * no valid lease, a get_lease request will be piggy backed. 21252196Smckusick */ 21352196Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 21452196Smckusick if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 21552196Smckusick do { 21652196Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 21752196Smckusick } while (error == NQNFS_EXPIRED); 21852196Smckusick if (error) { 21952196Smckusick brelse(bp); 22052196Smckusick return (error); 22152196Smckusick } 22252196Smckusick if ((np->n_flag & NQNFSNONCACHE) || 22352196Smckusick QUADNE(np->n_lrev, np->n_brev) || 22452196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 22552196Smckusick if (vp->v_type == VDIR) { 22652196Smckusick np->n_direofoffset = 0; 22752196Smckusick cache_purge(vp); 22852196Smckusick } 22952196Smckusick brelse(bp); 23052196Smckusick np->n_flag &= ~NMODIFIED; 23152196Smckusick vinvalbuf(vp, TRUE); 23252196Smckusick np->n_brev = np->n_lrev; 23352196Smckusick continue; 23452196Smckusick } 23552196Smckusick } else if ((np->n_flag & NQNFSNONCACHE) || 23652196Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 23752196Smckusick np->n_direofoffset = 0; 23852196Smckusick brelse(bp); 23952196Smckusick np->n_flag &= ~NMODIFIED; 24052196Smckusick vinvalbuf(vp, TRUE); 24152196Smckusick np->n_brev = np->n_lrev; 24252196Smckusick continue; 24352196Smckusick } 24452196Smckusick } 24541897Smckusick if (n > 0) 24641897Smckusick error = uiomove(bp->b_un.b_addr + on, (int)n, uio); 24741897Smckusick switch (vp->v_type) { 24841897Smckusick case VREG: 24943348Smckusick if (n+on == biosize || uio->uio_offset == np->n_size) 25038882Smacklem bp->b_flags |= B_AGE; 25141897Smckusick break; 25241897Smckusick case VLNK: 25341897Smckusick n = 0; 25441897Smckusick break; 25541897Smckusick case VDIR: 25641897Smckusick uio->uio_offset = bp->b_blkno; 25741897Smckusick break; 25841897Smckusick }; 25941897Smckusick brelse(bp); 26038882Smacklem } while (error == 0 && uio->uio_resid > 0 && n != 0); 26138882Smacklem return (error); 26238882Smacklem } 26338882Smacklem 26438882Smacklem /* 26538882Smacklem * Vnode op for write using bio 26638882Smacklem */ 26739584Smckusick nfs_write(vp, uio, ioflag, cred) 26838882Smacklem register struct vnode *vp; 26938882Smacklem register struct uio *uio; 27038882Smacklem int ioflag; 27138882Smacklem struct ucred *cred; 27238882Smacklem { 27352196Smckusick register int biosize; 27448047Smckusick struct proc *p = uio->uio_procp; 27538882Smacklem struct buf *bp; 27638882Smacklem struct nfsnode *np = VTONFS(vp); 27741897Smckusick struct vattr vattr; 27852196Smckusick struct nfsmount *nmp; 27938882Smacklem daddr_t lbn, bn; 28040220Smckusick int n, on, error = 0; 28138882Smacklem 28248047Smckusick #ifdef DIAGNOSTIC 28341897Smckusick if (uio->uio_rw != UIO_WRITE) 28441897Smckusick panic("nfs_write mode"); 28548047Smckusick if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 28648047Smckusick panic("nfs_write proc"); 28748047Smckusick #endif 28841897Smckusick if (vp->v_type != VREG) 28941897Smckusick return (EIO); 290*52986Smckusick if (ioflag & (IO_APPEND | IO_SYNC)) { 291*52986Smckusick if (np->n_flag & NMODIFIED) { 292*52986Smckusick np->n_flag &= ~NMODIFIED; 293*52986Smckusick vinvalbuf(vp, TRUE); 294*52986Smckusick } 295*52986Smckusick if (ioflag & IO_APPEND) { 296*52986Smckusick np->n_attrstamp = 0; 297*52986Smckusick if (error = nfs_getattr(vp, &vattr, cred, p)) 298*52986Smckusick return (error); 299*52986Smckusick uio->uio_offset = np->n_size; 300*52986Smckusick } 301*52986Smckusick } 30252196Smckusick nmp = VFSTONFS(vp->v_mount); 30339584Smckusick if (uio->uio_offset < 0) 30439584Smckusick return (EINVAL); 30538882Smacklem if (uio->uio_resid == 0) 30639584Smckusick return (0); 30738882Smacklem /* 30838882Smacklem * Maybe this should be above the vnode op call, but so long as 30938882Smacklem * file servers have no limits, i don't think it matters 31038882Smacklem */ 31152196Smckusick if (p && uio->uio_offset + uio->uio_resid > 31247572Skarels p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 31347572Skarels psignal(p, SIGXFSZ); 31439584Smckusick return (EFBIG); 31538882Smacklem } 31643348Smckusick /* 31743348Smckusick * I use nm_rsize, not nm_wsize so that all buffer cache blocks 31843348Smckusick * will be the same size within a filesystem. nfs_writerpc will 31943348Smckusick * still use nm_wsize when sizing the rpc's. 32043348Smckusick */ 32152196Smckusick biosize = nmp->nm_rsize; 32241897Smckusick np->n_flag |= NMODIFIED; 32338882Smacklem do { 32452196Smckusick 32552196Smckusick /* 32652196Smckusick * Check for a valid write lease. 32752196Smckusick * If non-cachable, just do the rpc 32852196Smckusick */ 32952196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 33052196Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 33152196Smckusick do { 33252196Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 33352196Smckusick } while (error == NQNFS_EXPIRED); 33452196Smckusick if (error) 33552196Smckusick return (error); 33652196Smckusick if (QUADNE(np->n_lrev, np->n_brev) || 33752196Smckusick (np->n_flag & NQNFSNONCACHE)) { 33852196Smckusick vinvalbuf(vp, TRUE); 33952196Smckusick np->n_brev = np->n_lrev; 34052196Smckusick } 34152196Smckusick } 34252196Smckusick if (np->n_flag & NQNFSNONCACHE) 34352196Smckusick return (nfs_writerpc(vp, uio, cred)); 34439750Smckusick nfsstats.biocache_writes++; 34543348Smckusick lbn = uio->uio_offset / biosize; 34643348Smckusick on = uio->uio_offset & (biosize-1); 34743348Smckusick n = MIN((unsigned)(biosize - on), uio->uio_resid); 34852196Smckusick if (uio->uio_offset + n > np->n_size) { 34952196Smckusick np->n_size = uio->uio_offset + n; 35045714Smckusick vnode_pager_setsize(vp, np->n_size); 35145714Smckusick } 35252196Smckusick bn = lbn * (biosize / DEV_BSIZE); 35340037Smckusick again: 35443348Smckusick bp = getblk(vp, bn, biosize); 35538882Smacklem if (bp->b_wcred == NOCRED) { 35638882Smacklem crhold(cred); 35738882Smacklem bp->b_wcred = cred; 35838882Smacklem } 35952196Smckusick 36052196Smckusick /* 36152196Smckusick * If the new write will leave a contiguous dirty 36252196Smckusick * area, just update the b_dirtyoff and b_dirtyend, 36352196Smckusick * otherwise force a write rpc of the old dirty area. 36452196Smckusick */ 36552196Smckusick if (bp->b_dirtyend > 0 && 36652196Smckusick (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 36752196Smckusick bp->b_proc = p; 36852196Smckusick if (error = bwrite(bp)) 36952196Smckusick return (error); 37052196Smckusick goto again; 37152196Smckusick } 37252196Smckusick 37352196Smckusick /* 37452196Smckusick * Check for valid write lease and get one as required. 37552196Smckusick * In case getblk() and/or bwrite() delayed us. 37652196Smckusick */ 37752196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 37852196Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 37952196Smckusick do { 38052196Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 38152196Smckusick } while (error == NQNFS_EXPIRED); 38252196Smckusick if (error) { 38352196Smckusick brelse(bp); 38452196Smckusick return (error); 38538882Smacklem } 38652196Smckusick if (QUADNE(np->n_lrev, np->n_brev) || 38752196Smckusick (np->n_flag & NQNFSNONCACHE)) { 38852196Smckusick vinvalbuf(vp, TRUE); 38952196Smckusick np->n_brev = np->n_lrev; 39052196Smckusick } 39138882Smacklem } 39240037Smckusick if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { 39340037Smckusick brelse(bp); 39439584Smckusick return (error); 39540037Smckusick } 39652196Smckusick if (bp->b_dirtyend > 0) { 39752196Smckusick bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); 39852196Smckusick bp->b_dirtyend = MAX((on+n), bp->b_dirtyend); 39952196Smckusick } else { 40052196Smckusick bp->b_dirtyoff = on; 40152196Smckusick bp->b_dirtyend = on+n; 40252196Smckusick } 40352196Smckusick if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 40452196Smckusick bp->b_validoff > bp->b_dirtyend) { 40552196Smckusick bp->b_validoff = bp->b_dirtyoff; 40652196Smckusick bp->b_validend = bp->b_dirtyend; 40752196Smckusick } else { 40852196Smckusick bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff); 40952196Smckusick bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend); 41052196Smckusick } 41152196Smckusick 41252196Smckusick /* 41352196Smckusick * If the lease is non-cachable or IO_SYNC do bwrite(). 41452196Smckusick */ 41552196Smckusick if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 41652196Smckusick bp->b_proc = p; 41752196Smckusick bwrite(bp); 41852196Smckusick } else if ((n+on) == biosize && 41952196Smckusick (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 42038882Smacklem bp->b_flags |= B_AGE; 42141897Smckusick bp->b_proc = (struct proc *)0; 42238882Smacklem bawrite(bp); 42338882Smacklem } else { 42441897Smckusick bp->b_proc = (struct proc *)0; 42538882Smacklem bdwrite(bp); 42638882Smacklem } 42738882Smacklem } while (error == 0 && uio->uio_resid > 0 && n != 0); 42838882Smacklem return (error); 42938882Smacklem } 430