138882Smacklem /* 2*63233Sbostic * Copyright (c) 1989, 1993 3*63233Sbostic * The Regents of the University of California. All rights reserved. 438882Smacklem * 538882Smacklem * This code is derived from software contributed to Berkeley by 638882Smacklem * Rick Macklem at The University of Guelph. 738882Smacklem * 844509Sbostic * %sccs.include.redist.c% 938882Smacklem * 10*63233Sbostic * @(#)nfs_bio.c 8.1 (Berkeley) 06/10/93 1138882Smacklem */ 1238882Smacklem 1353322Smckusick #include <sys/param.h> 1455063Spendry #include <sys/systm.h> 1553322Smckusick #include <sys/resourcevar.h> 1653322Smckusick #include <sys/proc.h> 1753322Smckusick #include <sys/buf.h> 1853322Smckusick #include <sys/vnode.h> 1953322Smckusick #include <sys/trace.h> 2053322Smckusick #include <sys/mount.h> 2153322Smckusick #include <sys/kernel.h> 2256535Sbostic 2353322Smckusick #include <vm/vm.h> 2456535Sbostic 2553322Smckusick #include <nfs/nfsnode.h> 2653322Smckusick #include <nfs/rpcv2.h> 2753322Smckusick #include <nfs/nfsv2.h> 2853322Smckusick #include <nfs/nfs.h> 2953322Smckusick #include <nfs/nfsmount.h> 3053322Smckusick #include <nfs/nqnfs.h> 3138882Smacklem 3257808Smckusick struct buf *incore(), *nfs_getcacheblk(); 3357783Smckusick extern struct queue_entry nfs_bufq; 3457783Smckusick extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 3557783Smckusick extern int nfs_numasync; 3638882Smacklem 3738882Smacklem /* 3838882Smacklem * Vnode op for read using bio 3938882Smacklem * Any similarity to readip() is purely coincidental 4038882Smacklem */ 4141897Smckusick nfs_bioread(vp, uio, ioflag, cred) 4238882Smacklem register struct vnode *vp; 4343348Smckusick register struct uio *uio; 4438882Smacklem int ioflag; 4538882Smacklem struct ucred *cred; 4638882Smacklem { 4738882Smacklem register struct nfsnode *np = VTONFS(vp); 4857783Smckusick register int biosize, diff; 4957783Smckusick struct buf *bp, *rabp; 5038882Smacklem struct vattr vattr; 5157783Smckusick struct proc *p; 5252196Smckusick struct nfsmount *nmp; 5357783Smckusick daddr_t lbn, bn, rabn; 5457783Smckusick caddr_t baddr; 5557783Smckusick int got_buf, len, nra, error = 0, n, on, not_readin; 5638882Smacklem 5742241Smckusick #ifdef lint 5842241Smckusick ioflag = ioflag; 5942241Smckusick #endif /* lint */ 6048047Smckusick #ifdef DIAGNOSTIC 6138882Smacklem if (uio->uio_rw != UIO_READ) 6238882Smacklem panic("nfs_read mode"); 6348047Smckusick #endif 6438882Smacklem if (uio->uio_resid == 0) 6539584Smckusick return (0); 6641897Smckusick if (uio->uio_offset < 0 && vp->v_type != VDIR) 6739584Smckusick return (EINVAL); 6852196Smckusick nmp = VFSTONFS(vp->v_mount); 6952196Smckusick biosize = nmp->nm_rsize; 7057783Smckusick p = uio->uio_procp; 7138882Smacklem /* 7252196Smckusick * For nfs, cache consistency can only be maintained approximately. 7352196Smckusick * Although RFC1094 does not specify the criteria, the following is 7452196Smckusick * believed to be compatible with the reference port. 7552196Smckusick * For nqnfs, full cache consistency is maintained within the loop. 7652196Smckusick * For nfs: 7738882Smacklem * If the file's modify time on the server has changed since the 7838882Smacklem * last read rpc or you have written to the file, 7938882Smacklem * you may have lost data cache consistency with the 8038882Smacklem * server, so flush all of the file's data out of the cache. 8141897Smckusick * Then force a getattr rpc to ensure that you have up to date 8241897Smckusick * attributes. 8352196Smckusick * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 8452196Smckusick * the ones changing the modify time. 8538882Smacklem * NB: This implies that cache data can be read when up to 8638882Smacklem * NFS_ATTRTIMEO seconds out of date. If you find that you need current 8738882Smacklem * attributes this could be forced by setting n_attrstamp to 0 before 8853550Sheideman * the VOP_GETATTR() call. 8938882Smacklem */ 9052196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 9141897Smckusick if (np->n_flag & NMODIFIED) { 9252196Smckusick if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 9357783Smckusick vp->v_type != VREG) { 9457783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 9557783Smckusick return (error); 9657783Smckusick } 9741897Smckusick np->n_attrstamp = 0; 9841897Smckusick np->n_direofoffset = 0; 9957783Smckusick if (error = VOP_GETATTR(vp, &vattr, cred, p)) 10039750Smckusick return (error); 10154106Smckusick np->n_mtime = vattr.va_mtime.ts_sec; 10241897Smckusick } else { 10357783Smckusick if (error = VOP_GETATTR(vp, &vattr, cred, p)) 10441897Smckusick return (error); 10554106Smckusick if (np->n_mtime != vattr.va_mtime.ts_sec) { 10641897Smckusick np->n_direofoffset = 0; 10757783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 10857783Smckusick return (error); 10954106Smckusick np->n_mtime = vattr.va_mtime.ts_sec; 11041897Smckusick } 11139750Smckusick } 11238882Smacklem } 11338882Smacklem do { 11452196Smckusick 11552196Smckusick /* 11652196Smckusick * Get a valid lease. If cached data is stale, flush it. 11752196Smckusick */ 11857783Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 11957783Smckusick if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 12057783Smckusick do { 12157783Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, p); 12257783Smckusick } while (error == NQNFS_EXPIRED); 12357783Smckusick if (error) 12452196Smckusick return (error); 12557783Smckusick if (np->n_lrev != np->n_brev || 12659704Smckusick (np->n_flag & NQNFSNONCACHE) || 12757783Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 12852196Smckusick if (vp->v_type == VDIR) { 12957783Smckusick np->n_direofoffset = 0; 13057783Smckusick cache_purge(vp); 13152196Smckusick } 13257783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 13357783Smckusick return (error); 13452196Smckusick np->n_brev = np->n_lrev; 13557783Smckusick } 13657783Smckusick } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 13757783Smckusick np->n_direofoffset = 0; 13857783Smckusick cache_purge(vp); 13957783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 14057783Smckusick return (error); 14152196Smckusick } 14252196Smckusick } 14352196Smckusick if (np->n_flag & NQNFSNONCACHE) { 14452196Smckusick switch (vp->v_type) { 14552196Smckusick case VREG: 14652196Smckusick error = nfs_readrpc(vp, uio, cred); 14752196Smckusick break; 14852196Smckusick case VLNK: 14952196Smckusick error = nfs_readlinkrpc(vp, uio, cred); 15052196Smckusick break; 15152196Smckusick case VDIR: 15252196Smckusick error = nfs_readdirrpc(vp, uio, cred); 15352196Smckusick break; 15452196Smckusick }; 15552196Smckusick return (error); 15652196Smckusick } 15757783Smckusick baddr = (caddr_t)0; 15841897Smckusick switch (vp->v_type) { 15941897Smckusick case VREG: 16039750Smckusick nfsstats.biocache_reads++; 16143348Smckusick lbn = uio->uio_offset / biosize; 16243348Smckusick on = uio->uio_offset & (biosize-1); 16357783Smckusick bn = lbn * (biosize / DEV_BSIZE); 16457783Smckusick not_readin = 1; 16557783Smckusick 16657783Smckusick /* 16757783Smckusick * Start the read ahead(s), as required. 16857783Smckusick */ 16957783Smckusick if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 17057783Smckusick lbn == vp->v_lastr + 1) { 17157783Smckusick for (nra = 0; nra < nmp->nm_readahead && 17257783Smckusick (lbn + 1 + nra) * biosize < np->n_size; nra++) { 17357783Smckusick rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 17457808Smckusick if (!incore(vp, rabn)) { 17557783Smckusick rabp = nfs_getcacheblk(vp, rabn, biosize, p); 17657783Smckusick if (!rabp) 17757783Smckusick return (EINTR); 17857783Smckusick if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 17957783Smckusick rabp->b_flags |= (B_READ | B_ASYNC); 18057783Smckusick if (nfs_asyncio(rabp, cred)) { 18157783Smckusick rabp->b_flags |= B_INVAL; 18257783Smckusick brelse(rabp); 18357783Smckusick } 18457783Smckusick } 18557783Smckusick } 18657783Smckusick } 18757783Smckusick } 18857783Smckusick 18957783Smckusick /* 19057783Smckusick * If the block is in the cache and has the required data 19157783Smckusick * in a valid region, just copy it out. 19257783Smckusick * Otherwise, get the block and write back/read in, 19357783Smckusick * as required. 19457783Smckusick */ 19557808Smckusick if ((bp = incore(vp, bn)) && 19657783Smckusick (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 19757783Smckusick (B_BUSY | B_WRITEINPROG)) 19857783Smckusick got_buf = 0; 19957783Smckusick else { 20057783Smckusick again: 20157783Smckusick bp = nfs_getcacheblk(vp, bn, biosize, p); 20257783Smckusick if (!bp) 20357783Smckusick return (EINTR); 20457783Smckusick got_buf = 1; 20557783Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 20657783Smckusick bp->b_flags |= B_READ; 20757783Smckusick not_readin = 0; 20857783Smckusick if (error = nfs_doio(bp, cred, p)) { 20957783Smckusick brelse(bp); 21057783Smckusick return (error); 21157783Smckusick } 21257783Smckusick } 21357783Smckusick } 21455057Spendry n = min((unsigned)(biosize - on), uio->uio_resid); 21538882Smacklem diff = np->n_size - uio->uio_offset; 21638882Smacklem if (diff < n) 21738882Smacklem n = diff; 21857783Smckusick if (not_readin && n > 0) { 21957783Smckusick if (on < bp->b_validoff || (on + n) > bp->b_validend) { 22057783Smckusick if (!got_buf) { 22157783Smckusick bp = nfs_getcacheblk(vp, bn, biosize, p); 22257783Smckusick if (!bp) 22357783Smckusick return (EINTR); 22457783Smckusick got_buf = 1; 22557783Smckusick } 22652196Smckusick bp->b_flags |= B_INVAL; 22752196Smckusick if (bp->b_dirtyend > 0) { 22857783Smckusick if ((bp->b_flags & B_DELWRI) == 0) 22957783Smckusick panic("nfsbioread"); 23057783Smckusick if (VOP_BWRITE(bp) == EINTR) 23157783Smckusick return (EINTR); 23252196Smckusick } else 23357783Smckusick brelse(bp); 23452196Smckusick goto again; 23552196Smckusick } 23652196Smckusick } 23739901Smckusick vp->v_lastr = lbn; 23857783Smckusick diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 23957783Smckusick if (diff < n) 24057783Smckusick n = diff; 24141897Smckusick break; 24241897Smckusick case VLNK: 24341897Smckusick nfsstats.biocache_readlinks++; 24457783Smckusick bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 24557783Smckusick if (!bp) 24657783Smckusick return (EINTR); 24757783Smckusick if ((bp->b_flags & B_DONE) == 0) { 24857783Smckusick bp->b_flags |= B_READ; 24957783Smckusick if (error = nfs_doio(bp, cred, p)) { 25057783Smckusick brelse(bp); 25157783Smckusick return (error); 25257783Smckusick } 25357783Smckusick } 25457783Smckusick n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 25557783Smckusick got_buf = 1; 25641897Smckusick on = 0; 25741897Smckusick break; 25841897Smckusick case VDIR: 25941897Smckusick nfsstats.biocache_readdirs++; 26057783Smckusick bn = (daddr_t)uio->uio_offset; 26157783Smckusick bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p); 26257783Smckusick if (!bp) 26357783Smckusick return (EINTR); 26457783Smckusick if ((bp->b_flags & B_DONE) == 0) { 26557783Smckusick bp->b_flags |= B_READ; 26657783Smckusick if (error = nfs_doio(bp, cred, p)) { 26752196Smckusick brelse(bp); 26852196Smckusick return (error); 26952196Smckusick } 27057783Smckusick } 27157783Smckusick 27257783Smckusick /* 27357783Smckusick * If not eof and read aheads are enabled, start one. 27457783Smckusick * (You need the current block first, so that you have the 27557783Smckusick * directory offset cookie of the next block. 27657783Smckusick */ 27757783Smckusick rabn = bp->b_blkno; 27857783Smckusick if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 27957783Smckusick rabn != 0 && rabn != np->n_direofoffset && 28057808Smckusick !incore(vp, rabn)) { 28157783Smckusick rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p); 28257783Smckusick if (rabp) { 28357783Smckusick if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 28457783Smckusick rabp->b_flags |= (B_READ | B_ASYNC); 28557783Smckusick if (nfs_asyncio(rabp, cred)) { 28657783Smckusick rabp->b_flags |= B_INVAL; 28757783Smckusick brelse(rabp); 28852196Smckusick } 28957783Smckusick } 29052196Smckusick } 29152196Smckusick } 29257783Smckusick on = 0; 29357783Smckusick n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 29457783Smckusick got_buf = 1; 29557783Smckusick break; 29657783Smckusick }; 29757783Smckusick 29857783Smckusick if (n > 0) { 29957783Smckusick if (!baddr) 30057783Smckusick baddr = bp->b_un.b_addr; 30157783Smckusick error = uiomove(baddr + on, (int)n, uio); 30252196Smckusick } 30341897Smckusick switch (vp->v_type) { 30441897Smckusick case VREG: 30557783Smckusick if (n + on == biosize || uio->uio_offset == np->n_size) 30638882Smacklem bp->b_flags |= B_AGE; 30741897Smckusick break; 30841897Smckusick case VLNK: 30941897Smckusick n = 0; 31041897Smckusick break; 31141897Smckusick case VDIR: 31241897Smckusick uio->uio_offset = bp->b_blkno; 31341897Smckusick break; 31441897Smckusick }; 31557783Smckusick if (got_buf) 31657783Smckusick brelse(bp); 31757783Smckusick } while (error == 0 && uio->uio_resid > 0 && n > 0); 31838882Smacklem return (error); 31938882Smacklem } 32038882Smacklem 32138882Smacklem /* 32238882Smacklem * Vnode op for write using bio 32338882Smacklem */ 32454669Smckusick nfs_write(ap) 32554448Smckusick struct vop_write_args /* { 32654448Smckusick struct vnode *a_vp; 32754448Smckusick struct uio *a_uio; 32854448Smckusick int a_ioflag; 32954448Smckusick struct ucred *a_cred; 33054448Smckusick } */ *ap; 33138882Smacklem { 33252196Smckusick register int biosize; 33354448Smckusick register struct uio *uio = ap->a_uio; 33454448Smckusick struct proc *p = uio->uio_procp; 33554448Smckusick register struct vnode *vp = ap->a_vp; 33654448Smckusick struct nfsnode *np = VTONFS(vp); 33754448Smckusick register struct ucred *cred = ap->a_cred; 33854448Smckusick int ioflag = ap->a_ioflag; 33938882Smacklem struct buf *bp; 34041897Smckusick struct vattr vattr; 34152196Smckusick struct nfsmount *nmp; 34238882Smacklem daddr_t lbn, bn; 34340220Smckusick int n, on, error = 0; 34438882Smacklem 34548047Smckusick #ifdef DIAGNOSTIC 34654448Smckusick if (uio->uio_rw != UIO_WRITE) 34741897Smckusick panic("nfs_write mode"); 34854448Smckusick if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 34948047Smckusick panic("nfs_write proc"); 35048047Smckusick #endif 35154448Smckusick if (vp->v_type != VREG) 35241897Smckusick return (EIO); 35353627Smckusick if (np->n_flag & NWRITEERR) { 35453627Smckusick np->n_flag &= ~NWRITEERR; 35553627Smckusick return (np->n_error); 35653627Smckusick } 35754448Smckusick if (ioflag & (IO_APPEND | IO_SYNC)) { 35852986Smckusick if (np->n_flag & NMODIFIED) { 35956282Smckusick np->n_attrstamp = 0; 36057783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 36157783Smckusick return (error); 36252986Smckusick } 36354448Smckusick if (ioflag & IO_APPEND) { 36452986Smckusick np->n_attrstamp = 0; 36554448Smckusick if (error = VOP_GETATTR(vp, &vattr, cred, p)) 36652986Smckusick return (error); 36754448Smckusick uio->uio_offset = np->n_size; 36852986Smckusick } 36952986Smckusick } 37054448Smckusick nmp = VFSTONFS(vp->v_mount); 37154448Smckusick if (uio->uio_offset < 0) 37239584Smckusick return (EINVAL); 37354448Smckusick if (uio->uio_resid == 0) 37439584Smckusick return (0); 37538882Smacklem /* 37638882Smacklem * Maybe this should be above the vnode op call, but so long as 37738882Smacklem * file servers have no limits, i don't think it matters 37838882Smacklem */ 37954448Smckusick if (p && uio->uio_offset + uio->uio_resid > 38047572Skarels p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 38147572Skarels psignal(p, SIGXFSZ); 38239584Smckusick return (EFBIG); 38338882Smacklem } 38443348Smckusick /* 38543348Smckusick * I use nm_rsize, not nm_wsize so that all buffer cache blocks 38643348Smckusick * will be the same size within a filesystem. nfs_writerpc will 38743348Smckusick * still use nm_wsize when sizing the rpc's. 38843348Smckusick */ 38952196Smckusick biosize = nmp->nm_rsize; 39038882Smacklem do { 39152196Smckusick 39252196Smckusick /* 39352196Smckusick * Check for a valid write lease. 39452196Smckusick * If non-cachable, just do the rpc 39552196Smckusick */ 39652196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 39754448Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 39852196Smckusick do { 39954448Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 40052196Smckusick } while (error == NQNFS_EXPIRED); 40152196Smckusick if (error) 40252196Smckusick return (error); 40354448Smckusick if (np->n_lrev != np->n_brev || 40452196Smckusick (np->n_flag & NQNFSNONCACHE)) { 40557783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 40657783Smckusick return (error); 40752196Smckusick np->n_brev = np->n_lrev; 40852196Smckusick } 40952196Smckusick } 41052196Smckusick if (np->n_flag & NQNFSNONCACHE) 41157783Smckusick return (nfs_writerpc(vp, uio, cred, ioflag)); 41239750Smckusick nfsstats.biocache_writes++; 41354448Smckusick lbn = uio->uio_offset / biosize; 41454448Smckusick on = uio->uio_offset & (biosize-1); 41555057Spendry n = min((unsigned)(biosize - on), uio->uio_resid); 41652196Smckusick bn = lbn * (biosize / DEV_BSIZE); 41740037Smckusick again: 41857783Smckusick bp = nfs_getcacheblk(vp, bn, biosize, p); 41957783Smckusick if (!bp) 42057783Smckusick return (EINTR); 42138882Smacklem if (bp->b_wcred == NOCRED) { 42254448Smckusick crhold(cred); 42354448Smckusick bp->b_wcred = cred; 42438882Smacklem } 42557783Smckusick np->n_flag |= NMODIFIED; 42657783Smckusick if (uio->uio_offset + n > np->n_size) { 42757783Smckusick np->n_size = uio->uio_offset + n; 42857783Smckusick vnode_pager_setsize(vp, (u_long)np->n_size); 42957783Smckusick } 43052196Smckusick 43152196Smckusick /* 43252196Smckusick * If the new write will leave a contiguous dirty 43352196Smckusick * area, just update the b_dirtyoff and b_dirtyend, 43452196Smckusick * otherwise force a write rpc of the old dirty area. 43552196Smckusick */ 43652196Smckusick if (bp->b_dirtyend > 0 && 43752196Smckusick (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 43852196Smckusick bp->b_proc = p; 43957783Smckusick if (VOP_BWRITE(bp) == EINTR) 44057783Smckusick return (EINTR); 44152196Smckusick goto again; 44252196Smckusick } 44352196Smckusick 44452196Smckusick /* 44552196Smckusick * Check for valid write lease and get one as required. 44652196Smckusick * In case getblk() and/or bwrite() delayed us. 44752196Smckusick */ 44852196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 44954448Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 45052196Smckusick do { 45154448Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 45252196Smckusick } while (error == NQNFS_EXPIRED); 45352196Smckusick if (error) { 45452196Smckusick brelse(bp); 45552196Smckusick return (error); 45638882Smacklem } 45754448Smckusick if (np->n_lrev != np->n_brev || 45852196Smckusick (np->n_flag & NQNFSNONCACHE)) { 45956282Smckusick brelse(bp); 46057783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 46157783Smckusick return (error); 46252196Smckusick np->n_brev = np->n_lrev; 46356282Smckusick goto again; 46452196Smckusick } 46538882Smacklem } 46654448Smckusick if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { 46757783Smckusick bp->b_flags |= B_ERROR; 46840037Smckusick brelse(bp); 46939584Smckusick return (error); 47040037Smckusick } 47152196Smckusick if (bp->b_dirtyend > 0) { 47255057Spendry bp->b_dirtyoff = min(on, bp->b_dirtyoff); 47357783Smckusick bp->b_dirtyend = max((on + n), bp->b_dirtyend); 47452196Smckusick } else { 47552196Smckusick bp->b_dirtyoff = on; 47657783Smckusick bp->b_dirtyend = on + n; 47752196Smckusick } 47857783Smckusick #ifndef notdef 47952196Smckusick if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 48052196Smckusick bp->b_validoff > bp->b_dirtyend) { 48152196Smckusick bp->b_validoff = bp->b_dirtyoff; 48252196Smckusick bp->b_validend = bp->b_dirtyend; 48352196Smckusick } else { 48455057Spendry bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 48555057Spendry bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 48652196Smckusick } 48757783Smckusick #else 48857783Smckusick bp->b_validoff = bp->b_dirtyoff; 48957783Smckusick bp->b_validend = bp->b_dirtyend; 49057783Smckusick #endif 49157783Smckusick if (ioflag & IO_APPEND) 49257783Smckusick bp->b_flags |= B_APPENDWRITE; 49352196Smckusick 49452196Smckusick /* 49552196Smckusick * If the lease is non-cachable or IO_SYNC do bwrite(). 49652196Smckusick */ 49754448Smckusick if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 49852196Smckusick bp->b_proc = p; 49957783Smckusick if (error = VOP_BWRITE(bp)) 50057783Smckusick return (error); 50157783Smckusick } else if ((n + on) == biosize && 50257783Smckusick (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 50341897Smckusick bp->b_proc = (struct proc *)0; 50438882Smacklem bawrite(bp); 50557783Smckusick } else 50638882Smacklem bdwrite(bp); 50757783Smckusick } while (uio->uio_resid > 0 && n > 0); 50857783Smckusick return (0); 50957783Smckusick } 51057783Smckusick 51157783Smckusick /* 51257783Smckusick * Get an nfs cache block. 51357783Smckusick * Allocate a new one if the block isn't currently in the cache 51457783Smckusick * and return the block marked busy. If the calling process is 51557783Smckusick * interrupted by a signal for an interruptible mount point, return 51657783Smckusick * NULL. 51757783Smckusick */ 51857783Smckusick struct buf * 51957783Smckusick nfs_getcacheblk(vp, bn, size, p) 52057783Smckusick struct vnode *vp; 52157783Smckusick daddr_t bn; 52257783Smckusick int size; 52357783Smckusick struct proc *p; 52457783Smckusick { 52557783Smckusick register struct buf *bp; 52657783Smckusick struct nfsmount *nmp = VFSTONFS(vp->v_mount); 52757783Smckusick 52857783Smckusick if (nmp->nm_flag & NFSMNT_INT) { 52957808Smckusick bp = getblk(vp, bn, size, PCATCH, 0); 53057783Smckusick while (bp == (struct buf *)0) { 53157783Smckusick if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 53257783Smckusick return ((struct buf *)0); 53357808Smckusick bp = getblk(vp, bn, size, 0, 2 * hz); 53438882Smacklem } 53557783Smckusick } else 53657808Smckusick bp = getblk(vp, bn, size, 0, 0); 53757783Smckusick return (bp); 53857783Smckusick } 53957783Smckusick 54057783Smckusick /* 54157783Smckusick * Flush and invalidate all dirty buffers. If another process is already 54257783Smckusick * doing the flush, just wait for completion. 54357783Smckusick */ 54457783Smckusick nfs_vinvalbuf(vp, flags, cred, p, intrflg) 54557783Smckusick struct vnode *vp; 54657783Smckusick int flags; 54757783Smckusick struct ucred *cred; 54857783Smckusick struct proc *p; 54957783Smckusick int intrflg; 55057783Smckusick { 55157783Smckusick register struct nfsnode *np = VTONFS(vp); 55257783Smckusick struct nfsmount *nmp = VFSTONFS(vp->v_mount); 55357783Smckusick int error = 0, slpflag, slptimeo; 55457783Smckusick 55557783Smckusick if ((nmp->nm_flag & NFSMNT_INT) == 0) 55657783Smckusick intrflg = 0; 55757783Smckusick if (intrflg) { 55857783Smckusick slpflag = PCATCH; 55957783Smckusick slptimeo = 2 * hz; 56057783Smckusick } else { 56157783Smckusick slpflag = 0; 56257783Smckusick slptimeo = 0; 56357783Smckusick } 56457783Smckusick /* 56557783Smckusick * First wait for any other process doing a flush to complete. 56657783Smckusick */ 56757783Smckusick while (np->n_flag & NFLUSHINPROG) { 56857783Smckusick np->n_flag |= NFLUSHWANT; 56957783Smckusick error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 57057783Smckusick slptimeo); 57157783Smckusick if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 57257783Smckusick return (EINTR); 57357783Smckusick } 57457783Smckusick 57557783Smckusick /* 57657783Smckusick * Now, flush as required. 57757783Smckusick */ 57857783Smckusick np->n_flag |= NFLUSHINPROG; 57957808Smckusick error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 58057783Smckusick while (error) { 58157783Smckusick if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 58257783Smckusick np->n_flag &= ~NFLUSHINPROG; 58357783Smckusick if (np->n_flag & NFLUSHWANT) { 58457783Smckusick np->n_flag &= ~NFLUSHWANT; 58557783Smckusick wakeup((caddr_t)&np->n_flag); 58657783Smckusick } 58757783Smckusick return (EINTR); 58857783Smckusick } 58957808Smckusick error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 59057783Smckusick } 59157783Smckusick np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 59257783Smckusick if (np->n_flag & NFLUSHWANT) { 59357783Smckusick np->n_flag &= ~NFLUSHWANT; 59457783Smckusick wakeup((caddr_t)&np->n_flag); 59557783Smckusick } 59657783Smckusick return (0); 59757783Smckusick } 59857783Smckusick 59957783Smckusick /* 60057783Smckusick * Initiate asynchronous I/O. Return an error if no nfsiods are available. 60157783Smckusick * This is mainly to avoid queueing async I/O requests when the nfsiods 60257783Smckusick * are all hung on a dead server. 60357783Smckusick */ 60457783Smckusick nfs_asyncio(bp, cred) 60557783Smckusick register struct buf *bp; 60657783Smckusick struct ucred *cred; 60757783Smckusick { 60857783Smckusick register int i; 60957783Smckusick 61057783Smckusick if (nfs_numasync == 0) 61157783Smckusick return (EIO); 61257783Smckusick for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 61357783Smckusick if (nfs_iodwant[i]) { 61457783Smckusick if (bp->b_flags & B_READ) { 61557783Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 61657783Smckusick crhold(cred); 61757783Smckusick bp->b_rcred = cred; 61857783Smckusick } 61957783Smckusick } else { 62057783Smckusick if (bp->b_wcred == NOCRED && cred != NOCRED) { 62157783Smckusick crhold(cred); 62257783Smckusick bp->b_wcred = cred; 62357783Smckusick } 62457783Smckusick } 62557783Smckusick 62657783Smckusick queue_enter_tail(&nfs_bufq, bp, struct buf *, b_freelist); 62757783Smckusick nfs_iodwant[i] = (struct proc *)0; 62857783Smckusick wakeup((caddr_t)&nfs_iodwant[i]); 62957783Smckusick return (0); 63057783Smckusick } 63157783Smckusick return (EIO); 63257783Smckusick } 63357783Smckusick 63457783Smckusick /* 63557783Smckusick * Do an I/O operation to/from a cache block. This may be called 63657783Smckusick * synchronously or from an nfsiod. 63757783Smckusick */ 63857783Smckusick int 63957783Smckusick nfs_doio(bp, cr, p) 64057783Smckusick register struct buf *bp; 64157783Smckusick struct cred *cr; 64257783Smckusick struct proc *p; 64357783Smckusick { 64457783Smckusick register struct uio *uiop; 64557783Smckusick register struct vnode *vp; 64657783Smckusick struct nfsnode *np; 64757783Smckusick struct nfsmount *nmp; 64857783Smckusick int error, diff, len; 64957783Smckusick struct uio uio; 65057783Smckusick struct iovec io; 65157783Smckusick 65257783Smckusick vp = bp->b_vp; 65357783Smckusick np = VTONFS(vp); 65457783Smckusick nmp = VFSTONFS(vp->v_mount); 65557783Smckusick uiop = &uio; 65657783Smckusick uiop->uio_iov = &io; 65757783Smckusick uiop->uio_iovcnt = 1; 65857783Smckusick uiop->uio_segflg = UIO_SYSSPACE; 65957783Smckusick uiop->uio_procp = p; 66057783Smckusick 66157783Smckusick /* 66257783Smckusick * Historically, paging was done with physio, but no more. 66357783Smckusick */ 66457783Smckusick if (bp->b_flags & B_PHYS) 66557783Smckusick panic("doio phys"); 66657783Smckusick if (bp->b_flags & B_READ) { 66757783Smckusick io.iov_len = uiop->uio_resid = bp->b_bcount; 66857783Smckusick io.iov_base = bp->b_un.b_addr; 66957783Smckusick uiop->uio_rw = UIO_READ; 67057783Smckusick switch (vp->v_type) { 67157783Smckusick case VREG: 67257783Smckusick uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 67357783Smckusick nfsstats.read_bios++; 67457783Smckusick error = nfs_readrpc(vp, uiop, cr); 67557783Smckusick if (!error) { 67657783Smckusick bp->b_validoff = 0; 67757783Smckusick if (uiop->uio_resid) { 67857783Smckusick /* 67957783Smckusick * If len > 0, there is a hole in the file and 68057783Smckusick * no writes after the hole have been pushed to 68157783Smckusick * the server yet. 68257783Smckusick * Just zero fill the rest of the valid area. 68357783Smckusick */ 68457783Smckusick diff = bp->b_bcount - uiop->uio_resid; 68557783Smckusick len = np->n_size - (bp->b_blkno * DEV_BSIZE 68657783Smckusick + diff); 68757783Smckusick if (len > 0) { 68857783Smckusick len = min(len, uiop->uio_resid); 68957783Smckusick bzero(bp->b_un.b_addr + diff, len); 69057783Smckusick bp->b_validend = diff + len; 69157783Smckusick } else 69257783Smckusick bp->b_validend = diff; 69357783Smckusick } else 69457783Smckusick bp->b_validend = bp->b_bcount; 69557783Smckusick } 69657783Smckusick if (p && (vp->v_flag & VTEXT) && 69757783Smckusick (((nmp->nm_flag & NFSMNT_NQNFS) && 69857783Smckusick np->n_lrev != np->n_brev) || 69957783Smckusick (!(nmp->nm_flag & NFSMNT_NQNFS) && 70057783Smckusick np->n_mtime != np->n_vattr.va_mtime.ts_sec))) { 70157783Smckusick uprintf("Process killed due to text file modification\n"); 70257783Smckusick psignal(p, SIGKILL); 70357783Smckusick p->p_flag |= SKEEP; 70457783Smckusick } 70557783Smckusick break; 70657783Smckusick case VLNK: 70757783Smckusick uiop->uio_offset = 0; 70857783Smckusick nfsstats.readlink_bios++; 70957783Smckusick error = nfs_readlinkrpc(vp, uiop, cr); 71057783Smckusick break; 71157783Smckusick case VDIR: 71257783Smckusick uiop->uio_offset = bp->b_lblkno; 71357783Smckusick nfsstats.readdir_bios++; 71457783Smckusick if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) 71557783Smckusick error = nfs_readdirlookrpc(vp, uiop, cr); 71657783Smckusick else 71757783Smckusick error = nfs_readdirrpc(vp, uiop, cr); 71857783Smckusick /* 71957783Smckusick * Save offset cookie in b_blkno. 72057783Smckusick */ 72157783Smckusick bp->b_blkno = uiop->uio_offset; 72257783Smckusick break; 72357783Smckusick }; 72457783Smckusick if (error) { 72557783Smckusick bp->b_flags |= B_ERROR; 72657783Smckusick bp->b_error = error; 72757783Smckusick } 72857783Smckusick } else { 72957783Smckusick io.iov_len = uiop->uio_resid = bp->b_dirtyend 73057783Smckusick - bp->b_dirtyoff; 73157783Smckusick uiop->uio_offset = (bp->b_blkno * DEV_BSIZE) 73257783Smckusick + bp->b_dirtyoff; 73357783Smckusick io.iov_base = bp->b_un.b_addr + bp->b_dirtyoff; 73457783Smckusick uiop->uio_rw = UIO_WRITE; 73557783Smckusick nfsstats.write_bios++; 73657783Smckusick if (bp->b_flags & B_APPENDWRITE) 73757783Smckusick error = nfs_writerpc(vp, uiop, cr, IO_APPEND); 73857783Smckusick else 73957783Smckusick error = nfs_writerpc(vp, uiop, cr, 0); 74057783Smckusick bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE); 74157783Smckusick 74257783Smckusick /* 74357783Smckusick * For an interrupted write, the buffer is still valid and the 74457783Smckusick * write hasn't been pushed to the server yet, so we can't set 74557783Smckusick * B_ERROR and report the interruption by setting B_EINTR. For 74657783Smckusick * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 74757783Smckusick * is essentially a noop. 74857783Smckusick */ 74957783Smckusick if (error == EINTR) { 75057783Smckusick bp->b_flags &= ~B_INVAL; 75157783Smckusick bp->b_flags |= B_DELWRI; 75257783Smckusick 75357783Smckusick /* 75457783Smckusick * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 75557783Smckusick * buffer to the clean list, we have to reassign it back to the 75657783Smckusick * dirty one. Ugh. 75757783Smckusick */ 75857783Smckusick if (bp->b_flags & B_ASYNC) 75957783Smckusick reassignbuf(bp, vp); 76057783Smckusick else 76157783Smckusick bp->b_flags |= B_EINTR; 76257783Smckusick } else { 76357783Smckusick if (error) { 76457783Smckusick bp->b_flags |= B_ERROR; 76557783Smckusick bp->b_error = np->n_error = error; 76657783Smckusick np->n_flag |= NWRITEERR; 76757783Smckusick } 76857783Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 76957783Smckusick } 77057783Smckusick } 77157783Smckusick bp->b_resid = uiop->uio_resid; 77257783Smckusick biodone(bp); 77338882Smacklem return (error); 77438882Smacklem } 775