138882Smacklem /* 238882Smacklem * Copyright (c) 1989 The Regents of the University of California. 338882Smacklem * All rights reserved. 438882Smacklem * 538882Smacklem * This code is derived from software contributed to Berkeley by 638882Smacklem * Rick Macklem at The University of Guelph. 738882Smacklem * 844509Sbostic * %sccs.include.redist.c% 938882Smacklem * 10*57783Smckusick * @(#)nfs_bio.c 7.36 (Berkeley) 02/02/93 1138882Smacklem */ 1238882Smacklem 1353322Smckusick #include <sys/param.h> 1455063Spendry #include <sys/systm.h> 1553322Smckusick #include <sys/resourcevar.h> 1653322Smckusick #include <sys/proc.h> 1753322Smckusick #include <sys/buf.h> 1853322Smckusick #include <sys/vnode.h> 1953322Smckusick #include <sys/trace.h> 2053322Smckusick #include <sys/mount.h> 2153322Smckusick #include <sys/kernel.h> 2256535Sbostic 2353322Smckusick #include <vm/vm.h> 2456535Sbostic 2553322Smckusick #include <nfs/nfsnode.h> 2653322Smckusick #include <nfs/rpcv2.h> 2753322Smckusick #include <nfs/nfsv2.h> 2853322Smckusick #include <nfs/nfs.h> 2953322Smckusick #include <nfs/nfsmount.h> 3053322Smckusick #include <nfs/nqnfs.h> 3138882Smacklem 32*57783Smckusick struct buf *nfsincore(), *nfs_getcacheblk(), *nfsgetblk(); 33*57783Smckusick extern struct queue_entry nfs_bufq; 34*57783Smckusick extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 35*57783Smckusick extern int nfs_numasync; 3638882Smacklem 3738882Smacklem /* 3838882Smacklem * Vnode op for read using bio 3938882Smacklem * Any similarity to readip() is purely coincidental 4038882Smacklem */ 4141897Smckusick nfs_bioread(vp, uio, ioflag, cred) 4238882Smacklem register struct vnode *vp; 4343348Smckusick register struct uio *uio; 4438882Smacklem int ioflag; 4538882Smacklem struct ucred *cred; 4638882Smacklem { 4738882Smacklem register struct nfsnode *np = VTONFS(vp); 48*57783Smckusick register int biosize, diff; 49*57783Smckusick struct buf *bp, *rabp; 5038882Smacklem struct vattr vattr; 51*57783Smckusick struct proc *p; 5252196Smckusick struct nfsmount *nmp; 53*57783Smckusick daddr_t lbn, bn, rabn; 54*57783Smckusick caddr_t baddr; 55*57783Smckusick int got_buf, len, nra, error = 0, n, on, not_readin; 5638882Smacklem 5742241Smckusick #ifdef lint 5842241Smckusick ioflag = ioflag; 5942241Smckusick #endif /* lint */ 6048047Smckusick #ifdef DIAGNOSTIC 6138882Smacklem if (uio->uio_rw != UIO_READ) 6238882Smacklem panic("nfs_read mode"); 6348047Smckusick #endif 6438882Smacklem if (uio->uio_resid == 0) 6539584Smckusick return (0); 6641897Smckusick if (uio->uio_offset < 0 && vp->v_type != VDIR) 6739584Smckusick return (EINVAL); 6852196Smckusick nmp = VFSTONFS(vp->v_mount); 6952196Smckusick biosize = nmp->nm_rsize; 70*57783Smckusick p = uio->uio_procp; 7138882Smacklem /* 7252196Smckusick * For nfs, cache consistency can only be maintained approximately. 7352196Smckusick * Although RFC1094 does not specify the criteria, the following is 7452196Smckusick * believed to be compatible with the reference port. 7552196Smckusick * For nqnfs, full cache consistency is maintained within the loop. 7652196Smckusick * For nfs: 7738882Smacklem * If the file's modify time on the server has changed since the 7838882Smacklem * last read rpc or you have written to the file, 7938882Smacklem * you may have lost data cache consistency with the 8038882Smacklem * server, so flush all of the file's data out of the cache. 8141897Smckusick * Then force a getattr rpc to ensure that you have up to date 8241897Smckusick * attributes. 8352196Smckusick * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 8452196Smckusick * the ones changing the modify time. 8538882Smacklem * NB: This implies that cache data can be read when up to 8638882Smacklem * NFS_ATTRTIMEO seconds out of date. If you find that you need current 8738882Smacklem * attributes this could be forced by setting n_attrstamp to 0 before 8853550Sheideman * the VOP_GETATTR() call. 8938882Smacklem */ 9052196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 9141897Smckusick if (np->n_flag & NMODIFIED) { 9252196Smckusick if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 93*57783Smckusick vp->v_type != VREG) { 94*57783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 95*57783Smckusick return (error); 96*57783Smckusick } 9741897Smckusick np->n_attrstamp = 0; 9841897Smckusick np->n_direofoffset = 0; 99*57783Smckusick if (error = VOP_GETATTR(vp, &vattr, cred, p)) 10039750Smckusick return (error); 10154106Smckusick np->n_mtime = vattr.va_mtime.ts_sec; 10241897Smckusick } else { 103*57783Smckusick if (error = VOP_GETATTR(vp, &vattr, cred, p)) 10441897Smckusick return (error); 10554106Smckusick if (np->n_mtime != vattr.va_mtime.ts_sec) { 10641897Smckusick np->n_direofoffset = 0; 107*57783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 108*57783Smckusick return (error); 10954106Smckusick np->n_mtime = vattr.va_mtime.ts_sec; 11041897Smckusick } 11139750Smckusick } 11238882Smacklem } 11338882Smacklem do { 11452196Smckusick 11552196Smckusick /* 11652196Smckusick * Get a valid lease. If cached data is stale, flush it. 11752196Smckusick */ 118*57783Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) { 119*57783Smckusick if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 120*57783Smckusick do { 121*57783Smckusick error = nqnfs_getlease(vp, NQL_READ, cred, p); 122*57783Smckusick } while (error == NQNFS_EXPIRED); 123*57783Smckusick if (error) 12452196Smckusick return (error); 125*57783Smckusick if (np->n_lrev != np->n_brev || 126*57783Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 12752196Smckusick if (vp->v_type == VDIR) { 128*57783Smckusick np->n_direofoffset = 0; 129*57783Smckusick cache_purge(vp); 13052196Smckusick } 131*57783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 132*57783Smckusick return (error); 13352196Smckusick np->n_brev = np->n_lrev; 134*57783Smckusick } 135*57783Smckusick } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 136*57783Smckusick np->n_direofoffset = 0; 137*57783Smckusick cache_purge(vp); 138*57783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 139*57783Smckusick return (error); 14052196Smckusick } 14152196Smckusick } 14252196Smckusick if (np->n_flag & NQNFSNONCACHE) { 14352196Smckusick switch (vp->v_type) { 14452196Smckusick case VREG: 14552196Smckusick error = nfs_readrpc(vp, uio, cred); 14652196Smckusick break; 14752196Smckusick case VLNK: 14852196Smckusick error = nfs_readlinkrpc(vp, uio, cred); 14952196Smckusick break; 15052196Smckusick case VDIR: 15152196Smckusick error = nfs_readdirrpc(vp, uio, cred); 15252196Smckusick break; 15352196Smckusick }; 15452196Smckusick return (error); 15552196Smckusick } 156*57783Smckusick baddr = (caddr_t)0; 15741897Smckusick switch (vp->v_type) { 15841897Smckusick case VREG: 15939750Smckusick nfsstats.biocache_reads++; 16043348Smckusick lbn = uio->uio_offset / biosize; 16143348Smckusick on = uio->uio_offset & (biosize-1); 162*57783Smckusick bn = lbn * (biosize / DEV_BSIZE); 163*57783Smckusick not_readin = 1; 164*57783Smckusick 165*57783Smckusick /* 166*57783Smckusick * Start the read ahead(s), as required. 167*57783Smckusick */ 168*57783Smckusick if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 169*57783Smckusick lbn == vp->v_lastr + 1) { 170*57783Smckusick for (nra = 0; nra < nmp->nm_readahead && 171*57783Smckusick (lbn + 1 + nra) * biosize < np->n_size; nra++) { 172*57783Smckusick rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 173*57783Smckusick if (!nfsincore(vp, rabn)) { 174*57783Smckusick rabp = nfs_getcacheblk(vp, rabn, biosize, p); 175*57783Smckusick if (!rabp) 176*57783Smckusick return (EINTR); 177*57783Smckusick if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 178*57783Smckusick rabp->b_flags |= (B_READ | B_ASYNC); 179*57783Smckusick if (nfs_asyncio(rabp, cred)) { 180*57783Smckusick rabp->b_flags |= B_INVAL; 181*57783Smckusick brelse(rabp); 182*57783Smckusick } 183*57783Smckusick } 184*57783Smckusick } 185*57783Smckusick } 186*57783Smckusick } 187*57783Smckusick 188*57783Smckusick /* 189*57783Smckusick * If the block is in the cache and has the required data 190*57783Smckusick * in a valid region, just copy it out. 191*57783Smckusick * Otherwise, get the block and write back/read in, 192*57783Smckusick * as required. 193*57783Smckusick */ 194*57783Smckusick if ((bp = nfsincore(vp, bn)) && 195*57783Smckusick (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 196*57783Smckusick (B_BUSY | B_WRITEINPROG)) 197*57783Smckusick got_buf = 0; 198*57783Smckusick else { 199*57783Smckusick again: 200*57783Smckusick bp = nfs_getcacheblk(vp, bn, biosize, p); 201*57783Smckusick if (!bp) 202*57783Smckusick return (EINTR); 203*57783Smckusick got_buf = 1; 204*57783Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 205*57783Smckusick bp->b_flags |= B_READ; 206*57783Smckusick not_readin = 0; 207*57783Smckusick if (error = nfs_doio(bp, cred, p)) { 208*57783Smckusick brelse(bp); 209*57783Smckusick return (error); 210*57783Smckusick } 211*57783Smckusick } 212*57783Smckusick } 21355057Spendry n = min((unsigned)(biosize - on), uio->uio_resid); 21438882Smacklem diff = np->n_size - uio->uio_offset; 21538882Smacklem if (diff < n) 21638882Smacklem n = diff; 217*57783Smckusick if (not_readin && n > 0) { 218*57783Smckusick if (on < bp->b_validoff || (on + n) > bp->b_validend) { 219*57783Smckusick if (!got_buf) { 220*57783Smckusick bp = nfs_getcacheblk(vp, bn, biosize, p); 221*57783Smckusick if (!bp) 222*57783Smckusick return (EINTR); 223*57783Smckusick got_buf = 1; 224*57783Smckusick } 22552196Smckusick bp->b_flags |= B_INVAL; 22652196Smckusick if (bp->b_dirtyend > 0) { 227*57783Smckusick if ((bp->b_flags & B_DELWRI) == 0) 228*57783Smckusick panic("nfsbioread"); 229*57783Smckusick if (VOP_BWRITE(bp) == EINTR) 230*57783Smckusick return (EINTR); 23152196Smckusick } else 232*57783Smckusick brelse(bp); 23352196Smckusick goto again; 23452196Smckusick } 23552196Smckusick } 23639901Smckusick vp->v_lastr = lbn; 237*57783Smckusick diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 238*57783Smckusick if (diff < n) 239*57783Smckusick n = diff; 24041897Smckusick break; 24141897Smckusick case VLNK: 24241897Smckusick nfsstats.biocache_readlinks++; 243*57783Smckusick bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 244*57783Smckusick if (!bp) 245*57783Smckusick return (EINTR); 246*57783Smckusick if ((bp->b_flags & B_DONE) == 0) { 247*57783Smckusick bp->b_flags |= B_READ; 248*57783Smckusick if (error = nfs_doio(bp, cred, p)) { 249*57783Smckusick brelse(bp); 250*57783Smckusick return (error); 251*57783Smckusick } 252*57783Smckusick } 253*57783Smckusick n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 254*57783Smckusick got_buf = 1; 25541897Smckusick on = 0; 25641897Smckusick break; 25741897Smckusick case VDIR: 25841897Smckusick nfsstats.biocache_readdirs++; 259*57783Smckusick bn = (daddr_t)uio->uio_offset; 260*57783Smckusick bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p); 261*57783Smckusick if (!bp) 262*57783Smckusick return (EINTR); 263*57783Smckusick if ((bp->b_flags & B_DONE) == 0) { 264*57783Smckusick bp->b_flags |= B_READ; 265*57783Smckusick if (error = nfs_doio(bp, cred, p)) { 26652196Smckusick brelse(bp); 26752196Smckusick return (error); 26852196Smckusick } 269*57783Smckusick } 270*57783Smckusick 271*57783Smckusick /* 272*57783Smckusick * If not eof and read aheads are enabled, start one. 273*57783Smckusick * (You need the current block first, so that you have the 274*57783Smckusick * directory offset cookie of the next block. 275*57783Smckusick */ 276*57783Smckusick rabn = bp->b_blkno; 277*57783Smckusick if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 278*57783Smckusick rabn != 0 && rabn != np->n_direofoffset && 279*57783Smckusick !nfsincore(vp, rabn)) { 280*57783Smckusick rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p); 281*57783Smckusick if (rabp) { 282*57783Smckusick if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 283*57783Smckusick rabp->b_flags |= (B_READ | B_ASYNC); 284*57783Smckusick if (nfs_asyncio(rabp, cred)) { 285*57783Smckusick rabp->b_flags |= B_INVAL; 286*57783Smckusick brelse(rabp); 28752196Smckusick } 288*57783Smckusick } 28952196Smckusick } 29052196Smckusick } 291*57783Smckusick on = 0; 292*57783Smckusick n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 293*57783Smckusick got_buf = 1; 294*57783Smckusick break; 295*57783Smckusick }; 296*57783Smckusick 297*57783Smckusick if (n > 0) { 298*57783Smckusick if (!baddr) 299*57783Smckusick baddr = bp->b_un.b_addr; 300*57783Smckusick error = uiomove(baddr + on, (int)n, uio); 30152196Smckusick } 30241897Smckusick switch (vp->v_type) { 30341897Smckusick case VREG: 304*57783Smckusick if (n + on == biosize || uio->uio_offset == np->n_size) 30538882Smacklem bp->b_flags |= B_AGE; 30641897Smckusick break; 30741897Smckusick case VLNK: 30841897Smckusick n = 0; 30941897Smckusick break; 31041897Smckusick case VDIR: 31141897Smckusick uio->uio_offset = bp->b_blkno; 31241897Smckusick break; 31341897Smckusick }; 314*57783Smckusick if (got_buf) 315*57783Smckusick brelse(bp); 316*57783Smckusick } while (error == 0 && uio->uio_resid > 0 && n > 0); 31738882Smacklem return (error); 31838882Smacklem } 31938882Smacklem 32038882Smacklem /* 32138882Smacklem * Vnode op for write using bio 32238882Smacklem */ 32354669Smckusick nfs_write(ap) 32454448Smckusick struct vop_write_args /* { 32554448Smckusick struct vnode *a_vp; 32654448Smckusick struct uio *a_uio; 32754448Smckusick int a_ioflag; 32854448Smckusick struct ucred *a_cred; 32954448Smckusick } */ *ap; 33038882Smacklem { 33152196Smckusick register int biosize; 33254448Smckusick register struct uio *uio = ap->a_uio; 33354448Smckusick struct proc *p = uio->uio_procp; 33454448Smckusick register struct vnode *vp = ap->a_vp; 33554448Smckusick struct nfsnode *np = VTONFS(vp); 33654448Smckusick register struct ucred *cred = ap->a_cred; 33754448Smckusick int ioflag = ap->a_ioflag; 33838882Smacklem struct buf *bp; 33941897Smckusick struct vattr vattr; 34052196Smckusick struct nfsmount *nmp; 34138882Smacklem daddr_t lbn, bn; 34240220Smckusick int n, on, error = 0; 34338882Smacklem 34448047Smckusick #ifdef DIAGNOSTIC 34554448Smckusick if (uio->uio_rw != UIO_WRITE) 34641897Smckusick panic("nfs_write mode"); 34754448Smckusick if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 34848047Smckusick panic("nfs_write proc"); 34948047Smckusick #endif 35054448Smckusick if (vp->v_type != VREG) 35141897Smckusick return (EIO); 35253627Smckusick if (np->n_flag & NWRITEERR) { 35353627Smckusick np->n_flag &= ~NWRITEERR; 35453627Smckusick return (np->n_error); 35553627Smckusick } 35654448Smckusick if (ioflag & (IO_APPEND | IO_SYNC)) { 35752986Smckusick if (np->n_flag & NMODIFIED) { 35856282Smckusick np->n_attrstamp = 0; 359*57783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 360*57783Smckusick return (error); 36152986Smckusick } 36254448Smckusick if (ioflag & IO_APPEND) { 36352986Smckusick np->n_attrstamp = 0; 36454448Smckusick if (error = VOP_GETATTR(vp, &vattr, cred, p)) 36552986Smckusick return (error); 36654448Smckusick uio->uio_offset = np->n_size; 36752986Smckusick } 36852986Smckusick } 36954448Smckusick nmp = VFSTONFS(vp->v_mount); 37054448Smckusick if (uio->uio_offset < 0) 37139584Smckusick return (EINVAL); 37254448Smckusick if (uio->uio_resid == 0) 37339584Smckusick return (0); 37438882Smacklem /* 37538882Smacklem * Maybe this should be above the vnode op call, but so long as 37638882Smacklem * file servers have no limits, i don't think it matters 37738882Smacklem */ 37854448Smckusick if (p && uio->uio_offset + uio->uio_resid > 37947572Skarels p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 38047572Skarels psignal(p, SIGXFSZ); 38139584Smckusick return (EFBIG); 38238882Smacklem } 38343348Smckusick /* 38443348Smckusick * I use nm_rsize, not nm_wsize so that all buffer cache blocks 38543348Smckusick * will be the same size within a filesystem. nfs_writerpc will 38643348Smckusick * still use nm_wsize when sizing the rpc's. 38743348Smckusick */ 38852196Smckusick biosize = nmp->nm_rsize; 38938882Smacklem do { 39052196Smckusick 39152196Smckusick /* 39252196Smckusick * Check for a valid write lease. 39352196Smckusick * If non-cachable, just do the rpc 39452196Smckusick */ 39552196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 39654448Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 39752196Smckusick do { 39854448Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 39952196Smckusick } while (error == NQNFS_EXPIRED); 40052196Smckusick if (error) 40152196Smckusick return (error); 40254448Smckusick if (np->n_lrev != np->n_brev || 40352196Smckusick (np->n_flag & NQNFSNONCACHE)) { 404*57783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 405*57783Smckusick return (error); 40652196Smckusick np->n_brev = np->n_lrev; 40752196Smckusick } 40852196Smckusick } 40952196Smckusick if (np->n_flag & NQNFSNONCACHE) 410*57783Smckusick return (nfs_writerpc(vp, uio, cred, ioflag)); 41139750Smckusick nfsstats.biocache_writes++; 41254448Smckusick lbn = uio->uio_offset / biosize; 41354448Smckusick on = uio->uio_offset & (biosize-1); 41455057Spendry n = min((unsigned)(biosize - on), uio->uio_resid); 41552196Smckusick bn = lbn * (biosize / DEV_BSIZE); 41640037Smckusick again: 417*57783Smckusick bp = nfs_getcacheblk(vp, bn, biosize, p); 418*57783Smckusick if (!bp) 419*57783Smckusick return (EINTR); 42038882Smacklem if (bp->b_wcred == NOCRED) { 42154448Smckusick crhold(cred); 42254448Smckusick bp->b_wcred = cred; 42338882Smacklem } 424*57783Smckusick np->n_flag |= NMODIFIED; 425*57783Smckusick if (uio->uio_offset + n > np->n_size) { 426*57783Smckusick np->n_size = uio->uio_offset + n; 427*57783Smckusick vnode_pager_setsize(vp, (u_long)np->n_size); 428*57783Smckusick } 42952196Smckusick 43052196Smckusick /* 43152196Smckusick * If the new write will leave a contiguous dirty 43252196Smckusick * area, just update the b_dirtyoff and b_dirtyend, 43352196Smckusick * otherwise force a write rpc of the old dirty area. 43452196Smckusick */ 43552196Smckusick if (bp->b_dirtyend > 0 && 43652196Smckusick (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 43752196Smckusick bp->b_proc = p; 438*57783Smckusick if (VOP_BWRITE(bp) == EINTR) 439*57783Smckusick return (EINTR); 44052196Smckusick goto again; 44152196Smckusick } 44252196Smckusick 44352196Smckusick /* 44452196Smckusick * Check for valid write lease and get one as required. 44552196Smckusick * In case getblk() and/or bwrite() delayed us. 44652196Smckusick */ 44752196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) && 44854448Smckusick NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 44952196Smckusick do { 45054448Smckusick error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 45152196Smckusick } while (error == NQNFS_EXPIRED); 45252196Smckusick if (error) { 45352196Smckusick brelse(bp); 45452196Smckusick return (error); 45538882Smacklem } 45654448Smckusick if (np->n_lrev != np->n_brev || 45752196Smckusick (np->n_flag & NQNFSNONCACHE)) { 45856282Smckusick brelse(bp); 459*57783Smckusick if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 460*57783Smckusick return (error); 46152196Smckusick np->n_brev = np->n_lrev; 46256282Smckusick goto again; 46352196Smckusick } 46438882Smacklem } 46554448Smckusick if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { 466*57783Smckusick bp->b_flags |= B_ERROR; 46740037Smckusick brelse(bp); 46839584Smckusick return (error); 46940037Smckusick } 47052196Smckusick if (bp->b_dirtyend > 0) { 47155057Spendry bp->b_dirtyoff = min(on, bp->b_dirtyoff); 472*57783Smckusick bp->b_dirtyend = max((on + n), bp->b_dirtyend); 47352196Smckusick } else { 47452196Smckusick bp->b_dirtyoff = on; 475*57783Smckusick bp->b_dirtyend = on + n; 47652196Smckusick } 477*57783Smckusick #ifndef notdef 47852196Smckusick if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 47952196Smckusick bp->b_validoff > bp->b_dirtyend) { 48052196Smckusick bp->b_validoff = bp->b_dirtyoff; 48152196Smckusick bp->b_validend = bp->b_dirtyend; 48252196Smckusick } else { 48355057Spendry bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 48455057Spendry bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 48552196Smckusick } 486*57783Smckusick #else 487*57783Smckusick bp->b_validoff = bp->b_dirtyoff; 488*57783Smckusick bp->b_validend = bp->b_dirtyend; 489*57783Smckusick #endif 490*57783Smckusick if (ioflag & IO_APPEND) 491*57783Smckusick bp->b_flags |= B_APPENDWRITE; 49252196Smckusick 49352196Smckusick /* 49452196Smckusick * If the lease is non-cachable or IO_SYNC do bwrite(). 49552196Smckusick */ 49654448Smckusick if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 49752196Smckusick bp->b_proc = p; 498*57783Smckusick if (error = VOP_BWRITE(bp)) 499*57783Smckusick return (error); 500*57783Smckusick } else if ((n + on) == biosize && 501*57783Smckusick (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 50241897Smckusick bp->b_proc = (struct proc *)0; 50338882Smacklem bawrite(bp); 504*57783Smckusick } else 50538882Smacklem bdwrite(bp); 506*57783Smckusick } while (uio->uio_resid > 0 && n > 0); 507*57783Smckusick return (0); 508*57783Smckusick } 509*57783Smckusick 510*57783Smckusick /* 511*57783Smckusick * Get an nfs cache block. 512*57783Smckusick * Allocate a new one if the block isn't currently in the cache 513*57783Smckusick * and return the block marked busy. If the calling process is 514*57783Smckusick * interrupted by a signal for an interruptible mount point, return 515*57783Smckusick * NULL. 516*57783Smckusick */ 517*57783Smckusick struct buf * 518*57783Smckusick nfs_getcacheblk(vp, bn, size, p) 519*57783Smckusick struct vnode *vp; 520*57783Smckusick daddr_t bn; 521*57783Smckusick int size; 522*57783Smckusick struct proc *p; 523*57783Smckusick { 524*57783Smckusick register struct buf *bp; 525*57783Smckusick struct nfsmount *nmp = VFSTONFS(vp->v_mount); 526*57783Smckusick 527*57783Smckusick if (nmp->nm_flag & NFSMNT_INT) { 528*57783Smckusick bp = nfsgetblk(vp, bn, size, PCATCH, 0); 529*57783Smckusick while (bp == (struct buf *)0) { 530*57783Smckusick if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 531*57783Smckusick return ((struct buf *)0); 532*57783Smckusick bp = nfsgetblk(vp, bn, size, 0, 2 * hz); 53338882Smacklem } 534*57783Smckusick } else 535*57783Smckusick bp = nfsgetblk(vp, bn, size, 0, 0); 536*57783Smckusick return (bp); 537*57783Smckusick } 538*57783Smckusick 539*57783Smckusick /* 540*57783Smckusick * Flush and invalidate all dirty buffers. If another process is already 541*57783Smckusick * doing the flush, just wait for completion. 542*57783Smckusick */ 543*57783Smckusick nfs_vinvalbuf(vp, flags, cred, p, intrflg) 544*57783Smckusick struct vnode *vp; 545*57783Smckusick int flags; 546*57783Smckusick struct ucred *cred; 547*57783Smckusick struct proc *p; 548*57783Smckusick int intrflg; 549*57783Smckusick { 550*57783Smckusick register struct nfsnode *np = VTONFS(vp); 551*57783Smckusick struct nfsmount *nmp = VFSTONFS(vp->v_mount); 552*57783Smckusick int error = 0, slpflag, slptimeo; 553*57783Smckusick 554*57783Smckusick if ((nmp->nm_flag & NFSMNT_INT) == 0) 555*57783Smckusick intrflg = 0; 556*57783Smckusick if (intrflg) { 557*57783Smckusick slpflag = PCATCH; 558*57783Smckusick slptimeo = 2 * hz; 559*57783Smckusick } else { 560*57783Smckusick slpflag = 0; 561*57783Smckusick slptimeo = 0; 562*57783Smckusick } 563*57783Smckusick /* 564*57783Smckusick * First wait for any other process doing a flush to complete. 565*57783Smckusick */ 566*57783Smckusick while (np->n_flag & NFLUSHINPROG) { 567*57783Smckusick np->n_flag |= NFLUSHWANT; 568*57783Smckusick error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 569*57783Smckusick slptimeo); 570*57783Smckusick if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 571*57783Smckusick return (EINTR); 572*57783Smckusick } 573*57783Smckusick 574*57783Smckusick /* 575*57783Smckusick * Now, flush as required. 576*57783Smckusick */ 577*57783Smckusick np->n_flag |= NFLUSHINPROG; 578*57783Smckusick error = nfsvinvalbuf(vp, flags, cred, p, slpflag, 0); 579*57783Smckusick while (error) { 580*57783Smckusick if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 581*57783Smckusick np->n_flag &= ~NFLUSHINPROG; 582*57783Smckusick if (np->n_flag & NFLUSHWANT) { 583*57783Smckusick np->n_flag &= ~NFLUSHWANT; 584*57783Smckusick wakeup((caddr_t)&np->n_flag); 585*57783Smckusick } 586*57783Smckusick return (EINTR); 587*57783Smckusick } 588*57783Smckusick error = nfsvinvalbuf(vp, flags, cred, p, 0, slptimeo); 589*57783Smckusick } 590*57783Smckusick np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 591*57783Smckusick if (np->n_flag & NFLUSHWANT) { 592*57783Smckusick np->n_flag &= ~NFLUSHWANT; 593*57783Smckusick wakeup((caddr_t)&np->n_flag); 594*57783Smckusick } 595*57783Smckusick return (0); 596*57783Smckusick } 597*57783Smckusick 598*57783Smckusick /* 599*57783Smckusick * Initiate asynchronous I/O. Return an error if no nfsiods are available. 600*57783Smckusick * This is mainly to avoid queueing async I/O requests when the nfsiods 601*57783Smckusick * are all hung on a dead server. 602*57783Smckusick */ 603*57783Smckusick nfs_asyncio(bp, cred) 604*57783Smckusick register struct buf *bp; 605*57783Smckusick struct ucred *cred; 606*57783Smckusick { 607*57783Smckusick register int i; 608*57783Smckusick 609*57783Smckusick if (nfs_numasync == 0) 610*57783Smckusick return (EIO); 611*57783Smckusick for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 612*57783Smckusick if (nfs_iodwant[i]) { 613*57783Smckusick if (bp->b_flags & B_READ) { 614*57783Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 615*57783Smckusick crhold(cred); 616*57783Smckusick bp->b_rcred = cred; 617*57783Smckusick } 618*57783Smckusick } else { 619*57783Smckusick if (bp->b_wcred == NOCRED && cred != NOCRED) { 620*57783Smckusick crhold(cred); 621*57783Smckusick bp->b_wcred = cred; 622*57783Smckusick } 623*57783Smckusick } 624*57783Smckusick 625*57783Smckusick queue_enter_tail(&nfs_bufq, bp, struct buf *, b_freelist); 626*57783Smckusick nfs_iodwant[i] = (struct proc *)0; 627*57783Smckusick wakeup((caddr_t)&nfs_iodwant[i]); 628*57783Smckusick return (0); 629*57783Smckusick } 630*57783Smckusick return (EIO); 631*57783Smckusick } 632*57783Smckusick 633*57783Smckusick /* 634*57783Smckusick * Do an I/O operation to/from a cache block. This may be called 635*57783Smckusick * synchronously or from an nfsiod. 636*57783Smckusick */ 637*57783Smckusick int 638*57783Smckusick nfs_doio(bp, cr, p) 639*57783Smckusick register struct buf *bp; 640*57783Smckusick struct cred *cr; 641*57783Smckusick struct proc *p; 642*57783Smckusick { 643*57783Smckusick register struct uio *uiop; 644*57783Smckusick register struct vnode *vp; 645*57783Smckusick struct nfsnode *np; 646*57783Smckusick struct nfsmount *nmp; 647*57783Smckusick int error, diff, len; 648*57783Smckusick struct uio uio; 649*57783Smckusick struct iovec io; 650*57783Smckusick 651*57783Smckusick vp = bp->b_vp; 652*57783Smckusick np = VTONFS(vp); 653*57783Smckusick nmp = VFSTONFS(vp->v_mount); 654*57783Smckusick uiop = &uio; 655*57783Smckusick uiop->uio_iov = &io; 656*57783Smckusick uiop->uio_iovcnt = 1; 657*57783Smckusick uiop->uio_segflg = UIO_SYSSPACE; 658*57783Smckusick uiop->uio_procp = p; 659*57783Smckusick 660*57783Smckusick /* 661*57783Smckusick * Historically, paging was done with physio, but no more. 662*57783Smckusick */ 663*57783Smckusick if (bp->b_flags & B_PHYS) 664*57783Smckusick panic("doio phys"); 665*57783Smckusick if (bp->b_flags & B_READ) { 666*57783Smckusick io.iov_len = uiop->uio_resid = bp->b_bcount; 667*57783Smckusick io.iov_base = bp->b_un.b_addr; 668*57783Smckusick uiop->uio_rw = UIO_READ; 669*57783Smckusick switch (vp->v_type) { 670*57783Smckusick case VREG: 671*57783Smckusick uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 672*57783Smckusick nfsstats.read_bios++; 673*57783Smckusick error = nfs_readrpc(vp, uiop, cr); 674*57783Smckusick if (!error) { 675*57783Smckusick bp->b_validoff = 0; 676*57783Smckusick if (uiop->uio_resid) { 677*57783Smckusick /* 678*57783Smckusick * If len > 0, there is a hole in the file and 679*57783Smckusick * no writes after the hole have been pushed to 680*57783Smckusick * the server yet. 681*57783Smckusick * Just zero fill the rest of the valid area. 682*57783Smckusick */ 683*57783Smckusick diff = bp->b_bcount - uiop->uio_resid; 684*57783Smckusick len = np->n_size - (bp->b_blkno * DEV_BSIZE 685*57783Smckusick + diff); 686*57783Smckusick if (len > 0) { 687*57783Smckusick len = min(len, uiop->uio_resid); 688*57783Smckusick bzero(bp->b_un.b_addr + diff, len); 689*57783Smckusick bp->b_validend = diff + len; 690*57783Smckusick } else 691*57783Smckusick bp->b_validend = diff; 692*57783Smckusick } else 693*57783Smckusick bp->b_validend = bp->b_bcount; 694*57783Smckusick } 695*57783Smckusick if (p && (vp->v_flag & VTEXT) && 696*57783Smckusick (((nmp->nm_flag & NFSMNT_NQNFS) && 697*57783Smckusick np->n_lrev != np->n_brev) || 698*57783Smckusick (!(nmp->nm_flag & NFSMNT_NQNFS) && 699*57783Smckusick np->n_mtime != np->n_vattr.va_mtime.ts_sec))) { 700*57783Smckusick uprintf("Process killed due to text file modification\n"); 701*57783Smckusick psignal(p, SIGKILL); 702*57783Smckusick p->p_flag |= SKEEP; 703*57783Smckusick } 704*57783Smckusick break; 705*57783Smckusick case VLNK: 706*57783Smckusick uiop->uio_offset = 0; 707*57783Smckusick nfsstats.readlink_bios++; 708*57783Smckusick error = nfs_readlinkrpc(vp, uiop, cr); 709*57783Smckusick break; 710*57783Smckusick case VDIR: 711*57783Smckusick uiop->uio_offset = bp->b_lblkno; 712*57783Smckusick nfsstats.readdir_bios++; 713*57783Smckusick if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) 714*57783Smckusick error = nfs_readdirlookrpc(vp, uiop, cr); 715*57783Smckusick else 716*57783Smckusick error = nfs_readdirrpc(vp, uiop, cr); 717*57783Smckusick /* 718*57783Smckusick * Save offset cookie in b_blkno. 719*57783Smckusick */ 720*57783Smckusick bp->b_blkno = uiop->uio_offset; 721*57783Smckusick break; 722*57783Smckusick }; 723*57783Smckusick if (error) { 724*57783Smckusick bp->b_flags |= B_ERROR; 725*57783Smckusick bp->b_error = error; 726*57783Smckusick } 727*57783Smckusick } else { 728*57783Smckusick io.iov_len = uiop->uio_resid = bp->b_dirtyend 729*57783Smckusick - bp->b_dirtyoff; 730*57783Smckusick uiop->uio_offset = (bp->b_blkno * DEV_BSIZE) 731*57783Smckusick + bp->b_dirtyoff; 732*57783Smckusick io.iov_base = bp->b_un.b_addr + bp->b_dirtyoff; 733*57783Smckusick uiop->uio_rw = UIO_WRITE; 734*57783Smckusick nfsstats.write_bios++; 735*57783Smckusick if (bp->b_flags & B_APPENDWRITE) 736*57783Smckusick error = nfs_writerpc(vp, uiop, cr, IO_APPEND); 737*57783Smckusick else 738*57783Smckusick error = nfs_writerpc(vp, uiop, cr, 0); 739*57783Smckusick bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE); 740*57783Smckusick 741*57783Smckusick /* 742*57783Smckusick * For an interrupted write, the buffer is still valid and the 743*57783Smckusick * write hasn't been pushed to the server yet, so we can't set 744*57783Smckusick * B_ERROR and report the interruption by setting B_EINTR. For 745*57783Smckusick * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 746*57783Smckusick * is essentially a noop. 747*57783Smckusick */ 748*57783Smckusick if (error == EINTR) { 749*57783Smckusick bp->b_flags &= ~B_INVAL; 750*57783Smckusick bp->b_flags |= B_DELWRI; 751*57783Smckusick 752*57783Smckusick /* 753*57783Smckusick * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 754*57783Smckusick * buffer to the clean list, we have to reassign it back to the 755*57783Smckusick * dirty one. Ugh. 756*57783Smckusick */ 757*57783Smckusick if (bp->b_flags & B_ASYNC) 758*57783Smckusick reassignbuf(bp, vp); 759*57783Smckusick else 760*57783Smckusick bp->b_flags |= B_EINTR; 761*57783Smckusick } else { 762*57783Smckusick if (error) { 763*57783Smckusick bp->b_flags |= B_ERROR; 764*57783Smckusick bp->b_error = np->n_error = error; 765*57783Smckusick np->n_flag |= NWRITEERR; 766*57783Smckusick } 767*57783Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 768*57783Smckusick } 769*57783Smckusick } 770*57783Smckusick bp->b_resid = uiop->uio_resid; 771*57783Smckusick biodone(bp); 77238882Smacklem return (error); 77338882Smacklem } 774