138882Smacklem /*
263233Sbostic * Copyright (c) 1989, 1993
363233Sbostic * The Regents of the University of California. All rights reserved.
438882Smacklem *
538882Smacklem * This code is derived from software contributed to Berkeley by
638882Smacklem * Rick Macklem at The University of Guelph.
738882Smacklem *
844509Sbostic * %sccs.include.redist.c%
938882Smacklem *
10*68653Smckusick * @(#)nfs_bio.c 8.9 (Berkeley) 03/30/95
1138882Smacklem */
1238882Smacklem
13*68653Smckusick
1453322Smckusick #include <sys/param.h>
1555063Spendry #include <sys/systm.h>
1653322Smckusick #include <sys/resourcevar.h>
17*68653Smckusick #include <sys/signalvar.h>
1853322Smckusick #include <sys/proc.h>
1953322Smckusick #include <sys/buf.h>
2053322Smckusick #include <sys/vnode.h>
2153322Smckusick #include <sys/trace.h>
2253322Smckusick #include <sys/mount.h>
2353322Smckusick #include <sys/kernel.h>
2456535Sbostic
2553322Smckusick #include <vm/vm.h>
2656535Sbostic
2753322Smckusick #include <nfs/rpcv2.h>
28*68653Smckusick #include <nfs/nfsproto.h>
2953322Smckusick #include <nfs/nfs.h>
3053322Smckusick #include <nfs/nfsmount.h>
3153322Smckusick #include <nfs/nqnfs.h>
32*68653Smckusick #include <nfs/nfsnode.h>
3338882Smacklem
34*68653Smckusick struct buf *nfs_getcacheblk();
3557783Smckusick extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
3657783Smckusick extern int nfs_numasync;
37*68653Smckusick extern struct nfsstats nfsstats;
3838882Smacklem
3938882Smacklem /*
4038882Smacklem * Vnode op for read using bio
4138882Smacklem * Any similarity to readip() is purely coincidental
4238882Smacklem */
43*68653Smckusick int
nfs_bioread(vp,uio,ioflag,cred)4441897Smckusick nfs_bioread(vp, uio, ioflag, cred)
4538882Smacklem register struct vnode *vp;
4643348Smckusick register struct uio *uio;
4738882Smacklem int ioflag;
4838882Smacklem struct ucred *cred;
4938882Smacklem {
5038882Smacklem register struct nfsnode *np = VTONFS(vp);
51*68653Smckusick register int biosize, diff, i;
52*68653Smckusick struct buf *bp = 0, *rabp;
5338882Smacklem struct vattr vattr;
5457783Smckusick struct proc *p;
55*68653Smckusick struct nfsmount *nmp = VFSTONFS(vp->v_mount);
56*68653Smckusick daddr_t lbn, bn, bn2, rabn;
5757783Smckusick caddr_t baddr;
58*68653Smckusick int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
59*68653Smckusick nfsquad_t tquad;
6038882Smacklem
6148047Smckusick #ifdef DIAGNOSTIC
6238882Smacklem if (uio->uio_rw != UIO_READ)
6338882Smacklem panic("nfs_read mode");
6448047Smckusick #endif
6538882Smacklem if (uio->uio_resid == 0)
6639584Smckusick return (0);
67*68653Smckusick if (uio->uio_offset < 0)
6839584Smckusick return (EINVAL);
69*68653Smckusick p = uio->uio_procp;
70*68653Smckusick if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
71*68653Smckusick (void)nfs_fsinfo(nmp, vp, cred, p);
7252196Smckusick biosize = nmp->nm_rsize;
7338882Smacklem /*
7452196Smckusick * For nfs, cache consistency can only be maintained approximately.
7552196Smckusick * Although RFC1094 does not specify the criteria, the following is
7652196Smckusick * believed to be compatible with the reference port.
7752196Smckusick * For nqnfs, full cache consistency is maintained within the loop.
7852196Smckusick * For nfs:
7938882Smacklem * If the file's modify time on the server has changed since the
8038882Smacklem * last read rpc or you have written to the file,
8138882Smacklem * you may have lost data cache consistency with the
8238882Smacklem * server, so flush all of the file's data out of the cache.
8341897Smckusick * Then force a getattr rpc to ensure that you have up to date
8441897Smckusick * attributes.
8538882Smacklem * NB: This implies that cache data can be read when up to
8638882Smacklem * NFS_ATTRTIMEO seconds out of date. If you find that you need current
8738882Smacklem * attributes this could be forced by setting n_attrstamp to 0 before
8853550Sheideman * the VOP_GETATTR() call.
8938882Smacklem */
9052196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
9141897Smckusick if (np->n_flag & NMODIFIED) {
92*68653Smckusick if (vp->v_type != VREG) {
93*68653Smckusick if (vp->v_type != VDIR)
94*68653Smckusick panic("nfs: bioread, not dir");
95*68653Smckusick nfs_invaldir(vp);
96*68653Smckusick error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
97*68653Smckusick if (error)
9857783Smckusick return (error);
9957783Smckusick }
10041897Smckusick np->n_attrstamp = 0;
101*68653Smckusick error = VOP_GETATTR(vp, &vattr, cred, p);
102*68653Smckusick if (error)
10339750Smckusick return (error);
10454106Smckusick np->n_mtime = vattr.va_mtime.ts_sec;
10541897Smckusick } else {
106*68653Smckusick error = VOP_GETATTR(vp, &vattr, cred, p);
107*68653Smckusick if (error)
10841897Smckusick return (error);
10954106Smckusick if (np->n_mtime != vattr.va_mtime.ts_sec) {
110*68653Smckusick if (vp->v_type == VDIR)
111*68653Smckusick nfs_invaldir(vp);
112*68653Smckusick error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
113*68653Smckusick if (error)
11457783Smckusick return (error);
11554106Smckusick np->n_mtime = vattr.va_mtime.ts_sec;
11641897Smckusick }
11739750Smckusick }
11838882Smacklem }
11938882Smacklem do {
12052196Smckusick
12152196Smckusick /*
12252196Smckusick * Get a valid lease. If cached data is stale, flush it.
12352196Smckusick */
12457783Smckusick if (nmp->nm_flag & NFSMNT_NQNFS) {
125*68653Smckusick if (NQNFS_CKINVALID(vp, np, ND_READ)) {
12657783Smckusick do {
127*68653Smckusick error = nqnfs_getlease(vp, ND_READ, cred, p);
12857783Smckusick } while (error == NQNFS_EXPIRED);
12957783Smckusick if (error)
13052196Smckusick return (error);
13157783Smckusick if (np->n_lrev != np->n_brev ||
13259704Smckusick (np->n_flag & NQNFSNONCACHE) ||
13357783Smckusick ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
134*68653Smckusick if (vp->v_type == VDIR)
135*68653Smckusick nfs_invaldir(vp);
136*68653Smckusick error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
137*68653Smckusick if (error)
13857783Smckusick return (error);
13952196Smckusick np->n_brev = np->n_lrev;
14057783Smckusick }
14157783Smckusick } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
142*68653Smckusick nfs_invaldir(vp);
143*68653Smckusick error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
144*68653Smckusick if (error)
14557783Smckusick return (error);
14652196Smckusick }
14752196Smckusick }
14852196Smckusick if (np->n_flag & NQNFSNONCACHE) {
14952196Smckusick switch (vp->v_type) {
15052196Smckusick case VREG:
151*68653Smckusick return (nfs_readrpc(vp, uio, cred));
15252196Smckusick case VLNK:
153*68653Smckusick return (nfs_readlinkrpc(vp, uio, cred));
15452196Smckusick case VDIR:
15552196Smckusick break;
156*68653Smckusick default:
157*68653Smckusick printf(" NQNFSNONCACHE: type %x unexpected\n",
158*68653Smckusick vp->v_type);
15952196Smckusick };
16052196Smckusick }
16157783Smckusick baddr = (caddr_t)0;
16241897Smckusick switch (vp->v_type) {
16341897Smckusick case VREG:
16439750Smckusick nfsstats.biocache_reads++;
16543348Smckusick lbn = uio->uio_offset / biosize;
166*68653Smckusick on = uio->uio_offset & (biosize - 1);
16757783Smckusick bn = lbn * (biosize / DEV_BSIZE);
16857783Smckusick not_readin = 1;
16957783Smckusick
17057783Smckusick /*
17157783Smckusick * Start the read ahead(s), as required.
17257783Smckusick */
173*68653Smckusick if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
17457783Smckusick for (nra = 0; nra < nmp->nm_readahead &&
17557783Smckusick (lbn + 1 + nra) * biosize < np->n_size; nra++) {
17657783Smckusick rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
17757808Smckusick if (!incore(vp, rabn)) {
17857783Smckusick rabp = nfs_getcacheblk(vp, rabn, biosize, p);
17957783Smckusick if (!rabp)
18057783Smckusick return (EINTR);
18157783Smckusick if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
18257783Smckusick rabp->b_flags |= (B_READ | B_ASYNC);
18357783Smckusick if (nfs_asyncio(rabp, cred)) {
18457783Smckusick rabp->b_flags |= B_INVAL;
18557783Smckusick brelse(rabp);
18657783Smckusick }
18768157Smckusick } else
18868157Smckusick brelse(rabp);
18957783Smckusick }
19057783Smckusick }
19157783Smckusick }
19257783Smckusick
19357783Smckusick /*
19457783Smckusick * If the block is in the cache and has the required data
19557783Smckusick * in a valid region, just copy it out.
19657783Smckusick * Otherwise, get the block and write back/read in,
19757783Smckusick * as required.
19857783Smckusick */
19957808Smckusick if ((bp = incore(vp, bn)) &&
20057783Smckusick (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
20157783Smckusick (B_BUSY | B_WRITEINPROG))
20257783Smckusick got_buf = 0;
20357783Smckusick else {
20457783Smckusick again:
20557783Smckusick bp = nfs_getcacheblk(vp, bn, biosize, p);
20657783Smckusick if (!bp)
20757783Smckusick return (EINTR);
20857783Smckusick got_buf = 1;
20957783Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
21057783Smckusick bp->b_flags |= B_READ;
21157783Smckusick not_readin = 0;
212*68653Smckusick error = nfs_doio(bp, cred, p);
213*68653Smckusick if (error) {
21457783Smckusick brelse(bp);
21557783Smckusick return (error);
21657783Smckusick }
21757783Smckusick }
21857783Smckusick }
21955057Spendry n = min((unsigned)(biosize - on), uio->uio_resid);
22038882Smacklem diff = np->n_size - uio->uio_offset;
22138882Smacklem if (diff < n)
22238882Smacklem n = diff;
22357783Smckusick if (not_readin && n > 0) {
22457783Smckusick if (on < bp->b_validoff || (on + n) > bp->b_validend) {
22557783Smckusick if (!got_buf) {
22657783Smckusick bp = nfs_getcacheblk(vp, bn, biosize, p);
22757783Smckusick if (!bp)
22857783Smckusick return (EINTR);
22957783Smckusick got_buf = 1;
23057783Smckusick }
231*68653Smckusick bp->b_flags |= B_INVAFTERWRITE;
23252196Smckusick if (bp->b_dirtyend > 0) {
23357783Smckusick if ((bp->b_flags & B_DELWRI) == 0)
23457783Smckusick panic("nfsbioread");
23557783Smckusick if (VOP_BWRITE(bp) == EINTR)
23657783Smckusick return (EINTR);
23752196Smckusick } else
23857783Smckusick brelse(bp);
23952196Smckusick goto again;
24052196Smckusick }
24152196Smckusick }
24239901Smckusick vp->v_lastr = lbn;
24357783Smckusick diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
24457783Smckusick if (diff < n)
24557783Smckusick n = diff;
24641897Smckusick break;
24741897Smckusick case VLNK:
24841897Smckusick nfsstats.biocache_readlinks++;
24957783Smckusick bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
25057783Smckusick if (!bp)
25157783Smckusick return (EINTR);
25257783Smckusick if ((bp->b_flags & B_DONE) == 0) {
25357783Smckusick bp->b_flags |= B_READ;
254*68653Smckusick error = nfs_doio(bp, cred, p);
255*68653Smckusick if (error) {
25657783Smckusick brelse(bp);
25757783Smckusick return (error);
25857783Smckusick }
25957783Smckusick }
26057783Smckusick n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
26157783Smckusick got_buf = 1;
26241897Smckusick on = 0;
26341897Smckusick break;
26441897Smckusick case VDIR:
26541897Smckusick nfsstats.biocache_readdirs++;
266*68653Smckusick lbn = uio->uio_offset / NFS_DIRBLKSIZ;
267*68653Smckusick on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
268*68653Smckusick bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
26957783Smckusick if (!bp)
270*68653Smckusick return (EINTR);
27157783Smckusick if ((bp->b_flags & B_DONE) == 0) {
272*68653Smckusick bp->b_flags |= B_READ;
273*68653Smckusick error = nfs_doio(bp, cred, p);
274*68653Smckusick if (error) {
275*68653Smckusick brelse(bp);
276*68653Smckusick while (error == NFSERR_BAD_COOKIE) {
277*68653Smckusick nfs_invaldir(vp);
278*68653Smckusick error = nfs_vinvalbuf(vp, 0, cred, p, 1);
279*68653Smckusick /*
280*68653Smckusick * Yuck! The directory has been modified on the
281*68653Smckusick * server. The only way to get the block is by
282*68653Smckusick * reading from the beginning to get all the
283*68653Smckusick * offset cookies.
284*68653Smckusick */
285*68653Smckusick for (i = 0; i <= lbn && !error; i++) {
286*68653Smckusick bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
287*68653Smckusick if (!bp)
288*68653Smckusick return (EINTR);
289*68653Smckusick if ((bp->b_flags & B_DONE) == 0) {
290*68653Smckusick bp->b_flags |= B_READ;
291*68653Smckusick error = nfs_doio(bp, cred, p);
292*68653Smckusick if (error)
293*68653Smckusick brelse(bp);
294*68653Smckusick }
295*68653Smckusick }
29652196Smckusick }
297*68653Smckusick if (error)
298*68653Smckusick return (error);
299*68653Smckusick }
30057783Smckusick }
30157783Smckusick
30257783Smckusick /*
30357783Smckusick * If not eof and read aheads are enabled, start one.
30457783Smckusick * (You need the current block first, so that you have the
305*68653Smckusick * directory offset cookie of the next block.)
30657783Smckusick */
30757783Smckusick if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
308*68653Smckusick (np->n_direofoffset == 0 ||
309*68653Smckusick (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
310*68653Smckusick !(np->n_flag & NQNFSNONCACHE) &&
311*68653Smckusick !incore(vp, lbn + 1)) {
312*68653Smckusick rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
31357783Smckusick if (rabp) {
31457783Smckusick if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
31557783Smckusick rabp->b_flags |= (B_READ | B_ASYNC);
31657783Smckusick if (nfs_asyncio(rabp, cred)) {
31757783Smckusick rabp->b_flags |= B_INVAL;
31857783Smckusick brelse(rabp);
31952196Smckusick }
32068157Smckusick } else
32168157Smckusick brelse(rabp);
32252196Smckusick }
32352196Smckusick }
324*68653Smckusick n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
32557783Smckusick got_buf = 1;
32657783Smckusick break;
327*68653Smckusick default:
328*68653Smckusick printf(" nfsbioread: type %x unexpected\n",vp->v_type);
329*68653Smckusick break;
33057783Smckusick };
33157783Smckusick
33257783Smckusick if (n > 0) {
33357783Smckusick if (!baddr)
33464544Sbostic baddr = bp->b_data;
33557783Smckusick error = uiomove(baddr + on, (int)n, uio);
33652196Smckusick }
33741897Smckusick switch (vp->v_type) {
33841897Smckusick case VREG:
33941897Smckusick break;
34041897Smckusick case VLNK:
34141897Smckusick n = 0;
34241897Smckusick break;
34341897Smckusick case VDIR:
344*68653Smckusick if (np->n_flag & NQNFSNONCACHE)
345*68653Smckusick bp->b_flags |= B_INVAL;
34641897Smckusick break;
347*68653Smckusick default:
348*68653Smckusick printf(" nfsbioread: type %x unexpected\n",vp->v_type);
349*68653Smckusick }
35057783Smckusick if (got_buf)
35157783Smckusick brelse(bp);
35257783Smckusick } while (error == 0 && uio->uio_resid > 0 && n > 0);
35338882Smacklem return (error);
35438882Smacklem }
35538882Smacklem
35638882Smacklem /*
35738882Smacklem * Vnode op for write using bio
35838882Smacklem */
359*68653Smckusick int
nfs_write(ap)36054669Smckusick nfs_write(ap)
36154448Smckusick struct vop_write_args /* {
36254448Smckusick struct vnode *a_vp;
36354448Smckusick struct uio *a_uio;
36454448Smckusick int a_ioflag;
36554448Smckusick struct ucred *a_cred;
36654448Smckusick } */ *ap;
36738882Smacklem {
36852196Smckusick register int biosize;
36954448Smckusick register struct uio *uio = ap->a_uio;
37054448Smckusick struct proc *p = uio->uio_procp;
37154448Smckusick register struct vnode *vp = ap->a_vp;
37254448Smckusick struct nfsnode *np = VTONFS(vp);
37354448Smckusick register struct ucred *cred = ap->a_cred;
37454448Smckusick int ioflag = ap->a_ioflag;
37538882Smacklem struct buf *bp;
37641897Smckusick struct vattr vattr;
377*68653Smckusick struct nfsmount *nmp = VFSTONFS(vp->v_mount);
37838882Smacklem daddr_t lbn, bn;
379*68653Smckusick int n, on, error = 0, iomode, must_commit;
38038882Smacklem
38148047Smckusick #ifdef DIAGNOSTIC
38254448Smckusick if (uio->uio_rw != UIO_WRITE)
38341897Smckusick panic("nfs_write mode");
38454448Smckusick if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
38548047Smckusick panic("nfs_write proc");
38648047Smckusick #endif
38754448Smckusick if (vp->v_type != VREG)
38841897Smckusick return (EIO);
38953627Smckusick if (np->n_flag & NWRITEERR) {
39053627Smckusick np->n_flag &= ~NWRITEERR;
39153627Smckusick return (np->n_error);
39253627Smckusick }
393*68653Smckusick if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
394*68653Smckusick (void)nfs_fsinfo(nmp, vp, cred, p);
39554448Smckusick if (ioflag & (IO_APPEND | IO_SYNC)) {
39652986Smckusick if (np->n_flag & NMODIFIED) {
39756282Smckusick np->n_attrstamp = 0;
398*68653Smckusick error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
399*68653Smckusick if (error)
40057783Smckusick return (error);
40152986Smckusick }
40254448Smckusick if (ioflag & IO_APPEND) {
40352986Smckusick np->n_attrstamp = 0;
404*68653Smckusick error = VOP_GETATTR(vp, &vattr, cred, p);
405*68653Smckusick if (error)
40652986Smckusick return (error);
40754448Smckusick uio->uio_offset = np->n_size;
40852986Smckusick }
40952986Smckusick }
41054448Smckusick if (uio->uio_offset < 0)
41139584Smckusick return (EINVAL);
41254448Smckusick if (uio->uio_resid == 0)
41339584Smckusick return (0);
41438882Smacklem /*
41538882Smacklem * Maybe this should be above the vnode op call, but so long as
41638882Smacklem * file servers have no limits, i don't think it matters
41738882Smacklem */
41854448Smckusick if (p && uio->uio_offset + uio->uio_resid >
41947572Skarels p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
42047572Skarels psignal(p, SIGXFSZ);
42139584Smckusick return (EFBIG);
42238882Smacklem }
42343348Smckusick /*
42443348Smckusick * I use nm_rsize, not nm_wsize so that all buffer cache blocks
42543348Smckusick * will be the same size within a filesystem. nfs_writerpc will
42643348Smckusick * still use nm_wsize when sizing the rpc's.
42743348Smckusick */
42852196Smckusick biosize = nmp->nm_rsize;
42938882Smacklem do {
43052196Smckusick
43152196Smckusick /*
43267373Shibler * XXX make sure we aren't cached in the VM page cache
43367373Shibler */
43467373Shibler (void)vnode_pager_uncache(vp);
43567373Shibler
43667373Shibler /*
43752196Smckusick * Check for a valid write lease.
43852196Smckusick */
43952196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) &&
440*68653Smckusick NQNFS_CKINVALID(vp, np, ND_WRITE)) {
44152196Smckusick do {
442*68653Smckusick error = nqnfs_getlease(vp, ND_WRITE, cred, p);
44352196Smckusick } while (error == NQNFS_EXPIRED);
44452196Smckusick if (error)
44552196Smckusick return (error);
44654448Smckusick if (np->n_lrev != np->n_brev ||
44752196Smckusick (np->n_flag & NQNFSNONCACHE)) {
448*68653Smckusick error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
449*68653Smckusick if (error)
45057783Smckusick return (error);
45152196Smckusick np->n_brev = np->n_lrev;
45252196Smckusick }
45352196Smckusick }
454*68653Smckusick if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
455*68653Smckusick iomode = NFSV3WRITE_FILESYNC;
456*68653Smckusick error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
457*68653Smckusick if (must_commit)
458*68653Smckusick nfs_clearcommit(vp->v_mount);
459*68653Smckusick return (error);
460*68653Smckusick }
46139750Smckusick nfsstats.biocache_writes++;
46254448Smckusick lbn = uio->uio_offset / biosize;
46354448Smckusick on = uio->uio_offset & (biosize-1);
46455057Spendry n = min((unsigned)(biosize - on), uio->uio_resid);
46552196Smckusick bn = lbn * (biosize / DEV_BSIZE);
46640037Smckusick again:
46757783Smckusick bp = nfs_getcacheblk(vp, bn, biosize, p);
46857783Smckusick if (!bp)
46957783Smckusick return (EINTR);
47038882Smacklem if (bp->b_wcred == NOCRED) {
47154448Smckusick crhold(cred);
47254448Smckusick bp->b_wcred = cred;
47338882Smacklem }
47457783Smckusick np->n_flag |= NMODIFIED;
47557783Smckusick if (uio->uio_offset + n > np->n_size) {
47657783Smckusick np->n_size = uio->uio_offset + n;
47757783Smckusick vnode_pager_setsize(vp, (u_long)np->n_size);
47857783Smckusick }
47952196Smckusick
48052196Smckusick /*
48152196Smckusick * If the new write will leave a contiguous dirty
48252196Smckusick * area, just update the b_dirtyoff and b_dirtyend,
48352196Smckusick * otherwise force a write rpc of the old dirty area.
48452196Smckusick */
48552196Smckusick if (bp->b_dirtyend > 0 &&
48652196Smckusick (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
48752196Smckusick bp->b_proc = p;
48857783Smckusick if (VOP_BWRITE(bp) == EINTR)
48957783Smckusick return (EINTR);
49052196Smckusick goto again;
49152196Smckusick }
49252196Smckusick
49352196Smckusick /*
49452196Smckusick * Check for valid write lease and get one as required.
49552196Smckusick * In case getblk() and/or bwrite() delayed us.
49652196Smckusick */
49752196Smckusick if ((nmp->nm_flag & NFSMNT_NQNFS) &&
498*68653Smckusick NQNFS_CKINVALID(vp, np, ND_WRITE)) {
49952196Smckusick do {
500*68653Smckusick error = nqnfs_getlease(vp, ND_WRITE, cred, p);
50152196Smckusick } while (error == NQNFS_EXPIRED);
50252196Smckusick if (error) {
50352196Smckusick brelse(bp);
50452196Smckusick return (error);
50538882Smacklem }
50654448Smckusick if (np->n_lrev != np->n_brev ||
50752196Smckusick (np->n_flag & NQNFSNONCACHE)) {
50856282Smckusick brelse(bp);
509*68653Smckusick error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
510*68653Smckusick if (error)
51157783Smckusick return (error);
51252196Smckusick np->n_brev = np->n_lrev;
51356282Smckusick goto again;
51452196Smckusick }
51538882Smacklem }
516*68653Smckusick error = uiomove((char *)bp->b_data + on, n, uio);
517*68653Smckusick if (error) {
51857783Smckusick bp->b_flags |= B_ERROR;
51940037Smckusick brelse(bp);
52039584Smckusick return (error);
52140037Smckusick }
52252196Smckusick if (bp->b_dirtyend > 0) {
52355057Spendry bp->b_dirtyoff = min(on, bp->b_dirtyoff);
52457783Smckusick bp->b_dirtyend = max((on + n), bp->b_dirtyend);
52552196Smckusick } else {
52652196Smckusick bp->b_dirtyoff = on;
52757783Smckusick bp->b_dirtyend = on + n;
52852196Smckusick }
52952196Smckusick if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
53052196Smckusick bp->b_validoff > bp->b_dirtyend) {
53152196Smckusick bp->b_validoff = bp->b_dirtyoff;
53252196Smckusick bp->b_validend = bp->b_dirtyend;
53352196Smckusick } else {
53455057Spendry bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
53555057Spendry bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
53652196Smckusick }
53752196Smckusick /*
53852196Smckusick * If the lease is non-cachable or IO_SYNC do bwrite().
53952196Smckusick */
54054448Smckusick if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
54152196Smckusick bp->b_proc = p;
542*68653Smckusick error = VOP_BWRITE(bp);
543*68653Smckusick if (error)
54457783Smckusick return (error);
545*68653Smckusick if (np->n_flag & NQNFSNONCACHE) {
546*68653Smckusick error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
547*68653Smckusick if (error)
548*68653Smckusick return (error);
549*68653Smckusick }
55057783Smckusick } else if ((n + on) == biosize &&
55157783Smckusick (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
55241897Smckusick bp->b_proc = (struct proc *)0;
553*68653Smckusick bp->b_flags |= B_ASYNC;
554*68653Smckusick (void)nfs_writebp(bp, 0);
55557783Smckusick } else
55638882Smacklem bdwrite(bp);
55757783Smckusick } while (uio->uio_resid > 0 && n > 0);
55857783Smckusick return (0);
55957783Smckusick }
56057783Smckusick
56157783Smckusick /*
56257783Smckusick * Get an nfs cache block.
56357783Smckusick * Allocate a new one if the block isn't currently in the cache
56457783Smckusick * and return the block marked busy. If the calling process is
56557783Smckusick * interrupted by a signal for an interruptible mount point, return
56657783Smckusick * NULL.
56757783Smckusick */
56857783Smckusick struct buf *
nfs_getcacheblk(vp,bn,size,p)56957783Smckusick nfs_getcacheblk(vp, bn, size, p)
57057783Smckusick struct vnode *vp;
57157783Smckusick daddr_t bn;
57257783Smckusick int size;
57357783Smckusick struct proc *p;
57457783Smckusick {
57557783Smckusick register struct buf *bp;
57657783Smckusick struct nfsmount *nmp = VFSTONFS(vp->v_mount);
57757783Smckusick
57857783Smckusick if (nmp->nm_flag & NFSMNT_INT) {
57957808Smckusick bp = getblk(vp, bn, size, PCATCH, 0);
58057783Smckusick while (bp == (struct buf *)0) {
58157783Smckusick if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
58257783Smckusick return ((struct buf *)0);
58357808Smckusick bp = getblk(vp, bn, size, 0, 2 * hz);
58438882Smacklem }
58557783Smckusick } else
58657808Smckusick bp = getblk(vp, bn, size, 0, 0);
58757783Smckusick return (bp);
58857783Smckusick }
58957783Smckusick
59057783Smckusick /*
59157783Smckusick * Flush and invalidate all dirty buffers. If another process is already
59257783Smckusick * doing the flush, just wait for completion.
59357783Smckusick */
594*68653Smckusick int
nfs_vinvalbuf(vp,flags,cred,p,intrflg)59557783Smckusick nfs_vinvalbuf(vp, flags, cred, p, intrflg)
59657783Smckusick struct vnode *vp;
59757783Smckusick int flags;
59857783Smckusick struct ucred *cred;
59957783Smckusick struct proc *p;
60057783Smckusick int intrflg;
60157783Smckusick {
60257783Smckusick register struct nfsnode *np = VTONFS(vp);
60357783Smckusick struct nfsmount *nmp = VFSTONFS(vp->v_mount);
60457783Smckusick int error = 0, slpflag, slptimeo;
60557783Smckusick
60657783Smckusick if ((nmp->nm_flag & NFSMNT_INT) == 0)
60757783Smckusick intrflg = 0;
60857783Smckusick if (intrflg) {
60957783Smckusick slpflag = PCATCH;
61057783Smckusick slptimeo = 2 * hz;
61157783Smckusick } else {
61257783Smckusick slpflag = 0;
61357783Smckusick slptimeo = 0;
61457783Smckusick }
61557783Smckusick /*
61657783Smckusick * First wait for any other process doing a flush to complete.
61757783Smckusick */
61857783Smckusick while (np->n_flag & NFLUSHINPROG) {
61957783Smckusick np->n_flag |= NFLUSHWANT;
62057783Smckusick error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
62157783Smckusick slptimeo);
62257783Smckusick if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
62357783Smckusick return (EINTR);
62457783Smckusick }
62557783Smckusick
62657783Smckusick /*
62757783Smckusick * Now, flush as required.
62857783Smckusick */
62957783Smckusick np->n_flag |= NFLUSHINPROG;
63057808Smckusick error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
63157783Smckusick while (error) {
63257783Smckusick if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
63357783Smckusick np->n_flag &= ~NFLUSHINPROG;
63457783Smckusick if (np->n_flag & NFLUSHWANT) {
63557783Smckusick np->n_flag &= ~NFLUSHWANT;
63657783Smckusick wakeup((caddr_t)&np->n_flag);
63757783Smckusick }
63857783Smckusick return (EINTR);
63957783Smckusick }
64057808Smckusick error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
64157783Smckusick }
64257783Smckusick np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
64357783Smckusick if (np->n_flag & NFLUSHWANT) {
64457783Smckusick np->n_flag &= ~NFLUSHWANT;
64557783Smckusick wakeup((caddr_t)&np->n_flag);
64657783Smckusick }
64757783Smckusick return (0);
64857783Smckusick }
64957783Smckusick
65057783Smckusick /*
65157783Smckusick * Initiate asynchronous I/O. Return an error if no nfsiods are available.
65257783Smckusick * This is mainly to avoid queueing async I/O requests when the nfsiods
65357783Smckusick * are all hung on a dead server.
65457783Smckusick */
655*68653Smckusick int
nfs_asyncio(bp,cred)65657783Smckusick nfs_asyncio(bp, cred)
65757783Smckusick register struct buf *bp;
65857783Smckusick struct ucred *cred;
65957783Smckusick {
66057783Smckusick register int i;
66157783Smckusick
66257783Smckusick if (nfs_numasync == 0)
66357783Smckusick return (EIO);
66457783Smckusick for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
66557783Smckusick if (nfs_iodwant[i]) {
66657783Smckusick if (bp->b_flags & B_READ) {
66757783Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) {
66857783Smckusick crhold(cred);
66957783Smckusick bp->b_rcred = cred;
67057783Smckusick }
67157783Smckusick } else {
672*68653Smckusick bp->b_flags |= B_WRITEINPROG;
67357783Smckusick if (bp->b_wcred == NOCRED && cred != NOCRED) {
67457783Smckusick crhold(cred);
67557783Smckusick bp->b_wcred = cred;
67657783Smckusick }
67757783Smckusick }
67857783Smckusick
67965253Smckusick TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
68057783Smckusick nfs_iodwant[i] = (struct proc *)0;
68157783Smckusick wakeup((caddr_t)&nfs_iodwant[i]);
68257783Smckusick return (0);
68357783Smckusick }
684*68653Smckusick
685*68653Smckusick /*
686*68653Smckusick * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
687*68653Smckusick * return EIO so the process will call nfs_doio() and do it
688*68653Smckusick * synchronously.
689*68653Smckusick */
690*68653Smckusick if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
691*68653Smckusick return (EIO);
692*68653Smckusick
693*68653Smckusick /*
694*68653Smckusick * Just turn the async write into a delayed write, instead of
695*68653Smckusick * doing in synchronously. Hopefully, at least one of the nfsiods
696*68653Smckusick * is currently doing a write for this file and will pick up the
697*68653Smckusick * delayed writes before going back to sleep.
698*68653Smckusick */
699*68653Smckusick bp->b_flags |= B_DELWRI;
700*68653Smckusick reassignbuf(bp, bp->b_vp);
701*68653Smckusick biodone(bp);
702*68653Smckusick return (0);
70357783Smckusick }
70457783Smckusick
70557783Smckusick /*
70657783Smckusick * Do an I/O operation to/from a cache block. This may be called
70757783Smckusick * synchronously or from an nfsiod.
70857783Smckusick */
70957783Smckusick int
nfs_doio(bp,cr,p)71057783Smckusick nfs_doio(bp, cr, p)
71157783Smckusick register struct buf *bp;
712*68653Smckusick struct ucred *cr;
71357783Smckusick struct proc *p;
71457783Smckusick {
71557783Smckusick register struct uio *uiop;
71657783Smckusick register struct vnode *vp;
71757783Smckusick struct nfsnode *np;
71857783Smckusick struct nfsmount *nmp;
719*68653Smckusick int error = 0, diff, len, iomode, must_commit = 0;
72057783Smckusick struct uio uio;
72157783Smckusick struct iovec io;
722*68653Smckusick nfsquad_t tquad;
72357783Smckusick
72457783Smckusick vp = bp->b_vp;
72557783Smckusick np = VTONFS(vp);
72657783Smckusick nmp = VFSTONFS(vp->v_mount);
72757783Smckusick uiop = &uio;
72857783Smckusick uiop->uio_iov = &io;
72957783Smckusick uiop->uio_iovcnt = 1;
73057783Smckusick uiop->uio_segflg = UIO_SYSSPACE;
73157783Smckusick uiop->uio_procp = p;
73257783Smckusick
73357783Smckusick /*
73457783Smckusick * Historically, paging was done with physio, but no more.
73557783Smckusick */
736*68653Smckusick if (bp->b_flags & B_PHYS) {
737*68653Smckusick /*
738*68653Smckusick * ...though reading /dev/drum still gets us here.
739*68653Smckusick */
74057783Smckusick io.iov_len = uiop->uio_resid = bp->b_bcount;
741*68653Smckusick /* mapping was done by vmapbuf() */
74264544Sbostic io.iov_base = bp->b_data;
743*68653Smckusick uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
744*68653Smckusick if (bp->b_flags & B_READ) {
745*68653Smckusick uiop->uio_rw = UIO_READ;
746*68653Smckusick nfsstats.read_physios++;
747*68653Smckusick error = nfs_readrpc(vp, uiop, cr);
748*68653Smckusick } else
749*68653Smckusick panic("physio write");
750*68653Smckusick if (error) {
751*68653Smckusick bp->b_flags |= B_ERROR;
752*68653Smckusick bp->b_error = error;
753*68653Smckusick }
754*68653Smckusick } else if (bp->b_flags & B_READ) {
755*68653Smckusick io.iov_len = uiop->uio_resid = bp->b_bcount;
756*68653Smckusick io.iov_base = bp->b_data;
75757783Smckusick uiop->uio_rw = UIO_READ;
75857783Smckusick switch (vp->v_type) {
75957783Smckusick case VREG:
760*68653Smckusick uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
76157783Smckusick nfsstats.read_bios++;
76257783Smckusick error = nfs_readrpc(vp, uiop, cr);
76357783Smckusick if (!error) {
76457783Smckusick bp->b_validoff = 0;
76557783Smckusick if (uiop->uio_resid) {
76657783Smckusick /*
76757783Smckusick * If len > 0, there is a hole in the file and
76857783Smckusick * no writes after the hole have been pushed to
76957783Smckusick * the server yet.
77057783Smckusick * Just zero fill the rest of the valid area.
77157783Smckusick */
77257783Smckusick diff = bp->b_bcount - uiop->uio_resid;
773*68653Smckusick len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
77457783Smckusick + diff);
77557783Smckusick if (len > 0) {
77657783Smckusick len = min(len, uiop->uio_resid);
77764544Sbostic bzero((char *)bp->b_data + diff, len);
77857783Smckusick bp->b_validend = diff + len;
77957783Smckusick } else
78057783Smckusick bp->b_validend = diff;
78157783Smckusick } else
78257783Smckusick bp->b_validend = bp->b_bcount;
78357783Smckusick }
78457783Smckusick if (p && (vp->v_flag & VTEXT) &&
78557783Smckusick (((nmp->nm_flag & NFSMNT_NQNFS) &&
786*68653Smckusick NQNFS_CKINVALID(vp, np, ND_READ) &&
78757783Smckusick np->n_lrev != np->n_brev) ||
78857783Smckusick (!(nmp->nm_flag & NFSMNT_NQNFS) &&
78957783Smckusick np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
79057783Smckusick uprintf("Process killed due to text file modification\n");
79157783Smckusick psignal(p, SIGKILL);
79264595Sbostic p->p_flag |= P_NOSWAP;
79357783Smckusick }
79457783Smckusick break;
79557783Smckusick case VLNK:
796*68653Smckusick uiop->uio_offset = (off_t)0;
79757783Smckusick nfsstats.readlink_bios++;
79857783Smckusick error = nfs_readlinkrpc(vp, uiop, cr);
79957783Smckusick break;
80057783Smckusick case VDIR:
80157783Smckusick nfsstats.readdir_bios++;
802*68653Smckusick uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
803*68653Smckusick if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
804*68653Smckusick error = nfs_readdirplusrpc(vp, uiop, cr);
805*68653Smckusick if (error == NFSERR_NOTSUPP)
806*68653Smckusick nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
807*68653Smckusick }
808*68653Smckusick if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
809*68653Smckusick error = nfs_readdirrpc(vp, uiop, cr);
81057783Smckusick break;
811*68653Smckusick default:
812*68653Smckusick printf("nfs_doio: type %x unexpected\n",vp->v_type);
813*68653Smckusick break;
81457783Smckusick };
81557783Smckusick if (error) {
81657783Smckusick bp->b_flags |= B_ERROR;
81757783Smckusick bp->b_error = error;
81857783Smckusick }
81957783Smckusick } else {
82057783Smckusick io.iov_len = uiop->uio_resid = bp->b_dirtyend
82157783Smckusick - bp->b_dirtyoff;
822*68653Smckusick uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
82357783Smckusick + bp->b_dirtyoff;
82464544Sbostic io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
82557783Smckusick uiop->uio_rw = UIO_WRITE;
82657783Smckusick nfsstats.write_bios++;
827*68653Smckusick if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
828*68653Smckusick iomode = NFSV3WRITE_UNSTABLE;
82957783Smckusick else
830*68653Smckusick iomode = NFSV3WRITE_FILESYNC;
831*68653Smckusick bp->b_flags |= B_WRITEINPROG;
832*68653Smckusick error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
833*68653Smckusick if (!error && iomode == NFSV3WRITE_UNSTABLE)
834*68653Smckusick bp->b_flags |= B_NEEDCOMMIT;
835*68653Smckusick else
836*68653Smckusick bp->b_flags &= ~B_NEEDCOMMIT;
837*68653Smckusick bp->b_flags &= ~B_WRITEINPROG;
83857783Smckusick
83957783Smckusick /*
84057783Smckusick * For an interrupted write, the buffer is still valid and the
84157783Smckusick * write hasn't been pushed to the server yet, so we can't set
84257783Smckusick * B_ERROR and report the interruption by setting B_EINTR. For
84357783Smckusick * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
84457783Smckusick * is essentially a noop.
845*68653Smckusick * For the case of a V3 write rpc not being committed to stable
846*68653Smckusick * storage, the block is still dirty and requires either a commit
847*68653Smckusick * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
848*68653Smckusick * before the block is reused. This is indicated by setting the
849*68653Smckusick * B_DELWRI and B_NEEDCOMMIT flags.
85057783Smckusick */
851*68653Smckusick if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
85257783Smckusick bp->b_flags |= B_DELWRI;
85357783Smckusick
85457783Smckusick /*
85557783Smckusick * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
85657783Smckusick * buffer to the clean list, we have to reassign it back to the
85757783Smckusick * dirty one. Ugh.
85857783Smckusick */
85957783Smckusick if (bp->b_flags & B_ASYNC)
86057783Smckusick reassignbuf(bp, vp);
86157783Smckusick else
86257783Smckusick bp->b_flags |= B_EINTR;
86357783Smckusick } else {
86457783Smckusick if (error) {
86557783Smckusick bp->b_flags |= B_ERROR;
86657783Smckusick bp->b_error = np->n_error = error;
86757783Smckusick np->n_flag |= NWRITEERR;
86857783Smckusick }
86957783Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0;
87057783Smckusick }
87157783Smckusick }
87257783Smckusick bp->b_resid = uiop->uio_resid;
873*68653Smckusick if (must_commit)
874*68653Smckusick nfs_clearcommit(vp->v_mount);
87557783Smckusick biodone(bp);
87638882Smacklem return (error);
87738882Smacklem }
878