xref: /csrg-svn/sys/nfs/nfs_bio.c (revision 56535)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)nfs_bio.c	7.33 (Berkeley) 10/11/92
11  */
12 
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/resourcevar.h>
16 #include <sys/proc.h>
17 #include <sys/buf.h>
18 #include <sys/vnode.h>
19 #include <sys/trace.h>
20 #include <sys/mount.h>
21 #include <sys/kernel.h>
22 
23 #include <vm/vm.h>
24 
25 #include <nfs/nfsnode.h>
26 #include <nfs/rpcv2.h>
27 #include <nfs/nfsv2.h>
28 #include <nfs/nfs.h>
29 #include <nfs/nfsmount.h>
30 #include <nfs/nqnfs.h>
31 
32 /* True and false, how exciting */
33 #define	TRUE	1
34 #define	FALSE	0
35 
36 /*
37  * Vnode op for read using bio
38  * Any similarity to readip() is purely coincidental
39  */
40 nfs_bioread(vp, uio, ioflag, cred)
41 	register struct vnode *vp;
42 	register struct uio *uio;
43 	int ioflag;
44 	struct ucred *cred;
45 {
46 	register struct nfsnode *np = VTONFS(vp);
47 	register int biosize;
48 	struct buf *bp;
49 	struct vattr vattr;
50 	struct nfsmount *nmp;
51 	daddr_t lbn, bn, rablock[NFS_MAXRAHEAD];
52 	int rasize[NFS_MAXRAHEAD], nra, diff, error = 0;
53 	int n, on;
54 
55 #ifdef lint
56 	ioflag = ioflag;
57 #endif /* lint */
58 #ifdef DIAGNOSTIC
59 	if (uio->uio_rw != UIO_READ)
60 		panic("nfs_read mode");
61 #endif
62 	if (uio->uio_resid == 0)
63 		return (0);
64 	if (uio->uio_offset < 0 && vp->v_type != VDIR)
65 		return (EINVAL);
66 	nmp = VFSTONFS(vp->v_mount);
67 	biosize = nmp->nm_rsize;
68 	/*
69 	 * For nfs, cache consistency can only be maintained approximately.
70 	 * Although RFC1094 does not specify the criteria, the following is
71 	 * believed to be compatible with the reference port.
72 	 * For nqnfs, full cache consistency is maintained within the loop.
73 	 * For nfs:
74 	 * If the file's modify time on the server has changed since the
75 	 * last read rpc or you have written to the file,
76 	 * you may have lost data cache consistency with the
77 	 * server, so flush all of the file's data out of the cache.
78 	 * Then force a getattr rpc to ensure that you have up to date
79 	 * attributes.
80 	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
81 	 * the ones changing the modify time.
82 	 * NB: This implies that cache data can be read when up to
83 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
84 	 * attributes this could be forced by setting n_attrstamp to 0 before
85 	 * the VOP_GETATTR() call.
86 	 */
87 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
88 		if (np->n_flag & NMODIFIED) {
89 			np->n_flag &= ~NMODIFIED;
90 			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
91 			     vp->v_type != VREG)
92 				vinvalbuf(vp, TRUE, cred, uio->uio_procp);
93 			np->n_attrstamp = 0;
94 			np->n_direofoffset = 0;
95 			if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp))
96 				return (error);
97 			np->n_mtime = vattr.va_mtime.ts_sec;
98 		} else {
99 			if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp))
100 				return (error);
101 			if (np->n_mtime != vattr.va_mtime.ts_sec) {
102 				np->n_direofoffset = 0;
103 				vinvalbuf(vp, TRUE, cred, uio->uio_procp);
104 				np->n_mtime = vattr.va_mtime.ts_sec;
105 			}
106 		}
107 	}
108 	do {
109 
110 	    /*
111 	     * Get a valid lease. If cached data is stale, flush it.
112 	     */
113 	    if ((nmp->nm_flag & NFSMNT_NQNFS) &&
114 		NQNFS_CKINVALID(vp, np, NQL_READ)) {
115 		do {
116 			error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
117 		} while (error == NQNFS_EXPIRED);
118 		if (error)
119 			return (error);
120 		if (np->n_lrev != np->n_brev ||
121 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
122 			if (vp->v_type == VDIR) {
123 				np->n_direofoffset = 0;
124 				cache_purge(vp);
125 			}
126 			np->n_flag &= ~NMODIFIED;
127 			vinvalbuf(vp, TRUE, cred, uio->uio_procp);
128 			np->n_brev = np->n_lrev;
129 		}
130 	    }
131 	    if (np->n_flag & NQNFSNONCACHE) {
132 		switch (vp->v_type) {
133 		case VREG:
134 			error = nfs_readrpc(vp, uio, cred);
135 			break;
136 		case VLNK:
137 			error = nfs_readlinkrpc(vp, uio, cred);
138 			break;
139 		case VDIR:
140 			error = nfs_readdirrpc(vp, uio, cred);
141 			break;
142 		};
143 		return (error);
144 	    }
145 	    switch (vp->v_type) {
146 	    case VREG:
147 		nfsstats.biocache_reads++;
148 		lbn = uio->uio_offset / biosize;
149 		on = uio->uio_offset & (biosize-1);
150 		n = min((unsigned)(biosize - on), uio->uio_resid);
151 		diff = np->n_size - uio->uio_offset;
152 		if (diff <= 0)
153 			return (error);
154 		if (diff < n)
155 			n = diff;
156 		bn = lbn*(biosize/DEV_BSIZE);
157 		for (nra = 0; nra < nmp->nm_readahead &&
158 			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
159 			rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
160 			rasize[nra] = biosize;
161 		}
162 again:
163 		if (nra > 0 && lbn >= vp->v_lastr)
164 			error = breadn(vp, bn, biosize, rablock, rasize, nra,
165 				cred, &bp);
166 		else
167 			error = bread(vp, bn, biosize, cred, &bp);
168 		if (bp->b_validend > 0) {
169 			if (on < bp->b_validoff || (on+n) > bp->b_validend) {
170 				bp->b_flags |= B_INVAL;
171 				if (bp->b_dirtyend > 0) {
172 					if ((bp->b_flags & B_DELWRI) == 0)
173 						panic("nfsbioread");
174 					(void) bwrite(bp);
175 				} else
176 					brelse(bp);
177 				goto again;
178 			}
179 		} else {
180 			bp->b_validoff = 0;
181 			bp->b_validend = biosize - bp->b_resid;
182 		}
183 		vp->v_lastr = lbn;
184 		if (bp->b_resid) {
185 		   diff = (on >= (biosize-bp->b_resid)) ? 0 :
186 			(biosize-bp->b_resid-on);
187 		   n = min(n, diff);
188 		}
189 		break;
190 	    case VLNK:
191 		nfsstats.biocache_readlinks++;
192 		on = 0;
193 		error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp);
194 		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
195 		break;
196 	    case VDIR:
197 		nfsstats.biocache_readdirs++;
198 		on = 0;
199 		error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp);
200 		n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
201 		break;
202 	    };
203 	    if (error) {
204 		brelse(bp);
205 		return (error);
206 	    }
207 
208 	    /*
209 	     * For nqnfs:
210 	     * Must check for valid lease, since it may have expired while in
211 	     * bread(). If expired, get a lease.
212 	     * If data is stale, flush and try again.
213 	     * nb: If a read rpc is done by bread() or breada() and there is
214 	     *     no valid lease, a get_lease request will be piggy backed.
215 	     */
216 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
217 		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
218 			do {
219 				error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
220 			} while (error == NQNFS_EXPIRED);
221 			if (error) {
222 				brelse(bp);
223 				return (error);
224 			}
225 			if ((np->n_flag & NQNFSNONCACHE) ||
226 			    np->n_lrev != np->n_brev ||
227 			    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
228 				if (vp->v_type == VDIR) {
229 					np->n_direofoffset = 0;
230 					cache_purge(vp);
231 				}
232 				brelse(bp);
233 				np->n_flag &= ~NMODIFIED;
234 				vinvalbuf(vp, TRUE, cred, uio->uio_procp);
235 				np->n_brev = np->n_lrev;
236 				continue;
237 			}
238 		} else if ((np->n_flag & NQNFSNONCACHE) ||
239 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
240 			np->n_direofoffset = 0;
241 			brelse(bp);
242 			np->n_flag &= ~NMODIFIED;
243 			vinvalbuf(vp, TRUE, cred, uio->uio_procp);
244 			np->n_brev = np->n_lrev;
245 			continue;
246 		}
247 	    }
248 	    if (n > 0)
249 		error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
250 	    switch (vp->v_type) {
251 	    case VREG:
252 		if (n+on == biosize || uio->uio_offset == np->n_size)
253 			bp->b_flags |= B_AGE;
254 		break;
255 	    case VLNK:
256 		n = 0;
257 		break;
258 	    case VDIR:
259 		uio->uio_offset = bp->b_blkno;
260 		break;
261 	    };
262 	    brelse(bp);
263 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
264 	return (error);
265 }
266 
267 /*
268  * Vnode op for write using bio
269  */
270 nfs_write(ap)
271 	struct vop_write_args /* {
272 		struct vnode *a_vp;
273 		struct uio *a_uio;
274 		int  a_ioflag;
275 		struct ucred *a_cred;
276 	} */ *ap;
277 {
278 	register int biosize;
279 	register struct uio *uio = ap->a_uio;
280 	struct proc *p = uio->uio_procp;
281 	register struct vnode *vp = ap->a_vp;
282 	struct nfsnode *np = VTONFS(vp);
283 	register struct ucred *cred = ap->a_cred;
284 	int ioflag = ap->a_ioflag;
285 	struct buf *bp;
286 	struct vattr vattr;
287 	struct nfsmount *nmp;
288 	daddr_t lbn, bn;
289 	int n, on, error = 0;
290 
291 #ifdef DIAGNOSTIC
292 	if (uio->uio_rw != UIO_WRITE)
293 		panic("nfs_write mode");
294 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
295 		panic("nfs_write proc");
296 #endif
297 	if (vp->v_type != VREG)
298 		return (EIO);
299 	if (np->n_flag & NWRITEERR) {
300 		np->n_flag &= ~NWRITEERR;
301 		return (np->n_error);
302 	}
303 	if (ioflag & (IO_APPEND | IO_SYNC)) {
304 		if (np->n_flag & NMODIFIED) {
305 			np->n_flag &= ~NMODIFIED;
306 			np->n_attrstamp = 0;
307 			vinvalbuf(vp, TRUE, cred, p);
308 		}
309 		if (ioflag & IO_APPEND) {
310 			np->n_attrstamp = 0;
311 			if (error = VOP_GETATTR(vp, &vattr, cred, p))
312 				return (error);
313 			uio->uio_offset = np->n_size;
314 		}
315 	}
316 	nmp = VFSTONFS(vp->v_mount);
317 	if (uio->uio_offset < 0)
318 		return (EINVAL);
319 	if (uio->uio_resid == 0)
320 		return (0);
321 	/*
322 	 * Maybe this should be above the vnode op call, but so long as
323 	 * file servers have no limits, i don't think it matters
324 	 */
325 	if (p && uio->uio_offset + uio->uio_resid >
326 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
327 		psignal(p, SIGXFSZ);
328 		return (EFBIG);
329 	}
330 	/*
331 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
332 	 * will be the same size within a filesystem. nfs_writerpc will
333 	 * still use nm_wsize when sizing the rpc's.
334 	 */
335 	biosize = nmp->nm_rsize;
336 	np->n_flag |= NMODIFIED;
337 	do {
338 
339 		/*
340 		 * Check for a valid write lease.
341 		 * If non-cachable, just do the rpc
342 		 */
343 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
344 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
345 			do {
346 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
347 			} while (error == NQNFS_EXPIRED);
348 			if (error)
349 				return (error);
350 			if (np->n_lrev != np->n_brev ||
351 			    (np->n_flag & NQNFSNONCACHE)) {
352 				vinvalbuf(vp, TRUE, cred, p);
353 				np->n_brev = np->n_lrev;
354 			}
355 		}
356 		if (np->n_flag & NQNFSNONCACHE)
357 			return (nfs_writerpc(vp, uio, cred, 0));
358 		nfsstats.biocache_writes++;
359 		lbn = uio->uio_offset / biosize;
360 		on = uio->uio_offset & (biosize-1);
361 		n = min((unsigned)(biosize - on), uio->uio_resid);
362 		if (uio->uio_offset + n > np->n_size) {
363 			np->n_size = uio->uio_offset + n;
364 			vnode_pager_setsize(vp, (u_long)np->n_size);
365 		}
366 		bn = lbn * (biosize / DEV_BSIZE);
367 again:
368 		bp = getblk(vp, bn, biosize);
369 		if (bp->b_wcred == NOCRED) {
370 			crhold(cred);
371 			bp->b_wcred = cred;
372 		}
373 
374 		/*
375 		 * If the new write will leave a contiguous dirty
376 		 * area, just update the b_dirtyoff and b_dirtyend,
377 		 * otherwise force a write rpc of the old dirty area.
378 		 */
379 		if (bp->b_dirtyend > 0 &&
380 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
381 			bp->b_proc = p;
382 			if (error = bwrite(bp))
383 				return (error);
384 			goto again;
385 		}
386 
387 		/*
388 		 * Check for valid write lease and get one as required.
389 		 * In case getblk() and/or bwrite() delayed us.
390 		 */
391 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
392 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
393 			do {
394 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
395 			} while (error == NQNFS_EXPIRED);
396 			if (error) {
397 				brelse(bp);
398 				return (error);
399 			}
400 			if (np->n_lrev != np->n_brev ||
401 			    (np->n_flag & NQNFSNONCACHE)) {
402 				brelse(bp);
403 				vinvalbuf(vp, TRUE, cred, p);
404 				np->n_brev = np->n_lrev;
405 				goto again;
406 			}
407 		}
408 		if (error = uiomove(bp->b_un.b_addr + on, n, uio)) {
409 			brelse(bp);
410 			return (error);
411 		}
412 		if (bp->b_dirtyend > 0) {
413 			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
414 			bp->b_dirtyend = max((on+n), bp->b_dirtyend);
415 		} else {
416 			bp->b_dirtyoff = on;
417 			bp->b_dirtyend = on+n;
418 		}
419 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
420 		    bp->b_validoff > bp->b_dirtyend) {
421 			bp->b_validoff = bp->b_dirtyoff;
422 			bp->b_validend = bp->b_dirtyend;
423 		} else {
424 			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
425 			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
426 		}
427 
428 		/*
429 		 * If the lease is non-cachable or IO_SYNC do bwrite().
430 		 */
431 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
432 			bp->b_proc = p;
433 			bwrite(bp);
434 		} else if ((n+on) == biosize &&
435 			 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
436 			bp->b_flags |= B_AGE;
437 			bp->b_proc = (struct proc *)0;
438 			bawrite(bp);
439 		} else {
440 			bp->b_proc = (struct proc *)0;
441 			bdwrite(bp);
442 		}
443 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
444 	return (error);
445 }
446