xref: /csrg-svn/sys/nfs/nfs_bio.c (revision 52196)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)nfs_bio.c	7.21 (Berkeley) 01/14/92
11  */
12 
13 #include "param.h"
14 #include "resourcevar.h"
15 #include "proc.h"
16 #include "buf.h"
17 #include "vnode.h"
18 #include "trace.h"
19 #include "mount.h"
20 #include "kernel.h"
21 #include "machine/endian.h"
22 #include "nfsnode.h"
23 #include "rpcv2.h"
24 #include "nfsv2.h"
25 #include "nfs.h"
26 #include "nfsmount.h"
27 #include "nqnfs.h"
28 
29 /* True and false, how exciting */
30 #define	TRUE	1
31 #define	FALSE	0
32 
33 /*
34  * Vnode op for read using bio
35  * Any similarity to readip() is purely coincidental
36  */
37 nfs_bioread(vp, uio, ioflag, cred)
38 	register struct vnode *vp;
39 	register struct uio *uio;
40 	int ioflag;
41 	struct ucred *cred;
42 {
43 	register struct nfsnode *np = VTONFS(vp);
44 	register int biosize;
45 	struct buf *bp;
46 	struct vattr vattr;
47 	struct nfsmount *nmp;
48 	daddr_t lbn, bn, rablock[NFS_MAXRAHEAD];
49 	int rasize[NFS_MAXRAHEAD], nra, diff, error = 0;
50 	int n, on;
51 
52 #ifdef lint
53 	ioflag = ioflag;
54 #endif /* lint */
55 #ifdef DIAGNOSTIC
56 	if (uio->uio_rw != UIO_READ)
57 		panic("nfs_read mode");
58 #endif
59 	if (uio->uio_resid == 0)
60 		return (0);
61 	if (uio->uio_offset < 0 && vp->v_type != VDIR)
62 		return (EINVAL);
63 	nmp = VFSTONFS(vp->v_mount);
64 	biosize = nmp->nm_rsize;
65 	/*
66 	 * For nfs, cache consistency can only be maintained approximately.
67 	 * Although RFC1094 does not specify the criteria, the following is
68 	 * believed to be compatible with the reference port.
69 	 * For nqnfs, full cache consistency is maintained within the loop.
70 	 * For nfs:
71 	 * If the file's modify time on the server has changed since the
72 	 * last read rpc or you have written to the file,
73 	 * you may have lost data cache consistency with the
74 	 * server, so flush all of the file's data out of the cache.
75 	 * Then force a getattr rpc to ensure that you have up to date
76 	 * attributes.
77 	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
78 	 * the ones changing the modify time.
79 	 * NB: This implies that cache data can be read when up to
80 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
81 	 * attributes this could be forced by setting n_attrstamp to 0 before
82 	 * the nfs_getattr() call.
83 	 */
84 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
85 		if (np->n_flag & NMODIFIED) {
86 			np->n_flag &= ~NMODIFIED;
87 			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
88 			     vp->v_type != VREG)
89 				vinvalbuf(vp, TRUE);
90 			np->n_attrstamp = 0;
91 			np->n_direofoffset = 0;
92 			if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp))
93 				return (error);
94 			np->n_mtime = vattr.va_mtime.tv_sec;
95 		} else {
96 			if (error = nfs_getattr(vp, &vattr, cred, uio->uio_procp))
97 				return (error);
98 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
99 				np->n_direofoffset = 0;
100 				vinvalbuf(vp, TRUE);
101 				np->n_mtime = vattr.va_mtime.tv_sec;
102 			}
103 		}
104 	}
105 	do {
106 
107 	    /*
108 	     * Get a valid lease. If cached data is stale, flush it.
109 	     */
110 	    if ((nmp->nm_flag & NFSMNT_NQNFS) &&
111 		NQNFS_CKINVALID(vp, np, NQL_READ)) {
112 		do {
113 			error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
114 		} while (error == NQNFS_EXPIRED);
115 		if (error)
116 			return (error);
117 		if (QUADNE(np->n_lrev, np->n_brev) ||
118 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
119 			if (vp->v_type == VDIR) {
120 				np->n_direofoffset = 0;
121 				cache_purge(vp);
122 			}
123 			np->n_flag &= ~NMODIFIED;
124 			vinvalbuf(vp, TRUE);
125 			np->n_brev = np->n_lrev;
126 		}
127 	    }
128 	    if (np->n_flag & NQNFSNONCACHE) {
129 		switch (vp->v_type) {
130 		case VREG:
131 			error = nfs_readrpc(vp, uio, cred);
132 			break;
133 		case VLNK:
134 			error = nfs_readlinkrpc(vp, uio, cred);
135 			break;
136 		case VDIR:
137 			error = nfs_readdirrpc(vp, uio, cred);
138 			break;
139 		};
140 		return (error);
141 	    }
142 	    switch (vp->v_type) {
143 	    case VREG:
144 		nfsstats.biocache_reads++;
145 		lbn = uio->uio_offset / biosize;
146 		on = uio->uio_offset & (biosize-1);
147 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
148 		diff = np->n_size - uio->uio_offset;
149 		if (diff <= 0)
150 			return (error);
151 		if (diff < n)
152 			n = diff;
153 		bn = lbn*(biosize/DEV_BSIZE);
154 		for (nra = 0; nra < nmp->nm_readahead &&
155 			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
156 			rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
157 			rasize[nra] = biosize;
158 		}
159 again:
160 		if (nra > 0 && lbn >= vp->v_lastr)
161 			error = breadn(vp, bn, biosize, rablock, rasize, nra,
162 				cred, &bp);
163 		else
164 			error = bread(vp, bn, biosize, cred, &bp);
165 		if (bp->b_validend > 0) {
166 			if (on < bp->b_validoff || (on+n) > bp->b_validend) {
167 				bp->b_flags |= B_INVAL;
168 				if (bp->b_dirtyend > 0) {
169 					if ((bp->b_flags & B_DELWRI) == 0)
170 						panic("nfsbioread");
171 					(void) bwrite(bp);
172 				} else
173 					brelse(bp);
174 				goto again;
175 			}
176 		} else {
177 			bp->b_validoff = 0;
178 			bp->b_validend = biosize - bp->b_resid;
179 		}
180 		vp->v_lastr = lbn;
181 		if (bp->b_resid) {
182 		   diff = (on >= (biosize-bp->b_resid)) ? 0 :
183 			(biosize-bp->b_resid-on);
184 		   n = MIN(n, diff);
185 		}
186 		break;
187 	    case VLNK:
188 		nfsstats.biocache_readlinks++;
189 		on = 0;
190 		error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp);
191 		n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
192 		break;
193 	    case VDIR:
194 		nfsstats.biocache_readdirs++;
195 		on = 0;
196 		error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp);
197 		n = MIN(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
198 		break;
199 	    };
200 	    if (error) {
201 		brelse(bp);
202 		return (error);
203 	    }
204 
205 	    /*
206 	     * For nqnfs:
207 	     * Must check for valid lease, since it may have expired while in
208 	     * bread(). If expired, get a lease.
209 	     * If data is stale, flush and try again.
210 	     * nb: If a read rpc is done by bread() or breada() and there is
211 	     *     no valid lease, a get_lease request will be piggy backed.
212 	     */
213 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
214 		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
215 			do {
216 				error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp);
217 			} while (error == NQNFS_EXPIRED);
218 			if (error) {
219 				brelse(bp);
220 				return (error);
221 			}
222 			if ((np->n_flag & NQNFSNONCACHE) ||
223 			    QUADNE(np->n_lrev, np->n_brev) ||
224 			    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
225 				if (vp->v_type == VDIR) {
226 					np->n_direofoffset = 0;
227 					cache_purge(vp);
228 				}
229 				brelse(bp);
230 				np->n_flag &= ~NMODIFIED;
231 				vinvalbuf(vp, TRUE);
232 				np->n_brev = np->n_lrev;
233 				continue;
234 			}
235 		} else if ((np->n_flag & NQNFSNONCACHE) ||
236 		    ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
237 			np->n_direofoffset = 0;
238 			brelse(bp);
239 			np->n_flag &= ~NMODIFIED;
240 			vinvalbuf(vp, TRUE);
241 			np->n_brev = np->n_lrev;
242 			continue;
243 		}
244 	    }
245 	    if (n > 0)
246 		error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
247 	    switch (vp->v_type) {
248 	    case VREG:
249 		if (n+on == biosize || uio->uio_offset == np->n_size)
250 			bp->b_flags |= B_AGE;
251 		break;
252 	    case VLNK:
253 		n = 0;
254 		break;
255 	    case VDIR:
256 		uio->uio_offset = bp->b_blkno;
257 		break;
258 	    };
259 	    brelse(bp);
260 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
261 	return (error);
262 }
263 
264 /*
265  * Vnode op for write using bio
266  */
267 nfs_write(vp, uio, ioflag, cred)
268 	register struct vnode *vp;
269 	register struct uio *uio;
270 	int ioflag;
271 	struct ucred *cred;
272 {
273 	register int biosize;
274 	struct proc *p = uio->uio_procp;
275 	struct buf *bp;
276 	struct nfsnode *np = VTONFS(vp);
277 	struct vattr vattr;
278 	struct nfsmount *nmp;
279 	daddr_t lbn, bn;
280 	int n, on, error = 0;
281 
282 #ifdef DIAGNOSTIC
283 	if (uio->uio_rw != UIO_WRITE)
284 		panic("nfs_write mode");
285 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
286 		panic("nfs_write proc");
287 #endif
288 	if (vp->v_type != VREG)
289 		return (EIO);
290 	nmp = VFSTONFS(vp->v_mount);
291 	if (uio->uio_offset < 0)
292 		return (EINVAL);
293 	if (uio->uio_resid == 0)
294 		return (0);
295 	/*
296 	 * Maybe this should be above the vnode op call, but so long as
297 	 * file servers have no limits, i don't think it matters
298 	 */
299 	if (p && uio->uio_offset + uio->uio_resid >
300 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
301 		psignal(p, SIGXFSZ);
302 		return (EFBIG);
303 	}
304 	/*
305 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
306 	 * will be the same size within a filesystem. nfs_writerpc will
307 	 * still use nm_wsize when sizing the rpc's.
308 	 */
309 	biosize = nmp->nm_rsize;
310 	np->n_flag |= NMODIFIED;
311 	do {
312 
313 		/*
314 		 * Check for a valid write lease.
315 		 * If non-cachable, just do the rpc
316 		 */
317 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
318 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
319 			do {
320 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
321 			} while (error == NQNFS_EXPIRED);
322 			if (error)
323 				return (error);
324 			if (QUADNE(np->n_lrev, np->n_brev) ||
325 			    (np->n_flag & NQNFSNONCACHE)) {
326 				vinvalbuf(vp, TRUE);
327 				np->n_brev = np->n_lrev;
328 			}
329 		}
330 		if (np->n_flag & NQNFSNONCACHE)
331 			return (nfs_writerpc(vp, uio, cred));
332 		nfsstats.biocache_writes++;
333 		lbn = uio->uio_offset / biosize;
334 		on = uio->uio_offset & (biosize-1);
335 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
336 		if (uio->uio_offset + n > np->n_size) {
337 			np->n_size = uio->uio_offset + n;
338 			vnode_pager_setsize(vp, np->n_size);
339 		}
340 		bn = lbn * (biosize / DEV_BSIZE);
341 again:
342 		bp = getblk(vp, bn, biosize);
343 		if (bp->b_wcred == NOCRED) {
344 			crhold(cred);
345 			bp->b_wcred = cred;
346 		}
347 
348 		/*
349 		 * If the new write will leave a contiguous dirty
350 		 * area, just update the b_dirtyoff and b_dirtyend,
351 		 * otherwise force a write rpc of the old dirty area.
352 		 */
353 		if (bp->b_dirtyend > 0 &&
354 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
355 			bp->b_proc = p;
356 			if (error = bwrite(bp))
357 				return (error);
358 			goto again;
359 		}
360 
361 		/*
362 		 * Check for valid write lease and get one as required.
363 		 * In case getblk() and/or bwrite() delayed us.
364 		 */
365 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
366 		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
367 			do {
368 				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
369 			} while (error == NQNFS_EXPIRED);
370 			if (error) {
371 				brelse(bp);
372 				return (error);
373 			}
374 			if (QUADNE(np->n_lrev, np->n_brev) ||
375 			    (np->n_flag & NQNFSNONCACHE)) {
376 				vinvalbuf(vp, TRUE);
377 				np->n_brev = np->n_lrev;
378 			}
379 		}
380 		if (error = uiomove(bp->b_un.b_addr + on, n, uio)) {
381 			brelse(bp);
382 			return (error);
383 		}
384 		if (bp->b_dirtyend > 0) {
385 			bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
386 			bp->b_dirtyend = MAX((on+n), bp->b_dirtyend);
387 		} else {
388 			bp->b_dirtyoff = on;
389 			bp->b_dirtyend = on+n;
390 		}
391 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
392 		    bp->b_validoff > bp->b_dirtyend) {
393 			bp->b_validoff = bp->b_dirtyoff;
394 			bp->b_validend = bp->b_dirtyend;
395 		} else {
396 			bp->b_validoff = MIN(bp->b_validoff, bp->b_dirtyoff);
397 			bp->b_validend = MAX(bp->b_validend, bp->b_dirtyend);
398 		}
399 
400 		/*
401 		 * If the lease is non-cachable or IO_SYNC do bwrite().
402 		 */
403 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
404 			bp->b_proc = p;
405 			bwrite(bp);
406 		} else if ((n+on) == biosize &&
407 			 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
408 			bp->b_flags |= B_AGE;
409 			bp->b_proc = (struct proc *)0;
410 			bawrite(bp);
411 		} else {
412 			bp->b_proc = (struct proc *)0;
413 			bdwrite(bp);
414 		}
415 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
416 	return (error);
417 }
418