xref: /csrg-svn/sys/nfs/nfs_bio.c (revision 39358)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms are permitted
9  * provided that the above copyright notice and this paragraph are
10  * duplicated in all such forms and that any documentation,
11  * advertising materials, and other materials related to such
12  * distribution and use acknowledge that the software was developed
13  * by the University of California, Berkeley.  The name of the
14  * University may not be used to endorse or promote products derived
15  * from this software without specific prior written permission.
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19  *
20  *	@(#)nfs_bio.c	7.3 (Berkeley) 10/21/89
21  */
22 
23 #include "param.h"
24 #include "user.h"
25 #include "buf.h"
26 #include "vnode.h"
27 #include "trace.h"
28 #include "mount.h"
29 #include "nfsnode.h"
30 #include "nfsiom.h"
31 
32 /* True and false, how exciting */
33 #define	TRUE	1
34 #define	FALSE	0
35 
36 /*
37  * Vnode op for read using bio
38  * Any similarity to readip() is purely coincidental
39  */
40 nfs_read(vp, uio, offp, ioflag, cred)
41 	register struct vnode *vp;
42 	struct uio *uio;
43 	off_t *offp;
44 	int ioflag;
45 	struct ucred *cred;
46 {
47 	register struct nfsnode *np = VTONFS(vp);
48 	struct buf *bp;
49 	struct vattr vattr;
50 	daddr_t lbn, bn, rablock;
51 	int error = 0;
52 	int diff;
53 	long n, on;
54 	int count;
55 
56 	if (!(ioflag & IO_NODELOCKED))
57 		nfs_lock(vp);
58 	/*
59 	 * Avoid caching directories. Once everything is using getdirentries()
60 	 * this will never happen anyhow.
61 	 */
62 	if (vp->v_type == VDIR) {
63 		error = nfs_readrpc(vp, uio, offp, cred);
64 		if (!(ioflag & IO_NODELOCKED))
65 			nfs_unlock(vp);
66 		return (error);
67 	}
68 	uio->uio_offset = *offp;
69 	count = uio->uio_resid;
70 	if (uio->uio_rw != UIO_READ)
71 		panic("nfs_read mode");
72 	if (vp->v_type != VREG)
73 		panic("nfs_read type");
74 	if (uio->uio_resid == 0)
75 		goto out;
76 	if (uio->uio_offset < 0) {
77 		error = EINVAL;
78 		goto out;
79 	}
80 	/*
81 	 * If the file's modify time on the server has changed since the
82 	 * last read rpc or you have written to the file,
83 	 * you may have lost data cache consistency with the
84 	 * server, so flush all of the file's data out of the cache.
85 	 * This will implicitly bring the modify time up to date, since
86 	 * up to date attributes are returned in the reply to any write rpc's
87 	 * NB: This implies that cache data can be read when up to
88 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
89 	 * attributes this could be forced by setting n_attrstamp to 0 before
90 	 * the nfs_getattr() call.
91 	 */
92 	if (np->n_flag & NMODIFIED) {
93 		np->n_flag &= ~NMODIFIED;
94 		if (error = nfs_blkflush(vp, (daddr_t)0, np->n_size, TRUE))
95 			goto out;
96 		if (error = nfs_getattr(vp, &vattr, cred))
97 			goto out;
98 		np->n_mtime = vattr.va_mtime.tv_sec;
99 	} else {
100 		if (error = nfs_getattr(vp, &vattr, cred))
101 			goto out;
102 		if (np->n_mtime != vattr.va_mtime.tv_sec) {
103 			if (error = nfs_blkflush(vp, (daddr_t)0,
104 				np->n_size, TRUE))
105 				goto out;
106 			np->n_mtime = vattr.va_mtime.tv_sec;
107 		}
108 	}
109 	np->n_flag |= NBUFFERED;
110 	do {
111 		lbn = uio->uio_offset >> NFS_BIOSHIFT;
112 		on = uio->uio_offset & (NFS_BIOSIZE-1);
113 		n = MIN((unsigned)(NFS_BIOSIZE - on), uio->uio_resid);
114 		diff = np->n_size - uio->uio_offset;
115 		if (diff <= 0)
116 			goto out;
117 		if (diff < n)
118 			n = diff;
119 		bn = lbn*(NFS_BIOSIZE/DEV_BSIZE);
120 		rablock = (lbn+1)*(NFS_BIOSIZE/DEV_BSIZE);
121 		if (np->n_lastr+1 == lbn && np->n_size > (rablock*DEV_BSIZE))
122 			error = breada(vp, bn, NFS_BIOSIZE, rablock, NFS_BIOSIZE,
123 				cred, &bp);
124 		else
125 			error = bread(vp, bn, NFS_BIOSIZE, cred, &bp);
126 		np->n_lastr = lbn;
127 		if (bp->b_resid) {
128 			diff = (on >= (NFS_BIOSIZE-bp->b_resid)) ? 0 :
129 				(NFS_BIOSIZE-bp->b_resid-on);
130 			n = MIN(n, diff);
131 		}
132 		if (error) {
133 			brelse(bp);
134 			goto out;
135 		}
136 		if (n > 0)
137 			error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
138 		if (n+on == NFS_BIOSIZE || uio->uio_offset == np->n_size)
139 			bp->b_flags |= B_AGE;
140 		brelse(bp);
141 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
142 out:
143 	*offp = uio->uio_offset;
144 	if (!(ioflag & IO_NODELOCKED))
145 		nfs_unlock(vp);
146 	return (error);
147 }
148 
149 /*
150  * Vnode op for write using bio
151  */
152 nfs_write(vp, uio, offp, ioflag, cred)
153 	register struct vnode *vp;
154 	register struct uio *uio;
155 	off_t *offp;
156 	int ioflag;
157 	struct ucred *cred;
158 {
159 	struct buf *bp;
160 	struct nfsnode *np = VTONFS(vp);
161 	daddr_t lbn, bn;
162 	int i, n, on;
163 	int flags, count, size;
164 	int error = 0;
165 	int cnt;
166 	u_long osize;
167 
168 	if ((ioflag & IO_NODELOCKED) == 0)
169 		nfs_lock(vp);
170 	/* Should we try and do this ?? */
171 	if (vp->v_type == VREG && (ioflag & IO_APPEND))
172 		*offp = np->n_size;
173 	uio->uio_offset = *offp;
174 	cnt = uio->uio_resid;
175 #ifdef notdef
176 	osize = np->n_size;
177 #endif
178 	if (uio->uio_rw != UIO_WRITE)
179 		panic("nfs_write mode");
180 	if (vp->v_type != VREG)
181 		panic("nfs_write type");
182 	if (uio->uio_offset < 0) {
183 		error = EINVAL;
184 		goto out;
185 	}
186 	if (uio->uio_resid == 0)
187 		goto out;
188 	/*
189 	 * Maybe this should be above the vnode op call, but so long as
190 	 * file servers have no limits, i don't think it matters
191 	 */
192 	if (vp->v_type == VREG &&
193 	    uio->uio_offset + uio->uio_resid >
194 	      u.u_rlimit[RLIMIT_FSIZE].rlim_cur) {
195 		psignal(u.u_procp, SIGXFSZ);
196 		error = EFBIG;
197 		goto out;
198 	}
199 	np->n_flag |= (NMODIFIED|NBUFFERED);
200 	do {
201 		lbn = uio->uio_offset >> NFS_BIOSHIFT;
202 		on = uio->uio_offset & (NFS_BIOSIZE-1);
203 		n = MIN((unsigned)(NFS_BIOSIZE - on), uio->uio_resid);
204 		if (uio->uio_offset+n > np->n_size)
205 			np->n_size = uio->uio_offset+n;
206 		bn = lbn*(NFS_BIOSIZE/DEV_BSIZE);
207 		count = howmany(NFS_BIOSIZE, CLBYTES);
208 		for (i = 0; i < count; i++)
209 			munhash(vp, bn + i * CLBYTES / DEV_BSIZE);
210 		bp = getblk(vp, bn, NFS_BIOSIZE);
211 		if (bp->b_wcred == NOCRED) {
212 			crhold(cred);
213 			bp->b_wcred = cred;
214 		}
215 		if (bp->b_dirtyend > 0) {
216 			/*
217 			 * Iff the new write will leave a contiguous
218 			 * dirty area, just update the b_dirtyoff and
219 			 * b_dirtyend
220 			 * otherwise force a write rpc of the old dirty
221 			 * area
222 			 */
223 			if (on <= bp->b_dirtyend && (on+n) >= bp->b_dirtyoff) {
224 				bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
225 				bp->b_dirtyend = MAX((on+n), bp->b_dirtyend);
226 			} else {
227 				/*
228 				 * Like bwrite() but without the brelse
229 				 */
230 				bp->b_flags &= ~(B_READ | B_DONE |
231 				    B_ERROR | B_DELWRI | B_ASYNC);
232 				u.u_ru.ru_oublock++;
233 				VOP_STRATEGY(bp);
234 				error = biowait(bp);
235 				if (bp->b_flags & B_ERROR) {
236 					brelse(bp);
237 					if (bp->b_error)
238 						error = bp->b_error;
239 					else
240 						error = EIO;
241 					goto out;
242 				}
243 				bp->b_dirtyoff = on;
244 				bp->b_dirtyend = on+n;
245 			}
246 		} else {
247 			bp->b_dirtyoff = on;
248 			bp->b_dirtyend = on+n;
249 		}
250 		if (error = uiomove(bp->b_un.b_addr + on, n, uio))
251 			goto out;
252 		if ((n+on) == NFS_BIOSIZE) {
253 			bp->b_flags |= B_AGE;
254 			bawrite(bp);
255 		} else {
256 			bdwrite(bp);
257 		}
258 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
259 #ifdef notdef
260 	/* Should we try and do this for nfs ?? */
261 	if (error && (ioflag & IO_UNIT))
262 		np->n_size = osize;
263 	else
264 #endif
265 		*offp += cnt - uio->uio_resid;
266 out:
267 	if ((ioflag & IO_NODELOCKED) == 0)
268 		nfs_unlock(vp);
269 	return (error);
270 }
271 
272 /*
273  * Flush and invalidate all of the buffers associated with the blocks of vp
274  */
275 nfs_blkflush(vp, blkno, size, invalidate)
276 	struct vnode *vp;
277 	daddr_t blkno;
278 	long size;
279 	int invalidate;
280 {
281 	register struct buf *ep;
282 	struct buf *dp;
283 	daddr_t curblk, nextblk, ecurblk, lastblk;
284 	int s, error, allerrors = 0;
285 
286 	/*
287 	 * Iterate through each possible hash chain.
288 	 */
289 	lastblk = blkno + btodb(size+DEV_BSIZE-1) - 1;
290 	for (curblk = blkno; curblk <= lastblk; curblk = nextblk) {
291 #if RND & (RND-1)
292 	        nextblk = ((curblk / RND) + 1) * RND;
293 #else
294 	        nextblk = ((curblk & ~(RND-1)) + RND);
295 #endif
296 	        ecurblk = nextblk > lastblk ? lastblk : nextblk - 1;
297 	        dp = BUFHASH(vp, curblk);
298 loop:
299 	        for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
300 	                if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
301 	                        continue;
302 	                /* look for overlap */
303 	                if (ep->b_bcount == 0 || ep->b_blkno > ecurblk ||
304 	                    ep->b_blkno + btodb(ep->b_bcount) <= curblk)
305 	                        continue;
306 	                s = splbio();
307 	                if (ep->b_flags&B_BUSY) {
308 	                        ep->b_flags |= B_WANTED;
309 	                        sleep((caddr_t)ep, PRIBIO+1);
310 	                        splx(s);
311 	                        goto loop;
312 	                }
313 	                if (ep->b_flags & B_DELWRI) {
314 	                        splx(s);
315 	                        notavail(ep);
316 	                        if (error = bwrite(ep))
317 	                                allerrors = error;
318 	                        goto loop;
319 	                }
320 	                splx(s);
321 			if (invalidate) {
322 				notavail(ep);
323 				ep->b_flags |= B_INVAL;
324 				brelvp(ep);
325 				brelse(ep);
326 			}
327 	        }
328 	}
329 	return (allerrors);
330 }
331