xref: /csrg-svn/sys/nfs/nfs_bio.c (revision 39487)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms are permitted
9  * provided that the above copyright notice and this paragraph are
10  * duplicated in all such forms and that any documentation,
11  * advertising materials, and other materials related to such
12  * distribution and use acknowledge that the software was developed
13  * by the University of California, Berkeley.  The name of the
14  * University may not be used to endorse or promote products derived
15  * from this software without specific prior written permission.
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19  *
20  *	@(#)nfs_bio.c	7.4 (Berkeley) 11/03/89
21  */
22 
23 #include "param.h"
24 #include "user.h"
25 #include "buf.h"
26 #include "vnode.h"
27 #include "trace.h"
28 #include "mount.h"
29 #include "nfsnode.h"
30 #include "nfsiom.h"
31 
32 /* True and false, how exciting */
33 #define	TRUE	1
34 #define	FALSE	0
35 
36 /*
37  * Vnode op for read using bio
38  * Any similarity to readip() is purely coincidental
39  */
40 nfs_read(vp, uio, offp, ioflag, cred)
41 	register struct vnode *vp;
42 	struct uio *uio;
43 	off_t *offp;
44 	int ioflag;
45 	struct ucred *cred;
46 {
47 	register struct nfsnode *np = VTONFS(vp);
48 	struct buf *bp;
49 	struct vattr vattr;
50 	daddr_t lbn, bn, rablock;
51 	int diff, error = 0;
52 	long n, on;
53 
54 	if (!(ioflag & IO_NODELOCKED))
55 		nfs_lock(vp);
56 	/*
57 	 * Avoid caching directories. Once everything is using getdirentries()
58 	 * this will never happen anyhow.
59 	 */
60 	if (vp->v_type == VDIR) {
61 		error = nfs_readrpc(vp, uio, offp, cred);
62 		if (!(ioflag & IO_NODELOCKED))
63 			nfs_unlock(vp);
64 		return (error);
65 	}
66 	uio->uio_offset = *offp;
67 	if (uio->uio_rw != UIO_READ)
68 		panic("nfs_read mode");
69 	if (vp->v_type != VREG)
70 		panic("nfs_read type");
71 	if (uio->uio_resid == 0)
72 		goto out;
73 	if (uio->uio_offset < 0) {
74 		error = EINVAL;
75 		goto out;
76 	}
77 	/*
78 	 * If the file's modify time on the server has changed since the
79 	 * last read rpc or you have written to the file,
80 	 * you may have lost data cache consistency with the
81 	 * server, so flush all of the file's data out of the cache.
82 	 * This will implicitly bring the modify time up to date, since
83 	 * up to date attributes are returned in the reply to any write rpc's
84 	 * NB: This implies that cache data can be read when up to
85 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
86 	 * attributes this could be forced by setting n_attrstamp to 0 before
87 	 * the nfs_getattr() call.
88 	 */
89 	if (np->n_flag & NMODIFIED) {
90 		np->n_flag &= ~NMODIFIED;
91 		if (error = nfs_blkflush(vp, (daddr_t)0, np->n_size, TRUE))
92 			goto out;
93 		if (error = nfs_getattr(vp, &vattr, cred))
94 			goto out;
95 		np->n_mtime = vattr.va_mtime.tv_sec;
96 	} else {
97 		if (error = nfs_getattr(vp, &vattr, cred))
98 			goto out;
99 		if (np->n_mtime != vattr.va_mtime.tv_sec) {
100 			if (error = nfs_blkflush(vp, (daddr_t)0,
101 				np->n_size, TRUE))
102 				goto out;
103 			np->n_mtime = vattr.va_mtime.tv_sec;
104 		}
105 	}
106 	np->n_flag |= NBUFFERED;
107 	do {
108 		lbn = uio->uio_offset >> NFS_BIOSHIFT;
109 		on = uio->uio_offset & (NFS_BIOSIZE-1);
110 		n = MIN((unsigned)(NFS_BIOSIZE - on), uio->uio_resid);
111 		diff = np->n_size - uio->uio_offset;
112 		if (diff <= 0)
113 			goto out;
114 		if (diff < n)
115 			n = diff;
116 		bn = lbn*(NFS_BIOSIZE/DEV_BSIZE);
117 		rablock = (lbn+1)*(NFS_BIOSIZE/DEV_BSIZE);
118 		if (np->n_lastr+1 == lbn && np->n_size > (rablock*DEV_BSIZE))
119 			error = breada(vp, bn, NFS_BIOSIZE, rablock, NFS_BIOSIZE,
120 				cred, &bp);
121 		else
122 			error = bread(vp, bn, NFS_BIOSIZE, cred, &bp);
123 		np->n_lastr = lbn;
124 		if (bp->b_resid) {
125 			diff = (on >= (NFS_BIOSIZE-bp->b_resid)) ? 0 :
126 				(NFS_BIOSIZE-bp->b_resid-on);
127 			n = MIN(n, diff);
128 		}
129 		if (error) {
130 			brelse(bp);
131 			goto out;
132 		}
133 		if (n > 0)
134 			error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
135 		if (n+on == NFS_BIOSIZE || uio->uio_offset == np->n_size)
136 			bp->b_flags |= B_AGE;
137 		brelse(bp);
138 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
139 out:
140 	*offp = uio->uio_offset;
141 	if (!(ioflag & IO_NODELOCKED))
142 		nfs_unlock(vp);
143 	return (error);
144 }
145 
146 /*
147  * Vnode op for write using bio
148  */
149 nfs_write(vp, uio, offp, ioflag, cred)
150 	register struct vnode *vp;
151 	register struct uio *uio;
152 	off_t *offp;
153 	int ioflag;
154 	struct ucred *cred;
155 {
156 	struct buf *bp;
157 	struct nfsnode *np = VTONFS(vp);
158 	daddr_t lbn, bn;
159 	int i, n, on, cnt, count, error = 0;
160 
161 	if ((ioflag & IO_NODELOCKED) == 0)
162 		nfs_lock(vp);
163 	/* Should we try and do this ?? */
164 	if (vp->v_type == VREG && (ioflag & IO_APPEND))
165 		*offp = np->n_size;
166 	uio->uio_offset = *offp;
167 	cnt = uio->uio_resid;
168 #ifdef notdef
169 	osize = np->n_size;
170 #endif
171 	if (uio->uio_rw != UIO_WRITE)
172 		panic("nfs_write mode");
173 	if (vp->v_type != VREG)
174 		panic("nfs_write type");
175 	if (uio->uio_offset < 0) {
176 		error = EINVAL;
177 		goto out;
178 	}
179 	if (uio->uio_resid == 0)
180 		goto out;
181 	/*
182 	 * Maybe this should be above the vnode op call, but so long as
183 	 * file servers have no limits, i don't think it matters
184 	 */
185 	if (vp->v_type == VREG &&
186 	    uio->uio_offset + uio->uio_resid >
187 	      u.u_rlimit[RLIMIT_FSIZE].rlim_cur) {
188 		psignal(u.u_procp, SIGXFSZ);
189 		error = EFBIG;
190 		goto out;
191 	}
192 	np->n_flag |= (NMODIFIED|NBUFFERED);
193 	do {
194 		lbn = uio->uio_offset >> NFS_BIOSHIFT;
195 		on = uio->uio_offset & (NFS_BIOSIZE-1);
196 		n = MIN((unsigned)(NFS_BIOSIZE - on), uio->uio_resid);
197 		if (uio->uio_offset+n > np->n_size)
198 			np->n_size = uio->uio_offset+n;
199 		bn = lbn*(NFS_BIOSIZE/DEV_BSIZE);
200 		count = howmany(NFS_BIOSIZE, CLBYTES);
201 		for (i = 0; i < count; i++)
202 			munhash(vp, bn + i * CLBYTES / DEV_BSIZE);
203 		bp = getblk(vp, bn, NFS_BIOSIZE);
204 		if (bp->b_wcred == NOCRED) {
205 			crhold(cred);
206 			bp->b_wcred = cred;
207 		}
208 		if (bp->b_dirtyend > 0) {
209 			/*
210 			 * Iff the new write will leave a contiguous
211 			 * dirty area, just update the b_dirtyoff and
212 			 * b_dirtyend
213 			 * otherwise force a write rpc of the old dirty
214 			 * area
215 			 */
216 			if (on <= bp->b_dirtyend && (on+n) >= bp->b_dirtyoff) {
217 				bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
218 				bp->b_dirtyend = MAX((on+n), bp->b_dirtyend);
219 			} else {
220 				/*
221 				 * Like bwrite() but without the brelse
222 				 */
223 				bp->b_flags &= ~(B_READ | B_DONE |
224 				    B_ERROR | B_DELWRI | B_ASYNC);
225 				u.u_ru.ru_oublock++;
226 				VOP_STRATEGY(bp);
227 				error = biowait(bp);
228 				if (bp->b_flags & B_ERROR) {
229 					brelse(bp);
230 					if (bp->b_error)
231 						error = bp->b_error;
232 					else
233 						error = EIO;
234 					goto out;
235 				}
236 				bp->b_dirtyoff = on;
237 				bp->b_dirtyend = on+n;
238 			}
239 		} else {
240 			bp->b_dirtyoff = on;
241 			bp->b_dirtyend = on+n;
242 		}
243 		if (error = uiomove(bp->b_un.b_addr + on, n, uio))
244 			goto out;
245 		if ((n+on) == NFS_BIOSIZE) {
246 			bp->b_flags |= B_AGE;
247 			bawrite(bp);
248 		} else {
249 			bdwrite(bp);
250 		}
251 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
252 #ifdef notdef
253 	/* Should we try and do this for nfs ?? */
254 	if (error && (ioflag & IO_UNIT))
255 		np->n_size = osize;
256 	else
257 #endif
258 		*offp += cnt - uio->uio_resid;
259 out:
260 	if ((ioflag & IO_NODELOCKED) == 0)
261 		nfs_unlock(vp);
262 	return (error);
263 }
264 
265 /*
266  * Flush and invalidate all of the buffers associated with the blocks of vp
267  */
268 nfs_blkflush(vp, blkno, size, invalidate)
269 	struct vnode *vp;
270 	daddr_t blkno;
271 	long size;
272 	int invalidate;
273 {
274 	register struct buf *ep;
275 	struct buf *dp;
276 	daddr_t curblk, nextblk, ecurblk, lastblk;
277 	int s, error, allerrors = 0;
278 
279 	/*
280 	 * Iterate through each possible hash chain.
281 	 */
282 	lastblk = blkno + btodb(size+DEV_BSIZE-1) - 1;
283 	for (curblk = blkno; curblk <= lastblk; curblk = nextblk) {
284 #if RND & (RND-1)
285 	        nextblk = ((curblk / RND) + 1) * RND;
286 #else
287 	        nextblk = ((curblk & ~(RND-1)) + RND);
288 #endif
289 	        ecurblk = nextblk > lastblk ? lastblk : nextblk - 1;
290 	        dp = BUFHASH(vp, curblk);
291 loop:
292 	        for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
293 	                if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
294 	                        continue;
295 	                /* look for overlap */
296 	                if (ep->b_bcount == 0 || ep->b_blkno > ecurblk ||
297 	                    ep->b_blkno + btodb(ep->b_bcount) <= curblk)
298 	                        continue;
299 	                s = splbio();
300 	                if (ep->b_flags&B_BUSY) {
301 	                        ep->b_flags |= B_WANTED;
302 	                        sleep((caddr_t)ep, PRIBIO+1);
303 	                        splx(s);
304 	                        goto loop;
305 	                }
306 	                if (ep->b_flags & B_DELWRI) {
307 	                        splx(s);
308 	                        notavail(ep);
309 	                        if (error = bwrite(ep))
310 	                                allerrors = error;
311 	                        goto loop;
312 	                }
313 	                splx(s);
314 			if (invalidate) {
315 				notavail(ep);
316 				ep->b_flags |= B_INVAL;
317 				brelvp(ep);
318 				brelse(ep);
319 			}
320 	        }
321 	}
322 	return (allerrors);
323 }
324