xref: /csrg-svn/sys/nfs/nfs_bio.c (revision 39670)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms are permitted
9  * provided that the above copyright notice and this paragraph are
10  * duplicated in all such forms and that any documentation,
11  * advertising materials, and other materials related to such
12  * distribution and use acknowledge that the software was developed
13  * by the University of California, Berkeley.  The name of the
14  * University may not be used to endorse or promote products derived
15  * from this software without specific prior written permission.
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19  *
20  *	@(#)nfs_bio.c	7.7 (Berkeley) 11/30/89
21  */
22 
23 #include "param.h"
24 #include "user.h"
25 #include "buf.h"
26 #include "vnode.h"
27 #include "trace.h"
28 #include "mount.h"
29 #include "nfsnode.h"
30 #include "nfsiom.h"
31 
32 /* True and false, how exciting */
33 #define	TRUE	1
34 #define	FALSE	0
35 
36 /*
37  * Vnode op for read using bio
38  * Any similarity to readip() is purely coincidental
39  */
40 nfs_read(vp, uio, ioflag, cred)
41 	register struct vnode *vp;
42 	struct uio *uio;
43 	int ioflag;
44 	struct ucred *cred;
45 {
46 	register struct nfsnode *np = VTONFS(vp);
47 	struct buf *bp;
48 	struct vattr vattr;
49 	daddr_t lbn, bn, rablock;
50 	int diff, error = 0;
51 	long n, on;
52 
53 	/*
54 	 * Avoid caching directories. Once everything is using getdirentries()
55 	 * this will never happen anyhow.
56 	 */
57 	if (vp->v_type == VDIR)
58 		return (nfs_readrpc(vp, uio, cred));
59 	if (uio->uio_rw != UIO_READ)
60 		panic("nfs_read mode");
61 	if (vp->v_type != VREG)
62 		panic("nfs_read type");
63 	if (uio->uio_resid == 0)
64 		return (0);
65 	if (uio->uio_offset < 0)
66 		return (EINVAL);
67 	/*
68 	 * If the file's modify time on the server has changed since the
69 	 * last read rpc or you have written to the file,
70 	 * you may have lost data cache consistency with the
71 	 * server, so flush all of the file's data out of the cache.
72 	 * This will implicitly bring the modify time up to date, since
73 	 * up to date attributes are returned in the reply to any write rpc's
74 	 * NB: This implies that cache data can be read when up to
75 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
76 	 * attributes this could be forced by setting n_attrstamp to 0 before
77 	 * the nfs_getattr() call.
78 	 */
79 	if (np->n_flag & NMODIFIED) {
80 		np->n_flag &= ~NMODIFIED;
81 		if (error = nfs_blkflush(vp, (daddr_t)0, np->n_size, TRUE))
82 			return (error);
83 		if (error = nfs_getattr(vp, &vattr, cred))
84 			return (error);
85 		np->n_mtime = vattr.va_mtime.tv_sec;
86 	} else {
87 		if (error = nfs_getattr(vp, &vattr, cred))
88 			return (error);
89 		if (np->n_mtime != vattr.va_mtime.tv_sec) {
90 			if (error = nfs_blkflush(vp, (daddr_t)0,
91 				np->n_size, TRUE))
92 				return (error);
93 			np->n_mtime = vattr.va_mtime.tv_sec;
94 		}
95 	}
96 	np->n_flag |= NBUFFERED;
97 	do {
98 		lbn = uio->uio_offset >> NFS_BIOSHIFT;
99 		on = uio->uio_offset & (NFS_BIOSIZE-1);
100 		n = MIN((unsigned)(NFS_BIOSIZE - on), uio->uio_resid);
101 		diff = np->n_size - uio->uio_offset;
102 		if (diff <= 0)
103 			return (error);
104 		if (diff < n)
105 			n = diff;
106 		bn = lbn*(NFS_BIOSIZE/DEV_BSIZE);
107 		rablock = (lbn+1)*(NFS_BIOSIZE/DEV_BSIZE);
108 		if (np->n_lastr+1 == lbn && np->n_size > (rablock*DEV_BSIZE))
109 			error = breada(vp, bn, NFS_BIOSIZE, rablock, NFS_BIOSIZE,
110 				cred, &bp);
111 		else
112 			error = bread(vp, bn, NFS_BIOSIZE, cred, &bp);
113 		np->n_lastr = lbn;
114 		if (bp->b_resid) {
115 			diff = (on >= (NFS_BIOSIZE-bp->b_resid)) ? 0 :
116 				(NFS_BIOSIZE-bp->b_resid-on);
117 			n = MIN(n, diff);
118 		}
119 		if (error) {
120 			brelse(bp);
121 			return (error);
122 		}
123 		if (n > 0)
124 			error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
125 		if (n+on == NFS_BIOSIZE || uio->uio_offset == np->n_size)
126 			bp->b_flags |= B_AGE;
127 		brelse(bp);
128 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
129 	return (error);
130 }
131 
132 /*
133  * Vnode op for write using bio
134  */
135 nfs_write(vp, uio, ioflag, cred)
136 	register struct vnode *vp;
137 	register struct uio *uio;
138 	int ioflag;
139 	struct ucred *cred;
140 {
141 	struct buf *bp;
142 	struct nfsnode *np = VTONFS(vp);
143 	daddr_t lbn, bn;
144 	int i, n, on, count, error = 0;
145 
146 	/* Should we try and do this ?? */
147 	if (vp->v_type == VREG && (ioflag & IO_APPEND))
148 		uio->uio_offset = np->n_size;
149 #ifdef notdef
150 	cnt = uio->uio_resid;
151 	osize = np->n_size;
152 #endif
153 	if (uio->uio_rw != UIO_WRITE)
154 		panic("nfs_write mode");
155 	if (vp->v_type != VREG)
156 		panic("nfs_write type");
157 	if (uio->uio_offset < 0)
158 		return (EINVAL);
159 	if (uio->uio_resid == 0)
160 		return (0);
161 	/*
162 	 * Maybe this should be above the vnode op call, but so long as
163 	 * file servers have no limits, i don't think it matters
164 	 */
165 	if (vp->v_type == VREG &&
166 	    uio->uio_offset + uio->uio_resid >
167 	      u.u_rlimit[RLIMIT_FSIZE].rlim_cur) {
168 		psignal(u.u_procp, SIGXFSZ);
169 		return (EFBIG);
170 	}
171 	np->n_flag |= (NMODIFIED|NBUFFERED);
172 	do {
173 		lbn = uio->uio_offset >> NFS_BIOSHIFT;
174 		on = uio->uio_offset & (NFS_BIOSIZE-1);
175 		n = MIN((unsigned)(NFS_BIOSIZE - on), uio->uio_resid);
176 		if (uio->uio_offset+n > np->n_size)
177 			np->n_size = uio->uio_offset+n;
178 		bn = lbn*(NFS_BIOSIZE/DEV_BSIZE);
179 		count = howmany(NFS_BIOSIZE, CLBYTES);
180 		for (i = 0; i < count; i++)
181 			munhash(vp, bn + i * CLBYTES / DEV_BSIZE);
182 		bp = getblk(vp, bn, NFS_BIOSIZE);
183 		if (bp->b_wcred == NOCRED) {
184 			crhold(cred);
185 			bp->b_wcred = cred;
186 		}
187 		if (bp->b_dirtyend > 0) {
188 			/*
189 			 * If the new write will leave a contiguous
190 			 * dirty area, just update the b_dirtyoff and
191 			 * b_dirtyend
192 			 * otherwise force a write rpc of the old dirty
193 			 * area
194 			 */
195 			if (on <= bp->b_dirtyend && (on+n) >= bp->b_dirtyoff) {
196 				bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
197 				bp->b_dirtyend = MAX((on+n), bp->b_dirtyend);
198 			} else {
199 				/*
200 				 * Like bwrite() but without the brelse
201 				 */
202 				bp->b_flags &= ~(B_READ | B_DONE |
203 				    B_ERROR | B_DELWRI | B_ASYNC);
204 				u.u_ru.ru_oublock++;
205 				VOP_STRATEGY(bp);
206 				error = biowait(bp);
207 				if (bp->b_flags & B_ERROR) {
208 					brelse(bp);
209 					if (bp->b_error)
210 						error = bp->b_error;
211 					else
212 						error = EIO;
213 					return (error);
214 				}
215 				bp->b_dirtyoff = on;
216 				bp->b_dirtyend = on+n;
217 			}
218 		} else {
219 			bp->b_dirtyoff = on;
220 			bp->b_dirtyend = on+n;
221 		}
222 		if (error = uiomove(bp->b_un.b_addr + on, n, uio))
223 			return (error);
224 		if ((n+on) == NFS_BIOSIZE) {
225 			bp->b_flags |= B_AGE;
226 			bawrite(bp);
227 		} else {
228 			bdwrite(bp);
229 		}
230 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
231 #ifdef notdef
232 	/* Should we try and do this for nfs ?? */
233 	if (error && (ioflag & IO_UNIT)) {
234 		np->n_size = osize;
235 		uio->uio_offset -= cnt - uio->uio_resid;
236 		uio->uio_resid = cnt;
237 	}
238 #endif
239 	return (error);
240 }
241 
242 /*
243  * Flush and invalidate all of the buffers associated with the blocks of vp
244  */
245 nfs_blkflush(vp, blkno, size, invalidate)
246 	struct vnode *vp;
247 	daddr_t blkno;
248 	long size;
249 	int invalidate;
250 {
251 	register struct buf *ep;
252 	struct buf *dp;
253 	daddr_t curblk, nextblk, ecurblk, lastblk;
254 	int s, error, allerrors = 0;
255 
256 	/*
257 	 * Iterate through each possible hash chain.
258 	 */
259 	lastblk = blkno + btodb(size+DEV_BSIZE-1) - 1;
260 	for (curblk = blkno; curblk <= lastblk; curblk = nextblk) {
261 #if RND & (RND-1)
262 	        nextblk = ((curblk / RND) + 1) * RND;
263 #else
264 	        nextblk = ((curblk & ~(RND-1)) + RND);
265 #endif
266 	        ecurblk = nextblk > lastblk ? lastblk : nextblk - 1;
267 	        dp = BUFHASH(vp, curblk);
268 loop:
269 	        for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
270 	                if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
271 	                        continue;
272 	                /* look for overlap */
273 	                if (ep->b_bcount == 0 || ep->b_lblkno > ecurblk ||
274 	                    ep->b_lblkno + btodb(ep->b_bcount) <= curblk)
275 	                        continue;
276 	                s = splbio();
277 	                if (ep->b_flags&B_BUSY) {
278 	                        ep->b_flags |= B_WANTED;
279 	                        sleep((caddr_t)ep, PRIBIO+1);
280 	                        splx(s);
281 	                        goto loop;
282 	                }
283 	                if (ep->b_flags & B_DELWRI) {
284 	                        splx(s);
285 	                        notavail(ep);
286 	                        if (error = bwrite(ep))
287 	                                allerrors = error;
288 	                        goto loop;
289 	                }
290 	                splx(s);
291 			if (invalidate) {
292 				notavail(ep);
293 				ep->b_flags |= B_INVAL;
294 				brelse(ep);
295 			}
296 	        }
297 	}
298 	return (allerrors);
299 }
300