xref: /dflybsd-src/sys/vfs/nfs/nfs_vnops.c (revision 1f2de5d41c9be614e9a1cba7cf16de309a2ea210)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
37  * $FreeBSD: src/sys/nfs/nfs_vnops.c,v 1.150.2.5 2001/12/20 19:56:28 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_vnops.c,v 1.9 2003/08/07 21:17:42 dillon Exp $
39  */
40 
41 
42 /*
43  * vnode op calls for Sun NFS version 2 and 3
44  */
45 
46 #include "opt_inet.h"
47 
48 #include <sys/param.h>
49 #include <sys/kernel.h>
50 #include <sys/systm.h>
51 #include <sys/resourcevar.h>
52 #include <sys/proc.h>
53 #include <sys/mount.h>
54 #include <sys/buf.h>
55 #include <sys/malloc.h>
56 #include <sys/mbuf.h>
57 #include <sys/namei.h>
58 #include <sys/socket.h>
59 #include <sys/vnode.h>
60 #include <sys/dirent.h>
61 #include <sys/fcntl.h>
62 #include <sys/lockf.h>
63 #include <sys/stat.h>
64 #include <sys/sysctl.h>
65 #include <sys/conf.h>
66 
67 #include <vm/vm.h>
68 #include <vm/vm_extern.h>
69 #include <vm/vm_zone.h>
70 
71 #include <sys/buf2.h>
72 
73 #include <vfs/fifofs/fifo.h>
74 
75 #include "rpcv2.h"
76 #include "nfsproto.h"
77 #include "nfs.h"
78 #include "nfsnode.h"
79 #include "nfsmount.h"
80 #include "xdr_subs.h"
81 #include "nfsm_subs.h"
82 #include "nqnfs.h"
83 
84 #include <net/if.h>
85 #include <netinet/in.h>
86 #include <netinet/in_var.h>
87 
88 /* Defs */
89 #define	TRUE	1
90 #define	FALSE	0
91 
92 /*
93  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
94  * calls are not in getblk() and brelse() so that they would not be necessary
95  * here.
96  */
97 #ifndef B_VMIO
98 #define vfs_busy_pages(bp, f)
99 #endif
100 
101 static int	nfsspec_read __P((struct vop_read_args *));
102 static int	nfsspec_write __P((struct vop_write_args *));
103 static int	nfsfifo_read __P((struct vop_read_args *));
104 static int	nfsfifo_write __P((struct vop_write_args *));
105 static int	nfsspec_close __P((struct vop_close_args *));
106 static int	nfsfifo_close __P((struct vop_close_args *));
107 #define nfs_poll vop_nopoll
108 static int	nfs_flush __P((struct vnode *,int,struct thread *,int));
109 static int	nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct thread *));
110 static	int	nfs_lookup __P((struct vop_lookup_args *));
111 static	int	nfs_create __P((struct vop_create_args *));
112 static	int	nfs_mknod __P((struct vop_mknod_args *));
113 static	int	nfs_open __P((struct vop_open_args *));
114 static	int	nfs_close __P((struct vop_close_args *));
115 static	int	nfs_access __P((struct vop_access_args *));
116 static	int	nfs_getattr __P((struct vop_getattr_args *));
117 static	int	nfs_setattr __P((struct vop_setattr_args *));
118 static	int	nfs_read __P((struct vop_read_args *));
119 static	int	nfs_mmap __P((struct vop_mmap_args *));
120 static	int	nfs_fsync __P((struct vop_fsync_args *));
121 static	int	nfs_remove __P((struct vop_remove_args *));
122 static	int	nfs_link __P((struct vop_link_args *));
123 static	int	nfs_rename __P((struct vop_rename_args *));
124 static	int	nfs_mkdir __P((struct vop_mkdir_args *));
125 static	int	nfs_rmdir __P((struct vop_rmdir_args *));
126 static	int	nfs_symlink __P((struct vop_symlink_args *));
127 static	int	nfs_readdir __P((struct vop_readdir_args *));
128 static	int	nfs_bmap __P((struct vop_bmap_args *));
129 static	int	nfs_strategy __P((struct vop_strategy_args *));
130 static	int	nfs_lookitup __P((struct vnode *, const char *, int,
131 			struct ucred *, struct thread *, struct nfsnode **));
132 static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
133 static int	nfsspec_access __P((struct vop_access_args *));
134 static int	nfs_readlink __P((struct vop_readlink_args *));
135 static int	nfs_print __P((struct vop_print_args *));
136 static int	nfs_advlock __P((struct vop_advlock_args *));
137 static int	nfs_bwrite __P((struct vop_bwrite_args *));
138 /*
139  * Global vfs data structures for nfs
140  */
141 vop_t **nfsv2_vnodeop_p;
142 static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
143 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
144 	{ &vop_access_desc,		(vop_t *) nfs_access },
145 	{ &vop_advlock_desc,		(vop_t *) nfs_advlock },
146 	{ &vop_bmap_desc,		(vop_t *) nfs_bmap },
147 	{ &vop_bwrite_desc,		(vop_t *) nfs_bwrite },
148 	{ &vop_close_desc,		(vop_t *) nfs_close },
149 	{ &vop_create_desc,		(vop_t *) nfs_create },
150 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
151 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
152 	{ &vop_getpages_desc,		(vop_t *) nfs_getpages },
153 	{ &vop_putpages_desc,		(vop_t *) nfs_putpages },
154 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
155 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
156 	{ &vop_lease_desc,		(vop_t *) vop_null },
157 	{ &vop_link_desc,		(vop_t *) nfs_link },
158 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
159 	{ &vop_lookup_desc,		(vop_t *) nfs_lookup },
160 	{ &vop_mkdir_desc,		(vop_t *) nfs_mkdir },
161 	{ &vop_mknod_desc,		(vop_t *) nfs_mknod },
162 	{ &vop_mmap_desc,		(vop_t *) nfs_mmap },
163 	{ &vop_open_desc,		(vop_t *) nfs_open },
164 	{ &vop_poll_desc,		(vop_t *) nfs_poll },
165 	{ &vop_print_desc,		(vop_t *) nfs_print },
166 	{ &vop_read_desc,		(vop_t *) nfs_read },
167 	{ &vop_readdir_desc,		(vop_t *) nfs_readdir },
168 	{ &vop_readlink_desc,		(vop_t *) nfs_readlink },
169 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
170 	{ &vop_remove_desc,		(vop_t *) nfs_remove },
171 	{ &vop_rename_desc,		(vop_t *) nfs_rename },
172 	{ &vop_rmdir_desc,		(vop_t *) nfs_rmdir },
173 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
174 	{ &vop_strategy_desc,		(vop_t *) nfs_strategy },
175 	{ &vop_symlink_desc,		(vop_t *) nfs_symlink },
176 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
177 	{ &vop_write_desc,		(vop_t *) nfs_write },
178 	{ NULL, NULL }
179 };
180 static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
181 	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
182 VNODEOP_SET(nfsv2_vnodeop_opv_desc);
183 
184 /*
185  * Special device vnode ops
186  */
187 vop_t **spec_nfsv2nodeop_p;
188 static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
189 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
190 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
191 	{ &vop_close_desc,		(vop_t *) nfsspec_close },
192 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
193 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
194 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
195 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
196 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
197 	{ &vop_print_desc,		(vop_t *) nfs_print },
198 	{ &vop_read_desc,		(vop_t *) nfsspec_read },
199 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
200 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
201 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
202 	{ &vop_write_desc,		(vop_t *) nfsspec_write },
203 	{ NULL, NULL }
204 };
205 static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
206 	{ &spec_nfsv2nodeop_p, nfsv2_specop_entries };
207 VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
208 
209 vop_t **fifo_nfsv2nodeop_p;
210 static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
211 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
212 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
213 	{ &vop_close_desc,		(vop_t *) nfsfifo_close },
214 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
215 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
216 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
217 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
218 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
219 	{ &vop_print_desc,		(vop_t *) nfs_print },
220 	{ &vop_read_desc,		(vop_t *) nfsfifo_read },
221 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
222 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
223 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
224 	{ &vop_write_desc,		(vop_t *) nfsfifo_write },
225 	{ NULL, NULL }
226 };
227 static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
228 	{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
229 VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
230 
231 static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
232 				  struct componentname *cnp,
233 				  struct vattr *vap));
234 static int	nfs_removerpc __P((struct vnode *dvp, const char *name,
235 				   int namelen,
236 				   struct ucred *cred, struct thread *td));
237 static int	nfs_renamerpc __P((struct vnode *fdvp, const char *fnameptr,
238 				   int fnamelen, struct vnode *tdvp,
239 				   const char *tnameptr, int tnamelen,
240 				   struct ucred *cred, struct thread *td));
241 static int	nfs_renameit __P((struct vnode *sdvp,
242 				  struct componentname *scnp,
243 				  struct sillyrename *sp));
244 
245 /*
246  * Global variables
247  */
248 extern u_int32_t nfs_true, nfs_false;
249 extern u_int32_t nfs_xdrneg1;
250 extern struct nfsstats nfsstats;
251 extern nfstype nfsv3_type[9];
252 struct thread *nfs_iodwant[NFS_MAXASYNCDAEMON];
253 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
254 int nfs_numasync = 0;
255 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
256 
257 SYSCTL_DECL(_vfs_nfs);
258 
259 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
260 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
261 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
262 
263 static int	nfsv3_commit_on_close = 0;
264 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
265 	   &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
266 #if 0
267 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
268 	   &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
269 
270 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
271 	   &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
272 #endif
273 
274 #define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
275 			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
276 			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
277 static int
278 nfs3_access_otw(struct vnode *vp, int wmode,
279 	struct thread *td, struct ucred *cred)
280 {
281 	const int v3 = 1;
282 	u_int32_t *tl;
283 	int error = 0, attrflag;
284 
285 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
286 	caddr_t bpos, dpos, cp2;
287 	int32_t t1, t2;
288 	caddr_t cp;
289 	u_int32_t rmode;
290 	struct nfsnode *np = VTONFS(vp);
291 
292 	nfsstats.rpccnt[NFSPROC_ACCESS]++;
293 	nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
294 	nfsm_fhtom(vp, v3);
295 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
296 	*tl = txdr_unsigned(wmode);
297 	nfsm_request(vp, NFSPROC_ACCESS, td, cred);
298 	nfsm_postop_attr(vp, attrflag);
299 	if (!error) {
300 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
301 		rmode = fxdr_unsigned(u_int32_t, *tl);
302 		np->n_mode = rmode;
303 		np->n_modeuid = cred->cr_uid;
304 		np->n_modestamp = time_second;
305 	}
306 	nfsm_reqdone;
307 	return error;
308 }
309 
310 /*
311  * nfs access vnode op.
312  * For nfs version 2, just return ok. File accesses may fail later.
313  * For nfs version 3, use the access rpc to check accessibility. If file modes
314  * are changed on the server, accesses might still fail later.
315  */
316 static int
317 nfs_access(ap)
318 	struct vop_access_args /* {
319 		struct vnode *a_vp;
320 		int  a_mode;
321 		struct ucred *a_cred;
322 		struct thread *a_td;
323 	} */ *ap;
324 {
325 	struct vnode *vp = ap->a_vp;
326 	int error = 0;
327 	u_int32_t mode, wmode;
328 	int v3 = NFS_ISV3(vp);
329 	struct nfsnode *np = VTONFS(vp);
330 
331 	/*
332 	 * Disallow write attempts on filesystems mounted read-only;
333 	 * unless the file is a socket, fifo, or a block or character
334 	 * device resident on the filesystem.
335 	 */
336 	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
337 		switch (vp->v_type) {
338 		case VREG:
339 		case VDIR:
340 		case VLNK:
341 			return (EROFS);
342 		default:
343 			break;
344 		}
345 	}
346 	/*
347 	 * For nfs v3, check to see if we have done this recently, and if
348 	 * so return our cached result instead of making an ACCESS call.
349 	 * If not, do an access rpc, otherwise you are stuck emulating
350 	 * ufs_access() locally using the vattr. This may not be correct,
351 	 * since the server may apply other access criteria such as
352 	 * client uid-->server uid mapping that we do not know about.
353 	 */
354 	if (v3) {
355 		if (ap->a_mode & VREAD)
356 			mode = NFSV3ACCESS_READ;
357 		else
358 			mode = 0;
359 		if (vp->v_type != VDIR) {
360 			if (ap->a_mode & VWRITE)
361 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
362 			if (ap->a_mode & VEXEC)
363 				mode |= NFSV3ACCESS_EXECUTE;
364 		} else {
365 			if (ap->a_mode & VWRITE)
366 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
367 					 NFSV3ACCESS_DELETE);
368 			if (ap->a_mode & VEXEC)
369 				mode |= NFSV3ACCESS_LOOKUP;
370 		}
371 		/* XXX safety belt, only make blanket request if caching */
372 		if (nfsaccess_cache_timeout > 0) {
373 			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
374 				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
375 				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
376 		} else {
377 			wmode = mode;
378 		}
379 
380 		/*
381 		 * Does our cached result allow us to give a definite yes to
382 		 * this request?
383 		 */
384 		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
385 		    (ap->a_cred->cr_uid == np->n_modeuid) &&
386 		    ((np->n_mode & mode) == mode)) {
387 			nfsstats.accesscache_hits++;
388 		} else {
389 			/*
390 			 * Either a no, or a don't know.  Go to the wire.
391 			 */
392 			nfsstats.accesscache_misses++;
393 		        error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred);
394 			if (!error) {
395 				if ((np->n_mode & mode) != mode) {
396 					error = EACCES;
397 				}
398 			}
399 		}
400 		return (error);
401 	} else {
402 		if ((error = nfsspec_access(ap)) != 0)
403 			return (error);
404 
405 		/*
406 		 * Attempt to prevent a mapped root from accessing a file
407 		 * which it shouldn't.  We try to read a byte from the file
408 		 * if the user is root and the file is not zero length.
409 		 * After calling nfsspec_access, we should have the correct
410 		 * file size cached.
411 		 */
412 		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
413 		    && VTONFS(vp)->n_size > 0) {
414 			struct iovec aiov;
415 			struct uio auio;
416 			char buf[1];
417 
418 			aiov.iov_base = buf;
419 			aiov.iov_len = 1;
420 			auio.uio_iov = &aiov;
421 			auio.uio_iovcnt = 1;
422 			auio.uio_offset = 0;
423 			auio.uio_resid = 1;
424 			auio.uio_segflg = UIO_SYSSPACE;
425 			auio.uio_rw = UIO_READ;
426 			auio.uio_td = ap->a_td;
427 
428 			if (vp->v_type == VREG)
429 				error = nfs_readrpc(vp, &auio);
430 			else if (vp->v_type == VDIR) {
431 				char* bp;
432 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
433 				aiov.iov_base = bp;
434 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
435 				error = nfs_readdirrpc(vp, &auio);
436 				free(bp, M_TEMP);
437 			} else if (vp->v_type == VLNK)
438 				error = nfs_readlinkrpc(vp, &auio);
439 			else
440 				error = EACCES;
441 		}
442 		return (error);
443 	}
444 }
445 
446 /*
447  * nfs open vnode op
448  * Check to see if the type is ok
449  * and that deletion is not in progress.
450  * For paged in text files, you will need to flush the page cache
451  * if consistency is lost.
452  */
453 /* ARGSUSED */
454 static int
455 nfs_open(ap)
456 	struct vop_open_args /* {
457 		struct vnode *a_vp;
458 		int  a_mode;
459 		struct ucred *a_cred;
460 		struct thread *a_td;
461 	} */ *ap;
462 {
463 	struct vnode *vp = ap->a_vp;
464 	struct nfsnode *np = VTONFS(vp);
465 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
466 	struct vattr vattr;
467 	int error;
468 
469 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
470 #ifdef DIAGNOSTIC
471 		printf("open eacces vtyp=%d\n",vp->v_type);
472 #endif
473 		return (EACCES);
474 	}
475 	/*
476 	 * Get a valid lease. If cached data is stale, flush it.
477 	 */
478 	if (nmp->nm_flag & NFSMNT_NQNFS) {
479 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
480 		    do {
481 			error = nqnfs_getlease(vp, ND_READ, ap->a_td);
482 		    } while (error == NQNFS_EXPIRED);
483 		    if (error)
484 			return (error);
485 		    if (np->n_lrev != np->n_brev ||
486 			(np->n_flag & NQNFSNONCACHE)) {
487 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1))
488 			    == EINTR) {
489 				return (error);
490 			}
491 			np->n_brev = np->n_lrev;
492 		    }
493 		}
494 	} else {
495 		if (np->n_flag & NMODIFIED) {
496 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1))
497 			    == EINTR) {
498 				return (error);
499 			}
500 			np->n_attrstamp = 0;
501 			if (vp->v_type == VDIR)
502 				np->n_direofoffset = 0;
503 			error = VOP_GETATTR(vp, &vattr, ap->a_td);
504 			if (error)
505 				return (error);
506 			np->n_mtime = vattr.va_mtime.tv_sec;
507 		} else {
508 			error = VOP_GETATTR(vp, &vattr, ap->a_td);
509 			if (error)
510 				return (error);
511 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
512 				if (vp->v_type == VDIR)
513 					np->n_direofoffset = 0;
514 				if ((error = nfs_vinvalbuf(vp, V_SAVE,
515 				    ap->a_td, 1)) == EINTR) {
516 					return (error);
517 				}
518 				np->n_mtime = vattr.va_mtime.tv_sec;
519 			}
520 		}
521 	}
522 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
523 		np->n_attrstamp = 0; /* For Open/Close consistency */
524 	return (0);
525 }
526 
527 /*
528  * nfs close vnode op
529  * What an NFS client should do upon close after writing is a debatable issue.
530  * Most NFS clients push delayed writes to the server upon close, basically for
531  * two reasons:
532  * 1 - So that any write errors may be reported back to the client process
533  *     doing the close system call. By far the two most likely errors are
534  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
535  * 2 - To put a worst case upper bound on cache inconsistency between
536  *     multiple clients for the file.
537  * There is also a consistency problem for Version 2 of the protocol w.r.t.
538  * not being able to tell if other clients are writing a file concurrently,
539  * since there is no way of knowing if the changed modify time in the reply
540  * is only due to the write for this client.
541  * (NFS Version 3 provides weak cache consistency data in the reply that
542  *  should be sufficient to detect and handle this case.)
543  *
544  * The current code does the following:
545  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
546  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
547  *                     or commit them (this satisfies 1 and 2 except for the
548  *                     case where the server crashes after this close but
549  *                     before the commit RPC, which is felt to be "good
550  *                     enough". Changing the last argument to nfs_flush() to
551  *                     a 1 would force a commit operation, if it is felt a
552  *                     commit is necessary now.
553  * for NQNFS         - do nothing now, since 2 is dealt with via leases and
554  *                     1 should be dealt with via an fsync() system call for
555  *                     cases where write errors are important.
556  */
557 /* ARGSUSED */
558 static int
559 nfs_close(ap)
560 	struct vop_close_args /* {
561 		struct vnodeop_desc *a_desc;
562 		struct vnode *a_vp;
563 		int  a_fflag;
564 		struct ucred *a_cred;
565 		struct thread *a_td;
566 	} */ *ap;
567 {
568 	struct vnode *vp = ap->a_vp;
569 	struct nfsnode *np = VTONFS(vp);
570 	int error = 0;
571 
572 	if (vp->v_type == VREG) {
573 	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
574 		(np->n_flag & NMODIFIED)) {
575 		if (NFS_ISV3(vp)) {
576 		    /*
577 		     * Under NFSv3 we have dirty buffers to dispose of.  We
578 		     * must flush them to the NFS server.  We have the option
579 		     * of waiting all the way through the commit rpc or just
580 		     * waiting for the initial write.  The default is to only
581 		     * wait through the initial write so the data is in the
582 		     * server's cache, which is roughly similar to the state
583 		     * a standard disk subsystem leaves the file in on close().
584 		     *
585 		     * We cannot clear the NMODIFIED bit in np->n_flag due to
586 		     * potential races with other processes, and certainly
587 		     * cannot clear it if we don't commit.
588 		     */
589 		    int cm = nfsv3_commit_on_close ? 1 : 0;
590 		    error = nfs_flush(vp, MNT_WAIT, ap->a_td, cm);
591 		    /* np->n_flag &= ~NMODIFIED; */
592 		} else {
593 		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
594 		}
595 		np->n_attrstamp = 0;
596 	    }
597 	    if (np->n_flag & NWRITEERR) {
598 		np->n_flag &= ~NWRITEERR;
599 		error = np->n_error;
600 	    }
601 	}
602 	return (error);
603 }
604 
605 /*
606  * nfs getattr call from vfs.
607  */
608 static int
609 nfs_getattr(ap)
610 	struct vop_getattr_args /* {
611 		struct vnode *a_vp;
612 		struct vattr *a_vap;
613 		struct ucred *a_cred;
614 		struct thread *a_td;
615 	} */ *ap;
616 {
617 	struct vnode *vp = ap->a_vp;
618 	struct nfsnode *np = VTONFS(vp);
619 	caddr_t cp;
620 	u_int32_t *tl;
621 	int32_t t1, t2;
622 	caddr_t bpos, dpos;
623 	int error = 0;
624 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
625 	int v3 = NFS_ISV3(vp);
626 
627 	/*
628 	 * Update local times for special files.
629 	 */
630 	if (np->n_flag & (NACC | NUPD))
631 		np->n_flag |= NCHG;
632 	/*
633 	 * First look in the cache.
634 	 */
635 	if (nfs_getattrcache(vp, ap->a_vap) == 0)
636 		return (0);
637 
638 	if (v3 && nfsaccess_cache_timeout > 0) {
639 		nfsstats.accesscache_misses++;
640 		nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_td, NFSVPCRED(vp));
641 		if (nfs_getattrcache(vp, ap->a_vap) == 0)
642 			return (0);
643 	}
644 
645 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
646 	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
647 	nfsm_fhtom(vp, v3);
648 	nfsm_request(vp, NFSPROC_GETATTR, ap->a_td, NFSVPCRED(vp));
649 	if (!error) {
650 		nfsm_loadattr(vp, ap->a_vap);
651 	}
652 	nfsm_reqdone;
653 	return (error);
654 }
655 
656 /*
657  * nfs setattr call.
658  */
659 static int
660 nfs_setattr(ap)
661 	struct vop_setattr_args /* {
662 		struct vnodeop_desc *a_desc;
663 		struct vnode *a_vp;
664 		struct vattr *a_vap;
665 		struct ucred *a_cred;
666 		struct thread *a_td;
667 	} */ *ap;
668 {
669 	struct vnode *vp = ap->a_vp;
670 	struct nfsnode *np = VTONFS(vp);
671 	struct vattr *vap = ap->a_vap;
672 	int error = 0;
673 	u_quad_t tsize;
674 
675 #ifndef nolint
676 	tsize = (u_quad_t)0;
677 #endif
678 
679 	/*
680 	 * Setting of flags is not supported.
681 	 */
682 	if (vap->va_flags != VNOVAL)
683 		return (EOPNOTSUPP);
684 
685 	/*
686 	 * Disallow write attempts if the filesystem is mounted read-only.
687 	 */
688   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
689 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
690 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
691 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
692 		return (EROFS);
693 	if (vap->va_size != VNOVAL) {
694  		switch (vp->v_type) {
695  		case VDIR:
696  			return (EISDIR);
697  		case VCHR:
698  		case VBLK:
699  		case VSOCK:
700  		case VFIFO:
701 			if (vap->va_mtime.tv_sec == VNOVAL &&
702 			    vap->va_atime.tv_sec == VNOVAL &&
703 			    vap->va_mode == (mode_t)VNOVAL &&
704 			    vap->va_uid == (uid_t)VNOVAL &&
705 			    vap->va_gid == (gid_t)VNOVAL)
706 				return (0);
707  			vap->va_size = VNOVAL;
708  			break;
709  		default:
710 			/*
711 			 * Disallow write attempts if the filesystem is
712 			 * mounted read-only.
713 			 */
714 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
715 				return (EROFS);
716 
717 			/*
718 			 * We run vnode_pager_setsize() early (why?),
719 			 * we must set np->n_size now to avoid vinvalbuf
720 			 * V_SAVE races that might setsize a lower
721 			 * value.
722 			 */
723 
724 			tsize = np->n_size;
725 			error = nfs_meta_setsize(vp, ap->a_td, vap->va_size);
726 
727  			if (np->n_flag & NMODIFIED) {
728  			    if (vap->va_size == 0)
729  				error = nfs_vinvalbuf(vp, 0, ap->a_td, 1);
730  			    else
731  				error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
732  			    if (error) {
733 				np->n_size = tsize;
734 				vnode_pager_setsize(vp, np->n_size);
735  				return (error);
736 			    }
737  			}
738 			np->n_vattr.va_size = vap->va_size;
739   		};
740   	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
741 		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
742 		vp->v_type == VREG &&
743   		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) == EINTR)
744 		return (error);
745 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_td);
746 	if (error && vap->va_size != VNOVAL) {
747 		np->n_size = np->n_vattr.va_size = tsize;
748 		vnode_pager_setsize(vp, np->n_size);
749 	}
750 	return (error);
751 }
752 
753 /*
754  * Do an nfs setattr rpc.
755  */
756 static int
757 nfs_setattrrpc(struct vnode *vp, struct vattr *vap,
758 	struct ucred *cred, struct thread *td)
759 {
760 	struct nfsv2_sattr *sp;
761 	caddr_t cp;
762 	int32_t t1, t2;
763 	caddr_t bpos, dpos, cp2;
764 	u_int32_t *tl;
765 	int error = 0, wccflag = NFSV3_WCCRATTR;
766 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
767 	int v3 = NFS_ISV3(vp);
768 
769 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
770 	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
771 	nfsm_fhtom(vp, v3);
772 	if (v3) {
773 		nfsm_v3attrbuild(vap, TRUE);
774 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
775 		*tl = nfs_false;
776 	} else {
777 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
778 		if (vap->va_mode == (mode_t)VNOVAL)
779 			sp->sa_mode = nfs_xdrneg1;
780 		else
781 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
782 		if (vap->va_uid == (uid_t)VNOVAL)
783 			sp->sa_uid = nfs_xdrneg1;
784 		else
785 			sp->sa_uid = txdr_unsigned(vap->va_uid);
786 		if (vap->va_gid == (gid_t)VNOVAL)
787 			sp->sa_gid = nfs_xdrneg1;
788 		else
789 			sp->sa_gid = txdr_unsigned(vap->va_gid);
790 		sp->sa_size = txdr_unsigned(vap->va_size);
791 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
792 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
793 	}
794 	nfsm_request(vp, NFSPROC_SETATTR, td, cred);
795 	if (v3) {
796 		nfsm_wcc_data(vp, wccflag);
797 	} else
798 		nfsm_loadattr(vp, (struct vattr *)0);
799 	nfsm_reqdone;
800 	return (error);
801 }
802 
803 /*
804  * nfs lookup call, one step at a time...
805  * First look in cache
806  * If not found, unlock the directory nfsnode and do the rpc
807  */
808 static int
809 nfs_lookup(ap)
810 	struct vop_lookup_args /* {
811 		struct vnodeop_desc *a_desc;
812 		struct vnode *a_dvp;
813 		struct vnode **a_vpp;
814 		struct componentname *a_cnp;
815 	} */ *ap;
816 {
817 	struct componentname *cnp = ap->a_cnp;
818 	struct vnode *dvp = ap->a_dvp;
819 	struct vnode **vpp = ap->a_vpp;
820 	int flags = cnp->cn_flags;
821 	struct vnode *newvp;
822 	u_int32_t *tl;
823 	caddr_t cp;
824 	int32_t t1, t2;
825 	struct nfsmount *nmp;
826 	caddr_t bpos, dpos, cp2;
827 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
828 	long len;
829 	nfsfh_t *fhp;
830 	struct nfsnode *np;
831 	int lockparent, wantparent, error = 0, attrflag, fhsize;
832 	int v3 = NFS_ISV3(dvp);
833 	struct thread *td = cnp->cn_td;
834 
835 	*vpp = NULLVP;
836 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
837 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
838 		return (EROFS);
839 	if (dvp->v_type != VDIR)
840 		return (ENOTDIR);
841 	lockparent = flags & LOCKPARENT;
842 	wantparent = flags & (LOCKPARENT|WANTPARENT);
843 	nmp = VFSTONFS(dvp->v_mount);
844 	np = VTONFS(dvp);
845 	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
846 		struct vattr vattr;
847 		int vpid;
848 
849 		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
850 			*vpp = NULLVP;
851 			return (error);
852 		}
853 
854 		newvp = *vpp;
855 		vpid = newvp->v_id;
856 		/*
857 		 * See the comment starting `Step through' in ufs/ufs_lookup.c
858 		 * for an explanation of the locking protocol
859 		 */
860 		if (dvp == newvp) {
861 			VREF(newvp);
862 			error = 0;
863 		} else if (flags & ISDOTDOT) {
864 			VOP_UNLOCK(dvp, 0, td);
865 			error = vget(newvp, LK_EXCLUSIVE, td);
866 			if (!error && lockparent && (flags & ISLASTCN))
867 				error = vn_lock(dvp, LK_EXCLUSIVE, td);
868 		} else {
869 			error = vget(newvp, LK_EXCLUSIVE, td);
870 			if (!lockparent || error || !(flags & ISLASTCN))
871 				VOP_UNLOCK(dvp, 0, td);
872 		}
873 		if (!error) {
874 			if (vpid == newvp->v_id) {
875 			   if (!VOP_GETATTR(newvp, &vattr, td)
876 			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
877 				nfsstats.lookupcache_hits++;
878 				if (cnp->cn_nameiop != LOOKUP &&
879 				    (flags & ISLASTCN))
880 					cnp->cn_flags |= SAVENAME;
881 				return (0);
882 			   }
883 			   cache_purge(newvp);
884 			}
885 			vput(newvp);
886 			if (lockparent && dvp != newvp && (flags & ISLASTCN))
887 				VOP_UNLOCK(dvp, 0, td);
888 		}
889 		error = vn_lock(dvp, LK_EXCLUSIVE, td);
890 		*vpp = NULLVP;
891 		if (error)
892 			return (error);
893 	}
894 	error = 0;
895 	newvp = NULLVP;
896 	nfsstats.lookupcache_misses++;
897 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
898 	len = cnp->cn_namelen;
899 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
900 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
901 	nfsm_fhtom(dvp, v3);
902 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
903 	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_td, cnp->cn_cred);
904 	if (error) {
905 		nfsm_postop_attr(dvp, attrflag);
906 		m_freem(mrep);
907 		goto nfsmout;
908 	}
909 	nfsm_getfh(fhp, fhsize, v3);
910 
911 	/*
912 	 * Handle RENAME case...
913 	 */
914 	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
915 		if (NFS_CMPFH(np, fhp, fhsize)) {
916 			m_freem(mrep);
917 			return (EISDIR);
918 		}
919 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
920 		if (error) {
921 			m_freem(mrep);
922 			return (error);
923 		}
924 		newvp = NFSTOV(np);
925 		if (v3) {
926 			nfsm_postop_attr(newvp, attrflag);
927 			nfsm_postop_attr(dvp, attrflag);
928 		} else
929 			nfsm_loadattr(newvp, (struct vattr *)0);
930 		*vpp = newvp;
931 		m_freem(mrep);
932 		cnp->cn_flags |= SAVENAME;
933 		if (!lockparent)
934 			VOP_UNLOCK(dvp, 0, td);
935 		return (0);
936 	}
937 
938 	if (flags & ISDOTDOT) {
939 		VOP_UNLOCK(dvp, 0, td);
940 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
941 		if (error) {
942 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
943 			return (error);
944 		}
945 		newvp = NFSTOV(np);
946 		if (lockparent && (flags & ISLASTCN) &&
947 		    (error = vn_lock(dvp, LK_EXCLUSIVE, td))) {
948 		    	vput(newvp);
949 			return (error);
950 		}
951 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
952 		VREF(dvp);
953 		newvp = dvp;
954 	} else {
955 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
956 		if (error) {
957 			m_freem(mrep);
958 			return (error);
959 		}
960 		if (!lockparent || !(flags & ISLASTCN))
961 			VOP_UNLOCK(dvp, 0, td);
962 		newvp = NFSTOV(np);
963 	}
964 	if (v3) {
965 		nfsm_postop_attr(newvp, attrflag);
966 		nfsm_postop_attr(dvp, attrflag);
967 	} else
968 		nfsm_loadattr(newvp, (struct vattr *)0);
969 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
970 		cnp->cn_flags |= SAVENAME;
971 	if ((cnp->cn_flags & MAKEENTRY) &&
972 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
973 		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
974 		cache_enter(dvp, newvp, cnp);
975 	}
976 	*vpp = newvp;
977 	nfsm_reqdone;
978 	if (error) {
979 		if (newvp != NULLVP) {
980 			vrele(newvp);
981 			*vpp = NULLVP;
982 		}
983 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
984 		    (flags & ISLASTCN) && error == ENOENT) {
985 			if (!lockparent)
986 				VOP_UNLOCK(dvp, 0, td);
987 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
988 				error = EROFS;
989 			else
990 				error = EJUSTRETURN;
991 		}
992 		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
993 			cnp->cn_flags |= SAVENAME;
994 	}
995 	return (error);
996 }
997 
998 /*
999  * nfs read call.
1000  * Just call nfs_bioread() to do the work.
1001  */
1002 static int
1003 nfs_read(ap)
1004 	struct vop_read_args /* {
1005 		struct vnode *a_vp;
1006 		struct uio *a_uio;
1007 		int  a_ioflag;
1008 		struct ucred *a_cred;
1009 	} */ *ap;
1010 {
1011 	struct vnode *vp = ap->a_vp;
1012 
1013 	if (vp->v_type != VREG)
1014 		return (EPERM);
1015 	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag));
1016 }
1017 
1018 /*
1019  * nfs readlink call
1020  */
1021 static int
1022 nfs_readlink(ap)
1023 	struct vop_readlink_args /* {
1024 		struct vnode *a_vp;
1025 		struct uio *a_uio;
1026 		struct ucred *a_cred;
1027 	} */ *ap;
1028 {
1029 	struct vnode *vp = ap->a_vp;
1030 
1031 	if (vp->v_type != VLNK)
1032 		return (EINVAL);
1033 	return (nfs_bioread(vp, ap->a_uio, 0));
1034 }
1035 
1036 /*
1037  * Do a readlink rpc.
1038  * Called by nfs_doio() from below the buffer cache.
1039  */
1040 int
1041 nfs_readlinkrpc(struct vnode *vp, struct uio *uiop)
1042 {
1043 	u_int32_t *tl;
1044 	caddr_t cp;
1045 	int32_t t1, t2;
1046 	caddr_t bpos, dpos, cp2;
1047 	int error = 0, len, attrflag;
1048 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1049 	int v3 = NFS_ISV3(vp);
1050 
1051 	nfsstats.rpccnt[NFSPROC_READLINK]++;
1052 	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
1053 	nfsm_fhtom(vp, v3);
1054 	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, NFSVPCRED(vp));
1055 	if (v3)
1056 		nfsm_postop_attr(vp, attrflag);
1057 	if (!error) {
1058 		nfsm_strsiz(len, NFS_MAXPATHLEN);
1059 		if (len == NFS_MAXPATHLEN) {
1060 			struct nfsnode *np = VTONFS(vp);
1061 			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1062 				len = np->n_size;
1063 		}
1064 		nfsm_mtouio(uiop, len);
1065 	}
1066 	nfsm_reqdone;
1067 	return (error);
1068 }
1069 
1070 /*
1071  * nfs read rpc call
1072  * Ditto above
1073  */
1074 int
1075 nfs_readrpc(struct vnode *vp, struct uio *uiop)
1076 {
1077 	u_int32_t *tl;
1078 	caddr_t cp;
1079 	int32_t t1, t2;
1080 	caddr_t bpos, dpos, cp2;
1081 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1082 	struct nfsmount *nmp;
1083 	int error = 0, len, retlen, tsiz, eof, attrflag;
1084 	int v3 = NFS_ISV3(vp);
1085 
1086 #ifndef nolint
1087 	eof = 0;
1088 #endif
1089 	nmp = VFSTONFS(vp->v_mount);
1090 	tsiz = uiop->uio_resid;
1091 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1092 		return (EFBIG);
1093 	while (tsiz > 0) {
1094 		nfsstats.rpccnt[NFSPROC_READ]++;
1095 		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
1096 		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1097 		nfsm_fhtom(vp, v3);
1098 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1099 		if (v3) {
1100 			txdr_hyper(uiop->uio_offset, tl);
1101 			*(tl + 2) = txdr_unsigned(len);
1102 		} else {
1103 			*tl++ = txdr_unsigned(uiop->uio_offset);
1104 			*tl++ = txdr_unsigned(len);
1105 			*tl = 0;
1106 		}
1107 		nfsm_request(vp, NFSPROC_READ, uiop->uio_td, NFSVPCRED(vp));
1108 		if (v3) {
1109 			nfsm_postop_attr(vp, attrflag);
1110 			if (error) {
1111 				m_freem(mrep);
1112 				goto nfsmout;
1113 			}
1114 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1115 			eof = fxdr_unsigned(int, *(tl + 1));
1116 		} else
1117 			nfsm_loadattr(vp, (struct vattr *)0);
1118 		nfsm_strsiz(retlen, nmp->nm_rsize);
1119 		nfsm_mtouio(uiop, retlen);
1120 		m_freem(mrep);
1121 		tsiz -= retlen;
1122 		if (v3) {
1123 			if (eof || retlen == 0) {
1124 				tsiz = 0;
1125 			}
1126 		} else if (retlen < len) {
1127 			tsiz = 0;
1128 		}
1129 	}
1130 nfsmout:
1131 	return (error);
1132 }
1133 
1134 /*
1135  * nfs write call
1136  */
1137 int
1138 nfs_writerpc(vp, uiop, iomode, must_commit)
1139 	struct vnode *vp;
1140 	struct uio *uiop;
1141 	int *iomode, *must_commit;
1142 {
1143 	u_int32_t *tl;
1144 	caddr_t cp;
1145 	int32_t t1, t2, backup;
1146 	caddr_t bpos, dpos, cp2;
1147 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1148 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1149 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1150 	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
1151 
1152 #ifndef DIAGNOSTIC
1153 	if (uiop->uio_iovcnt != 1)
1154 		panic("nfs: writerpc iovcnt > 1");
1155 #endif
1156 	*must_commit = 0;
1157 	tsiz = uiop->uio_resid;
1158 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1159 		return (EFBIG);
1160 	while (tsiz > 0) {
1161 		nfsstats.rpccnt[NFSPROC_WRITE]++;
1162 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
1163 		nfsm_reqhead(vp, NFSPROC_WRITE,
1164 			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1165 		nfsm_fhtom(vp, v3);
1166 		if (v3) {
1167 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1168 			txdr_hyper(uiop->uio_offset, tl);
1169 			tl += 2;
1170 			*tl++ = txdr_unsigned(len);
1171 			*tl++ = txdr_unsigned(*iomode);
1172 			*tl = txdr_unsigned(len);
1173 		} else {
1174 			u_int32_t x;
1175 
1176 			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1177 			/* Set both "begin" and "current" to non-garbage. */
1178 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1179 			*tl++ = x;	/* "begin offset" */
1180 			*tl++ = x;	/* "current offset" */
1181 			x = txdr_unsigned(len);
1182 			*tl++ = x;	/* total to this offset */
1183 			*tl = x;	/* size of this write */
1184 		}
1185 		nfsm_uiotom(uiop, len);
1186 		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, NFSVPCRED(vp));
1187 		if (v3) {
1188 			wccflag = NFSV3_WCCCHK;
1189 			nfsm_wcc_data(vp, wccflag);
1190 			if (!error) {
1191 				nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1192 					+ NFSX_V3WRITEVERF);
1193 				rlen = fxdr_unsigned(int, *tl++);
1194 				if (rlen == 0) {
1195 					error = NFSERR_IO;
1196 					m_freem(mrep);
1197 					break;
1198 				} else if (rlen < len) {
1199 					backup = len - rlen;
1200 					uiop->uio_iov->iov_base -= backup;
1201 					uiop->uio_iov->iov_len += backup;
1202 					uiop->uio_offset -= backup;
1203 					uiop->uio_resid += backup;
1204 					len = rlen;
1205 				}
1206 				commit = fxdr_unsigned(int, *tl++);
1207 
1208 				/*
1209 				 * Return the lowest committment level
1210 				 * obtained by any of the RPCs.
1211 				 */
1212 				if (committed == NFSV3WRITE_FILESYNC)
1213 					committed = commit;
1214 				else if (committed == NFSV3WRITE_DATASYNC &&
1215 					commit == NFSV3WRITE_UNSTABLE)
1216 					committed = commit;
1217 				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1218 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1219 					NFSX_V3WRITEVERF);
1220 				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
1221 				} else if (bcmp((caddr_t)tl,
1222 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1223 				    *must_commit = 1;
1224 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1225 					NFSX_V3WRITEVERF);
1226 				}
1227 			}
1228 		} else
1229 		    nfsm_loadattr(vp, (struct vattr *)0);
1230 		if (wccflag)
1231 		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
1232 		m_freem(mrep);
1233 		if (error)
1234 			break;
1235 		tsiz -= len;
1236 	}
1237 nfsmout:
1238 	if (vp->v_mount->mnt_flag & MNT_ASYNC)
1239 		committed = NFSV3WRITE_FILESYNC;
1240 	*iomode = committed;
1241 	if (error)
1242 		uiop->uio_resid = tsiz;
1243 	return (error);
1244 }
1245 
1246 /*
1247  * nfs mknod rpc
1248  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1249  * mode set to specify the file type and the size field for rdev.
1250  */
1251 static int
1252 nfs_mknodrpc(dvp, vpp, cnp, vap)
1253 	struct vnode *dvp;
1254 	struct vnode **vpp;
1255 	struct componentname *cnp;
1256 	struct vattr *vap;
1257 {
1258 	struct nfsv2_sattr *sp;
1259 	u_int32_t *tl;
1260 	caddr_t cp;
1261 	int32_t t1, t2;
1262 	struct vnode *newvp = (struct vnode *)0;
1263 	struct nfsnode *np = (struct nfsnode *)0;
1264 	struct vattr vattr;
1265 	char *cp2;
1266 	caddr_t bpos, dpos;
1267 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1268 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1269 	u_int32_t rdev;
1270 	int v3 = NFS_ISV3(dvp);
1271 
1272 	if (vap->va_type == VCHR || vap->va_type == VBLK)
1273 		rdev = txdr_unsigned(vap->va_rdev);
1274 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1275 		rdev = nfs_xdrneg1;
1276 	else {
1277 		return (EOPNOTSUPP);
1278 	}
1279 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_td)) != 0) {
1280 		return (error);
1281 	}
1282 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
1283 	nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1284 		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1285 	nfsm_fhtom(dvp, v3);
1286 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1287 	if (v3) {
1288 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1289 		*tl++ = vtonfsv3_type(vap->va_type);
1290 		nfsm_v3attrbuild(vap, FALSE);
1291 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
1292 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1293 			*tl++ = txdr_unsigned(umajor(vap->va_rdev));
1294 			*tl = txdr_unsigned(uminor(vap->va_rdev));
1295 		}
1296 	} else {
1297 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1298 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1299 		sp->sa_uid = nfs_xdrneg1;
1300 		sp->sa_gid = nfs_xdrneg1;
1301 		sp->sa_size = rdev;
1302 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1303 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1304 	}
1305 	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_td, cnp->cn_cred);
1306 	if (!error) {
1307 		nfsm_mtofh(dvp, newvp, v3, gotvp);
1308 		if (!gotvp) {
1309 			if (newvp) {
1310 				vput(newvp);
1311 				newvp = (struct vnode *)0;
1312 			}
1313 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1314 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_td, &np);
1315 			if (!error)
1316 				newvp = NFSTOV(np);
1317 		}
1318 	}
1319 	if (v3)
1320 		nfsm_wcc_data(dvp, wccflag);
1321 	nfsm_reqdone;
1322 	if (error) {
1323 		if (newvp)
1324 			vput(newvp);
1325 	} else {
1326 		if (cnp->cn_flags & MAKEENTRY)
1327 			cache_enter(dvp, newvp, cnp);
1328 		*vpp = newvp;
1329 	}
1330 	VTONFS(dvp)->n_flag |= NMODIFIED;
1331 	if (!wccflag)
1332 		VTONFS(dvp)->n_attrstamp = 0;
1333 	return (error);
1334 }
1335 
1336 /*
1337  * nfs mknod vop
1338  * just call nfs_mknodrpc() to do the work.
1339  */
1340 /* ARGSUSED */
1341 static int
1342 nfs_mknod(ap)
1343 	struct vop_mknod_args /* {
1344 		struct vnode *a_dvp;
1345 		struct vnode **a_vpp;
1346 		struct componentname *a_cnp;
1347 		struct vattr *a_vap;
1348 	} */ *ap;
1349 {
1350 	return nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap);
1351 }
1352 
1353 static u_long create_verf;
1354 /*
1355  * nfs file create call
1356  */
1357 static int
1358 nfs_create(ap)
1359 	struct vop_create_args /* {
1360 		struct vnode *a_dvp;
1361 		struct vnode **a_vpp;
1362 		struct componentname *a_cnp;
1363 		struct vattr *a_vap;
1364 	} */ *ap;
1365 {
1366 	struct vnode *dvp = ap->a_dvp;
1367 	struct vattr *vap = ap->a_vap;
1368 	struct componentname *cnp = ap->a_cnp;
1369 	struct nfsv2_sattr *sp;
1370 	u_int32_t *tl;
1371 	caddr_t cp;
1372 	int32_t t1, t2;
1373 	struct nfsnode *np = (struct nfsnode *)0;
1374 	struct vnode *newvp = (struct vnode *)0;
1375 	caddr_t bpos, dpos, cp2;
1376 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1377 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1378 	struct vattr vattr;
1379 	int v3 = NFS_ISV3(dvp);
1380 
1381 	/*
1382 	 * Oops, not for me..
1383 	 */
1384 	if (vap->va_type == VSOCK)
1385 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1386 
1387 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_td)) != 0) {
1388 		return (error);
1389 	}
1390 	if (vap->va_vaflags & VA_EXCLUSIVE)
1391 		fmode |= O_EXCL;
1392 again:
1393 	nfsstats.rpccnt[NFSPROC_CREATE]++;
1394 	nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1395 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1396 	nfsm_fhtom(dvp, v3);
1397 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1398 	if (v3) {
1399 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1400 		if (fmode & O_EXCL) {
1401 			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1402 			nfsm_build(tl, u_int32_t *, NFSX_V3CREATEVERF);
1403 #ifdef INET
1404 			if (!TAILQ_EMPTY(&in_ifaddrhead))
1405 				*tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
1406 			else
1407 #endif
1408 				*tl++ = create_verf;
1409 			*tl = ++create_verf;
1410 		} else {
1411 			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1412 			nfsm_v3attrbuild(vap, FALSE);
1413 		}
1414 	} else {
1415 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1416 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1417 		sp->sa_uid = nfs_xdrneg1;
1418 		sp->sa_gid = nfs_xdrneg1;
1419 		sp->sa_size = 0;
1420 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1421 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1422 	}
1423 	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_td, cnp->cn_cred);
1424 	if (!error) {
1425 		nfsm_mtofh(dvp, newvp, v3, gotvp);
1426 		if (!gotvp) {
1427 			if (newvp) {
1428 				vput(newvp);
1429 				newvp = (struct vnode *)0;
1430 			}
1431 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1432 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_td, &np);
1433 			if (!error)
1434 				newvp = NFSTOV(np);
1435 		}
1436 	}
1437 	if (v3)
1438 		nfsm_wcc_data(dvp, wccflag);
1439 	nfsm_reqdone;
1440 	if (error) {
1441 		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1442 			fmode &= ~O_EXCL;
1443 			goto again;
1444 		}
1445 		if (newvp)
1446 			vput(newvp);
1447 	} else if (v3 && (fmode & O_EXCL)) {
1448 		/*
1449 		 * We are normally called with only a partially initialized
1450 		 * VAP.  Since the NFSv3 spec says that server may use the
1451 		 * file attributes to store the verifier, the spec requires
1452 		 * us to do a SETATTR RPC. FreeBSD servers store the verifier
1453 		 * in atime, but we can't really assume that all servers will
1454 		 * so we ensure that our SETATTR sets both atime and mtime.
1455 		 */
1456 		if (vap->va_mtime.tv_sec == VNOVAL)
1457 			vfs_timestamp(&vap->va_mtime);
1458 		if (vap->va_atime.tv_sec == VNOVAL)
1459 			vap->va_atime = vap->va_mtime;
1460 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_td);
1461 	}
1462 	if (!error) {
1463 		if (cnp->cn_flags & MAKEENTRY)
1464 			cache_enter(dvp, newvp, cnp);
1465 		*ap->a_vpp = newvp;
1466 	}
1467 	VTONFS(dvp)->n_flag |= NMODIFIED;
1468 	if (!wccflag)
1469 		VTONFS(dvp)->n_attrstamp = 0;
1470 	return (error);
1471 }
1472 
1473 /*
1474  * nfs file remove call
1475  * To try and make nfs semantics closer to ufs semantics, a file that has
1476  * other processes using the vnode is renamed instead of removed and then
1477  * removed later on the last close.
1478  * - If v_usecount > 1
1479  *	  If a rename is not already in the works
1480  *	     call nfs_sillyrename() to set it up
1481  *     else
1482  *	  do the remove rpc
1483  */
1484 static int
1485 nfs_remove(ap)
1486 	struct vop_remove_args /* {
1487 		struct vnodeop_desc *a_desc;
1488 		struct vnode * a_dvp;
1489 		struct vnode * a_vp;
1490 		struct componentname * a_cnp;
1491 	} */ *ap;
1492 {
1493 	struct vnode *vp = ap->a_vp;
1494 	struct vnode *dvp = ap->a_dvp;
1495 	struct componentname *cnp = ap->a_cnp;
1496 	struct nfsnode *np = VTONFS(vp);
1497 	int error = 0;
1498 	struct vattr vattr;
1499 
1500 #ifndef DIAGNOSTIC
1501 	if ((cnp->cn_flags & HASBUF) == 0)
1502 		panic("nfs_remove: no name");
1503 	if (vp->v_usecount < 1)
1504 		panic("nfs_remove: bad v_usecount");
1505 #endif
1506 	if (vp->v_type == VDIR)
1507 		error = EPERM;
1508 	else if (vp->v_usecount == 1 || (np->n_sillyrename &&
1509 	    VOP_GETATTR(vp, &vattr, cnp->cn_td) == 0 &&
1510 	    vattr.va_nlink > 1)) {
1511 		/*
1512 		 * Purge the name cache so that the chance of a lookup for
1513 		 * the name succeeding while the remove is in progress is
1514 		 * minimized. Without node locking it can still happen, such
1515 		 * that an I/O op returns ESTALE, but since you get this if
1516 		 * another host removes the file..
1517 		 */
1518 		cache_purge(vp);
1519 		/*
1520 		 * throw away biocache buffers, mainly to avoid
1521 		 * unnecessary delayed writes later.
1522 		 */
1523 		error = nfs_vinvalbuf(vp, 0, cnp->cn_td, 1);
1524 		/* Do the rpc */
1525 		if (error != EINTR)
1526 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
1527 				cnp->cn_namelen, cnp->cn_cred, cnp->cn_td);
1528 		/*
1529 		 * Kludge City: If the first reply to the remove rpc is lost..
1530 		 *   the reply to the retransmitted request will be ENOENT
1531 		 *   since the file was in fact removed
1532 		 *   Therefore, we cheat and return success.
1533 		 */
1534 		if (error == ENOENT)
1535 			error = 0;
1536 	} else if (!np->n_sillyrename)
1537 		error = nfs_sillyrename(dvp, vp, cnp);
1538 	np->n_attrstamp = 0;
1539 	return (error);
1540 }
1541 
1542 /*
1543  * nfs file remove rpc called from nfs_inactive
1544  */
1545 int
1546 nfs_removeit(struct sillyrename *sp)
1547 {
1548 
1549 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen,
1550 		sp->s_cred, NULL));
1551 }
1552 
1553 /*
1554  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1555  */
1556 static int
1557 nfs_removerpc(dvp, name, namelen, cred, td)
1558 	struct vnode *dvp;
1559 	const char *name;
1560 	int namelen;
1561 	struct ucred *cred;
1562 	struct thread *td;
1563 {
1564 	u_int32_t *tl;
1565 	caddr_t cp;
1566 	int32_t t1, t2;
1567 	caddr_t bpos, dpos, cp2;
1568 	int error = 0, wccflag = NFSV3_WCCRATTR;
1569 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1570 	int v3 = NFS_ISV3(dvp);
1571 
1572 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
1573 	nfsm_reqhead(dvp, NFSPROC_REMOVE,
1574 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1575 	nfsm_fhtom(dvp, v3);
1576 	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
1577 	nfsm_request(dvp, NFSPROC_REMOVE, td, cred);
1578 	if (v3)
1579 		nfsm_wcc_data(dvp, wccflag);
1580 	nfsm_reqdone;
1581 	VTONFS(dvp)->n_flag |= NMODIFIED;
1582 	if (!wccflag)
1583 		VTONFS(dvp)->n_attrstamp = 0;
1584 	return (error);
1585 }
1586 
1587 /*
1588  * nfs file rename call
1589  */
1590 static int
1591 nfs_rename(ap)
1592 	struct vop_rename_args  /* {
1593 		struct vnode *a_fdvp;
1594 		struct vnode *a_fvp;
1595 		struct componentname *a_fcnp;
1596 		struct vnode *a_tdvp;
1597 		struct vnode *a_tvp;
1598 		struct componentname *a_tcnp;
1599 	} */ *ap;
1600 {
1601 	struct vnode *fvp = ap->a_fvp;
1602 	struct vnode *tvp = ap->a_tvp;
1603 	struct vnode *fdvp = ap->a_fdvp;
1604 	struct vnode *tdvp = ap->a_tdvp;
1605 	struct componentname *tcnp = ap->a_tcnp;
1606 	struct componentname *fcnp = ap->a_fcnp;
1607 	int error;
1608 
1609 #ifndef DIAGNOSTIC
1610 	if ((tcnp->cn_flags & HASBUF) == 0 ||
1611 	    (fcnp->cn_flags & HASBUF) == 0)
1612 		panic("nfs_rename: no name");
1613 #endif
1614 	/* Check for cross-device rename */
1615 	if ((fvp->v_mount != tdvp->v_mount) ||
1616 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1617 		error = EXDEV;
1618 		goto out;
1619 	}
1620 
1621 	/*
1622 	 * We have to flush B_DELWRI data prior to renaming
1623 	 * the file.  If we don't, the delayed-write buffers
1624 	 * can be flushed out later after the file has gone stale
1625 	 * under NFSV3.  NFSV2 does not have this problem because
1626 	 * ( as far as I can tell ) it flushes dirty buffers more
1627 	 * often.
1628 	 */
1629 
1630 	VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_td);
1631 	if (tvp)
1632 	    VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_td);
1633 
1634 	/*
1635 	 * If the tvp exists and is in use, sillyrename it before doing the
1636 	 * rename of the new file over it.
1637 	 * XXX Can't sillyrename a directory.
1638 	 */
1639 	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
1640 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1641 		vput(tvp);
1642 		tvp = NULL;
1643 	}
1644 
1645 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1646 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1647 		tcnp->cn_td);
1648 
1649 	if (fvp->v_type == VDIR) {
1650 		if (tvp != NULL && tvp->v_type == VDIR)
1651 			cache_purge(tdvp);
1652 		cache_purge(fdvp);
1653 	}
1654 
1655 out:
1656 	if (tdvp == tvp)
1657 		vrele(tdvp);
1658 	else
1659 		vput(tdvp);
1660 	if (tvp)
1661 		vput(tvp);
1662 	vrele(fdvp);
1663 	vrele(fvp);
1664 	/*
1665 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1666 	 */
1667 	if (error == ENOENT)
1668 		error = 0;
1669 	return (error);
1670 }
1671 
1672 /*
1673  * nfs file rename rpc called from nfs_remove() above
1674  */
1675 static int
1676 nfs_renameit(sdvp, scnp, sp)
1677 	struct vnode *sdvp;
1678 	struct componentname *scnp;
1679 	struct sillyrename *sp;
1680 {
1681 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
1682 		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_td));
1683 }
1684 
1685 /*
1686  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1687  */
1688 static int
1689 nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, td)
1690 	struct vnode *fdvp;
1691 	const char *fnameptr;
1692 	int fnamelen;
1693 	struct vnode *tdvp;
1694 	const char *tnameptr;
1695 	int tnamelen;
1696 	struct ucred *cred;
1697 	struct thread *td;
1698 {
1699 	u_int32_t *tl;
1700 	caddr_t cp;
1701 	int32_t t1, t2;
1702 	caddr_t bpos, dpos, cp2;
1703 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
1704 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1705 	int v3 = NFS_ISV3(fdvp);
1706 
1707 	nfsstats.rpccnt[NFSPROC_RENAME]++;
1708 	nfsm_reqhead(fdvp, NFSPROC_RENAME,
1709 		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
1710 		nfsm_rndup(tnamelen));
1711 	nfsm_fhtom(fdvp, v3);
1712 	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
1713 	nfsm_fhtom(tdvp, v3);
1714 	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
1715 	nfsm_request(fdvp, NFSPROC_RENAME, td, cred);
1716 	if (v3) {
1717 		nfsm_wcc_data(fdvp, fwccflag);
1718 		nfsm_wcc_data(tdvp, twccflag);
1719 	}
1720 	nfsm_reqdone;
1721 	VTONFS(fdvp)->n_flag |= NMODIFIED;
1722 	VTONFS(tdvp)->n_flag |= NMODIFIED;
1723 	if (!fwccflag)
1724 		VTONFS(fdvp)->n_attrstamp = 0;
1725 	if (!twccflag)
1726 		VTONFS(tdvp)->n_attrstamp = 0;
1727 	return (error);
1728 }
1729 
1730 /*
1731  * nfs hard link create call
1732  */
1733 static int
1734 nfs_link(ap)
1735 	struct vop_link_args /* {
1736 		struct vnode *a_tdvp;
1737 		struct vnode *a_vp;
1738 		struct componentname *a_cnp;
1739 	} */ *ap;
1740 {
1741 	struct vnode *vp = ap->a_vp;
1742 	struct vnode *tdvp = ap->a_tdvp;
1743 	struct componentname *cnp = ap->a_cnp;
1744 	u_int32_t *tl;
1745 	caddr_t cp;
1746 	int32_t t1, t2;
1747 	caddr_t bpos, dpos, cp2;
1748 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
1749 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1750 	int v3;
1751 
1752 	if (vp->v_mount != tdvp->v_mount) {
1753 		return (EXDEV);
1754 	}
1755 
1756 	/*
1757 	 * Push all writes to the server, so that the attribute cache
1758 	 * doesn't get "out of sync" with the server.
1759 	 * XXX There should be a better way!
1760 	 */
1761 	VOP_FSYNC(vp, MNT_WAIT, cnp->cn_td);
1762 
1763 	v3 = NFS_ISV3(vp);
1764 	nfsstats.rpccnt[NFSPROC_LINK]++;
1765 	nfsm_reqhead(vp, NFSPROC_LINK,
1766 		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1767 	nfsm_fhtom(vp, v3);
1768 	nfsm_fhtom(tdvp, v3);
1769 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1770 	nfsm_request(vp, NFSPROC_LINK, cnp->cn_td, cnp->cn_cred);
1771 	if (v3) {
1772 		nfsm_postop_attr(vp, attrflag);
1773 		nfsm_wcc_data(tdvp, wccflag);
1774 	}
1775 	nfsm_reqdone;
1776 	VTONFS(tdvp)->n_flag |= NMODIFIED;
1777 	if (!attrflag)
1778 		VTONFS(vp)->n_attrstamp = 0;
1779 	if (!wccflag)
1780 		VTONFS(tdvp)->n_attrstamp = 0;
1781 	/*
1782 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
1783 	 */
1784 	if (error == EEXIST)
1785 		error = 0;
1786 	return (error);
1787 }
1788 
1789 /*
1790  * nfs symbolic link create call
1791  */
1792 static int
1793 nfs_symlink(ap)
1794 	struct vop_symlink_args /* {
1795 		struct vnode *a_dvp;
1796 		struct vnode **a_vpp;
1797 		struct componentname *a_cnp;
1798 		struct vattr *a_vap;
1799 		char *a_target;
1800 	} */ *ap;
1801 {
1802 	struct vnode *dvp = ap->a_dvp;
1803 	struct vattr *vap = ap->a_vap;
1804 	struct componentname *cnp = ap->a_cnp;
1805 	struct nfsv2_sattr *sp;
1806 	u_int32_t *tl;
1807 	caddr_t cp;
1808 	int32_t t1, t2;
1809 	caddr_t bpos, dpos, cp2;
1810 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
1811 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1812 	struct vnode *newvp = (struct vnode *)0;
1813 	int v3 = NFS_ISV3(dvp);
1814 
1815 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
1816 	slen = strlen(ap->a_target);
1817 	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
1818 	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
1819 	nfsm_fhtom(dvp, v3);
1820 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1821 	if (v3) {
1822 		nfsm_v3attrbuild(vap, FALSE);
1823 	}
1824 	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
1825 	if (!v3) {
1826 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1827 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
1828 		sp->sa_uid = nfs_xdrneg1;
1829 		sp->sa_gid = nfs_xdrneg1;
1830 		sp->sa_size = nfs_xdrneg1;
1831 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1832 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1833 	}
1834 
1835 	/*
1836 	 * Issue the NFS request and get the rpc response.
1837 	 *
1838 	 * Only NFSv3 responses returning an error of 0 actually return
1839 	 * a file handle that can be converted into newvp without having
1840 	 * to do an extra lookup rpc.
1841 	 */
1842 	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_td, cnp->cn_cred);
1843 	if (v3) {
1844 		if (error == 0)
1845 			nfsm_mtofh(dvp, newvp, v3, gotvp);
1846 		nfsm_wcc_data(dvp, wccflag);
1847 	}
1848 
1849 	/*
1850 	 * out code jumps -> here, mrep is also freed.
1851 	 */
1852 
1853 	nfsm_reqdone;
1854 
1855 	/*
1856 	 * If we get an EEXIST error, silently convert it to no-error
1857 	 * in case of an NFS retry.
1858 	 */
1859 	if (error == EEXIST)
1860 		error = 0;
1861 
1862 	/*
1863 	 * If we do not have (or no longer have) an error, and we could
1864 	 * not extract the newvp from the response due to the request being
1865 	 * NFSv2 or the error being EEXIST.  We have to do a lookup in order
1866 	 * to obtain a newvp to return.
1867 	 */
1868 	if (error == 0 && newvp == NULL) {
1869 		struct nfsnode *np = NULL;
1870 
1871 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1872 		    cnp->cn_cred, cnp->cn_td, &np);
1873 		if (!error)
1874 			newvp = NFSTOV(np);
1875 	}
1876 	if (error) {
1877 		if (newvp)
1878 			vput(newvp);
1879 	} else {
1880 		*ap->a_vpp = newvp;
1881 	}
1882 	VTONFS(dvp)->n_flag |= NMODIFIED;
1883 	if (!wccflag)
1884 		VTONFS(dvp)->n_attrstamp = 0;
1885 	return (error);
1886 }
1887 
1888 /*
1889  * nfs make dir call
1890  */
1891 static int
1892 nfs_mkdir(ap)
1893 	struct vop_mkdir_args /* {
1894 		struct vnode *a_dvp;
1895 		struct vnode **a_vpp;
1896 		struct componentname *a_cnp;
1897 		struct vattr *a_vap;
1898 	} */ *ap;
1899 {
1900 	struct vnode *dvp = ap->a_dvp;
1901 	struct vattr *vap = ap->a_vap;
1902 	struct componentname *cnp = ap->a_cnp;
1903 	struct nfsv2_sattr *sp;
1904 	u_int32_t *tl;
1905 	caddr_t cp;
1906 	int32_t t1, t2;
1907 	int len;
1908 	struct nfsnode *np = (struct nfsnode *)0;
1909 	struct vnode *newvp = (struct vnode *)0;
1910 	caddr_t bpos, dpos, cp2;
1911 	int error = 0, wccflag = NFSV3_WCCRATTR;
1912 	int gotvp = 0;
1913 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1914 	struct vattr vattr;
1915 	int v3 = NFS_ISV3(dvp);
1916 
1917 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_td)) != 0) {
1918 		return (error);
1919 	}
1920 	len = cnp->cn_namelen;
1921 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
1922 	nfsm_reqhead(dvp, NFSPROC_MKDIR,
1923 	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
1924 	nfsm_fhtom(dvp, v3);
1925 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
1926 	if (v3) {
1927 		nfsm_v3attrbuild(vap, FALSE);
1928 	} else {
1929 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1930 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
1931 		sp->sa_uid = nfs_xdrneg1;
1932 		sp->sa_gid = nfs_xdrneg1;
1933 		sp->sa_size = nfs_xdrneg1;
1934 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1935 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1936 	}
1937 	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_td, cnp->cn_cred);
1938 	if (!error)
1939 		nfsm_mtofh(dvp, newvp, v3, gotvp);
1940 	if (v3)
1941 		nfsm_wcc_data(dvp, wccflag);
1942 	nfsm_reqdone;
1943 	VTONFS(dvp)->n_flag |= NMODIFIED;
1944 	if (!wccflag)
1945 		VTONFS(dvp)->n_attrstamp = 0;
1946 	/*
1947 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
1948 	 * if we can succeed in looking up the directory.
1949 	 */
1950 	if (error == EEXIST || (!error && !gotvp)) {
1951 		if (newvp) {
1952 			vrele(newvp);
1953 			newvp = (struct vnode *)0;
1954 		}
1955 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
1956 			cnp->cn_td, &np);
1957 		if (!error) {
1958 			newvp = NFSTOV(np);
1959 			if (newvp->v_type != VDIR)
1960 				error = EEXIST;
1961 		}
1962 	}
1963 	if (error) {
1964 		if (newvp)
1965 			vrele(newvp);
1966 	} else
1967 		*ap->a_vpp = newvp;
1968 	return (error);
1969 }
1970 
1971 /*
1972  * nfs remove directory call
1973  */
1974 static int
1975 nfs_rmdir(ap)
1976 	struct vop_rmdir_args /* {
1977 		struct vnode *a_dvp;
1978 		struct vnode *a_vp;
1979 		struct componentname *a_cnp;
1980 	} */ *ap;
1981 {
1982 	struct vnode *vp = ap->a_vp;
1983 	struct vnode *dvp = ap->a_dvp;
1984 	struct componentname *cnp = ap->a_cnp;
1985 	u_int32_t *tl;
1986 	caddr_t cp;
1987 	int32_t t1, t2;
1988 	caddr_t bpos, dpos, cp2;
1989 	int error = 0, wccflag = NFSV3_WCCRATTR;
1990 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1991 	int v3 = NFS_ISV3(dvp);
1992 
1993 	if (dvp == vp)
1994 		return (EINVAL);
1995 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
1996 	nfsm_reqhead(dvp, NFSPROC_RMDIR,
1997 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1998 	nfsm_fhtom(dvp, v3);
1999 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
2000 	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_td, cnp->cn_cred);
2001 	if (v3)
2002 		nfsm_wcc_data(dvp, wccflag);
2003 	nfsm_reqdone;
2004 	VTONFS(dvp)->n_flag |= NMODIFIED;
2005 	if (!wccflag)
2006 		VTONFS(dvp)->n_attrstamp = 0;
2007 	cache_purge(dvp);
2008 	cache_purge(vp);
2009 	/*
2010 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2011 	 */
2012 	if (error == ENOENT)
2013 		error = 0;
2014 	return (error);
2015 }
2016 
2017 /*
2018  * nfs readdir call
2019  */
2020 static int
2021 nfs_readdir(ap)
2022 	struct vop_readdir_args /* {
2023 		struct vnode *a_vp;
2024 		struct uio *a_uio;
2025 		struct ucred *a_cred;
2026 	} */ *ap;
2027 {
2028 	struct vnode *vp = ap->a_vp;
2029 	struct nfsnode *np = VTONFS(vp);
2030 	struct uio *uio = ap->a_uio;
2031 	int tresid, error;
2032 	struct vattr vattr;
2033 
2034 	if (vp->v_type != VDIR)
2035 		return (EPERM);
2036 	/*
2037 	 * First, check for hit on the EOF offset cache
2038 	 */
2039 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2040 	    (np->n_flag & NMODIFIED) == 0) {
2041 		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
2042 			if (NQNFS_CKCACHABLE(vp, ND_READ)) {
2043 				nfsstats.direofcache_hits++;
2044 				return (0);
2045 			}
2046 		} else if (VOP_GETATTR(vp, &vattr, uio->uio_td) == 0 &&
2047 			np->n_mtime == vattr.va_mtime.tv_sec) {
2048 			nfsstats.direofcache_hits++;
2049 			return (0);
2050 		}
2051 	}
2052 
2053 	/*
2054 	 * Call nfs_bioread() to do the real work.
2055 	 */
2056 	tresid = uio->uio_resid;
2057 	error = nfs_bioread(vp, uio, 0);
2058 
2059 	if (!error && uio->uio_resid == tresid)
2060 		nfsstats.direofcache_misses++;
2061 	return (error);
2062 }
2063 
2064 /*
2065  * Readdir rpc call.
2066  * Called from below the buffer cache by nfs_doio().
2067  */
2068 int
2069 nfs_readdirrpc(struct vnode *vp, struct uio *uiop)
2070 {
2071 	int len, left;
2072 	struct dirent *dp = NULL;
2073 	u_int32_t *tl;
2074 	caddr_t cp;
2075 	int32_t t1, t2;
2076 	nfsuint64 *cookiep;
2077 	caddr_t bpos, dpos, cp2;
2078 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2079 	nfsuint64 cookie;
2080 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2081 	struct nfsnode *dnp = VTONFS(vp);
2082 	u_quad_t fileno;
2083 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2084 	int attrflag;
2085 	int v3 = NFS_ISV3(vp);
2086 
2087 #ifndef DIAGNOSTIC
2088 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2089 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2090 		panic("nfs readdirrpc bad uio");
2091 #endif
2092 
2093 	/*
2094 	 * If there is no cookie, assume directory was stale.
2095 	 */
2096 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2097 	if (cookiep)
2098 		cookie = *cookiep;
2099 	else
2100 		return (NFSERR_BAD_COOKIE);
2101 	/*
2102 	 * Loop around doing readdir rpc's of size nm_readdirsize
2103 	 * truncated to a multiple of DIRBLKSIZ.
2104 	 * The stopping criteria is EOF or buffer full.
2105 	 */
2106 	while (more_dirs && bigenough) {
2107 		nfsstats.rpccnt[NFSPROC_READDIR]++;
2108 		nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
2109 			NFSX_READDIR(v3));
2110 		nfsm_fhtom(vp, v3);
2111 		if (v3) {
2112 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2113 			*tl++ = cookie.nfsuquad[0];
2114 			*tl++ = cookie.nfsuquad[1];
2115 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
2116 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
2117 		} else {
2118 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2119 			*tl++ = cookie.nfsuquad[0];
2120 		}
2121 		*tl = txdr_unsigned(nmp->nm_readdirsize);
2122 		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, NFSVPCRED(vp));
2123 		if (v3) {
2124 			nfsm_postop_attr(vp, attrflag);
2125 			if (!error) {
2126 				nfsm_dissect(tl, u_int32_t *,
2127 				    2 * NFSX_UNSIGNED);
2128 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
2129 				dnp->n_cookieverf.nfsuquad[1] = *tl;
2130 			} else {
2131 				m_freem(mrep);
2132 				goto nfsmout;
2133 			}
2134 		}
2135 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2136 		more_dirs = fxdr_unsigned(int, *tl);
2137 
2138 		/* loop thru the dir entries, doctoring them to 4bsd form */
2139 		while (more_dirs && bigenough) {
2140 			if (v3) {
2141 				nfsm_dissect(tl, u_int32_t *,
2142 				    3 * NFSX_UNSIGNED);
2143 				fileno = fxdr_hyper(tl);
2144 				len = fxdr_unsigned(int, *(tl + 2));
2145 			} else {
2146 				nfsm_dissect(tl, u_int32_t *,
2147 				    2 * NFSX_UNSIGNED);
2148 				fileno = fxdr_unsigned(u_quad_t, *tl++);
2149 				len = fxdr_unsigned(int, *tl);
2150 			}
2151 			if (len <= 0 || len > NFS_MAXNAMLEN) {
2152 				error = EBADRPC;
2153 				m_freem(mrep);
2154 				goto nfsmout;
2155 			}
2156 			tlen = nfsm_rndup(len);
2157 			if (tlen == len)
2158 				tlen += 4;	/* To ensure null termination */
2159 			left = DIRBLKSIZ - blksiz;
2160 			if ((tlen + DIRHDSIZ) > left) {
2161 				dp->d_reclen += left;
2162 				uiop->uio_iov->iov_base += left;
2163 				uiop->uio_iov->iov_len -= left;
2164 				uiop->uio_offset += left;
2165 				uiop->uio_resid -= left;
2166 				blksiz = 0;
2167 			}
2168 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2169 				bigenough = 0;
2170 			if (bigenough) {
2171 				dp = (struct dirent *)uiop->uio_iov->iov_base;
2172 				dp->d_fileno = (int)fileno;
2173 				dp->d_namlen = len;
2174 				dp->d_reclen = tlen + DIRHDSIZ;
2175 				dp->d_type = DT_UNKNOWN;
2176 				blksiz += dp->d_reclen;
2177 				if (blksiz == DIRBLKSIZ)
2178 					blksiz = 0;
2179 				uiop->uio_offset += DIRHDSIZ;
2180 				uiop->uio_resid -= DIRHDSIZ;
2181 				uiop->uio_iov->iov_base += DIRHDSIZ;
2182 				uiop->uio_iov->iov_len -= DIRHDSIZ;
2183 				nfsm_mtouio(uiop, len);
2184 				cp = uiop->uio_iov->iov_base;
2185 				tlen -= len;
2186 				*cp = '\0';	/* null terminate */
2187 				uiop->uio_iov->iov_base += tlen;
2188 				uiop->uio_iov->iov_len -= tlen;
2189 				uiop->uio_offset += tlen;
2190 				uiop->uio_resid -= tlen;
2191 			} else
2192 				nfsm_adv(nfsm_rndup(len));
2193 			if (v3) {
2194 				nfsm_dissect(tl, u_int32_t *,
2195 				    3 * NFSX_UNSIGNED);
2196 			} else {
2197 				nfsm_dissect(tl, u_int32_t *,
2198 				    2 * NFSX_UNSIGNED);
2199 			}
2200 			if (bigenough) {
2201 				cookie.nfsuquad[0] = *tl++;
2202 				if (v3)
2203 					cookie.nfsuquad[1] = *tl++;
2204 			} else if (v3)
2205 				tl += 2;
2206 			else
2207 				tl++;
2208 			more_dirs = fxdr_unsigned(int, *tl);
2209 		}
2210 		/*
2211 		 * If at end of rpc data, get the eof boolean
2212 		 */
2213 		if (!more_dirs) {
2214 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2215 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2216 		}
2217 		m_freem(mrep);
2218 	}
2219 	/*
2220 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2221 	 * by increasing d_reclen for the last record.
2222 	 */
2223 	if (blksiz > 0) {
2224 		left = DIRBLKSIZ - blksiz;
2225 		dp->d_reclen += left;
2226 		uiop->uio_iov->iov_base += left;
2227 		uiop->uio_iov->iov_len -= left;
2228 		uiop->uio_offset += left;
2229 		uiop->uio_resid -= left;
2230 	}
2231 
2232 	/*
2233 	 * We are now either at the end of the directory or have filled the
2234 	 * block.
2235 	 */
2236 	if (bigenough)
2237 		dnp->n_direofoffset = uiop->uio_offset;
2238 	else {
2239 		if (uiop->uio_resid > 0)
2240 			printf("EEK! readdirrpc resid > 0\n");
2241 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2242 		*cookiep = cookie;
2243 	}
2244 nfsmout:
2245 	return (error);
2246 }
2247 
2248 /*
2249  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2250  */
2251 int
2252 nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop)
2253 {
2254 	int len, left;
2255 	struct dirent *dp;
2256 	u_int32_t *tl;
2257 	caddr_t cp;
2258 	int32_t t1, t2;
2259 	struct vnode *newvp;
2260 	nfsuint64 *cookiep;
2261 	caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
2262 	struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
2263 	struct nameidata nami, *ndp = &nami;
2264 	struct componentname *cnp = &ndp->ni_cnd;
2265 	nfsuint64 cookie;
2266 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2267 	struct nfsnode *dnp = VTONFS(vp), *np;
2268 	nfsfh_t *fhp;
2269 	u_quad_t fileno;
2270 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2271 	int attrflag, fhsize;
2272 
2273 #ifndef nolint
2274 	dp = (struct dirent *)0;
2275 #endif
2276 #ifndef DIAGNOSTIC
2277 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2278 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2279 		panic("nfs readdirplusrpc bad uio");
2280 #endif
2281 	ndp->ni_dvp = vp;
2282 	newvp = NULLVP;
2283 
2284 	/*
2285 	 * If there is no cookie, assume directory was stale.
2286 	 */
2287 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2288 	if (cookiep)
2289 		cookie = *cookiep;
2290 	else
2291 		return (NFSERR_BAD_COOKIE);
2292 	/*
2293 	 * Loop around doing readdir rpc's of size nm_readdirsize
2294 	 * truncated to a multiple of DIRBLKSIZ.
2295 	 * The stopping criteria is EOF or buffer full.
2296 	 */
2297 	while (more_dirs && bigenough) {
2298 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2299 		nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2300 			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2301 		nfsm_fhtom(vp, 1);
2302  		nfsm_build(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
2303 		*tl++ = cookie.nfsuquad[0];
2304 		*tl++ = cookie.nfsuquad[1];
2305 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
2306 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
2307 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
2308 		*tl = txdr_unsigned(nmp->nm_rsize);
2309 		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, NFSVPCRED(vp));
2310 		nfsm_postop_attr(vp, attrflag);
2311 		if (error) {
2312 			m_freem(mrep);
2313 			goto nfsmout;
2314 		}
2315 		nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2316 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
2317 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
2318 		more_dirs = fxdr_unsigned(int, *tl);
2319 
2320 		/* loop thru the dir entries, doctoring them to 4bsd form */
2321 		while (more_dirs && bigenough) {
2322 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2323 			fileno = fxdr_hyper(tl);
2324 			len = fxdr_unsigned(int, *(tl + 2));
2325 			if (len <= 0 || len > NFS_MAXNAMLEN) {
2326 				error = EBADRPC;
2327 				m_freem(mrep);
2328 				goto nfsmout;
2329 			}
2330 			tlen = nfsm_rndup(len);
2331 			if (tlen == len)
2332 				tlen += 4;	/* To ensure null termination*/
2333 			left = DIRBLKSIZ - blksiz;
2334 			if ((tlen + DIRHDSIZ) > left) {
2335 				dp->d_reclen += left;
2336 				uiop->uio_iov->iov_base += left;
2337 				uiop->uio_iov->iov_len -= left;
2338 				uiop->uio_offset += left;
2339 				uiop->uio_resid -= left;
2340 				blksiz = 0;
2341 			}
2342 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2343 				bigenough = 0;
2344 			if (bigenough) {
2345 				dp = (struct dirent *)uiop->uio_iov->iov_base;
2346 				dp->d_fileno = (int)fileno;
2347 				dp->d_namlen = len;
2348 				dp->d_reclen = tlen + DIRHDSIZ;
2349 				dp->d_type = DT_UNKNOWN;
2350 				blksiz += dp->d_reclen;
2351 				if (blksiz == DIRBLKSIZ)
2352 					blksiz = 0;
2353 				uiop->uio_offset += DIRHDSIZ;
2354 				uiop->uio_resid -= DIRHDSIZ;
2355 				uiop->uio_iov->iov_base += DIRHDSIZ;
2356 				uiop->uio_iov->iov_len -= DIRHDSIZ;
2357 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
2358 				cnp->cn_namelen = len;
2359 				nfsm_mtouio(uiop, len);
2360 				cp = uiop->uio_iov->iov_base;
2361 				tlen -= len;
2362 				*cp = '\0';
2363 				uiop->uio_iov->iov_base += tlen;
2364 				uiop->uio_iov->iov_len -= tlen;
2365 				uiop->uio_offset += tlen;
2366 				uiop->uio_resid -= tlen;
2367 			} else
2368 				nfsm_adv(nfsm_rndup(len));
2369 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2370 			if (bigenough) {
2371 				cookie.nfsuquad[0] = *tl++;
2372 				cookie.nfsuquad[1] = *tl++;
2373 			} else
2374 				tl += 2;
2375 
2376 			/*
2377 			 * Since the attributes are before the file handle
2378 			 * (sigh), we must skip over the attributes and then
2379 			 * come back and get them.
2380 			 */
2381 			attrflag = fxdr_unsigned(int, *tl);
2382 			if (attrflag) {
2383 			    dpossav1 = dpos;
2384 			    mdsav1 = md;
2385 			    nfsm_adv(NFSX_V3FATTR);
2386 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2387 			    doit = fxdr_unsigned(int, *tl);
2388 			    if (doit) {
2389 				nfsm_getfh(fhp, fhsize, 1);
2390 				if (NFS_CMPFH(dnp, fhp, fhsize)) {
2391 				    VREF(vp);
2392 				    newvp = vp;
2393 				    np = dnp;
2394 				} else {
2395 				    error = nfs_nget(vp->v_mount, fhp,
2396 					fhsize, &np);
2397 				    if (error)
2398 					doit = 0;
2399 				    else
2400 					newvp = NFSTOV(np);
2401 				}
2402 			    }
2403 			    if (doit && bigenough) {
2404 				dpossav2 = dpos;
2405 				dpos = dpossav1;
2406 				mdsav2 = md;
2407 				md = mdsav1;
2408 				nfsm_loadattr(newvp, (struct vattr *)0);
2409 				dpos = dpossav2;
2410 				md = mdsav2;
2411 				dp->d_type =
2412 				    IFTODT(VTTOIF(np->n_vattr.va_type));
2413 				ndp->ni_vp = newvp;
2414 			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
2415 			    }
2416 			} else {
2417 			    /* Just skip over the file handle */
2418 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2419 			    i = fxdr_unsigned(int, *tl);
2420 			    nfsm_adv(nfsm_rndup(i));
2421 			}
2422 			if (newvp != NULLVP) {
2423 			    if (newvp == vp)
2424 				vrele(newvp);
2425 			    else
2426 				vput(newvp);
2427 			    newvp = NULLVP;
2428 			}
2429 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2430 			more_dirs = fxdr_unsigned(int, *tl);
2431 		}
2432 		/*
2433 		 * If at end of rpc data, get the eof boolean
2434 		 */
2435 		if (!more_dirs) {
2436 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2437 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2438 		}
2439 		m_freem(mrep);
2440 	}
2441 	/*
2442 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2443 	 * by increasing d_reclen for the last record.
2444 	 */
2445 	if (blksiz > 0) {
2446 		left = DIRBLKSIZ - blksiz;
2447 		dp->d_reclen += left;
2448 		uiop->uio_iov->iov_base += left;
2449 		uiop->uio_iov->iov_len -= left;
2450 		uiop->uio_offset += left;
2451 		uiop->uio_resid -= left;
2452 	}
2453 
2454 	/*
2455 	 * We are now either at the end of the directory or have filled the
2456 	 * block.
2457 	 */
2458 	if (bigenough)
2459 		dnp->n_direofoffset = uiop->uio_offset;
2460 	else {
2461 		if (uiop->uio_resid > 0)
2462 			printf("EEK! readdirplusrpc resid > 0\n");
2463 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2464 		*cookiep = cookie;
2465 	}
2466 nfsmout:
2467 	if (newvp != NULLVP) {
2468 	        if (newvp == vp)
2469 			vrele(newvp);
2470 		else
2471 			vput(newvp);
2472 		newvp = NULLVP;
2473 	}
2474 	return (error);
2475 }
2476 
2477 /*
2478  * Silly rename. To make the NFS filesystem that is stateless look a little
2479  * more like the "ufs" a remove of an active vnode is translated to a rename
2480  * to a funny looking filename that is removed by nfs_inactive on the
2481  * nfsnode. There is the potential for another process on a different client
2482  * to create the same funny name between the nfs_lookitup() fails and the
2483  * nfs_rename() completes, but...
2484  */
2485 static int
2486 nfs_sillyrename(dvp, vp, cnp)
2487 	struct vnode *dvp, *vp;
2488 	struct componentname *cnp;
2489 {
2490 	struct sillyrename *sp;
2491 	struct nfsnode *np;
2492 	int error;
2493 
2494 	cache_purge(dvp);
2495 	np = VTONFS(vp);
2496 #ifndef DIAGNOSTIC
2497 	if (vp->v_type == VDIR)
2498 		panic("nfs: sillyrename dir");
2499 #endif
2500 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
2501 		M_NFSREQ, M_WAITOK);
2502 	sp->s_cred = crdup(cnp->cn_cred);
2503 	sp->s_dvp = dvp;
2504 	VREF(dvp);
2505 
2506 	/* Fudge together a funny name */
2507 	sp->s_namlen = sprintf(sp->s_name, ".nfsA%08x4.4", (int)cnp->cn_td);
2508 
2509 	/* Try lookitups until we get one that isn't there */
2510 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2511 		cnp->cn_td, (struct nfsnode **)0) == 0) {
2512 		sp->s_name[4]++;
2513 		if (sp->s_name[4] > 'z') {
2514 			error = EINVAL;
2515 			goto bad;
2516 		}
2517 	}
2518 	error = nfs_renameit(dvp, cnp, sp);
2519 	if (error)
2520 		goto bad;
2521 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2522 		cnp->cn_td, &np);
2523 	np->n_sillyrename = sp;
2524 	return (0);
2525 bad:
2526 	vrele(sp->s_dvp);
2527 	crfree(sp->s_cred);
2528 	free((caddr_t)sp, M_NFSREQ);
2529 	return (error);
2530 }
2531 
2532 /*
2533  * Look up a file name and optionally either update the file handle or
2534  * allocate an nfsnode, depending on the value of npp.
2535  * npp == NULL	--> just do the lookup
2536  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2537  *			handled too
2538  * *npp != NULL --> update the file handle in the vnode
2539  */
2540 static int
2541 nfs_lookitup(dvp, name, len, cred, td, npp)
2542 	struct vnode *dvp;
2543 	const char *name;
2544 	int len;
2545 	struct ucred *cred;
2546 	struct thread *td;
2547 	struct nfsnode **npp;
2548 {
2549 	u_int32_t *tl;
2550 	caddr_t cp;
2551 	int32_t t1, t2;
2552 	struct vnode *newvp = (struct vnode *)0;
2553 	struct nfsnode *np, *dnp = VTONFS(dvp);
2554 	caddr_t bpos, dpos, cp2;
2555 	int error = 0, fhlen, attrflag;
2556 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2557 	nfsfh_t *nfhp;
2558 	int v3 = NFS_ISV3(dvp);
2559 
2560 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2561 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
2562 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
2563 	nfsm_fhtom(dvp, v3);
2564 	nfsm_strtom(name, len, NFS_MAXNAMLEN);
2565 	nfsm_request(dvp, NFSPROC_LOOKUP, td, cred);
2566 	if (npp && !error) {
2567 		nfsm_getfh(nfhp, fhlen, v3);
2568 		if (*npp) {
2569 		    np = *npp;
2570 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
2571 			free((caddr_t)np->n_fhp, M_NFSBIGFH);
2572 			np->n_fhp = &np->n_fh;
2573 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
2574 			np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
2575 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
2576 		    np->n_fhsize = fhlen;
2577 		    newvp = NFSTOV(np);
2578 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2579 		    VREF(dvp);
2580 		    newvp = dvp;
2581 		} else {
2582 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
2583 		    if (error) {
2584 			m_freem(mrep);
2585 			return (error);
2586 		    }
2587 		    newvp = NFSTOV(np);
2588 		}
2589 		if (v3) {
2590 			nfsm_postop_attr(newvp, attrflag);
2591 			if (!attrflag && *npp == NULL) {
2592 				m_freem(mrep);
2593 				if (newvp == dvp)
2594 					vrele(newvp);
2595 				else
2596 					vput(newvp);
2597 				return (ENOENT);
2598 			}
2599 		} else
2600 			nfsm_loadattr(newvp, (struct vattr *)0);
2601 	}
2602 	nfsm_reqdone;
2603 	if (npp && *npp == NULL) {
2604 		if (error) {
2605 			if (newvp) {
2606 				if (newvp == dvp)
2607 					vrele(newvp);
2608 				else
2609 					vput(newvp);
2610 			}
2611 		} else
2612 			*npp = np;
2613 	}
2614 	return (error);
2615 }
2616 
2617 /*
2618  * Nfs Version 3 commit rpc
2619  */
2620 int
2621 nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct thread *td)
2622 {
2623 	caddr_t cp;
2624 	u_int32_t *tl;
2625 	int32_t t1, t2;
2626 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2627 	caddr_t bpos, dpos, cp2;
2628 	int error = 0, wccflag = NFSV3_WCCRATTR;
2629 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2630 
2631 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
2632 		return (0);
2633 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
2634 	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
2635 	nfsm_fhtom(vp, 1);
2636 	nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2637 	txdr_hyper(offset, tl);
2638 	tl += 2;
2639 	*tl = txdr_unsigned(cnt);
2640 	nfsm_request(vp, NFSPROC_COMMIT, td, NFSVPCRED(vp));
2641 	nfsm_wcc_data(vp, wccflag);
2642 	if (!error) {
2643 		nfsm_dissect(tl, u_int32_t *, NFSX_V3WRITEVERF);
2644 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
2645 			NFSX_V3WRITEVERF)) {
2646 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
2647 				NFSX_V3WRITEVERF);
2648 			error = NFSERR_STALEWRITEVERF;
2649 		}
2650 	}
2651 	nfsm_reqdone;
2652 	return (error);
2653 }
2654 
2655 /*
2656  * Kludge City..
2657  * - make nfs_bmap() essentially a no-op that does no translation
2658  * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
2659  *   (Maybe I could use the process's page mapping, but I was concerned that
2660  *    Kernel Write might not be enabled and also figured copyout() would do
2661  *    a lot more work than bcopy() and also it currently happens in the
2662  *    context of the swapper process (2).
2663  */
2664 static int
2665 nfs_bmap(ap)
2666 	struct vop_bmap_args /* {
2667 		struct vnode *a_vp;
2668 		daddr_t  a_bn;
2669 		struct vnode **a_vpp;
2670 		daddr_t *a_bnp;
2671 		int *a_runp;
2672 		int *a_runb;
2673 	} */ *ap;
2674 {
2675 	struct vnode *vp = ap->a_vp;
2676 
2677 	if (ap->a_vpp != NULL)
2678 		*ap->a_vpp = vp;
2679 	if (ap->a_bnp != NULL)
2680 		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
2681 	if (ap->a_runp != NULL)
2682 		*ap->a_runp = 0;
2683 	if (ap->a_runb != NULL)
2684 		*ap->a_runb = 0;
2685 	return (0);
2686 }
2687 
2688 /*
2689  * Strategy routine.
2690  * For async requests when nfsiod(s) are running, queue the request by
2691  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
2692  * request.
2693  */
2694 static int
2695 nfs_strategy(ap)
2696 	struct vop_strategy_args *ap;
2697 {
2698 	struct buf *bp = ap->a_bp;
2699 	struct thread *td;
2700 	int error = 0;
2701 
2702 	KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
2703 	KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
2704 
2705 	if (bp->b_flags & B_PHYS)
2706 		panic("nfs physio");
2707 
2708 	if (bp->b_flags & B_ASYNC)
2709 		td = NULL;
2710 	else
2711 		td = curthread;	/* XXX */
2712 
2713 	/*
2714 	 * If the op is asynchronous and an i/o daemon is waiting
2715 	 * queue the request, wake it up and wait for completion
2716 	 * otherwise just do it ourselves.
2717 	 */
2718 	if ((bp->b_flags & B_ASYNC) == 0 ||
2719 		nfs_asyncio(bp, td))
2720 		error = nfs_doio(bp, td);
2721 	return (error);
2722 }
2723 
2724 /*
2725  * Mmap a file
2726  *
2727  * NB Currently unsupported.
2728  */
2729 /* ARGSUSED */
2730 static int
2731 nfs_mmap(ap)
2732 	struct vop_mmap_args /* {
2733 		struct vnode *a_vp;
2734 		int  a_fflags;
2735 		struct ucred *a_cred;
2736 		struct thread *a_td;
2737 	} */ *ap;
2738 {
2739 
2740 	return (EINVAL);
2741 }
2742 
2743 /*
2744  * fsync vnode op. Just call nfs_flush() with commit == 1.
2745  */
2746 /* ARGSUSED */
2747 static int
2748 nfs_fsync(ap)
2749 	struct vop_fsync_args /* {
2750 		struct vnodeop_desc *a_desc;
2751 		struct vnode * a_vp;
2752 		struct ucred * a_cred;
2753 		int  a_waitfor;
2754 		struct thread * a_td;
2755 	} */ *ap;
2756 {
2757 
2758 	return (nfs_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1));
2759 }
2760 
2761 /*
2762  * Flush all the blocks associated with a vnode.
2763  * 	Walk through the buffer pool and push any dirty pages
2764  *	associated with the vnode.
2765  */
2766 static int
2767 nfs_flush(vp, waitfor, td, commit)
2768 	struct vnode *vp;
2769 	int waitfor;
2770 	struct thread *td;
2771 	int commit;
2772 {
2773 	struct nfsnode *np = VTONFS(vp);
2774 	struct buf *bp;
2775 	int i;
2776 	struct buf *nbp;
2777 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2778 	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
2779 	int passone = 1;
2780 	u_quad_t off, endoff, toff;
2781 	struct buf **bvec = NULL;
2782 #ifndef NFS_COMMITBVECSIZ
2783 #define NFS_COMMITBVECSIZ	20
2784 #endif
2785 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
2786 	int bvecsize = 0, bveccount;
2787 
2788 	if (nmp->nm_flag & NFSMNT_INT)
2789 		slpflag = PCATCH;
2790 	if (!commit)
2791 		passone = 0;
2792 	/*
2793 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
2794 	 * server, but nas not been committed to stable storage on the server
2795 	 * yet. On the first pass, the byte range is worked out and the commit
2796 	 * rpc is done. On the second pass, nfs_writebp() is called to do the
2797 	 * job.
2798 	 */
2799 again:
2800 	off = (u_quad_t)-1;
2801 	endoff = 0;
2802 	bvecpos = 0;
2803 	if (NFS_ISV3(vp) && commit) {
2804 		s = splbio();
2805 		/*
2806 		 * Count up how many buffers waiting for a commit.
2807 		 */
2808 		bveccount = 0;
2809 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2810 			nbp = TAILQ_NEXT(bp, b_vnbufs);
2811 			if (BUF_REFCNT(bp) == 0 &&
2812 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
2813 				== (B_DELWRI | B_NEEDCOMMIT))
2814 				bveccount++;
2815 		}
2816 		/*
2817 		 * Allocate space to remember the list of bufs to commit.  It is
2818 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
2819 		 * If we can't get memory (for whatever reason), we will end up
2820 		 * committing the buffers one-by-one in the loop below.
2821 		 */
2822 		if (bvec != NULL && bvec != bvec_on_stack)
2823 			free(bvec, M_TEMP);
2824 		if (bveccount > NFS_COMMITBVECSIZ) {
2825 			bvec = (struct buf **)
2826 				malloc(bveccount * sizeof(struct buf *),
2827 				       M_TEMP, M_NOWAIT);
2828 			if (bvec == NULL) {
2829 				bvec = bvec_on_stack;
2830 				bvecsize = NFS_COMMITBVECSIZ;
2831 			} else
2832 				bvecsize = bveccount;
2833 		} else {
2834 			bvec = bvec_on_stack;
2835 			bvecsize = NFS_COMMITBVECSIZ;
2836 		}
2837 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2838 			nbp = TAILQ_NEXT(bp, b_vnbufs);
2839 			if (bvecpos >= bvecsize)
2840 				break;
2841 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
2842 			    (B_DELWRI | B_NEEDCOMMIT) ||
2843 			    BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
2844 				continue;
2845 			bremfree(bp);
2846 			/*
2847 			 * NOTE: we are not clearing B_DONE here, so we have
2848 			 * to do it later on in this routine if we intend to
2849 			 * initiate I/O on the bp.
2850 			 *
2851 			 * Note: to avoid loopback deadlocks, we do not
2852 			 * assign b_runningbufspace.
2853 			 */
2854 			bp->b_flags |= B_WRITEINPROG;
2855 			vfs_busy_pages(bp, 1);
2856 
2857 			/*
2858 			 * bp is protected by being locked, but nbp is not
2859 			 * and vfs_busy_pages() may sleep.  We have to
2860 			 * recalculate nbp.
2861 			 */
2862 			nbp = TAILQ_NEXT(bp, b_vnbufs);
2863 
2864 			/*
2865 			 * A list of these buffers is kept so that the
2866 			 * second loop knows which buffers have actually
2867 			 * been committed. This is necessary, since there
2868 			 * may be a race between the commit rpc and new
2869 			 * uncommitted writes on the file.
2870 			 */
2871 			bvec[bvecpos++] = bp;
2872 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2873 				bp->b_dirtyoff;
2874 			if (toff < off)
2875 				off = toff;
2876 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
2877 			if (toff > endoff)
2878 				endoff = toff;
2879 		}
2880 		splx(s);
2881 	}
2882 	if (bvecpos > 0) {
2883 		/*
2884 		 * Commit data on the server, as required.  Note that
2885 		 * nfs_commit will use the vnode's cred for the commit.
2886 		 */
2887 		retv = nfs_commit(vp, off, (int)(endoff - off), td);
2888 
2889 		if (retv == NFSERR_STALEWRITEVERF)
2890 			nfs_clearcommit(vp->v_mount);
2891 
2892 		/*
2893 		 * Now, either mark the blocks I/O done or mark the
2894 		 * blocks dirty, depending on whether the commit
2895 		 * succeeded.
2896 		 */
2897 		for (i = 0; i < bvecpos; i++) {
2898 			bp = bvec[i];
2899 			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG | B_CLUSTEROK);
2900 			if (retv) {
2901 				/*
2902 				 * Error, leave B_DELWRI intact
2903 				 */
2904 				vfs_unbusy_pages(bp);
2905 				brelse(bp);
2906 			} else {
2907 				/*
2908 				 * Success, remove B_DELWRI ( bundirty() ).
2909 				 *
2910 				 * b_dirtyoff/b_dirtyend seem to be NFS
2911 				 * specific.  We should probably move that
2912 				 * into bundirty(). XXX
2913 				 */
2914 				s = splbio();
2915 				vp->v_numoutput++;
2916 				bp->b_flags |= B_ASYNC;
2917 				bundirty(bp);
2918 				bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
2919 				bp->b_dirtyoff = bp->b_dirtyend = 0;
2920 				splx(s);
2921 				biodone(bp);
2922 			}
2923 		}
2924 	}
2925 
2926 	/*
2927 	 * Start/do any write(s) that are required.
2928 	 */
2929 loop:
2930 	s = splbio();
2931 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2932 		nbp = TAILQ_NEXT(bp, b_vnbufs);
2933 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
2934 			if (waitfor != MNT_WAIT || passone)
2935 				continue;
2936 			error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
2937 			    "nfsfsync", slpflag, slptimeo);
2938 			splx(s);
2939 			if (error == 0)
2940 				panic("nfs_fsync: inconsistent lock");
2941 			if (error == ENOLCK)
2942 				goto loop;
2943 			if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) {
2944 				error = EINTR;
2945 				goto done;
2946 			}
2947 			if (slpflag == PCATCH) {
2948 				slpflag = 0;
2949 				slptimeo = 2 * hz;
2950 			}
2951 			goto loop;
2952 		}
2953 		if ((bp->b_flags & B_DELWRI) == 0)
2954 			panic("nfs_fsync: not dirty");
2955 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
2956 			BUF_UNLOCK(bp);
2957 			continue;
2958 		}
2959 		bremfree(bp);
2960 		if (passone || !commit)
2961 		    bp->b_flags |= B_ASYNC;
2962 		else
2963 		    bp->b_flags |= B_ASYNC | B_WRITEINPROG;
2964 		splx(s);
2965 		VOP_BWRITE(bp->b_vp, bp);
2966 		goto loop;
2967 	}
2968 	splx(s);
2969 	if (passone) {
2970 		passone = 0;
2971 		goto again;
2972 	}
2973 	if (waitfor == MNT_WAIT) {
2974 		while (vp->v_numoutput) {
2975 			vp->v_flag |= VBWAIT;
2976 			error = tsleep((caddr_t)&vp->v_numoutput,
2977 				slpflag, "nfsfsync", slptimeo);
2978 			if (error) {
2979 			    if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) {
2980 				error = EINTR;
2981 				goto done;
2982 			    }
2983 			    if (slpflag == PCATCH) {
2984 				slpflag = 0;
2985 				slptimeo = 2 * hz;
2986 			    }
2987 			}
2988 		}
2989 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) {
2990 			goto loop;
2991 		}
2992 	}
2993 	if (np->n_flag & NWRITEERR) {
2994 		error = np->n_error;
2995 		np->n_flag &= ~NWRITEERR;
2996 	}
2997 done:
2998 	if (bvec != NULL && bvec != bvec_on_stack)
2999 		free(bvec, M_TEMP);
3000 	return (error);
3001 }
3002 
3003 /*
3004  * NFS advisory byte-level locks.
3005  * Currently unsupported.
3006  */
3007 static int
3008 nfs_advlock(ap)
3009 	struct vop_advlock_args /* {
3010 		struct vnode *a_vp;
3011 		caddr_t  a_id;
3012 		int  a_op;
3013 		struct flock *a_fl;
3014 		int  a_flags;
3015 	} */ *ap;
3016 {
3017 	struct nfsnode *np = VTONFS(ap->a_vp);
3018 
3019 	/*
3020 	 * The following kludge is to allow diskless support to work
3021 	 * until a real NFS lockd is implemented. Basically, just pretend
3022 	 * that this is a local lock.
3023 	 */
3024 	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
3025 }
3026 
3027 /*
3028  * Print out the contents of an nfsnode.
3029  */
3030 static int
3031 nfs_print(ap)
3032 	struct vop_print_args /* {
3033 		struct vnode *a_vp;
3034 	} */ *ap;
3035 {
3036 	struct vnode *vp = ap->a_vp;
3037 	struct nfsnode *np = VTONFS(vp);
3038 
3039 	printf("tag VT_NFS, fileid %ld fsid 0x%x",
3040 		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3041 	if (vp->v_type == VFIFO)
3042 		fifo_printinfo(vp);
3043 	printf("\n");
3044 	return (0);
3045 }
3046 
3047 /*
3048  * Just call nfs_writebp() with the force argument set to 1.
3049  *
3050  * NOTE: B_DONE may or may not be set in a_bp on call.
3051  */
3052 static int
3053 nfs_bwrite(ap)
3054 	struct vop_bwrite_args /* {
3055 		struct vnode *a_bp;
3056 	} */ *ap;
3057 {
3058 	return (nfs_writebp(ap->a_bp, 1, curthread));
3059 }
3060 
3061 /*
3062  * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
3063  * the force flag is one and it also handles the B_NEEDCOMMIT flag.  We set
3064  * B_CACHE if this is a VMIO buffer.
3065  */
3066 int
3067 nfs_writebp(bp, force, td)
3068 	struct buf *bp;
3069 	int force;
3070 	struct thread *td;
3071 {
3072 	int s;
3073 	int oldflags = bp->b_flags;
3074 #if 0
3075 	int retv = 1;
3076 	off_t off;
3077 #endif
3078 
3079 	if (BUF_REFCNT(bp) == 0)
3080 		panic("bwrite: buffer is not locked???");
3081 
3082 	if (bp->b_flags & B_INVAL) {
3083 		brelse(bp);
3084 		return(0);
3085 	}
3086 
3087 	bp->b_flags |= B_CACHE;
3088 
3089 	/*
3090 	 * Undirty the bp.  We will redirty it later if the I/O fails.
3091 	 */
3092 
3093 	s = splbio();
3094 	bundirty(bp);
3095 	bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
3096 
3097 	bp->b_vp->v_numoutput++;
3098 	splx(s);
3099 
3100 	/*
3101 	 * Note: to avoid loopback deadlocks, we do not
3102 	 * assign b_runningbufspace.
3103 	 */
3104 	vfs_busy_pages(bp, 1);
3105 
3106 	if (force)
3107 		bp->b_flags |= B_WRITEINPROG;
3108 	BUF_KERNPROC(bp);
3109 	VOP_STRATEGY(bp->b_vp, bp);
3110 
3111 	if( (oldflags & B_ASYNC) == 0) {
3112 		int rtval = biowait(bp);
3113 
3114 		if (oldflags & B_DELWRI) {
3115 			s = splbio();
3116 			reassignbuf(bp, bp->b_vp);
3117 			splx(s);
3118 		}
3119 
3120 		brelse(bp);
3121 		return (rtval);
3122 	}
3123 
3124 	return (0);
3125 }
3126 
3127 /*
3128  * nfs special file access vnode op.
3129  * Essentially just get vattr and then imitate iaccess() since the device is
3130  * local to the client.
3131  */
3132 static int
3133 nfsspec_access(ap)
3134 	struct vop_access_args /* {
3135 		struct vnode *a_vp;
3136 		int  a_mode;
3137 		struct ucred *a_cred;
3138 		struct thread *a_td;
3139 	} */ *ap;
3140 {
3141 	struct vattr *vap;
3142 	gid_t *gp;
3143 	struct ucred *cred = ap->a_cred;
3144 	struct vnode *vp = ap->a_vp;
3145 	mode_t mode = ap->a_mode;
3146 	struct vattr vattr;
3147 	int i;
3148 	int error;
3149 
3150 	/*
3151 	 * Disallow write attempts on filesystems mounted read-only;
3152 	 * unless the file is a socket, fifo, or a block or character
3153 	 * device resident on the filesystem.
3154 	 */
3155 	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3156 		switch (vp->v_type) {
3157 		case VREG:
3158 		case VDIR:
3159 		case VLNK:
3160 			return (EROFS);
3161 		default:
3162 			break;
3163 		}
3164 	}
3165 	/*
3166 	 * If you're the super-user,
3167 	 * you always get access.
3168 	 */
3169 	if (cred->cr_uid == 0)
3170 		return (0);
3171 	vap = &vattr;
3172 	error = VOP_GETATTR(vp, vap, ap->a_td);
3173 	if (error)
3174 		return (error);
3175 	/*
3176 	 * Access check is based on only one of owner, group, public.
3177 	 * If not owner, then check group. If not a member of the
3178 	 * group, then check public access.
3179 	 */
3180 	if (cred->cr_uid != vap->va_uid) {
3181 		mode >>= 3;
3182 		gp = cred->cr_groups;
3183 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
3184 			if (vap->va_gid == *gp)
3185 				goto found;
3186 		mode >>= 3;
3187 found:
3188 		;
3189 	}
3190 	error = (vap->va_mode & mode) == mode ? 0 : EACCES;
3191 	return (error);
3192 }
3193 
3194 /*
3195  * Read wrapper for special devices.
3196  */
3197 static int
3198 nfsspec_read(ap)
3199 	struct vop_read_args /* {
3200 		struct vnode *a_vp;
3201 		struct uio *a_uio;
3202 		int  a_ioflag;
3203 		struct ucred *a_cred;
3204 	} */ *ap;
3205 {
3206 	struct nfsnode *np = VTONFS(ap->a_vp);
3207 
3208 	/*
3209 	 * Set access flag.
3210 	 */
3211 	np->n_flag |= NACC;
3212 	getnanotime(&np->n_atim);
3213 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
3214 }
3215 
3216 /*
3217  * Write wrapper for special devices.
3218  */
3219 static int
3220 nfsspec_write(ap)
3221 	struct vop_write_args /* {
3222 		struct vnode *a_vp;
3223 		struct uio *a_uio;
3224 		int  a_ioflag;
3225 		struct ucred *a_cred;
3226 	} */ *ap;
3227 {
3228 	struct nfsnode *np = VTONFS(ap->a_vp);
3229 
3230 	/*
3231 	 * Set update flag.
3232 	 */
3233 	np->n_flag |= NUPD;
3234 	getnanotime(&np->n_mtim);
3235 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
3236 }
3237 
3238 /*
3239  * Close wrapper for special devices.
3240  *
3241  * Update the times on the nfsnode then do device close.
3242  */
3243 static int
3244 nfsspec_close(ap)
3245 	struct vop_close_args /* {
3246 		struct vnode *a_vp;
3247 		int  a_fflag;
3248 		struct ucred *a_cred;
3249 		struct thread *a_td;
3250 	} */ *ap;
3251 {
3252 	struct vnode *vp = ap->a_vp;
3253 	struct nfsnode *np = VTONFS(vp);
3254 	struct vattr vattr;
3255 
3256 	if (np->n_flag & (NACC | NUPD)) {
3257 		np->n_flag |= NCHG;
3258 		if (vp->v_usecount == 1 &&
3259 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3260 			VATTR_NULL(&vattr);
3261 			if (np->n_flag & NACC)
3262 				vattr.va_atime = np->n_atim;
3263 			if (np->n_flag & NUPD)
3264 				vattr.va_mtime = np->n_mtim;
3265 			(void)VOP_SETATTR(vp, &vattr, NFSVPCRED(vp), ap->a_td);
3266 		}
3267 	}
3268 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
3269 }
3270 
3271 /*
3272  * Read wrapper for fifos.
3273  */
3274 static int
3275 nfsfifo_read(ap)
3276 	struct vop_read_args /* {
3277 		struct vnode *a_vp;
3278 		struct uio *a_uio;
3279 		int  a_ioflag;
3280 		struct ucred *a_cred;
3281 	} */ *ap;
3282 {
3283 	struct nfsnode *np = VTONFS(ap->a_vp);
3284 
3285 	/*
3286 	 * Set access flag.
3287 	 */
3288 	np->n_flag |= NACC;
3289 	getnanotime(&np->n_atim);
3290 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
3291 }
3292 
3293 /*
3294  * Write wrapper for fifos.
3295  */
3296 static int
3297 nfsfifo_write(ap)
3298 	struct vop_write_args /* {
3299 		struct vnode *a_vp;
3300 		struct uio *a_uio;
3301 		int  a_ioflag;
3302 		struct ucred *a_cred;
3303 	} */ *ap;
3304 {
3305 	struct nfsnode *np = VTONFS(ap->a_vp);
3306 
3307 	/*
3308 	 * Set update flag.
3309 	 */
3310 	np->n_flag |= NUPD;
3311 	getnanotime(&np->n_mtim);
3312 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
3313 }
3314 
3315 /*
3316  * Close wrapper for fifos.
3317  *
3318  * Update the times on the nfsnode then do fifo close.
3319  */
3320 static int
3321 nfsfifo_close(ap)
3322 	struct vop_close_args /* {
3323 		struct vnode *a_vp;
3324 		int  a_fflag;
3325 		struct thread *a_td;
3326 	} */ *ap;
3327 {
3328 	struct vnode *vp = ap->a_vp;
3329 	struct nfsnode *np = VTONFS(vp);
3330 	struct vattr vattr;
3331 	struct timespec ts;
3332 
3333 	if (np->n_flag & (NACC | NUPD)) {
3334 		getnanotime(&ts);
3335 		if (np->n_flag & NACC)
3336 			np->n_atim = ts;
3337 		if (np->n_flag & NUPD)
3338 			np->n_mtim = ts;
3339 		np->n_flag |= NCHG;
3340 		if (vp->v_usecount == 1 &&
3341 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3342 			VATTR_NULL(&vattr);
3343 			if (np->n_flag & NACC)
3344 				vattr.va_atime = np->n_atim;
3345 			if (np->n_flag & NUPD)
3346 				vattr.va_mtime = np->n_mtim;
3347 			(void)VOP_SETATTR(vp, &vattr, NFSVPCRED(vp), ap->a_td);
3348 		}
3349 	}
3350 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
3351 }
3352 
3353