xref: /dflybsd-src/sys/vfs/nfs/nfs_serv.c (revision cd29885abfb8f68adb0c082e313b891156d66964)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
39  */
40 
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *	- do not mix the phases, since the nfsm_?? macros can return failures
50  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *	error number iff error != 0 whereas
54  *	returning an error from the server function implies a fatal error
55  *	such as a badly constructed rpc request that should be dropped without
56  *	a reply.
57  *	For Version 3, nfsm_reply() does not return for the error case, since
58  *	most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *	Warning: always pay careful attention to resource cleanup on return
62  *	and note that nfsm_*() macros can terminate a procedure on certain
63  *	errors.
64  */
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/priv.h>
70 #include <sys/nlookup.h>
71 #include <sys/namei.h>
72 #include <sys/unistd.h>
73 #include <sys/vnode.h>
74 #include <sys/mount.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/malloc.h>
78 #include <sys/mbuf.h>
79 #include <sys/dirent.h>
80 #include <sys/stat.h>
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83 #include <sys/buf.h>
84 
85 #include <vm/vm.h>
86 #include <vm/vm_extern.h>
87 #include <vm/vm_zone.h>
88 #include <vm/vm_object.h>
89 
90 #include <sys/buf2.h>
91 
92 #include <sys/thread2.h>
93 
94 #include "nfsproto.h"
95 #include "rpcv2.h"
96 #include "nfs.h"
97 #include "xdr_subs.h"
98 #include "nfsm_subs.h"
99 
100 #ifdef NFSRV_DEBUG
101 #define nfsdbprintf(info)	kprintf info
102 #else
103 #define nfsdbprintf(info)
104 #endif
105 
106 #define MAX_COMMIT_COUNT	(1024 * 1024)
107 
108 #define NUM_HEURISTIC		1017
109 #define NHUSE_INIT		64
110 #define NHUSE_INC		16
111 #define NHUSE_MAX		2048
112 
113 static struct nfsheur {
114     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
115     off_t nh_nextr;		/* next offset for sequential detection */
116     int nh_use;			/* use count for selection */
117     int nh_seqcount;		/* heuristic */
118 } nfsheur[NUM_HEURISTIC];
119 
120 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
121 		      NFFIFO, NFNON };
122 #ifndef NFS_NOSERVER
123 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
124 		      NFCHR, NFNON };
125 
126 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
127 int nfsrvw_procrastinate_v3 = 0;
128 
129 static struct timespec	nfsver;
130 
131 SYSCTL_DECL(_vfs_nfs);
132 
133 int nfs_async;
134 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
135 static int nfs_commit_blks;
136 static int nfs_commit_miss;
137 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
139 
140 static int nfsrv_access (struct mount *, struct vnode *, int,
141 			struct ucred *, int, struct thread *, int);
142 static void nfsrvw_coalesce (struct nfsrv_descript *,
143 		struct nfsrv_descript *);
144 
145 /*
146  * nfs v3 access service
147  */
148 int
149 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
150 	      struct thread *td, struct mbuf **mrq)
151 {
152 	struct sockaddr *nam = nfsd->nd_nam;
153 	struct ucred *cred = &nfsd->nd_cr;
154 	struct vnode *vp = NULL;
155 	struct mount *mp = NULL;
156 	nfsfh_t nfh;
157 	fhandle_t *fhp;
158 	int error = 0, rdonly, getret;
159 	struct vattr vattr, *vap = &vattr;
160 	u_long testmode, nfsmode;
161 	struct nfsm_info info;
162 	u_int32_t *tl;
163 
164 	info.dpos = nfsd->nd_dpos;
165 	info.md = nfsd->nd_md;
166 	info.mrep = nfsd->nd_mrep;
167 	info.mreq = NULL;
168 
169 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
170 	fhp = &nfh.fh_generic;
171 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
172 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
173 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
174 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
175 	if (error) {
176 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
177 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
178 		error = 0;
179 		goto nfsmout;
180 	}
181 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
182 	if ((nfsmode & NFSV3ACCESS_READ) &&
183 		nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
184 		nfsmode &= ~NFSV3ACCESS_READ;
185 	if (vp->v_type == VDIR)
186 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
187 			NFSV3ACCESS_DELETE);
188 	else
189 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
190 	if ((nfsmode & testmode) &&
191 		nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
192 		nfsmode &= ~testmode;
193 	if (vp->v_type == VDIR)
194 		testmode = NFSV3ACCESS_LOOKUP;
195 	else
196 		testmode = NFSV3ACCESS_EXECUTE;
197 	if ((nfsmode & testmode) &&
198 		nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
199 		nfsmode &= ~testmode;
200 	getret = VOP_GETATTR(vp, vap);
201 	vput(vp);
202 	vp = NULL;
203 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
204 			      NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
205 	nfsm_srvpostop_attr(&info, nfsd, getret, vap);
206 	tl = nfsm_build(&info, NFSX_UNSIGNED);
207 	*tl = txdr_unsigned(nfsmode);
208 nfsmout:
209 	*mrq = info.mreq;
210 	if (vp)
211 		vput(vp);
212 	return(error);
213 }
214 
215 /*
216  * nfs getattr service
217  */
218 int
219 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
220 	      struct thread *td, struct mbuf **mrq)
221 {
222 	struct sockaddr *nam = nfsd->nd_nam;
223 	struct ucred *cred = &nfsd->nd_cr;
224 	struct nfs_fattr *fp;
225 	struct vattr va;
226 	struct vattr *vap = &va;
227 	struct vnode *vp = NULL;
228 	struct mount *mp = NULL;
229 	nfsfh_t nfh;
230 	fhandle_t *fhp;
231 	int error = 0, rdonly;
232 	struct nfsm_info info;
233 
234 	info.mrep = nfsd->nd_mrep;
235 	info.md = nfsd->nd_md;
236 	info.dpos = nfsd->nd_dpos;
237 	info.mreq = NULL;
238 
239 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
240 	fhp = &nfh.fh_generic;
241 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
242 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
243 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
244 	if (error) {
245 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
246 		error = 0;
247 		goto nfsmout;
248 	}
249 	error = VOP_GETATTR(vp, vap);
250 	vput(vp);
251 	vp = NULL;
252 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
253 			      NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
254 	if (error) {
255 		error = 0;
256 		goto nfsmout;
257 	}
258 	fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
259 	nfsm_srvfattr(nfsd, vap, fp);
260 	/* fall through */
261 
262 nfsmout:
263 	*mrq = info.mreq;
264 	if (vp)
265 		vput(vp);
266 	return(error);
267 }
268 
269 /*
270  * nfs setattr service
271  */
272 int
273 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
274 	      struct thread *td, struct mbuf **mrq)
275 {
276 	struct sockaddr *nam = nfsd->nd_nam;
277 	struct ucred *cred = &nfsd->nd_cr;
278 	struct vattr va, preat;
279 	struct vattr *vap = &va;
280 	struct nfsv2_sattr *sp;
281 	struct nfs_fattr *fp;
282 	struct vnode *vp = NULL;
283 	struct mount *mp = NULL;
284 	nfsfh_t nfh;
285 	fhandle_t *fhp;
286 	u_int32_t *tl;
287 	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
288 	int gcheck = 0;
289 	struct timespec guard;
290 	struct nfsm_info info;
291 
292 	info.mrep = nfsd->nd_mrep;
293 	info.mreq = NULL;
294 	info.md = nfsd->nd_md;
295 	info.dpos = nfsd->nd_dpos;
296 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
297 
298 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
299 	fhp = &nfh.fh_generic;
300 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
301 	VATTR_NULL(vap);
302 	if (info.v3) {
303 		ERROROUT(nfsm_srvsattr(&info, vap));
304 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
305 		gcheck = fxdr_unsigned(int, *tl);
306 		if (gcheck) {
307 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
308 			fxdr_nfsv3time(tl, &guard);
309 		}
310 	} else {
311 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
312 		/*
313 		 * Nah nah nah nah na nah
314 		 * There is a bug in the Sun client that puts 0xffff in the mode
315 		 * field of sattr when it should put in 0xffffffff. The u_short
316 		 * doesn't sign extend.
317 		 * --> check the low order 2 bytes for 0xffff
318 		 */
319 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
320 			vap->va_mode = nfstov_mode(sp->sa_mode);
321 		if (sp->sa_uid != nfs_xdrneg1)
322 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
323 		if (sp->sa_gid != nfs_xdrneg1)
324 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
325 		if (sp->sa_size != nfs_xdrneg1)
326 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
327 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
328 #ifdef notyet
329 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
330 #else
331 			vap->va_atime.tv_sec =
332 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
333 			vap->va_atime.tv_nsec = 0;
334 #endif
335 		}
336 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
337 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
338 
339 	}
340 
341 	/*
342 	 * Now that we have all the fields, lets do it.
343 	 */
344 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
345 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
346 	if (error) {
347 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
348 				      2 * NFSX_UNSIGNED, &error));
349 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
350 				 postat_ret, vap);
351 		error = 0;
352 		goto nfsmout;
353 	}
354 
355 	/*
356 	 * vp now an active resource, pay careful attention to cleanup
357 	 */
358 
359 	if (info.v3) {
360 		error = preat_ret = VOP_GETATTR(vp, &preat);
361 		if (!error && gcheck &&
362 			(preat.va_ctime.tv_sec != guard.tv_sec ||
363 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
364 			error = NFSERR_NOT_SYNC;
365 		if (error) {
366 			vput(vp);
367 			vp = NULL;
368 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
369 					      NFSX_WCCDATA(info.v3), &error));
370 			nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
371 					 postat_ret, vap);
372 			error = 0;
373 			goto nfsmout;
374 		}
375 	}
376 
377 	/*
378 	 * If the size is being changed write acces is required, otherwise
379 	 * just check for a read only file system.
380 	 */
381 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
382 		if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
383 			error = EROFS;
384 			goto out;
385 		}
386 	} else {
387 		if (vp->v_type == VDIR) {
388 			error = EISDIR;
389 			goto out;
390 		} else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
391 			    td, 0)) != 0){
392 			goto out;
393 		}
394 	}
395 	error = VOP_SETATTR(vp, vap, cred);
396 	postat_ret = VOP_GETATTR(vp, vap);
397 	if (!error)
398 		error = postat_ret;
399 out:
400 	vput(vp);
401 	vp = NULL;
402 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
403 		   NFSX_WCCORFATTR(info.v3), &error));
404 	if (info.v3) {
405 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
406 				 postat_ret, vap);
407 		error = 0;
408 		goto nfsmout;
409 	} else {
410 		fp = nfsm_build(&info, NFSX_V2FATTR);
411 		nfsm_srvfattr(nfsd, vap, fp);
412 	}
413 	/* fall through */
414 
415 nfsmout:
416 	*mrq = info.mreq;
417 	if (vp)
418 		vput(vp);
419 	return(error);
420 }
421 
422 /*
423  * nfs lookup rpc
424  */
425 int
426 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
427 	     struct thread *td, struct mbuf **mrq)
428 {
429 	struct sockaddr *nam = nfsd->nd_nam;
430 	struct ucred *cred = &nfsd->nd_cr;
431 	struct nfs_fattr *fp;
432 	struct nlookupdata nd;
433 	struct vnode *vp;
434 	struct vnode *dirp;
435 	struct nchandle nch;
436 	nfsfh_t nfh;
437 	fhandle_t *fhp;
438 	int error = 0, len, dirattr_ret = 1;
439 	int pubflag;
440 	struct vattr va, dirattr, *vap = &va;
441 	struct nfsm_info info;
442 
443 	info.mrep = nfsd->nd_mrep;
444 	info.mreq = NULL;
445 	info.md = nfsd->nd_md;
446 	info.dpos = nfsd->nd_dpos;
447 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
448 
449 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
450 	nlookup_zero(&nd);
451 	dirp = NULL;
452 	vp = NULL;
453 
454 	fhp = &nfh.fh_generic;
455 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
456 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
457 
458 	pubflag = nfs_ispublicfh(fhp);
459 
460 	error = nfs_namei(&nd, cred, 0, NULL, &vp,
461 		fhp, len, slp, nam, &info.md, &info.dpos,
462 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
463 
464 	/*
465 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
466 	 * structure in case macros jump to nfsmout.
467 	 */
468 
469 	if (error) {
470 		if (dirp) {
471 			if (info.v3)
472 				dirattr_ret = VOP_GETATTR(dirp, &dirattr);
473 			vrele(dirp);
474 			dirp = NULL;
475 		}
476 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
477 				      NFSX_POSTOPATTR(info.v3), &error));
478 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
479 		error = 0;
480 		goto nfsmout;
481 	}
482 
483 	/*
484 	 * Locate index file for public filehandle
485 	 *
486 	 * error is 0 on entry and 0 on exit from this block.
487 	 */
488 
489 	if (pubflag) {
490 		if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
491 			/*
492 			 * Setup call to lookup() to see if we can find
493 			 * the index file. Arguably, this doesn't belong
494 			 * in a kernel.. Ugh.  If an error occurs, do not
495 			 * try to install an index file and then clear the
496 			 * error.
497 			 *
498 			 * When we replace nd with ind and redirect ndp,
499 			 * maintenance of ni_startdir and ni_vp shift to
500 			 * ind and we have to clean them up in the old nd.
501 			 * However, the cnd resource continues to be maintained
502 			 * via the original nd.  Confused?  You aren't alone!
503 			 */
504 			vn_unlock(vp);
505 			cache_copy(&nd.nl_nch, &nch);
506 			nlookup_done(&nd);
507 			error = nlookup_init_raw(&nd, nfs_pub.np_index,
508 						UIO_SYSSPACE, 0, cred, &nch);
509 			cache_drop(&nch);
510 			if (error == 0)
511 				error = nlookup(&nd);
512 
513 			if (error == 0) {
514 				/*
515 				 * Found an index file. Get rid of
516 				 * the old references.  transfer vp and
517 				 * load up the new vp.  Fortunately we do
518 				 * not have to deal with dvp, that would be
519 				 * a huge mess.
520 				 */
521 				if (dirp)
522 					vrele(dirp);
523 				dirp = vp;
524 				vp = NULL;
525 				error = cache_vget(&nd.nl_nch, nd.nl_cred,
526 							LK_EXCLUSIVE, &vp);
527 				KKASSERT(error == 0);
528 			}
529 			error = 0;
530 		}
531 		/*
532 		 * If the public filehandle was used, check that this lookup
533 		 * didn't result in a filehandle outside the publicly exported
534 		 * filesystem.  We clear the poor vp here to avoid lockups due
535 		 * to NFS I/O.
536 		 */
537 
538 		if (vp->v_mount != nfs_pub.np_mount) {
539 			vput(vp);
540 			vp = NULL;
541 			error = EPERM;
542 		}
543 	}
544 
545 	if (dirp) {
546 		if (info.v3)
547 			dirattr_ret = VOP_GETATTR(dirp, &dirattr);
548 		vrele(dirp);
549 		dirp = NULL;
550 	}
551 
552 	/*
553 	 * Resources at this point:
554 	 *	ndp->ni_vp	may not be NULL
555 	 *
556 	 */
557 
558 	if (error) {
559 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
560 				      NFSX_POSTOPATTR(info.v3), &error));
561 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
562 		error = 0;
563 		goto nfsmout;
564 	}
565 
566 	/*
567 	 * Clear out some resources prior to potentially blocking.  This
568 	 * is not as critical as ni_dvp resources in other routines, but
569 	 * it helps.
570 	 */
571 	nlookup_done(&nd);
572 
573 	/*
574 	 * Get underlying attribute, then release remaining resources ( for
575 	 * the same potential blocking reason ) and reply.
576 	 */
577 	bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
578 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
579 	if (!error)
580 		error = VOP_GETATTR(vp, vap);
581 
582 	vput(vp);
583 	vp = NULL;
584 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
585 			      NFSX_SRVFH(info.v3) +
586 			      NFSX_POSTOPORFATTR(info.v3) +
587 			      NFSX_POSTOPATTR(info.v3),
588 			      &error));
589 	if (error) {
590 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
591 		error = 0;
592 		goto nfsmout;
593 	}
594 	nfsm_srvfhtom(&info, fhp);
595 	if (info.v3) {
596 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
597 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
598 	} else {
599 		fp = nfsm_build(&info, NFSX_V2FATTR);
600 		nfsm_srvfattr(nfsd, vap, fp);
601 	}
602 
603 nfsmout:
604 	*mrq = info.mreq;
605 	if (dirp)
606 		vrele(dirp);
607 	nlookup_done(&nd);		/* may be called twice */
608 	if (vp)
609 		vput(vp);
610 	return (error);
611 }
612 
613 /*
614  * nfs readlink service
615  */
616 int
617 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
618 	       struct thread *td, struct mbuf **mrq)
619 {
620 	struct sockaddr *nam = nfsd->nd_nam;
621 	struct ucred *cred = &nfsd->nd_cr;
622 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
623 	struct iovec *ivp = iv;
624 	u_int32_t *tl;
625 	int error = 0, rdonly, i, tlen, len, getret;
626 	struct mbuf *mp1, *mp2, *mp3;
627 	struct vnode *vp = NULL;
628 	struct mount *mp = NULL;
629 	struct vattr attr;
630 	nfsfh_t nfh;
631 	fhandle_t *fhp;
632 	struct uio io, *uiop = &io;
633 	struct nfsm_info info;
634 
635 	info.mrep = nfsd->nd_mrep;
636 	info.mreq = NULL;
637 	info.md = nfsd->nd_md;
638 	info.dpos = nfsd->nd_dpos;
639 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
640 
641 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
642 #ifndef nolint
643 	mp2 = NULL;
644 #endif
645 	mp3 = NULL;
646 	fhp = &nfh.fh_generic;
647 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
648 	len = 0;
649 	i = 0;
650 	while (len < NFS_MAXPATHLEN) {
651 		mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
652 		mp1->m_len = MCLBYTES;
653 		if (len == 0)
654 			mp3 = mp2 = mp1;
655 		else {
656 			mp2->m_next = mp1;
657 			mp2 = mp1;
658 		}
659 		if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
660 			mp1->m_len = NFS_MAXPATHLEN-len;
661 			len = NFS_MAXPATHLEN;
662 		} else
663 			len += mp1->m_len;
664 		ivp->iov_base = mtod(mp1, caddr_t);
665 		ivp->iov_len = mp1->m_len;
666 		i++;
667 		ivp++;
668 	}
669 	uiop->uio_iov = iv;
670 	uiop->uio_iovcnt = i;
671 	uiop->uio_offset = 0;
672 	uiop->uio_resid = len;
673 	uiop->uio_rw = UIO_READ;
674 	uiop->uio_segflg = UIO_SYSSPACE;
675 	uiop->uio_td = NULL;
676 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
677 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
678 	if (error) {
679 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
680 				      2 * NFSX_UNSIGNED, &error));
681 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
682 		error = 0;
683 		goto nfsmout;
684 	}
685 	if (vp->v_type != VLNK) {
686 		if (info.v3)
687 			error = EINVAL;
688 		else
689 			error = ENXIO;
690 		goto out;
691 	}
692 	error = VOP_READLINK(vp, uiop, cred);
693 out:
694 	getret = VOP_GETATTR(vp, &attr);
695 	vput(vp);
696 	vp = NULL;
697 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
698 			     NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
699 			     &error));
700 	if (info.v3) {
701 		nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
702 		if (error) {
703 			error = 0;
704 			goto nfsmout;
705 		}
706 	}
707 	if (uiop->uio_resid > 0) {
708 		len -= uiop->uio_resid;
709 		tlen = nfsm_rndup(len);
710 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
711 	}
712 	tl = nfsm_build(&info, NFSX_UNSIGNED);
713 	*tl = txdr_unsigned(len);
714 	info.mb->m_next = mp3;
715 	mp3 = NULL;
716 nfsmout:
717 	*mrq = info.mreq;
718 	if (mp3)
719 		m_freem(mp3);
720 	if (vp)
721 		vput(vp);
722 	return(error);
723 }
724 
725 /*
726  * nfs read service
727  */
728 int
729 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
730 	   struct thread *td, struct mbuf **mrq)
731 {
732 	struct nfsm_info info;
733 	struct sockaddr *nam = nfsd->nd_nam;
734 	struct ucred *cred = &nfsd->nd_cr;
735 	struct iovec *iv;
736 	struct iovec *iv2;
737 	struct mbuf *m;
738 	struct nfs_fattr *fp;
739 	u_int32_t *tl;
740 	int i;
741 	int reqlen;
742 	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
743 	struct mbuf *m2;
744 	struct vnode *vp = NULL;
745 	struct mount *mp = NULL;
746 	nfsfh_t nfh;
747 	fhandle_t *fhp;
748 	struct uio io, *uiop = &io;
749 	struct vattr va, *vap = &va;
750 	struct nfsheur *nh;
751 	off_t off;
752 	int ioflag = 0;
753 
754 	info.mrep = nfsd->nd_mrep;
755 	info.mreq = NULL;
756 	info.md = nfsd->nd_md;
757 	info.dpos = nfsd->nd_dpos;
758 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
759 
760 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
761 	fhp = &nfh.fh_generic;
762 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
763 	if (info.v3) {
764 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
765 		off = fxdr_hyper(tl);
766 	} else {
767 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
768 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
769 	}
770 	NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
771 					    NFS_SRVMAXDATA(nfsd), &error));
772 
773 	/*
774 	 * Reference vp.  If an error occurs, vp will be invalid, but we
775 	 * have to NULL it just in case.  The macros might goto nfsmout
776 	 * as well.
777 	 */
778 
779 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
780 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
781 	if (error) {
782 		vp = NULL;
783 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
784 				      2 * NFSX_UNSIGNED, &error));
785 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
786 		error = 0;
787 		goto nfsmout;
788 	}
789 
790 	if (vp->v_type != VREG) {
791 		if (info.v3)
792 			error = EINVAL;
793 		else
794 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
795 	}
796 	if (!error) {
797 	    if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
798 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
799 	}
800 	getret = VOP_GETATTR(vp, vap);
801 	if (!error)
802 		error = getret;
803 	if (error) {
804 		vput(vp);
805 		vp = NULL;
806 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
807 				      NFSX_POSTOPATTR(info.v3), &error));
808 		nfsm_srvpostop_attr(&info, nfsd, getret, vap);
809 		error = 0;
810 		goto nfsmout;
811 	}
812 
813 	/*
814 	 * Calculate byte count to read
815 	 */
816 
817 	if (off >= vap->va_size)
818 		cnt = 0;
819 	else if ((off + reqlen) > vap->va_size)
820 		cnt = vap->va_size - off;
821 	else
822 		cnt = reqlen;
823 
824 	/*
825 	 * Calculate seqcount for heuristic
826 	 */
827 
828 	{
829 		int hi;
830 		int try = 32;
831 
832 		/*
833 		 * Locate best candidate
834 		 */
835 
836 		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
837 		nh = &nfsheur[hi];
838 
839 		while (try--) {
840 			if (nfsheur[hi].nh_vp == vp) {
841 				nh = &nfsheur[hi];
842 				break;
843 			}
844 			if (nfsheur[hi].nh_use > 0)
845 				--nfsheur[hi].nh_use;
846 			hi = (hi + 1) % NUM_HEURISTIC;
847 			if (nfsheur[hi].nh_use < nh->nh_use)
848 				nh = &nfsheur[hi];
849 		}
850 
851 		if (nh->nh_vp != vp) {
852 			nh->nh_vp = vp;
853 			nh->nh_nextr = off;
854 			nh->nh_use = NHUSE_INIT;
855 			if (off == 0)
856 				nh->nh_seqcount = 4;
857 			else
858 				nh->nh_seqcount = 1;
859 		}
860 
861 		/*
862 		 * Calculate heuristic
863 		 */
864 
865 		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
866 			if (++nh->nh_seqcount > IO_SEQMAX)
867 				nh->nh_seqcount = IO_SEQMAX;
868 		} else if (nh->nh_seqcount > 1) {
869 			nh->nh_seqcount = 1;
870 		} else {
871 			nh->nh_seqcount = 0;
872 		}
873 		nh->nh_use += NHUSE_INC;
874 		if (nh->nh_use > NHUSE_MAX)
875 			nh->nh_use = NHUSE_MAX;
876 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
877         }
878 
879 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
880 			      NFSX_POSTOPORFATTR(info.v3) +
881 			      3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
882 			      &error));
883 	if (info.v3) {
884 		tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
885 		*tl++ = nfs_true;
886 		fp = (struct nfs_fattr *)tl;
887 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
888 	} else {
889 		tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
890 		fp = (struct nfs_fattr *)tl;
891 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
892 	}
893 	len = left = nfsm_rndup(cnt);
894 	if (cnt > 0) {
895 		/*
896 		 * Generate the mbuf list with the uio_iov ref. to it.
897 		 */
898 		i = 0;
899 		m = m2 = info.mb;
900 		while (left > 0) {
901 			siz = min(M_TRAILINGSPACE(m), left);
902 			if (siz > 0) {
903 				left -= siz;
904 				i++;
905 			}
906 			if (left > 0) {
907 				m = m_getcl(MB_WAIT, MT_DATA, 0);
908 				m->m_len = 0;
909 				m2->m_next = m;
910 				m2 = m;
911 			}
912 		}
913 		MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
914 		       M_TEMP, M_WAITOK);
915 		uiop->uio_iov = iv2 = iv;
916 		m = info.mb;
917 		left = len;
918 		i = 0;
919 		while (left > 0) {
920 			if (m == NULL)
921 				panic("nfsrv_read iov");
922 			siz = min(M_TRAILINGSPACE(m), left);
923 			if (siz > 0) {
924 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
925 				iv->iov_len = siz;
926 				m->m_len += siz;
927 				left -= siz;
928 				iv++;
929 				i++;
930 			}
931 			m = m->m_next;
932 		}
933 		uiop->uio_iovcnt = i;
934 		uiop->uio_offset = off;
935 		uiop->uio_resid = len;
936 		uiop->uio_rw = UIO_READ;
937 		uiop->uio_segflg = UIO_SYSSPACE;
938 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
939 		off = uiop->uio_offset;
940 		nh->nh_nextr = off;
941 		FREE((caddr_t)iv2, M_TEMP);
942 		if (error || (getret = VOP_GETATTR(vp, vap))) {
943 			if (!error)
944 				error = getret;
945 			m_freem(info.mreq);
946 			info.mreq = NULL;
947 			vput(vp);
948 			vp = NULL;
949 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
950 					      NFSX_POSTOPATTR(info.v3),
951 					      &error));
952 			nfsm_srvpostop_attr(&info, nfsd, getret, vap);
953 			error = 0;
954 			goto nfsmout;
955 		}
956 	} else {
957 		uiop->uio_resid = 0;
958 	}
959 	vput(vp);
960 	vp = NULL;
961 	nfsm_srvfattr(nfsd, vap, fp);
962 	tlen = len - uiop->uio_resid;
963 	cnt = cnt < tlen ? cnt : tlen;
964 	tlen = nfsm_rndup(cnt);
965 	if (len != tlen || tlen != cnt)
966 		nfsm_adj(info.mb, len - tlen, tlen - cnt);
967 	if (info.v3) {
968 		*tl++ = txdr_unsigned(cnt);
969 		if (len < reqlen)
970 			*tl++ = nfs_true;
971 		else
972 			*tl++ = nfs_false;
973 	}
974 	*tl = txdr_unsigned(cnt);
975 nfsmout:
976 	*mrq = info.mreq;
977 	if (vp)
978 		vput(vp);
979 	return(error);
980 }
981 
982 /*
983  * nfs write service
984  */
985 int
986 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
987 	    struct thread *td, struct mbuf **mrq)
988 {
989 	struct sockaddr *nam = nfsd->nd_nam;
990 	struct ucred *cred = &nfsd->nd_cr;
991 	struct iovec *ivp;
992 	int i, cnt;
993 	struct mbuf *mp1;
994 	struct nfs_fattr *fp;
995 	struct iovec *iv;
996 	struct vattr va, forat;
997 	struct vattr *vap = &va;
998 	u_int32_t *tl;
999 	int error = 0, rdonly, len, forat_ret = 1;
1000 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1001 	int stable = NFSV3WRITE_FILESYNC;
1002 	struct vnode *vp = NULL;
1003 	struct mount *mp = NULL;
1004 	nfsfh_t nfh;
1005 	fhandle_t *fhp;
1006 	struct uio io, *uiop = &io;
1007 	struct nfsm_info info;
1008 	off_t off;
1009 
1010 	info.mrep = nfsd->nd_mrep;
1011 	info.mreq = NULL;
1012 	info.md = nfsd->nd_md;
1013 	info.dpos = nfsd->nd_dpos;
1014 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1015 
1016 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1017 	if (info.mrep == NULL) {
1018 		error = 0;
1019 		goto nfsmout;
1020 	}
1021 	fhp = &nfh.fh_generic;
1022 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1023 	if (info.v3) {
1024 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1025 		off = fxdr_hyper(tl);
1026 		tl += 3;
1027 		stable = fxdr_unsigned(int, *tl++);
1028 	} else {
1029 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1030 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1031 		tl += 2;
1032 		if (nfs_async)
1033 	    		stable = NFSV3WRITE_UNSTABLE;
1034 	}
1035 	retlen = len = fxdr_unsigned(int32_t, *tl);
1036 	cnt = i = 0;
1037 
1038 	/*
1039 	 * For NFS Version 2, it is not obvious what a write of zero length
1040 	 * should do, but I might as well be consistent with Version 3,
1041 	 * which is to return ok so long as there are no permission problems.
1042 	 */
1043 	if (len > 0) {
1044 	    zeroing = 1;
1045 	    mp1 = info.mrep;
1046 	    while (mp1) {
1047 		if (mp1 == info.md) {
1048 			zeroing = 0;
1049 			adjust = info.dpos - mtod(mp1, caddr_t);
1050 			mp1->m_len -= adjust;
1051 			if (mp1->m_len > 0 && adjust > 0)
1052 				mp1->m_data += adjust;
1053 		}
1054 		if (zeroing)
1055 			mp1->m_len = 0;
1056 		else if (mp1->m_len > 0) {
1057 			i += mp1->m_len;
1058 			if (i > len) {
1059 				mp1->m_len -= (i - len);
1060 				zeroing	= 1;
1061 			}
1062 			if (mp1->m_len > 0)
1063 				cnt++;
1064 		}
1065 		mp1 = mp1->m_next;
1066 	    }
1067 	}
1068 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1069 		error = EIO;
1070 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1071 				      2 * NFSX_UNSIGNED, &error));
1072 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1073 				 aftat_ret, vap);
1074 		error = 0;
1075 		goto nfsmout;
1076 	}
1077 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1078 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1079 	if (error) {
1080 		vp = NULL;
1081 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1082 				      2 * NFSX_UNSIGNED, &error));
1083 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1084 				 aftat_ret, vap);
1085 		error = 0;
1086 		goto nfsmout;
1087 	}
1088 	if (info.v3)
1089 		forat_ret = VOP_GETATTR(vp, &forat);
1090 	if (vp->v_type != VREG) {
1091 		if (info.v3)
1092 			error = EINVAL;
1093 		else
1094 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1095 	}
1096 	if (!error) {
1097 		error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1098 	}
1099 	if (error) {
1100 		vput(vp);
1101 		vp = NULL;
1102 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1103 				      NFSX_WCCDATA(info.v3), &error));
1104 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1105 				 aftat_ret, vap);
1106 		error = 0;
1107 		goto nfsmout;
1108 	}
1109 
1110 	if (len > 0) {
1111 	    MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1112 		M_WAITOK);
1113 	    uiop->uio_iov = iv = ivp;
1114 	    uiop->uio_iovcnt = cnt;
1115 	    mp1 = info.mrep;
1116 	    while (mp1) {
1117 		if (mp1->m_len > 0) {
1118 			ivp->iov_base = mtod(mp1, caddr_t);
1119 			ivp->iov_len = mp1->m_len;
1120 			ivp++;
1121 		}
1122 		mp1 = mp1->m_next;
1123 	    }
1124 
1125 	    /*
1126 	     * XXX
1127 	     * The IO_METASYNC flag indicates that all metadata (and not just
1128 	     * enough to ensure data integrity) mus be written to stable storage
1129 	     * synchronously.
1130 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1131 	     */
1132 	    if (stable == NFSV3WRITE_UNSTABLE)
1133 		ioflags = IO_NODELOCKED;
1134 	    else if (stable == NFSV3WRITE_DATASYNC)
1135 		ioflags = (IO_SYNC | IO_NODELOCKED);
1136 	    else
1137 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1138 	    uiop->uio_resid = len;
1139 	    uiop->uio_rw = UIO_WRITE;
1140 	    uiop->uio_segflg = UIO_SYSSPACE;
1141 	    uiop->uio_td = NULL;
1142 	    uiop->uio_offset = off;
1143 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1144 	    nfsstats.srvvop_writes++;
1145 	    FREE((caddr_t)iv, M_TEMP);
1146 	}
1147 	aftat_ret = VOP_GETATTR(vp, vap);
1148 	vput(vp);
1149 	vp = NULL;
1150 	if (!error)
1151 		error = aftat_ret;
1152 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1153 			      NFSX_PREOPATTR(info.v3) +
1154 			      NFSX_POSTOPORFATTR(info.v3) +
1155 			      2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1156 			      &error));
1157 	if (info.v3) {
1158 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1159 				 aftat_ret, vap);
1160 		if (error) {
1161 			error = 0;
1162 			goto nfsmout;
1163 		}
1164 		tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1165 		*tl++ = txdr_unsigned(retlen);
1166 		/*
1167 		 * If nfs_async is set, then pretend the write was FILESYNC.
1168 		 */
1169 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1170 			*tl++ = txdr_unsigned(stable);
1171 		else
1172 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1173 		/*
1174 		 * Actually, there is no need to txdr these fields,
1175 		 * but it may make the values more human readable,
1176 		 * for debugging purposes.
1177 		 */
1178 		if (nfsver.tv_sec == 0)
1179 			nfsver = boottime;
1180 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1181 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1182 	} else {
1183 		fp = nfsm_build(&info, NFSX_V2FATTR);
1184 		nfsm_srvfattr(nfsd, vap, fp);
1185 	}
1186 nfsmout:
1187 	*mrq = info.mreq;
1188 	if (vp)
1189 		vput(vp);
1190 	return(error);
1191 }
1192 
1193 /*
1194  * NFS write service with write gathering support. Called when
1195  * nfsrvw_procrastinate > 0.
1196  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1197  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1198  * Jan. 1994.
1199  */
1200 int
1201 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1202 		  struct thread *td, struct mbuf **mrq)
1203 {
1204 	struct iovec *ivp;
1205 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1206 	struct nfs_fattr *fp;
1207 	int i;
1208 	struct iovec *iov;
1209 	struct nfsrvw_delayhash *wpp;
1210 	struct ucred *cred;
1211 	struct vattr va, forat;
1212 	u_int32_t *tl;
1213 	int error = 0, rdonly, len, forat_ret = 1;
1214 	int ioflags, aftat_ret = 1, adjust, zeroing;
1215 	struct mbuf *mp1;
1216 	struct vnode *vp = NULL;
1217 	struct mount *mp = NULL;
1218 	struct uio io, *uiop = &io;
1219 	u_quad_t cur_usec;
1220 	struct nfsm_info info;
1221 
1222 	info.mreq = NULL;
1223 
1224 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1225 #ifndef nolint
1226 	i = 0;
1227 	len = 0;
1228 #endif
1229 	*mrq = NULL;
1230 	if (*ndp) {
1231 	    nfsd = *ndp;
1232 	    *ndp = NULL;
1233 	    info.mrep = nfsd->nd_mrep;
1234 	    info.mreq = NULL;
1235 	    info.md = nfsd->nd_md;
1236 	    info.dpos = nfsd->nd_dpos;
1237 	    info.v3 = (nfsd->nd_flag & ND_NFSV3);
1238 	    cred = &nfsd->nd_cr;
1239 	    LIST_INIT(&nfsd->nd_coalesce);
1240 	    nfsd->nd_mreq = NULL;
1241 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1242 	    cur_usec = nfs_curusec();
1243 	    nfsd->nd_time = cur_usec +
1244 		(info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1245 
1246 	    /*
1247 	     * Now, get the write header..
1248 	     */
1249 	    NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1250 	    if (info.v3) {
1251 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1252 		nfsd->nd_off = fxdr_hyper(tl);
1253 		tl += 3;
1254 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1255 	    } else {
1256 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1257 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1258 		tl += 2;
1259 		if (nfs_async)
1260 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1261 	    }
1262 	    len = fxdr_unsigned(int32_t, *tl);
1263 	    nfsd->nd_len = len;
1264 	    nfsd->nd_eoff = nfsd->nd_off + len;
1265 
1266 	    /*
1267 	     * Trim the header out of the mbuf list and trim off any trailing
1268 	     * junk so that the mbuf list has only the write data.
1269 	     */
1270 	    zeroing = 1;
1271 	    i = 0;
1272 	    mp1 = info.mrep;
1273 	    while (mp1) {
1274 		if (mp1 == info.md) {
1275 		    zeroing = 0;
1276 		    adjust = info.dpos - mtod(mp1, caddr_t);
1277 		    mp1->m_len -= adjust;
1278 		    if (mp1->m_len > 0 && adjust > 0)
1279 			mp1->m_data += adjust;
1280 		}
1281 		if (zeroing)
1282 		    mp1->m_len = 0;
1283 		else {
1284 		    i += mp1->m_len;
1285 		    if (i > len) {
1286 			mp1->m_len -= (i - len);
1287 			zeroing = 1;
1288 		    }
1289 		}
1290 		mp1 = mp1->m_next;
1291 	    }
1292 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1293 nfsmout:
1294 		m_freem(info.mrep);
1295 		info.mrep = NULL;
1296 		error = EIO;
1297 		nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1298 		if (info.v3) {
1299 		    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1300 				     aftat_ret, &va);
1301 		}
1302 		nfsd->nd_mreq = info.mreq;
1303 		nfsd->nd_mrep = NULL;
1304 		nfsd->nd_time = 0;
1305 	    }
1306 
1307 	    /*
1308 	     * Add this entry to the hash and time queues.
1309 	     */
1310 	    crit_enter();
1311 	    owp = NULL;
1312 	    wp = slp->ns_tq.lh_first;
1313 	    while (wp && wp->nd_time < nfsd->nd_time) {
1314 		owp = wp;
1315 		wp = wp->nd_tq.le_next;
1316 	    }
1317 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1318 	    if (owp) {
1319 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1320 	    } else {
1321 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1322 	    }
1323 	    if (nfsd->nd_mrep) {
1324 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1325 		owp = NULL;
1326 		wp = wpp->lh_first;
1327 		while (wp &&
1328 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1329 		    owp = wp;
1330 		    wp = wp->nd_hash.le_next;
1331 		}
1332 		while (wp && wp->nd_off < nfsd->nd_off &&
1333 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1334 		    owp = wp;
1335 		    wp = wp->nd_hash.le_next;
1336 		}
1337 		if (owp) {
1338 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1339 
1340 		    /*
1341 		     * Search the hash list for overlapping entries and
1342 		     * coalesce.
1343 		     */
1344 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1345 			wp = nfsd->nd_hash.le_next;
1346 			if (NFSW_SAMECRED(owp, nfsd))
1347 			    nfsrvw_coalesce(owp, nfsd);
1348 		    }
1349 		} else {
1350 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1351 		}
1352 	    }
1353 	    crit_exit();
1354 	}
1355 
1356 	/*
1357 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1358 	 * and generate the associated reply mbuf list(s).
1359 	 */
1360 loop1:
1361 	cur_usec = nfs_curusec();
1362 	crit_enter();
1363 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1364 		owp = nfsd->nd_tq.le_next;
1365 		if (nfsd->nd_time > cur_usec)
1366 		    break;
1367 		if (nfsd->nd_mreq)
1368 		    continue;
1369 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1370 		LIST_REMOVE(nfsd, nd_tq);
1371 		LIST_REMOVE(nfsd, nd_hash);
1372 		crit_exit();
1373 		info.mrep = nfsd->nd_mrep;
1374 		info.v3 = (nfsd->nd_flag & ND_NFSV3);
1375 		nfsd->nd_mrep = NULL;
1376 		cred = &nfsd->nd_cr;
1377 		forat_ret = aftat_ret = 1;
1378 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp,
1379 		    nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1380 		if (!error) {
1381 		    if (info.v3)
1382 			forat_ret = VOP_GETATTR(vp, &forat);
1383 		    if (vp->v_type != VREG) {
1384 			if (info.v3)
1385 			    error = EINVAL;
1386 			else
1387 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1388 		    }
1389 		} else {
1390 		    vp = NULL;
1391 		}
1392 		if (!error) {
1393 		    error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1394 		}
1395 
1396 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1397 		    ioflags = IO_NODELOCKED;
1398 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1399 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1400 		else
1401 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1402 		uiop->uio_rw = UIO_WRITE;
1403 		uiop->uio_segflg = UIO_SYSSPACE;
1404 		uiop->uio_td = NULL;
1405 		uiop->uio_offset = nfsd->nd_off;
1406 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1407 		if (uiop->uio_resid > 0) {
1408 		    mp1 = info.mrep;
1409 		    i = 0;
1410 		    while (mp1) {
1411 			if (mp1->m_len > 0)
1412 			    i++;
1413 			mp1 = mp1->m_next;
1414 		    }
1415 		    uiop->uio_iovcnt = i;
1416 		    MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
1417 			M_TEMP, M_WAITOK);
1418 		    uiop->uio_iov = ivp = iov;
1419 		    mp1 = info.mrep;
1420 		    while (mp1) {
1421 			if (mp1->m_len > 0) {
1422 			    ivp->iov_base = mtod(mp1, caddr_t);
1423 			    ivp->iov_len = mp1->m_len;
1424 			    ivp++;
1425 			}
1426 			mp1 = mp1->m_next;
1427 		    }
1428 		    if (!error) {
1429 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1430 			nfsstats.srvvop_writes++;
1431 		    }
1432 		    FREE((caddr_t)iov, M_TEMP);
1433 		}
1434 		m_freem(info.mrep);
1435 		info.mrep = NULL;
1436 		if (vp) {
1437 		    aftat_ret = VOP_GETATTR(vp, &va);
1438 		    vput(vp);
1439 		    vp = NULL;
1440 		}
1441 
1442 		/*
1443 		 * Loop around generating replies for all write rpcs that have
1444 		 * now been completed.
1445 		 */
1446 		swp = nfsd;
1447 		do {
1448 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1449 		    if (error) {
1450 			nfsm_writereply(&info, nfsd, slp, error,
1451 					NFSX_WCCDATA(info.v3));
1452 			if (info.v3) {
1453 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1454 					     aftat_ret, &va);
1455 			}
1456 		    } else {
1457 			nfsm_writereply(&info, nfsd, slp, error,
1458 					NFSX_PREOPATTR(info.v3) +
1459 					NFSX_POSTOPORFATTR(info.v3) +
1460 					2 * NFSX_UNSIGNED +
1461 					NFSX_WRITEVERF(info.v3));
1462 			if (info.v3) {
1463 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1464 					     aftat_ret, &va);
1465 			    tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1466 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1467 			    *tl++ = txdr_unsigned(swp->nd_stable);
1468 			    /*
1469 			     * Actually, there is no need to txdr these fields,
1470 			     * but it may make the values more human readable,
1471 			     * for debugging purposes.
1472 			     */
1473 			    if (nfsver.tv_sec == 0)
1474 				    nfsver = boottime;
1475 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1476 			    *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1477 			} else {
1478 			    fp = nfsm_build(&info, NFSX_V2FATTR);
1479 			    nfsm_srvfattr(nfsd, &va, fp);
1480 			}
1481 		    }
1482 		    nfsd->nd_mreq = info.mreq;
1483 		    if (nfsd->nd_mrep)
1484 			panic("nfsrv_write: nd_mrep not free");
1485 
1486 		    /*
1487 		     * Done. Put it at the head of the timer queue so that
1488 		     * the final phase can return the reply.
1489 		     */
1490 		    crit_enter();
1491 		    if (nfsd != swp) {
1492 			nfsd->nd_time = 0;
1493 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1494 		    }
1495 		    nfsd = swp->nd_coalesce.lh_first;
1496 		    if (nfsd) {
1497 			LIST_REMOVE(nfsd, nd_tq);
1498 		    }
1499 		    crit_exit();
1500 		} while (nfsd);
1501 		crit_enter();
1502 		swp->nd_time = 0;
1503 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1504 		crit_exit();
1505 		goto loop1;
1506 	}
1507 	crit_exit();
1508 
1509 	/*
1510 	 * Search for a reply to return.
1511 	 */
1512 	crit_enter();
1513 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next)
1514 		if (nfsd->nd_mreq) {
1515 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1516 		    LIST_REMOVE(nfsd, nd_tq);
1517 		    *mrq = nfsd->nd_mreq;
1518 		    *ndp = nfsd;
1519 		    break;
1520 		}
1521 	crit_exit();
1522 	*mrq = info.mreq;
1523 	return (0);
1524 }
1525 
1526 /*
1527  * Coalesce the write request nfsd into owp. To do this we must:
1528  * - remove nfsd from the queues
1529  * - merge nfsd->nd_mrep into owp->nd_mrep
1530  * - update the nd_eoff and nd_stable for owp
1531  * - put nfsd on owp's nd_coalesce list
1532  * NB: Must be called at splsoftclock().
1533  */
1534 static void
1535 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1536 {
1537         int overlap;
1538         struct mbuf *mp1;
1539 	struct nfsrv_descript *p;
1540 
1541 	NFS_DPF(WG, ("C%03x-%03x",
1542 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1543         LIST_REMOVE(nfsd, nd_hash);
1544         LIST_REMOVE(nfsd, nd_tq);
1545         if (owp->nd_eoff < nfsd->nd_eoff) {
1546             overlap = owp->nd_eoff - nfsd->nd_off;
1547             if (overlap < 0)
1548                 panic("nfsrv_coalesce: bad off");
1549             if (overlap > 0)
1550                 m_adj(nfsd->nd_mrep, overlap);
1551             mp1 = owp->nd_mrep;
1552             while (mp1->m_next)
1553                 mp1 = mp1->m_next;
1554             mp1->m_next = nfsd->nd_mrep;
1555             owp->nd_eoff = nfsd->nd_eoff;
1556         } else
1557             m_freem(nfsd->nd_mrep);
1558         nfsd->nd_mrep = NULL;
1559         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1560             owp->nd_stable = NFSV3WRITE_FILESYNC;
1561         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1562             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1563             owp->nd_stable = NFSV3WRITE_DATASYNC;
1564         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1565 
1566 	/*
1567 	 * If nfsd had anything else coalesced into it, transfer them
1568 	 * to owp, otherwise their replies will never get sent.
1569 	 */
1570 	for (p = nfsd->nd_coalesce.lh_first; p;
1571 	     p = nfsd->nd_coalesce.lh_first) {
1572 	    LIST_REMOVE(p, nd_tq);
1573 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1574 	}
1575 }
1576 
1577 /*
1578  * nfs create service
1579  * now does a truncate to 0 length via. setattr if it already exists
1580  */
1581 int
1582 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1583 	     struct thread *td, struct mbuf **mrq)
1584 {
1585 	struct sockaddr *nam = nfsd->nd_nam;
1586 	struct ucred *cred = &nfsd->nd_cr;
1587 	struct nfs_fattr *fp;
1588 	struct vattr va, dirfor, diraft;
1589 	struct vattr *vap = &va;
1590 	struct nfsv2_sattr *sp;
1591 	u_int32_t *tl;
1592 	struct nlookupdata nd;
1593 	int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1594 	udev_t rdev = NOUDEV;
1595 	caddr_t cp;
1596 	int how, exclusive_flag = 0;
1597 	struct vnode *dirp;
1598 	struct vnode *dvp;
1599 	struct vnode *vp;
1600 	struct mount *mp;
1601 	nfsfh_t nfh;
1602 	fhandle_t *fhp;
1603 	u_quad_t tempsize;
1604 	u_char cverf[NFSX_V3CREATEVERF];
1605 	struct nfsm_info info;
1606 
1607 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1608 	nlookup_zero(&nd);
1609 	dirp = NULL;
1610 	dvp = NULL;
1611 	vp = NULL;
1612 
1613 	info.mrep = nfsd->nd_mrep;
1614 	info.mreq = NULL;
1615 	info.md = nfsd->nd_md;
1616 	info.dpos = nfsd->nd_dpos;
1617 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1618 
1619 	fhp = &nfh.fh_generic;
1620 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1621 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1622 
1623 	/*
1624 	 * Call namei and do initial cleanup to get a few things
1625 	 * out of the way.  If we get an initial error we cleanup
1626 	 * and return here to avoid special-casing the invalid nd
1627 	 * structure through the rest of the case.  dirp may be
1628 	 * set even if an error occurs, but the nd structure will not
1629 	 * be valid at all if an error occurs so we have to invalidate it
1630 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1631 	 */
1632 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1633 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1634 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1635 	mp = vfs_getvfs(&fhp->fh_fsid);
1636 
1637 	if (dirp) {
1638 		if (info.v3) {
1639 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1640 		} else {
1641 			vrele(dirp);
1642 			dirp = NULL;
1643 		}
1644 	}
1645 	if (error) {
1646 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1647 				      NFSX_WCCDATA(info.v3), &error));
1648 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1649 				 diraft_ret, &diraft);
1650 		error = 0;
1651 		goto nfsmout;
1652 	}
1653 
1654 	/*
1655 	 * No error.  Continue.  State:
1656 	 *
1657 	 *	dirp 		may be valid
1658 	 *	vp		may be valid or NULL if the target does not
1659 	 *			exist.
1660 	 *	dvp		is valid
1661 	 *
1662 	 * The error state is set through the code and we may also do some
1663 	 * opportunistic releasing of vnodes to avoid holding locks through
1664 	 * NFS I/O.  The cleanup at the end is a catch-all
1665 	 */
1666 
1667 	VATTR_NULL(vap);
1668 	if (info.v3) {
1669 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1670 		how = fxdr_unsigned(int, *tl);
1671 		switch (how) {
1672 		case NFSV3CREATE_GUARDED:
1673 			if (vp) {
1674 				error = EEXIST;
1675 				break;
1676 			}
1677 			/* fall through */
1678 		case NFSV3CREATE_UNCHECKED:
1679 			ERROROUT(nfsm_srvsattr(&info, vap));
1680 			break;
1681 		case NFSV3CREATE_EXCLUSIVE:
1682 			NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1683 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1684 			exclusive_flag = 1;
1685 			break;
1686 		};
1687 		vap->va_type = VREG;
1688 	} else {
1689 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1690 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1691 		if (vap->va_type == VNON)
1692 			vap->va_type = VREG;
1693 		vap->va_mode = nfstov_mode(sp->sa_mode);
1694 		switch (vap->va_type) {
1695 		case VREG:
1696 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1697 			if (tsize != -1)
1698 				vap->va_size = (u_quad_t)tsize;
1699 			break;
1700 		case VCHR:
1701 		case VBLK:
1702 		case VFIFO:
1703 			rdev = fxdr_unsigned(long, sp->sa_size);
1704 			break;
1705 		default:
1706 			break;
1707 		};
1708 	}
1709 
1710 	/*
1711 	 * Iff doesn't exist, create it
1712 	 * otherwise just truncate to 0 length
1713 	 *   should I set the mode too ?
1714 	 *
1715 	 * The only possible error we can have at this point is EEXIST.
1716 	 * nd.ni_vp will also be non-NULL in that case.
1717 	 */
1718 	if (vp == NULL) {
1719 		if (vap->va_mode == (mode_t)VNOVAL)
1720 			vap->va_mode = 0;
1721 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1722 			vn_unlock(dvp);
1723 			error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1724 					    nd.nl_cred, vap);
1725 			vrele(dvp);
1726 			dvp = NULL;
1727 			if (error == 0) {
1728 				if (exclusive_flag) {
1729 					exclusive_flag = 0;
1730 					VATTR_NULL(vap);
1731 					bcopy(cverf, (caddr_t)&vap->va_atime,
1732 						NFSX_V3CREATEVERF);
1733 					error = VOP_SETATTR(vp, vap, cred);
1734 				}
1735 			}
1736 		} else if (
1737 			vap->va_type == VCHR ||
1738 			vap->va_type == VBLK ||
1739 			vap->va_type == VFIFO
1740 		) {
1741 			/*
1742 			 * Handle SysV FIFO node special cases.  All other
1743 			 * devices require super user to access.
1744 			 */
1745 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1746 				vap->va_type = VFIFO;
1747                         if (vap->va_type != VFIFO &&
1748                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1749 				goto nfsmreply0;
1750                         }
1751 			vap->va_rmajor = umajor(rdev);
1752 			vap->va_rminor = uminor(rdev);
1753 
1754 			vn_unlock(dvp);
1755 			error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1756 			vrele(dvp);
1757 			dvp = NULL;
1758 			if (error)
1759 				goto nfsmreply0;
1760 #if 0
1761 			/*
1762 			 * XXX what is this junk supposed to do ?
1763 			 */
1764 
1765 			vput(vp);
1766 			vp = NULL;
1767 
1768 			/*
1769 			 * release dvp prior to lookup
1770 			 */
1771 			vput(dvp);
1772 			dvp = NULL;
1773 
1774 			/*
1775 			 * Setup for lookup.
1776 			 *
1777 			 * Even though LOCKPARENT was cleared, ni_dvp may
1778 			 * be garbage.
1779 			 */
1780 			nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1781 			nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1782 			nd.ni_cnd.cn_td = td;
1783 			nd.ni_cnd.cn_cred = cred;
1784 
1785 			error = lookup(&nd);
1786 			nd.ni_dvp = NULL;
1787 
1788 			if (error != 0) {
1789 				NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1790 						      0, &error));
1791 				/* fall through on certain errors */
1792 			}
1793 			nfsrv_object_create(nd.ni_vp);
1794 			if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1795 				error = EINVAL;
1796 				goto nfsmreply0;
1797 			}
1798 #endif
1799 		} else {
1800 			error = ENXIO;
1801 		}
1802 	} else {
1803 		if (vap->va_size != -1) {
1804 			error = nfsrv_access(mp, vp, VWRITE, cred,
1805 			    (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1806 			if (!error) {
1807 				tempsize = vap->va_size;
1808 				VATTR_NULL(vap);
1809 				vap->va_size = tempsize;
1810 				error = VOP_SETATTR(vp, vap, cred);
1811 			}
1812 		}
1813 	}
1814 
1815 	if (!error) {
1816 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1817 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1818 		if (!error)
1819 			error = VOP_GETATTR(vp, vap);
1820 	}
1821 	if (info.v3) {
1822 		if (exclusive_flag && !error &&
1823 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1824 			error = EEXIST;
1825 		diraft_ret = VOP_GETATTR(dirp, &diraft);
1826 		vrele(dirp);
1827 		dirp = NULL;
1828 	}
1829 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1830 			      NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1831 			      NFSX_WCCDATA(info.v3),
1832 			      &error));
1833 	if (info.v3) {
1834 		if (!error) {
1835 			nfsm_srvpostop_fh(&info, fhp);
1836 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1837 		}
1838 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1839 				 diraft_ret, &diraft);
1840 		error = 0;
1841 	} else {
1842 		nfsm_srvfhtom(&info, fhp);
1843 		fp = nfsm_build(&info, NFSX_V2FATTR);
1844 		nfsm_srvfattr(nfsd, vap, fp);
1845 	}
1846 	goto nfsmout;
1847 
1848 nfsmreply0:
1849 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1850 	error = 0;
1851 	/* fall through */
1852 
1853 nfsmout:
1854 	*mrq = info.mreq;
1855 	if (dirp)
1856 		vrele(dirp);
1857 	nlookup_done(&nd);
1858 	if (dvp) {
1859 		if (dvp == vp)
1860 			vrele(dvp);
1861 		else
1862 			vput(dvp);
1863 	}
1864 	if (vp)
1865 		vput(vp);
1866 	return (error);
1867 }
1868 
1869 /*
1870  * nfs v3 mknod service
1871  */
1872 int
1873 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1874 	    struct thread *td, struct mbuf **mrq)
1875 {
1876 	struct sockaddr *nam = nfsd->nd_nam;
1877 	struct ucred *cred = &nfsd->nd_cr;
1878 	struct vattr va, dirfor, diraft;
1879 	struct vattr *vap = &va;
1880 	u_int32_t *tl;
1881 	struct nlookupdata nd;
1882 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1883 	enum vtype vtyp;
1884 	struct vnode *dirp;
1885 	struct vnode *dvp;
1886 	struct vnode *vp;
1887 	nfsfh_t nfh;
1888 	fhandle_t *fhp;
1889 	struct nfsm_info info;
1890 
1891 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1892 	nlookup_zero(&nd);
1893 	dirp = NULL;
1894 	dvp = NULL;
1895 	vp = NULL;
1896 
1897 	info.mrep = nfsd->nd_mrep;
1898 	info.mreq = NULL;
1899 	info.md = nfsd->nd_md;
1900 	info.dpos = nfsd->nd_dpos;
1901 
1902 	fhp = &nfh.fh_generic;
1903 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1904 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1905 
1906 	/*
1907 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1908 	 * is not valid.  However, nfsm_*() routines may still jump to
1909 	 * nfsmout.
1910 	 */
1911 
1912 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1913 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1914 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1915 	if (dirp)
1916 		dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1917 	if (error) {
1918 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1919 			   NFSX_WCCDATA(1), &error));
1920 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1921 				 diraft_ret, &diraft);
1922 		error = 0;
1923 		goto nfsmout;
1924 	}
1925 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1926 	vtyp = nfsv3tov_type(*tl);
1927 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1928 		error = NFSERR_BADTYPE;
1929 		goto out;
1930 	}
1931 	VATTR_NULL(vap);
1932 	ERROROUT(nfsm_srvsattr(&info, vap));
1933 	if (vtyp == VCHR || vtyp == VBLK) {
1934 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1935 		vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1936 		vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1937 	}
1938 
1939 	/*
1940 	 * Iff doesn't exist, create it.
1941 	 */
1942 	if (vp) {
1943 		error = EEXIST;
1944 		goto out;
1945 	}
1946 	vap->va_type = vtyp;
1947 	if (vap->va_mode == (mode_t)VNOVAL)
1948 		vap->va_mode = 0;
1949 	if (vtyp == VSOCK) {
1950 		vn_unlock(dvp);
1951 		error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1952 		vrele(dvp);
1953 		dvp = NULL;
1954 	} else {
1955 		if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1956 			goto out;
1957 
1958 		vn_unlock(dvp);
1959 		error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1960 		vrele(dvp);
1961 		dvp = NULL;
1962 		if (error)
1963 			goto out;
1964 	}
1965 
1966 	/*
1967 	 * send response, cleanup, return.
1968 	 */
1969 out:
1970 	nlookup_done(&nd);
1971 	if (dvp) {
1972 		if (dvp == vp)
1973 			vrele(dvp);
1974 		else
1975 			vput(dvp);
1976 		dvp = NULL;
1977 	}
1978 	if (!error) {
1979 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1980 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1981 		if (!error)
1982 			error = VOP_GETATTR(vp, vap);
1983 	}
1984 	if (vp) {
1985 		vput(vp);
1986 		vp = NULL;
1987 	}
1988 	diraft_ret = VOP_GETATTR(dirp, &diraft);
1989 	if (dirp) {
1990 		vrele(dirp);
1991 		dirp = NULL;
1992 	}
1993 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1994 			      NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1995 			      NFSX_WCCDATA(1), &error));
1996 	if (!error) {
1997 		nfsm_srvpostop_fh(&info, fhp);
1998 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1999 	}
2000 	nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2001 			 diraft_ret, &diraft);
2002 	*mrq = info.mreq;
2003 	return (0);
2004 nfsmout:
2005 	*mrq = info.mreq;
2006 	if (dirp)
2007 		vrele(dirp);
2008 	nlookup_done(&nd);
2009 	if (dvp) {
2010 		if (dvp == vp)
2011 			vrele(dvp);
2012 		else
2013 			vput(dvp);
2014 	}
2015 	if (vp)
2016 		vput(vp);
2017 	return (error);
2018 }
2019 
2020 /*
2021  * nfs remove service
2022  */
2023 int
2024 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2025 	     struct thread *td, struct mbuf **mrq)
2026 {
2027 	struct sockaddr *nam = nfsd->nd_nam;
2028 	struct ucred *cred = &nfsd->nd_cr;
2029 	struct nlookupdata nd;
2030 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2031 	struct vnode *dirp;
2032 	struct vnode *dvp;
2033 	struct vnode *vp;
2034 	struct vattr dirfor, diraft;
2035 	nfsfh_t nfh;
2036 	fhandle_t *fhp;
2037 	struct nfsm_info info;
2038 
2039 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2040 	nlookup_zero(&nd);
2041 	dirp = NULL;
2042 	dvp = NULL;
2043 	vp = NULL;
2044 
2045 	info.mrep = nfsd->nd_mrep;
2046 	info.mreq = NULL;
2047 	info.md = nfsd->nd_md;
2048 	info.dpos = nfsd->nd_dpos;
2049 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2050 
2051 	fhp = &nfh.fh_generic;
2052 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2053 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2054 
2055 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2056 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2057 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2058 	if (dirp) {
2059 		if (info.v3)
2060 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2061 	}
2062 	if (error == 0) {
2063 		if (vp->v_type == VDIR) {
2064 			error = EPERM;		/* POSIX */
2065 			goto out;
2066 		}
2067 		/*
2068 		 * The root of a mounted filesystem cannot be deleted.
2069 		 */
2070 		if (vp->v_flag & VROOT) {
2071 			error = EBUSY;
2072 			goto out;
2073 		}
2074 out:
2075 		if (!error) {
2076 			if (dvp != vp)
2077 				vn_unlock(dvp);
2078 			if (vp) {
2079 				vput(vp);
2080 				vp = NULL;
2081 			}
2082 			error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2083 			vrele(dvp);
2084 			dvp = NULL;
2085 		}
2086 	}
2087 	if (dirp && info.v3)
2088 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2089 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2090 	if (info.v3) {
2091 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2092 				 diraft_ret, &diraft);
2093 		error = 0;
2094 	}
2095 nfsmout:
2096 	*mrq = info.mreq;
2097 	nlookup_done(&nd);
2098 	if (dirp)
2099 		vrele(dirp);
2100 	if (dvp) {
2101 		if (dvp == vp)
2102 			vrele(dvp);
2103 		else
2104 			vput(dvp);
2105 	}
2106 	if (vp)
2107 		vput(vp);
2108 	return(error);
2109 }
2110 
2111 /*
2112  * nfs rename service
2113  */
2114 int
2115 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2116 	     struct thread *td, struct mbuf **mrq)
2117 {
2118 	struct sockaddr *nam = nfsd->nd_nam;
2119 	struct ucred *cred = &nfsd->nd_cr;
2120 	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2121 	int tdirfor_ret = 1, tdiraft_ret = 1;
2122 	struct nlookupdata fromnd, tond;
2123 	struct vnode *fvp, *fdirp, *fdvp;
2124 	struct vnode *tvp, *tdirp, *tdvp;
2125 	struct namecache *ncp;
2126 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2127 	nfsfh_t fnfh, tnfh;
2128 	fhandle_t *ffhp, *tfhp;
2129 	uid_t saved_uid;
2130 	struct nfsm_info info;
2131 
2132 	info.mrep = nfsd->nd_mrep;
2133 	info.mreq = NULL;
2134 	info.md = nfsd->nd_md;
2135 	info.dpos = nfsd->nd_dpos;
2136 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2137 
2138 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2139 #ifndef nolint
2140 	fvp = NULL;
2141 #endif
2142 	ffhp = &fnfh.fh_generic;
2143 	tfhp = &tnfh.fh_generic;
2144 
2145 	/*
2146 	 * Clear fields incase goto nfsmout occurs from macro.
2147 	 */
2148 
2149 	nlookup_zero(&fromnd);
2150 	nlookup_zero(&tond);
2151 	fdirp = NULL;
2152 	tdirp = NULL;
2153 
2154 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2155 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2156 
2157 	/*
2158 	 * Remember our original uid so that we can reset cr_uid before
2159 	 * the second nfs_namei() call, in case it is remapped.
2160 	 */
2161 	saved_uid = cred->cr_uid;
2162 	error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2163 			  NULL, NULL,
2164 			  ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2165 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2166 	if (fdirp) {
2167 		if (info.v3)
2168 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2169 	}
2170 	if (error) {
2171 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2172 				      2 * NFSX_WCCDATA(info.v3), &error));
2173 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2174 				 fdiraft_ret, &fdiraft);
2175 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2176 				 tdiraft_ret, &tdiraft);
2177 		error = 0;
2178 		goto nfsmout;
2179 	}
2180 
2181 	/*
2182 	 * We have to unlock the from ncp before we can safely lookup
2183 	 * the target ncp.
2184 	 */
2185 	KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2186 	cache_unlock(&fromnd.nl_nch);
2187 	fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2188 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2189 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2190 	cred->cr_uid = saved_uid;
2191 
2192 	error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2193 			  tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2194 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2195 	if (tdirp) {
2196 		if (info.v3)
2197 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2198 	}
2199 	if (error)
2200 		goto out1;
2201 
2202 	/*
2203 	 * relock the source
2204 	 */
2205 	if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2206 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2207 	} else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2208 		cache_lock(&fromnd.nl_nch);
2209 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2210 	} else {
2211 		cache_unlock(&tond.nl_nch);
2212 		cache_lock(&fromnd.nl_nch);
2213 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2214 		cache_lock(&tond.nl_nch);
2215 		cache_resolve(&tond.nl_nch, tond.nl_cred);
2216 	}
2217 	fromnd.nl_flags |= NLC_NCPISLOCKED;
2218 
2219 	fvp = fromnd.nl_nch.ncp->nc_vp;
2220 	tvp = tond.nl_nch.ncp->nc_vp;
2221 
2222 	/*
2223 	 * Set fdvp and tdvp.  We haven't done all the topology checks
2224 	 * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2225 	 * point).  If we get through the checks these will be guarenteed
2226 	 * to be non-NULL.
2227 	 *
2228 	 * Holding the children ncp's should be sufficient to prevent
2229 	 * fdvp and tdvp ripouts.
2230 	 */
2231 	if (fromnd.nl_nch.ncp->nc_parent)
2232 		fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2233 	else
2234 		fdvp = NULL;
2235 	if (tond.nl_nch.ncp->nc_parent)
2236 		tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2237 	else
2238 		tdvp = NULL;
2239 
2240 	if (tvp != NULL) {
2241 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2242 			if (info.v3)
2243 				error = EEXIST;
2244 			else
2245 				error = EISDIR;
2246 			goto out;
2247 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2248 			if (info.v3)
2249 				error = EEXIST;
2250 			else
2251 				error = ENOTDIR;
2252 			goto out;
2253 		}
2254 		if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2255 			if (info.v3)
2256 				error = EXDEV;
2257 			else
2258 				error = ENOTEMPTY;
2259 			goto out;
2260 		}
2261 	}
2262 	if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2263 		if (info.v3)
2264 			error = EXDEV;
2265 		else
2266 			error = ENOTEMPTY;
2267 		goto out;
2268 	}
2269 	if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2270 		if (info.v3)
2271 			error = EXDEV;
2272 		else
2273 			error = ENOTEMPTY;
2274 		goto out;
2275 	}
2276 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2277 		if (info.v3)
2278 			error = EINVAL;
2279 		else
2280 			error = ENOTEMPTY;
2281 	}
2282 
2283 	/*
2284 	 * You cannot rename a source into itself or a subdirectory of itself.
2285 	 * We check this by travsering the target directory upwards looking
2286 	 * for a match against the source.
2287 	 */
2288 	if (error == 0) {
2289 		for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2290 			if (fromnd.nl_nch.ncp == ncp) {
2291 				error = EINVAL;
2292 				break;
2293 			}
2294 		}
2295 	}
2296 
2297 	/*
2298 	 * If source is the same as the destination (that is the
2299 	 * same vnode with the same name in the same directory),
2300 	 * then there is nothing to do.
2301 	 */
2302 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2303 		error = -1;
2304 out:
2305 	if (!error) {
2306 		/*
2307 		 * The VOP_NRENAME function releases all vnode references &
2308 		 * locks prior to returning so we need to clear the pointers
2309 		 * to bypass cleanup code later on.
2310 		 */
2311 		error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2312 				    fdvp, tdvp, tond.nl_cred);
2313 	} else {
2314 		if (error == -1)
2315 			error = 0;
2316 	}
2317 	/* fall through */
2318 
2319 out1:
2320 	if (fdirp)
2321 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2322 	if (tdirp)
2323 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2324 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2325 			      2 * NFSX_WCCDATA(info.v3), &error));
2326 	if (info.v3) {
2327 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2328 				 fdiraft_ret, &fdiraft);
2329 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2330 				 tdiraft_ret, &tdiraft);
2331 	}
2332 	error = 0;
2333 	/* fall through */
2334 
2335 nfsmout:
2336 	*mrq = info.mreq;
2337 	if (tdirp)
2338 		vrele(tdirp);
2339 	nlookup_done(&tond);
2340 	if (fdirp)
2341 		vrele(fdirp);
2342 	nlookup_done(&fromnd);
2343 	return (error);
2344 }
2345 
2346 /*
2347  * nfs link service
2348  */
2349 int
2350 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2351 	   struct thread *td, struct mbuf **mrq)
2352 {
2353 	struct sockaddr *nam = nfsd->nd_nam;
2354 	struct ucred *cred = &nfsd->nd_cr;
2355 	struct nlookupdata nd;
2356 	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2357 	int getret = 1;
2358 	struct vnode *dirp;
2359 	struct vnode *dvp;
2360 	struct vnode *vp;
2361 	struct vnode *xp;
2362 	struct mount *mp;
2363 	struct mount *xmp;
2364 	struct vattr dirfor, diraft, at;
2365 	nfsfh_t nfh, dnfh;
2366 	fhandle_t *fhp, *dfhp;
2367 	struct nfsm_info info;
2368 
2369 	info.mrep = nfsd->nd_mrep;
2370 	info.mreq = NULL;
2371 	info.md = nfsd->nd_md;
2372 	info.dpos = nfsd->nd_dpos;
2373 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2374 
2375 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2376 	nlookup_zero(&nd);
2377 	dirp = dvp = vp = xp = NULL;
2378 	mp = xmp = NULL;
2379 
2380 	fhp = &nfh.fh_generic;
2381 	dfhp = &dnfh.fh_generic;
2382 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2383 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2384 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2385 
2386 	error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2387 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2388 	if (error) {
2389 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2390 				      NFSX_POSTOPATTR(info.v3) +
2391 				      NFSX_WCCDATA(info.v3),
2392 				      &error));
2393 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2394 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2395 				 diraft_ret, &diraft);
2396 		xp = NULL;
2397 		error = 0;
2398 		goto nfsmout;
2399 	}
2400 	if (xp->v_type == VDIR) {
2401 		error = EPERM;		/* POSIX */
2402 		goto out1;
2403 	}
2404 
2405 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2406 			  dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2407 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2408 	if (dirp) {
2409 		if (info.v3)
2410 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2411 	}
2412 	if (error)
2413 		goto out1;
2414 
2415 	if (vp != NULL) {
2416 		error = EEXIST;
2417 		goto out;
2418 	}
2419 	if (xp->v_mount != dvp->v_mount)
2420 		error = EXDEV;
2421 out:
2422 	if (!error) {
2423 		vn_unlock(dvp);
2424 		error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2425 		vrele(dvp);
2426 		dvp = NULL;
2427 	}
2428 	/* fall through */
2429 
2430 out1:
2431 	if (info.v3)
2432 		getret = VOP_GETATTR(xp, &at);
2433 	if (dirp)
2434 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2435 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2436 			      NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2437 			      &error));
2438 	if (info.v3) {
2439 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2440 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2441 				 diraft_ret, &diraft);
2442 		error = 0;
2443 	}
2444 	/* fall through */
2445 
2446 nfsmout:
2447 	*mrq = info.mreq;
2448 	nlookup_done(&nd);
2449 	if (dirp)
2450 		vrele(dirp);
2451 	if (xp)
2452 		vrele(xp);
2453 	if (dvp) {
2454 		if (dvp == vp)
2455 			vrele(dvp);
2456 		else
2457 			vput(dvp);
2458 	}
2459 	if (vp)
2460 		vput(vp);
2461 	return(error);
2462 }
2463 
2464 /*
2465  * nfs symbolic link service
2466  */
2467 int
2468 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2469 	      struct thread *td, struct mbuf **mrq)
2470 {
2471 	struct sockaddr *nam = nfsd->nd_nam;
2472 	struct ucred *cred = &nfsd->nd_cr;
2473 	struct vattr va, dirfor, diraft;
2474 	struct nlookupdata nd;
2475 	struct vattr *vap = &va;
2476 	struct nfsv2_sattr *sp;
2477 	char *pathcp = NULL;
2478 	struct uio io;
2479 	struct iovec iv;
2480 	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2481 	struct vnode *dirp;
2482 	struct vnode *vp;
2483 	struct vnode *dvp;
2484 	nfsfh_t nfh;
2485 	fhandle_t *fhp;
2486 	struct nfsm_info info;
2487 
2488 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2489 	nlookup_zero(&nd);
2490 	dirp = NULL;
2491 	dvp = NULL;
2492 	vp = NULL;
2493 
2494 	info.mrep = nfsd->nd_mrep;
2495 	info.mreq =  NULL;
2496 	info.md = nfsd->nd_md;
2497 	info.dpos = nfsd->nd_dpos;
2498 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2499 
2500 	fhp = &nfh.fh_generic;
2501 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2502 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2503 
2504 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2505 			fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2506 			td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2507 	if (dirp) {
2508 		if (info.v3)
2509 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2510 	}
2511 	if (error)
2512 		goto out;
2513 
2514 	VATTR_NULL(vap);
2515 	if (info.v3) {
2516 		ERROROUT(nfsm_srvsattr(&info, vap));
2517 	}
2518 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2519 	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2520 	iv.iov_base = pathcp;
2521 	iv.iov_len = len2;
2522 	io.uio_resid = len2;
2523 	io.uio_offset = 0;
2524 	io.uio_iov = &iv;
2525 	io.uio_iovcnt = 1;
2526 	io.uio_segflg = UIO_SYSSPACE;
2527 	io.uio_rw = UIO_READ;
2528 	io.uio_td = NULL;
2529 	ERROROUT(nfsm_mtouio(&info, &io, len2));
2530 	if (info.v3 == 0) {
2531 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2532 		vap->va_mode = nfstov_mode(sp->sa_mode);
2533 	}
2534 	*(pathcp + len2) = '\0';
2535 	if (vp) {
2536 		error = EEXIST;
2537 		goto out;
2538 	}
2539 
2540 	if (vap->va_mode == (mode_t)VNOVAL)
2541 		vap->va_mode = 0;
2542 	if (dvp != vp)
2543 		vn_unlock(dvp);
2544 	error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2545 	vrele(dvp);
2546 	dvp = NULL;
2547 	if (error == 0) {
2548 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2549 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2550 		if (!error)
2551 			error = VOP_GETATTR(vp, vap);
2552 	}
2553 
2554 out:
2555 	if (dvp) {
2556 		if (dvp == vp)
2557 			vrele(dvp);
2558 		else
2559 			vput(dvp);
2560 	}
2561 	if (vp) {
2562 		vput(vp);
2563 		vp = NULL;
2564 	}
2565 	if (pathcp) {
2566 		FREE(pathcp, M_TEMP);
2567 		pathcp = NULL;
2568 	}
2569 	if (dirp) {
2570 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2571 		vrele(dirp);
2572 		dirp = NULL;
2573 	}
2574 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2575 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2576 			      NFSX_WCCDATA(info.v3),
2577 			      &error));
2578 	if (info.v3) {
2579 		if (!error) {
2580 			nfsm_srvpostop_fh(&info, fhp);
2581 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2582 		}
2583 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2584 				 diraft_ret, &diraft);
2585 	}
2586 	error = 0;
2587 	/* fall through */
2588 
2589 nfsmout:
2590 	*mrq = info.mreq;
2591 	nlookup_done(&nd);
2592 	if (vp)
2593 		vput(vp);
2594 	if (dirp)
2595 		vrele(dirp);
2596 	if (pathcp)
2597 		FREE(pathcp, M_TEMP);
2598 	return (error);
2599 }
2600 
2601 /*
2602  * nfs mkdir service
2603  */
2604 int
2605 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2606 	    struct thread *td, struct mbuf **mrq)
2607 {
2608 	struct sockaddr *nam = nfsd->nd_nam;
2609 	struct ucred *cred = &nfsd->nd_cr;
2610 	struct vattr va, dirfor, diraft;
2611 	struct vattr *vap = &va;
2612 	struct nfs_fattr *fp;
2613 	struct nlookupdata nd;
2614 	u_int32_t *tl;
2615 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2616 	struct vnode *dirp;
2617 	struct vnode *dvp;
2618 	struct vnode *vp;
2619 	nfsfh_t nfh;
2620 	fhandle_t *fhp;
2621 	struct nfsm_info info;
2622 
2623 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2624 	nlookup_zero(&nd);
2625 	dirp = NULL;
2626 	dvp = NULL;
2627 	vp = NULL;
2628 
2629 	info.dpos = nfsd->nd_dpos;
2630 	info.mrep = nfsd->nd_mrep;
2631 	info.mreq =  NULL;
2632 	info.md = nfsd->nd_md;
2633 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2634 
2635 	fhp = &nfh.fh_generic;
2636 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2637 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2638 
2639 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2640 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2641 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2642 	if (dirp) {
2643 		if (info.v3)
2644 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2645 	}
2646 	if (error) {
2647 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2648 				      NFSX_WCCDATA(info.v3), &error));
2649 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2650 				 diraft_ret, &diraft);
2651 		error = 0;
2652 		goto nfsmout;
2653 	}
2654 	VATTR_NULL(vap);
2655 	if (info.v3) {
2656 		ERROROUT(nfsm_srvsattr(&info, vap));
2657 	} else {
2658 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2659 		vap->va_mode = nfstov_mode(*tl++);
2660 	}
2661 
2662 	/*
2663 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2664 	 * nd.ni_vp, if it exists, is referenced but not locked.
2665 	 */
2666 
2667 	vap->va_type = VDIR;
2668 	if (vp != NULL) {
2669 		error = EEXIST;
2670 		goto out;
2671 	}
2672 
2673 	/*
2674 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2675 	 * component is freed by the VOP call.  This will fill-in
2676 	 * nd.ni_vp, reference, and exclusively lock it.
2677 	 */
2678 	if (vap->va_mode == (mode_t)VNOVAL)
2679 		vap->va_mode = 0;
2680 	vn_unlock(dvp);
2681 	error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2682 	vrele(dvp);
2683 	dvp = NULL;
2684 
2685 	if (error == 0) {
2686 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2687 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2688 		if (error == 0)
2689 			error = VOP_GETATTR(vp, vap);
2690 	}
2691 out:
2692 	if (dirp)
2693 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2694 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2695 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2696 			      NFSX_WCCDATA(info.v3),
2697 			      &error));
2698 	if (info.v3) {
2699 		if (!error) {
2700 			nfsm_srvpostop_fh(&info, fhp);
2701 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2702 		}
2703 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2704 				 diraft_ret, &diraft);
2705 	} else {
2706 		nfsm_srvfhtom(&info, fhp);
2707 		fp = nfsm_build(&info, NFSX_V2FATTR);
2708 		nfsm_srvfattr(nfsd, vap, fp);
2709 	}
2710 	error = 0;
2711 	/* fall through */
2712 
2713 nfsmout:
2714 	*mrq = info.mreq;
2715 	nlookup_done(&nd);
2716 	if (dirp)
2717 		vrele(dirp);
2718 	if (dvp) {
2719 		if (dvp == vp)
2720 			vrele(dvp);
2721 		else
2722 			vput(dvp);
2723 	}
2724 	if (vp)
2725 		vput(vp);
2726 	return (error);
2727 }
2728 
2729 /*
2730  * nfs rmdir service
2731  */
2732 int
2733 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2734 	    struct thread *td, struct mbuf **mrq)
2735 {
2736 	struct sockaddr *nam = nfsd->nd_nam;
2737 	struct ucred *cred = &nfsd->nd_cr;
2738 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2739 	struct vnode *dirp;
2740 	struct vnode *dvp;
2741 	struct vnode *vp;
2742 	struct vattr dirfor, diraft;
2743 	nfsfh_t nfh;
2744 	fhandle_t *fhp;
2745 	struct nlookupdata nd;
2746 	struct nfsm_info info;
2747 
2748 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2749 	nlookup_zero(&nd);
2750 	dirp = NULL;
2751 	dvp = NULL;
2752 	vp = NULL;
2753 
2754 	info.mrep = nfsd->nd_mrep;
2755 	info.mreq = NULL;
2756 	info.md = nfsd->nd_md;
2757 	info.dpos = nfsd->nd_dpos;
2758 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2759 
2760 	fhp = &nfh.fh_generic;
2761 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2762 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2763 
2764 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2765 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2766 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2767 	if (dirp) {
2768 		if (info.v3)
2769 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2770 	}
2771 	if (error) {
2772 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2773 				      NFSX_WCCDATA(info.v3), &error));
2774 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2775 				 diraft_ret, &diraft);
2776 		error = 0;
2777 		goto nfsmout;
2778 	}
2779 	if (vp->v_type != VDIR) {
2780 		error = ENOTDIR;
2781 		goto out;
2782 	}
2783 
2784 	/*
2785 	 * The root of a mounted filesystem cannot be deleted.
2786 	 */
2787 	if (vp->v_flag & VROOT)
2788 		error = EBUSY;
2789 out:
2790 	/*
2791 	 * Issue or abort op.  Since SAVESTART is not set, path name
2792 	 * component is freed by the VOP after either.
2793 	 */
2794 	if (!error) {
2795 		if (dvp != vp)
2796 			vn_unlock(dvp);
2797 		vput(vp);
2798 		vp = NULL;
2799 		error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2800 		vrele(dvp);
2801 		dvp = NULL;
2802 	}
2803 	nlookup_done(&nd);
2804 
2805 	if (dirp)
2806 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2807 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2808 	if (info.v3) {
2809 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2810 				 diraft_ret, &diraft);
2811 		error = 0;
2812 	}
2813 	/* fall through */
2814 
2815 nfsmout:
2816 	*mrq = info.mreq;
2817 	if (dvp) {
2818 		if (dvp == vp)
2819 			vrele(dvp);
2820 		else
2821 			vput(dvp);
2822 	}
2823 	nlookup_done(&nd);
2824 	if (dirp)
2825 		vrele(dirp);
2826 	if (vp)
2827 		vput(vp);
2828 	return(error);
2829 }
2830 
2831 /*
2832  * nfs readdir service
2833  * - mallocs what it thinks is enough to read
2834  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2835  * - calls VOP_READDIR()
2836  * - loops around building the reply
2837  *	if the output generated exceeds count break out of loop
2838  *	The nfsm_clget macro is used here so that the reply will be packed
2839  *	tightly in mbuf clusters.
2840  * - it only knows that it has encountered eof when the VOP_READDIR()
2841  *	reads nothing
2842  * - as such one readdir rpc will return eof false although you are there
2843  *	and then the next will return eof
2844  * - it trims out records with d_fileno == 0
2845  *	this doesn't matter for Unix clients, but they might confuse clients
2846  *	for other os'.
2847  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2848  *	than requested, but this may not apply to all filesystems. For
2849  *	example, client NFS does not { although it is never remote mounted
2850  *	anyhow }
2851  *     The alternate call nfsrv_readdirplus() does lookups as well.
2852  * PS: The NFS protocol spec. does not clarify what the "count" byte
2853  *	argument is a count of.. just name strings and file id's or the
2854  *	entire reply rpc or ...
2855  *	I tried just file name and id sizes and it confused the Sun client,
2856  *	so I am using the full rpc size now. The "paranoia.." comment refers
2857  *	to including the status longwords that are not a part of the dir.
2858  *	"entry" structures, but are in the rpc.
2859  */
2860 struct flrep {
2861 	nfsuint64	fl_off;
2862 	u_int32_t	fl_postopok;
2863 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2864 	u_int32_t	fl_fhok;
2865 	u_int32_t	fl_fhsize;
2866 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2867 };
2868 
2869 int
2870 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2871 	      struct thread *td, struct mbuf **mrq)
2872 {
2873 	struct sockaddr *nam = nfsd->nd_nam;
2874 	struct ucred *cred = &nfsd->nd_cr;
2875 	char *bp, *be;
2876 	struct dirent *dp;
2877 	caddr_t cp;
2878 	u_int32_t *tl;
2879 	struct mbuf *mp1, *mp2;
2880 	char *cpos, *cend, *rbuf;
2881 	struct vnode *vp = NULL;
2882 	struct mount *mp = NULL;
2883 	struct vattr at;
2884 	nfsfh_t nfh;
2885 	fhandle_t *fhp;
2886 	struct uio io;
2887 	struct iovec iv;
2888 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2889 	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2890 	u_quad_t off, toff, verf;
2891 	off_t *cookies = NULL, *cookiep;
2892 	struct nfsm_info info;
2893 
2894 	info.mrep = nfsd->nd_mrep;
2895 	info.mreq = NULL;
2896 	info.md = nfsd->nd_md;
2897 	info.dpos = nfsd->nd_dpos;
2898 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2899 
2900 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2901 	fhp = &nfh.fh_generic;
2902 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2903 	if (info.v3) {
2904 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2905 		toff = fxdr_hyper(tl);
2906 		tl += 2;
2907 		verf = fxdr_hyper(tl);
2908 		tl += 2;
2909 	} else {
2910 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2911 		toff = fxdr_unsigned(u_quad_t, *tl++);
2912 		verf = 0;	/* shut up gcc */
2913 	}
2914 	off = toff;
2915 	cnt = fxdr_unsigned(int, *tl);
2916 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2917 	xfer = NFS_SRVMAXDATA(nfsd);
2918 	if ((unsigned)cnt > xfer)
2919 		cnt = xfer;
2920 	if ((unsigned)siz > xfer)
2921 		siz = xfer;
2922 	fullsiz = siz;
2923 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2924 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2925 	if (!error && vp->v_type != VDIR) {
2926 		error = ENOTDIR;
2927 		vput(vp);
2928 		vp = NULL;
2929 	}
2930 	if (error) {
2931 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2932 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2933 		error = 0;
2934 		goto nfsmout;
2935 	}
2936 
2937 	/*
2938 	 * Obtain lock on vnode for this section of the code
2939 	 */
2940 
2941 	if (info.v3) {
2942 		error = getret = VOP_GETATTR(vp, &at);
2943 #if 0
2944 		/*
2945 		 * XXX This check may be too strict for Solaris 2.5 clients.
2946 		 */
2947 		if (!error && toff && verf && verf != at.va_filerev)
2948 			error = NFSERR_BAD_COOKIE;
2949 #endif
2950 	}
2951 	if (!error)
2952 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2953 	if (error) {
2954 		vput(vp);
2955 		vp = NULL;
2956 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2957 				      NFSX_POSTOPATTR(info.v3), &error));
2958 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2959 		error = 0;
2960 		goto nfsmout;
2961 	}
2962 	vn_unlock(vp);
2963 
2964 	/*
2965 	 * end section.  Allocate rbuf and continue
2966 	 */
2967 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2968 again:
2969 	iv.iov_base = rbuf;
2970 	iv.iov_len = fullsiz;
2971 	io.uio_iov = &iv;
2972 	io.uio_iovcnt = 1;
2973 	io.uio_offset = (off_t)off;
2974 	io.uio_resid = fullsiz;
2975 	io.uio_segflg = UIO_SYSSPACE;
2976 	io.uio_rw = UIO_READ;
2977 	io.uio_td = NULL;
2978 	eofflag = 0;
2979 	if (cookies) {
2980 		kfree((caddr_t)cookies, M_TEMP);
2981 		cookies = NULL;
2982 	}
2983 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2984 	off = (off_t)io.uio_offset;
2985 	if (!cookies && !error)
2986 		error = NFSERR_PERM;
2987 	if (info.v3) {
2988 		getret = VOP_GETATTR(vp, &at);
2989 		if (!error)
2990 			error = getret;
2991 	}
2992 	if (error) {
2993 		vrele(vp);
2994 		vp = NULL;
2995 		kfree((caddr_t)rbuf, M_TEMP);
2996 		if (cookies)
2997 			kfree((caddr_t)cookies, M_TEMP);
2998 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2999 				      NFSX_POSTOPATTR(info.v3), &error));
3000 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3001 		error = 0;
3002 		goto nfsmout;
3003 	}
3004 	if (io.uio_resid) {
3005 		siz -= io.uio_resid;
3006 
3007 		/*
3008 		 * If nothing read, return eof
3009 		 * rpc reply
3010 		 */
3011 		if (siz == 0) {
3012 			vrele(vp);
3013 			vp = NULL;
3014 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3015 					      NFSX_POSTOPATTR(info.v3) +
3016 					      NFSX_COOKIEVERF(info.v3) +
3017 					      2 * NFSX_UNSIGNED,
3018 					      &error));
3019 			if (info.v3) {
3020 				nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3021 				tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3022 				txdr_hyper(at.va_filerev, tl);
3023 				tl += 2;
3024 			} else
3025 				tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3026 			*tl++ = nfs_false;
3027 			*tl = nfs_true;
3028 			FREE((caddr_t)rbuf, M_TEMP);
3029 			FREE((caddr_t)cookies, M_TEMP);
3030 			error = 0;
3031 			goto nfsmout;
3032 		}
3033 	}
3034 
3035 	/*
3036 	 * Check for degenerate cases of nothing useful read.
3037 	 * If so go try again
3038 	 */
3039 	cpos = rbuf;
3040 	cend = rbuf + siz;
3041 	dp = (struct dirent *)cpos;
3042 	cookiep = cookies;
3043 	/*
3044 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3045 	 * directory offset up to a block boundary, so it is necessary to
3046 	 * skip over the records that preceed the requested offset. This
3047 	 * requires the assumption that file offset cookies monotonically
3048 	 * increase.
3049 	 */
3050 	while (cpos < cend && ncookies > 0 &&
3051 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3052 		 ((u_quad_t)(*cookiep)) <= toff)) {
3053 		dp = _DIRENT_NEXT(dp);
3054 		cpos = (char *)dp;
3055 		cookiep++;
3056 		ncookies--;
3057 	}
3058 	if (cpos >= cend || ncookies == 0) {
3059 		toff = off;
3060 		siz = fullsiz;
3061 		goto again;
3062 	}
3063 
3064 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
3065 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3066 			      NFSX_POSTOPATTR(info.v3) +
3067 			      NFSX_COOKIEVERF(info.v3) + siz,
3068 			      &error));
3069 	if (info.v3) {
3070 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3071 		tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3072 		txdr_hyper(at.va_filerev, tl);
3073 	}
3074 	mp1 = mp2 = info.mb;
3075 	bp = info.bpos;
3076 	be = bp + M_TRAILINGSPACE(mp1);
3077 
3078 	/* Loop through the records and build reply */
3079 	while (cpos < cend && ncookies > 0) {
3080 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3081 			nlen = dp->d_namlen;
3082 			rem = nfsm_rndup(nlen) - nlen;
3083 			len += (4 * NFSX_UNSIGNED + nlen + rem);
3084 			if (info.v3)
3085 				len += 2 * NFSX_UNSIGNED;
3086 			if (len > cnt) {
3087 				eofflag = 0;
3088 				break;
3089 			}
3090 			/*
3091 			 * Build the directory record xdr from
3092 			 * the dirent entry.
3093 			 */
3094 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3095 			*tl = nfs_true;
3096 			bp += NFSX_UNSIGNED;
3097 			if (info.v3) {
3098 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3099 				*tl = txdr_unsigned(dp->d_ino >> 32);
3100 				bp += NFSX_UNSIGNED;
3101 			}
3102 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3103 			*tl = txdr_unsigned(dp->d_ino);
3104 			bp += NFSX_UNSIGNED;
3105 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3106 			*tl = txdr_unsigned(nlen);
3107 			bp += NFSX_UNSIGNED;
3108 
3109 			/* And loop around copying the name */
3110 			xfer = nlen;
3111 			cp = dp->d_name;
3112 			while (xfer > 0) {
3113 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3114 				if ((bp+xfer) > be)
3115 					tsiz = be-bp;
3116 				else
3117 					tsiz = xfer;
3118 				bcopy(cp, bp, tsiz);
3119 				bp += tsiz;
3120 				xfer -= tsiz;
3121 				if (xfer > 0)
3122 					cp += tsiz;
3123 			}
3124 			/* And null pad to a int32_t boundary */
3125 			for (i = 0; i < rem; i++)
3126 				*bp++ = '\0';
3127 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3128 
3129 			/* Finish off the record */
3130 			if (info.v3) {
3131 				*tl = txdr_unsigned(*cookiep >> 32);
3132 				bp += NFSX_UNSIGNED;
3133 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3134 			}
3135 			*tl = txdr_unsigned(*cookiep);
3136 			bp += NFSX_UNSIGNED;
3137 		}
3138 		dp = _DIRENT_NEXT(dp);
3139 		cpos = (char *)dp;
3140 		cookiep++;
3141 		ncookies--;
3142 	}
3143 	vrele(vp);
3144 	vp = NULL;
3145 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3146 	*tl = nfs_false;
3147 	bp += NFSX_UNSIGNED;
3148 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3149 	if (eofflag)
3150 		*tl = nfs_true;
3151 	else
3152 		*tl = nfs_false;
3153 	bp += NFSX_UNSIGNED;
3154 	if (mp1 != info.mb) {
3155 		if (bp < be)
3156 			mp1->m_len = bp - mtod(mp1, caddr_t);
3157 	} else
3158 		mp1->m_len += bp - info.bpos;
3159 	FREE((caddr_t)rbuf, M_TEMP);
3160 	FREE((caddr_t)cookies, M_TEMP);
3161 
3162 nfsmout:
3163 	*mrq = info.mreq;
3164 	if (vp)
3165 		vrele(vp);
3166 	return(error);
3167 }
3168 
3169 int
3170 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3171 		  struct thread *td, struct mbuf **mrq)
3172 {
3173 	struct sockaddr *nam = nfsd->nd_nam;
3174 	struct ucred *cred = &nfsd->nd_cr;
3175 	char *bp, *be;
3176 	struct dirent *dp;
3177 	caddr_t cp;
3178 	u_int32_t *tl;
3179 	struct mbuf *mp1, *mp2;
3180 	char *cpos, *cend, *rbuf;
3181 	struct vnode *vp = NULL, *nvp;
3182 	struct mount *mp = NULL;
3183 	struct flrep fl;
3184 	nfsfh_t nfh;
3185 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3186 	struct uio io;
3187 	struct iovec iv;
3188 	struct vattr va, at, *vap = &va;
3189 	struct nfs_fattr *fp;
3190 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3191 	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3192 	u_quad_t off, toff, verf;
3193 	off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3194 	struct nfsm_info info;
3195 
3196 	info.mrep = nfsd->nd_mrep;
3197 	info.mreq = NULL;
3198 	info.md = nfsd->nd_md;
3199 	info.dpos = nfsd->nd_dpos;
3200 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3201 
3202 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3203 	fhp = &nfh.fh_generic;
3204 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3205 	NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3206 	toff = fxdr_hyper(tl);
3207 	tl += 2;
3208 	verf = fxdr_hyper(tl);
3209 	tl += 2;
3210 	siz = fxdr_unsigned(int, *tl++);
3211 	cnt = fxdr_unsigned(int, *tl);
3212 	off = toff;
3213 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3214 	xfer = NFS_SRVMAXDATA(nfsd);
3215 	if ((unsigned)cnt > xfer)
3216 		cnt = xfer;
3217 	if ((unsigned)siz > xfer)
3218 		siz = xfer;
3219 	fullsiz = siz;
3220 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3221 			     &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3222 	if (!error && vp->v_type != VDIR) {
3223 		error = ENOTDIR;
3224 		vput(vp);
3225 		vp = NULL;
3226 	}
3227 	if (error) {
3228 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3229 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3230 		error = 0;
3231 		goto nfsmout;
3232 	}
3233 	error = getret = VOP_GETATTR(vp, &at);
3234 #if 0
3235 	/*
3236 	 * XXX This check may be too strict for Solaris 2.5 clients.
3237 	 */
3238 	if (!error && toff && verf && verf != at.va_filerev)
3239 		error = NFSERR_BAD_COOKIE;
3240 #endif
3241 	if (!error) {
3242 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3243 	}
3244 	if (error) {
3245 		vput(vp);
3246 		vp = NULL;
3247 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3248 				      NFSX_V3POSTOPATTR, &error));
3249 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3250 		error = 0;
3251 		goto nfsmout;
3252 	}
3253 	vn_unlock(vp);
3254 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3255 again:
3256 	iv.iov_base = rbuf;
3257 	iv.iov_len = fullsiz;
3258 	io.uio_iov = &iv;
3259 	io.uio_iovcnt = 1;
3260 	io.uio_offset = (off_t)off;
3261 	io.uio_resid = fullsiz;
3262 	io.uio_segflg = UIO_SYSSPACE;
3263 	io.uio_rw = UIO_READ;
3264 	io.uio_td = NULL;
3265 	eofflag = 0;
3266 	if (cookies) {
3267 		kfree((caddr_t)cookies, M_TEMP);
3268 		cookies = NULL;
3269 	}
3270 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3271 	off = (u_quad_t)io.uio_offset;
3272 	getret = VOP_GETATTR(vp, &at);
3273 	if (!cookies && !error)
3274 		error = NFSERR_PERM;
3275 	if (!error)
3276 		error = getret;
3277 	if (error) {
3278 		vrele(vp);
3279 		vp = NULL;
3280 		if (cookies)
3281 			kfree((caddr_t)cookies, M_TEMP);
3282 		kfree((caddr_t)rbuf, M_TEMP);
3283 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3284 				      NFSX_V3POSTOPATTR, &error));
3285 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3286 		error = 0;
3287 		goto nfsmout;
3288 	}
3289 	if (io.uio_resid) {
3290 		siz -= io.uio_resid;
3291 
3292 		/*
3293 		 * If nothing read, return eof
3294 		 * rpc reply
3295 		 */
3296 		if (siz == 0) {
3297 			vrele(vp);
3298 			vp = NULL;
3299 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3300 					      NFSX_V3POSTOPATTR +
3301 					      NFSX_V3COOKIEVERF +
3302 					      2 * NFSX_UNSIGNED,
3303 					      &error));
3304 			nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3305 			tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3306 			txdr_hyper(at.va_filerev, tl);
3307 			tl += 2;
3308 			*tl++ = nfs_false;
3309 			*tl = nfs_true;
3310 			FREE((caddr_t)cookies, M_TEMP);
3311 			FREE((caddr_t)rbuf, M_TEMP);
3312 			error = 0;
3313 			goto nfsmout;
3314 		}
3315 	}
3316 
3317 	/*
3318 	 * Check for degenerate cases of nothing useful read.
3319 	 * If so go try again
3320 	 */
3321 	cpos = rbuf;
3322 	cend = rbuf + siz;
3323 	dp = (struct dirent *)cpos;
3324 	cookiep = cookies;
3325 	/*
3326 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3327 	 * directory offset up to a block boundary, so it is necessary to
3328 	 * skip over the records that preceed the requested offset. This
3329 	 * requires the assumption that file offset cookies monotonically
3330 	 * increase.
3331 	 */
3332 	while (cpos < cend && ncookies > 0 &&
3333 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3334 		 ((u_quad_t)(*cookiep)) <= toff)) {
3335 		dp = _DIRENT_NEXT(dp);
3336 		cpos = (char *)dp;
3337 		cookiep++;
3338 		ncookies--;
3339 	}
3340 	if (cpos >= cend || ncookies == 0) {
3341 		toff = off;
3342 		siz = fullsiz;
3343 		goto again;
3344 	}
3345 
3346 	/*
3347 	 * Probe one of the directory entries to see if the filesystem
3348 	 * supports VGET.
3349 	 */
3350 	if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3351 		error = NFSERR_NOTSUPP;
3352 		vrele(vp);
3353 		vp = NULL;
3354 		kfree((caddr_t)cookies, M_TEMP);
3355 		kfree((caddr_t)rbuf, M_TEMP);
3356 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3357 				      NFSX_V3POSTOPATTR, &error));
3358 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3359 		error = 0;
3360 		goto nfsmout;
3361 	}
3362 	if (nvp) {
3363 		vput(nvp);
3364 		nvp = NULL;
3365 	}
3366 
3367 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3368 			2 * NFSX_UNSIGNED;
3369 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3370 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3371 	tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3372 	txdr_hyper(at.va_filerev, tl);
3373 	mp1 = mp2 = info.mb;
3374 	bp = info.bpos;
3375 	be = bp + M_TRAILINGSPACE(mp1);
3376 
3377 	/* Loop through the records and build reply */
3378 	while (cpos < cend && ncookies > 0) {
3379 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3380 			nlen = dp->d_namlen;
3381 			rem = nfsm_rndup(nlen) - nlen;
3382 
3383 			/*
3384 			 * For readdir_and_lookup get the vnode using
3385 			 * the file number.
3386 			 */
3387 			if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3388 				goto invalid;
3389 			bzero((caddr_t)nfhp, NFSX_V3FH);
3390 			nfhp->fh_fsid = fhp->fh_fsid;
3391 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3392 				vput(nvp);
3393 				nvp = NULL;
3394 				goto invalid;
3395 			}
3396 			if (VOP_GETATTR(nvp, vap)) {
3397 				vput(nvp);
3398 				nvp = NULL;
3399 				goto invalid;
3400 			}
3401 			vput(nvp);
3402 			nvp = NULL;
3403 
3404 			/*
3405 			 * If either the dircount or maxcount will be
3406 			 * exceeded, get out now. Both of these lengths
3407 			 * are calculated conservatively, including all
3408 			 * XDR overheads.
3409 			 */
3410 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3411 				NFSX_V3POSTOPATTR);
3412 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3413 			if (len > cnt || dirlen > fullsiz) {
3414 				eofflag = 0;
3415 				break;
3416 			}
3417 
3418 			/*
3419 			 * Build the directory record xdr from
3420 			 * the dirent entry.
3421 			 */
3422 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3423 			nfsm_srvfattr(nfsd, vap, fp);
3424 			fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3425 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3426 			fl.fl_postopok = nfs_true;
3427 			fl.fl_fhok = nfs_true;
3428 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3429 
3430 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3431 			*tl = nfs_true;
3432 			bp += NFSX_UNSIGNED;
3433 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3434 			*tl = txdr_unsigned(dp->d_ino >> 32);
3435 			bp += NFSX_UNSIGNED;
3436 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3437 			*tl = txdr_unsigned(dp->d_ino);
3438 			bp += NFSX_UNSIGNED;
3439 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3440 			*tl = txdr_unsigned(nlen);
3441 			bp += NFSX_UNSIGNED;
3442 
3443 			/* And loop around copying the name */
3444 			xfer = nlen;
3445 			cp = dp->d_name;
3446 			while (xfer > 0) {
3447 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3448 				if ((bp + xfer) > be)
3449 					tsiz = be - bp;
3450 				else
3451 					tsiz = xfer;
3452 				bcopy(cp, bp, tsiz);
3453 				bp += tsiz;
3454 				xfer -= tsiz;
3455 				cp += tsiz;
3456 			}
3457 			/* And null pad to a int32_t boundary */
3458 			for (i = 0; i < rem; i++)
3459 				*bp++ = '\0';
3460 
3461 			/*
3462 			 * Now copy the flrep structure out.
3463 			 */
3464 			xfer = sizeof (struct flrep);
3465 			cp = (caddr_t)&fl;
3466 			while (xfer > 0) {
3467 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3468 				if ((bp + xfer) > be)
3469 					tsiz = be - bp;
3470 				else
3471 					tsiz = xfer;
3472 				bcopy(cp, bp, tsiz);
3473 				bp += tsiz;
3474 				xfer -= tsiz;
3475 				cp += tsiz;
3476 			}
3477 		}
3478 invalid:
3479 		dp = _DIRENT_NEXT(dp);
3480 		cpos = (char *)dp;
3481 		cookiep++;
3482 		ncookies--;
3483 	}
3484 	vrele(vp);
3485 	vp = NULL;
3486 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3487 	*tl = nfs_false;
3488 	bp += NFSX_UNSIGNED;
3489 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3490 	if (eofflag)
3491 		*tl = nfs_true;
3492 	else
3493 		*tl = nfs_false;
3494 	bp += NFSX_UNSIGNED;
3495 	if (mp1 != info.mb) {
3496 		if (bp < be)
3497 			mp1->m_len = bp - mtod(mp1, caddr_t);
3498 	} else
3499 		mp1->m_len += bp - info.bpos;
3500 	FREE((caddr_t)cookies, M_TEMP);
3501 	FREE((caddr_t)rbuf, M_TEMP);
3502 nfsmout:
3503 	*mrq = info.mreq;
3504 	if (vp)
3505 		vrele(vp);
3506 	return(error);
3507 }
3508 
3509 /*
3510  * nfs commit service
3511  */
3512 int
3513 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3514 	     struct thread *td, struct mbuf **mrq)
3515 {
3516 	struct sockaddr *nam = nfsd->nd_nam;
3517 	struct ucred *cred = &nfsd->nd_cr;
3518 	struct vattr bfor, aft;
3519 	struct vnode *vp = NULL;
3520 	struct mount *mp = NULL;
3521 	nfsfh_t nfh;
3522 	fhandle_t *fhp;
3523 	u_int32_t *tl;
3524 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3525 	u_quad_t off;
3526 	struct nfsm_info info;
3527 
3528 	info.mrep = nfsd->nd_mrep;
3529 	info.mreq = NULL;
3530 	info.md = nfsd->nd_md;
3531 	info.dpos = nfsd->nd_dpos;
3532 
3533 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3534 	fhp = &nfh.fh_generic;
3535 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3536 	NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3537 
3538 	/*
3539 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3540 	 * count parameters, so these arguments are useless (someday maybe).
3541 	 */
3542 	off = fxdr_hyper(tl);
3543 	tl += 2;
3544 	cnt = fxdr_unsigned(int, *tl);
3545 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3546 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3547 	if (error) {
3548 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3549 				      2 * NFSX_UNSIGNED, &error));
3550 		nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3551 				 aft_ret, &aft);
3552 		error = 0;
3553 		goto nfsmout;
3554 	}
3555 	for_ret = VOP_GETATTR(vp, &bfor);
3556 
3557 	if (cnt > MAX_COMMIT_COUNT) {
3558 		/*
3559 		 * Give up and do the whole thing
3560 		 */
3561 		if (vp->v_object &&
3562 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3563 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3564 		}
3565 		error = VOP_FSYNC(vp, MNT_WAIT);
3566 	} else {
3567 		/*
3568 		 * Locate and synchronously write any buffers that fall
3569 		 * into the requested range.  Note:  we are assuming that
3570 		 * f_iosize is a power of 2.
3571 		 */
3572 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3573 		int iomask = iosize - 1;
3574 		off_t loffset;
3575 
3576 		/*
3577 		 * Align to iosize boundry, super-align to page boundry.
3578 		 */
3579 		if (off & iomask) {
3580 			cnt += off & iomask;
3581 			off &= ~(u_quad_t)iomask;
3582 		}
3583 		if (off & PAGE_MASK) {
3584 			cnt += off & PAGE_MASK;
3585 			off &= ~(u_quad_t)PAGE_MASK;
3586 		}
3587 		loffset = off;
3588 
3589 		if (vp->v_object &&
3590 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3591 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3592 		}
3593 
3594 		crit_enter();
3595 		while (cnt > 0) {
3596 			struct buf *bp;
3597 
3598 			/*
3599 			 * If we have a buffer and it is marked B_DELWRI we
3600 			 * have to lock and write it.  Otherwise the prior
3601 			 * write is assumed to have already been committed.
3602 			 */
3603 			if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3604 				if (bp->b_flags & B_DELWRI)
3605 					bp = findblk(vp, loffset, 0);
3606 				else
3607 					bp = NULL;
3608 			}
3609 			if (bp) {
3610 				if (bp->b_flags & B_DELWRI) {
3611 					bremfree(bp);
3612 					bwrite(bp);
3613 					++nfs_commit_miss;
3614 				} else {
3615 					BUF_UNLOCK(bp);
3616 				}
3617 			}
3618 			++nfs_commit_blks;
3619 			if (cnt < iosize)
3620 				break;
3621 			cnt -= iosize;
3622 			loffset += iosize;
3623 		}
3624 		crit_exit();
3625 	}
3626 
3627 	aft_ret = VOP_GETATTR(vp, &aft);
3628 	vput(vp);
3629 	vp = NULL;
3630 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3631 			      NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3632 			      &error));
3633 	nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3634 			 aft_ret, &aft);
3635 	if (!error) {
3636 		tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3637 		if (nfsver.tv_sec == 0)
3638 			nfsver = boottime;
3639 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3640 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3641 	} else {
3642 		error = 0;
3643 	}
3644 nfsmout:
3645 	*mrq = info.mreq;
3646 	if (vp)
3647 		vput(vp);
3648 	return(error);
3649 }
3650 
3651 /*
3652  * nfs statfs service
3653  */
3654 int
3655 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3656 	     struct thread *td, struct mbuf **mrq)
3657 {
3658 	struct sockaddr *nam = nfsd->nd_nam;
3659 	struct ucred *cred = &nfsd->nd_cr;
3660 	struct statfs *sf;
3661 	struct nfs_statfs *sfp;
3662 	int error = 0, rdonly, getret = 1;
3663 	struct vnode *vp = NULL;
3664 	struct mount *mp = NULL;
3665 	struct vattr at;
3666 	nfsfh_t nfh;
3667 	fhandle_t *fhp;
3668 	struct statfs statfs;
3669 	u_quad_t tval;
3670 	struct nfsm_info info;
3671 
3672 	info.mrep = nfsd->nd_mrep;
3673 	info.mreq = NULL;
3674 	info.md = nfsd->nd_md;
3675 	info.dpos = nfsd->nd_dpos;
3676 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3677 
3678 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3679 	fhp = &nfh.fh_generic;
3680 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3681 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3682 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3683 	if (error) {
3684 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3685 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3686 		error = 0;
3687 		goto nfsmout;
3688 	}
3689 	sf = &statfs;
3690 	error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3691 	getret = VOP_GETATTR(vp, &at);
3692 	vput(vp);
3693 	vp = NULL;
3694 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3695 			      NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3696 			      &error));
3697 	if (info.v3)
3698 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3699 	if (error) {
3700 		error = 0;
3701 		goto nfsmout;
3702 	}
3703 	sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3704 	if (info.v3) {
3705 		tval = (u_quad_t)sf->f_blocks;
3706 		tval *= (u_quad_t)sf->f_bsize;
3707 		txdr_hyper(tval, &sfp->sf_tbytes);
3708 		tval = (u_quad_t)sf->f_bfree;
3709 		tval *= (u_quad_t)sf->f_bsize;
3710 		txdr_hyper(tval, &sfp->sf_fbytes);
3711 		tval = (u_quad_t)sf->f_bavail;
3712 		tval *= (u_quad_t)sf->f_bsize;
3713 		txdr_hyper(tval, &sfp->sf_abytes);
3714 		sfp->sf_tfiles.nfsuquad[0] = 0;
3715 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3716 		sfp->sf_ffiles.nfsuquad[0] = 0;
3717 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3718 		sfp->sf_afiles.nfsuquad[0] = 0;
3719 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3720 		sfp->sf_invarsec = 0;
3721 	} else {
3722 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3723 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3724 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3725 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3726 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3727 	}
3728 nfsmout:
3729 	*mrq = info.mreq;
3730 	if (vp)
3731 		vput(vp);
3732 	return(error);
3733 }
3734 
3735 /*
3736  * nfs fsinfo service
3737  */
3738 int
3739 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3740 	     struct thread *td, struct mbuf **mrq)
3741 {
3742 	struct sockaddr *nam = nfsd->nd_nam;
3743 	struct ucred *cred = &nfsd->nd_cr;
3744 	struct nfsv3_fsinfo *sip;
3745 	int error = 0, rdonly, getret = 1, pref;
3746 	struct vnode *vp = NULL;
3747 	struct mount *mp = NULL;
3748 	struct vattr at;
3749 	nfsfh_t nfh;
3750 	fhandle_t *fhp;
3751 	u_quad_t maxfsize;
3752 	struct statfs sb;
3753 	struct nfsm_info info;
3754 
3755 	info.mrep = nfsd->nd_mrep;
3756 	info.mreq = NULL;
3757 	info.md = nfsd->nd_md;
3758 	info.dpos = nfsd->nd_dpos;
3759 
3760 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3761 	fhp = &nfh.fh_generic;
3762 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3763 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3764 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3765 	if (error) {
3766 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3767 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3768 		error = 0;
3769 		goto nfsmout;
3770 	}
3771 
3772 	/* XXX Try to make a guess on the max file size. */
3773 	VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3774 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3775 
3776 	getret = VOP_GETATTR(vp, &at);
3777 	vput(vp);
3778 	vp = NULL;
3779 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3780 			      NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3781 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3782 	sip = nfsm_build(&info, NFSX_V3FSINFO);
3783 
3784 	/*
3785 	 * XXX
3786 	 * There should be file system VFS OP(s) to get this information.
3787 	 * For now, assume ufs.
3788 	 */
3789 	if (slp->ns_so->so_type == SOCK_DGRAM)
3790 		pref = NFS_MAXDGRAMDATA;
3791 	else
3792 		pref = NFS_MAXDATA;
3793 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3794 	sip->fs_rtpref = txdr_unsigned(pref);
3795 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3796 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3797 	sip->fs_wtpref = txdr_unsigned(pref);
3798 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3799 	sip->fs_dtpref = txdr_unsigned(pref);
3800 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3801 	sip->fs_timedelta.nfsv3_sec = 0;
3802 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3803 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3804 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3805 		NFSV3FSINFO_CANSETTIME);
3806 nfsmout:
3807 	*mrq = info.mreq;
3808 	if (vp)
3809 		vput(vp);
3810 	return(error);
3811 }
3812 
3813 /*
3814  * nfs pathconf service
3815  */
3816 int
3817 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3818 	       struct thread *td, struct mbuf **mrq)
3819 {
3820 	struct sockaddr *nam = nfsd->nd_nam;
3821 	struct ucred *cred = &nfsd->nd_cr;
3822 	struct nfsv3_pathconf *pc;
3823 	int error = 0, rdonly, getret = 1;
3824 	register_t linkmax, namemax, chownres, notrunc;
3825 	struct vnode *vp = NULL;
3826 	struct mount *mp = NULL;
3827 	struct vattr at;
3828 	nfsfh_t nfh;
3829 	fhandle_t *fhp;
3830 	struct nfsm_info info;
3831 
3832 	info.mrep = nfsd->nd_mrep;
3833 	info.mreq = NULL;
3834 	info.md = nfsd->nd_md;
3835 	info.dpos = nfsd->nd_dpos;
3836 
3837 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3838 	fhp = &nfh.fh_generic;
3839 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3840 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3841 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3842 	if (error) {
3843 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3844 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3845 		error = 0;
3846 		goto nfsmout;
3847 	}
3848 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3849 	if (!error)
3850 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3851 	if (!error)
3852 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3853 	if (!error)
3854 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3855 	getret = VOP_GETATTR(vp, &at);
3856 	vput(vp);
3857 	vp = NULL;
3858 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3859 			      NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3860 			      &error));
3861 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3862 	if (error) {
3863 		error = 0;
3864 		goto nfsmout;
3865 	}
3866 	pc = nfsm_build(&info, NFSX_V3PATHCONF);
3867 
3868 	pc->pc_linkmax = txdr_unsigned(linkmax);
3869 	pc->pc_namemax = txdr_unsigned(namemax);
3870 	pc->pc_notrunc = txdr_unsigned(notrunc);
3871 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3872 
3873 	/*
3874 	 * These should probably be supported by VOP_PATHCONF(), but
3875 	 * until msdosfs is exportable (why would you want to?), the
3876 	 * Unix defaults should be ok.
3877 	 */
3878 	pc->pc_caseinsensitive = nfs_false;
3879 	pc->pc_casepreserving = nfs_true;
3880 nfsmout:
3881 	*mrq = info.mreq;
3882 	if (vp)
3883 		vput(vp);
3884 	return(error);
3885 }
3886 
3887 /*
3888  * Null operation, used by clients to ping server
3889  */
3890 /* ARGSUSED */
3891 int
3892 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3893 	   struct thread *td, struct mbuf **mrq)
3894 {
3895 	struct nfsm_info info;
3896 	int error = NFSERR_RETVOID;
3897 
3898 	info.mrep = nfsd->nd_mrep;
3899 	info.mreq = NULL;
3900 
3901 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3902 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3903 nfsmout:
3904 	*mrq = info.mreq;
3905 	return (error);
3906 }
3907 
3908 /*
3909  * No operation, used for obsolete procedures
3910  */
3911 /* ARGSUSED */
3912 int
3913 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3914 	   struct thread *td, struct mbuf **mrq)
3915 {
3916 	struct nfsm_info info;
3917 	int error;
3918 
3919 	info.mrep = nfsd->nd_mrep;
3920 	info.mreq = NULL;
3921 
3922 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3923 	if (nfsd->nd_repstat)
3924 		error = nfsd->nd_repstat;
3925 	else
3926 		error = EPROCUNAVAIL;
3927 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3928 	error = 0;
3929 nfsmout:
3930 	*mrq = info.mreq;
3931 	return (error);
3932 }
3933 
3934 /*
3935  * Perform access checking for vnodes obtained from file handles that would
3936  * refer to files already opened by a Unix client. You cannot just use
3937  * vn_writechk() and VOP_ACCESS() for two reasons.
3938  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3939  * 2 - The owner is to be given access irrespective of mode bits for some
3940  *     operations, so that processes that chmod after opening a file don't
3941  *     break. I don't like this because it opens a security hole, but since
3942  *     the nfs server opens a security hole the size of a barn door anyhow,
3943  *     what the heck.
3944  *
3945  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3946  * will return EPERM instead of EACCESS. EPERM is always an error.
3947  */
3948 static int
3949 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3950 	     int rdonly, struct thread *td, int override)
3951 {
3952 	struct vattr vattr;
3953 	int error;
3954 
3955 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3956 	if (flags & VWRITE) {
3957 		/* Just vn_writechk() changed to check rdonly */
3958 		/*
3959 		 * Disallow write attempts on read-only file systems;
3960 		 * unless the file is a socket or a block or character
3961 		 * device resident on the file system.
3962 		 */
3963 		if (rdonly ||
3964 		    ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3965 			switch (vp->v_type) {
3966 			case VREG:
3967 			case VDIR:
3968 			case VLNK:
3969 				return (EROFS);
3970 			default:
3971 				break;
3972 			}
3973 		}
3974 		/*
3975 		 * If there's shared text associated with
3976 		 * the inode, we can't allow writing.
3977 		 */
3978 		if (vp->v_flag & VTEXT)
3979 			return (ETXTBSY);
3980 	}
3981 	error = VOP_GETATTR(vp, &vattr);
3982 	if (error)
3983 		return (error);
3984 	error = VOP_ACCESS(vp, flags, cred);
3985 	/*
3986 	 * Allow certain operations for the owner (reads and writes
3987 	 * on files that are already open).
3988 	 */
3989 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3990 		error = 0;
3991 	return error;
3992 }
3993 #endif /* NFS_NOSERVER */
3994 
3995