xref: /dflybsd-src/sys/vfs/nfs/nfs_serv.c (revision 884717e1debcf4b08bda1d29d01b0c8a34b86a59)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  */
39 
40 /*
41  * nfs version 2 and 3 server calls to vnode ops
42  * - these routines generally have 3 phases
43  *   1 - break down and validate rpc request in mbuf list
44  *   2 - do the vnode ops for the request
45  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
46  *   3 - build the rpc reply in an mbuf list
47  *   nb:
48  *	- do not mix the phases, since the nfsm_?? macros can return failures
49  *	  on a bad rpc or similar and do not do any vrele() or vput()'s
50  *
51  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
52  *	error number iff error != 0 whereas
53  *	returning an error from the server function implies a fatal error
54  *	such as a badly constructed rpc request that should be dropped without
55  *	a reply.
56  *	For Version 3, nfsm_reply() does not return for the error case, since
57  *	most version 3 rpcs return more than the status for error cases.
58  *
59  * Other notes:
60  *	Warning: always pay careful attention to resource cleanup on return
61  *	and note that nfsm_*() macros can terminate a procedure on certain
62  *	errors.
63  */
64 
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/proc.h>
68 #include <sys/priv.h>
69 #include <sys/nlookup.h>
70 #include <sys/namei.h>
71 #include <sys/unistd.h>
72 #include <sys/vnode.h>
73 #include <sys/mount.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/dirent.h>
79 #include <sys/stat.h>
80 #include <sys/kernel.h>
81 #include <sys/sysctl.h>
82 #include <sys/buf.h>
83 
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_object.h>
87 
88 #include <sys/buf2.h>
89 
90 #include <sys/thread2.h>
91 
92 #include "nfsproto.h"
93 #include "rpcv2.h"
94 #include "nfs.h"
95 #include "xdr_subs.h"
96 #include "nfsm_subs.h"
97 
98 #ifdef NFSRV_DEBUG
99 #define nfsdbprintf(info)	kprintf info
100 #else
101 #define nfsdbprintf(info)
102 #endif
103 
104 #define MAX_COMMIT_COUNT	(1024 * 1024)
105 
106 #define NUM_HEURISTIC		1017
107 #define NHUSE_INIT		64
108 #define NHUSE_INC		16
109 #define NHUSE_MAX		2048
110 
111 static struct nfsheur {
112     struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
113     off_t nh_nextr;		/* next offset for sequential detection */
114     int nh_use;			/* use count for selection */
115     int nh_seqcount;		/* heuristic */
116 } nfsheur[NUM_HEURISTIC];
117 
118 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
119 		      NFFIFO, NFNON };
120 #ifndef NFS_NOSERVER
121 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
122 		      NFCHR, NFNON };
123 
124 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
125 int nfsrvw_procrastinate_v3 = 0;
126 
127 static struct timespec	nfsver;
128 
129 SYSCTL_DECL(_vfs_nfs);
130 
131 int nfs_async;
132 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
133     "Enable unstable and fast writes");
134 static int nfs_commit_blks;
135 static int nfs_commit_miss;
136 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
137     "Number of committed blocks");
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
139     "Number of nfs blocks committed from dirty buffers");
140 
141 static int nfsrv_access (struct mount *, struct vnode *, int,
142 			struct ucred *, int, struct thread *, int);
143 static void nfsrvw_coalesce (struct nfsrv_descript *,
144 		struct nfsrv_descript *);
145 
146 /*
147  * nfs v3 access service
148  */
149 int
150 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
151 	      struct thread *td, struct mbuf **mrq)
152 {
153 	struct sockaddr *nam = nfsd->nd_nam;
154 	struct ucred *cred = &nfsd->nd_cr;
155 	struct vnode *vp = NULL;
156 	struct mount *mp = NULL;
157 	nfsfh_t nfh;
158 	fhandle_t *fhp;
159 	int error = 0, rdonly, getret;
160 	struct vattr vattr, *vap = &vattr;
161 	u_long testmode, nfsmode;
162 	struct nfsm_info info;
163 	u_int32_t *tl;
164 
165 	info.dpos = nfsd->nd_dpos;
166 	info.md = nfsd->nd_md;
167 	info.mrep = nfsd->nd_mrep;
168 	info.mreq = NULL;
169 
170 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
171 	fhp = &nfh.fh_generic;
172 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
173 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
174 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
175 	    (nfsd->nd_flag & ND_KERBAUTH), TRUE);
176 	if (error) {
177 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
178 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
179 		error = 0;
180 		goto nfsmout;
181 	}
182 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
183 	if ((nfsmode & NFSV3ACCESS_READ) &&
184 		nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
185 		nfsmode &= ~NFSV3ACCESS_READ;
186 	if (vp->v_type == VDIR)
187 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
188 			NFSV3ACCESS_DELETE);
189 	else
190 		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
191 	if ((nfsmode & testmode) &&
192 		nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
193 		nfsmode &= ~testmode;
194 	if (vp->v_type == VDIR)
195 		testmode = NFSV3ACCESS_LOOKUP;
196 	else
197 		testmode = NFSV3ACCESS_EXECUTE;
198 	if ((nfsmode & testmode) &&
199 		nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
200 		nfsmode &= ~testmode;
201 	getret = VOP_GETATTR(vp, vap);
202 	vput(vp);
203 	vp = NULL;
204 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
205 			      NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
206 	nfsm_srvpostop_attr(&info, nfsd, getret, vap);
207 	tl = nfsm_build(&info, NFSX_UNSIGNED);
208 	*tl = txdr_unsigned(nfsmode);
209 nfsmout:
210 	*mrq = info.mreq;
211 	if (vp)
212 		vput(vp);
213 	return(error);
214 }
215 
216 /*
217  * nfs getattr service
218  */
219 int
220 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
221 	      struct thread *td, struct mbuf **mrq)
222 {
223 	struct sockaddr *nam = nfsd->nd_nam;
224 	struct ucred *cred = &nfsd->nd_cr;
225 	struct nfs_fattr *fp;
226 	struct vattr va;
227 	struct vattr *vap = &va;
228 	struct vnode *vp = NULL;
229 	struct mount *mp = NULL;
230 	nfsfh_t nfh;
231 	fhandle_t *fhp;
232 	int error = 0, rdonly;
233 	struct nfsm_info info;
234 
235 	info.mrep = nfsd->nd_mrep;
236 	info.md = nfsd->nd_md;
237 	info.dpos = nfsd->nd_dpos;
238 	info.mreq = NULL;
239 
240 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
241 	fhp = &nfh.fh_generic;
242 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
243 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
244 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
245 	if (error) {
246 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
247 		error = 0;
248 		goto nfsmout;
249 	}
250 	error = VOP_GETATTR(vp, vap);
251 	vput(vp);
252 	vp = NULL;
253 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
254 			      NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
255 	if (error) {
256 		error = 0;
257 		goto nfsmout;
258 	}
259 	fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
260 	nfsm_srvfattr(nfsd, vap, fp);
261 	/* fall through */
262 
263 nfsmout:
264 	*mrq = info.mreq;
265 	if (vp)
266 		vput(vp);
267 	return(error);
268 }
269 
270 /*
271  * nfs setattr service
272  */
273 int
274 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
275 	      struct thread *td, struct mbuf **mrq)
276 {
277 	struct sockaddr *nam = nfsd->nd_nam;
278 	struct ucred *cred = &nfsd->nd_cr;
279 	struct vattr va, preat;
280 	struct vattr *vap = &va;
281 	struct nfsv2_sattr *sp;
282 	struct nfs_fattr *fp;
283 	struct vnode *vp = NULL;
284 	struct mount *mp = NULL;
285 	nfsfh_t nfh;
286 	fhandle_t *fhp;
287 	u_int32_t *tl;
288 	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
289 	int gcheck = 0;
290 	struct timespec guard;
291 	struct nfsm_info info;
292 
293 	info.mrep = nfsd->nd_mrep;
294 	info.mreq = NULL;
295 	info.md = nfsd->nd_md;
296 	info.dpos = nfsd->nd_dpos;
297 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
298 
299 	guard.tv_sec = 0;	/* fix compiler warning */
300 	guard.tv_nsec = 0;
301 
302 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
303 	fhp = &nfh.fh_generic;
304 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
305 	VATTR_NULL(vap);
306 	if (info.v3) {
307 		ERROROUT(nfsm_srvsattr(&info, vap));
308 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
309 		gcheck = fxdr_unsigned(int, *tl);
310 		if (gcheck) {
311 			NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
312 			fxdr_nfsv3time(tl, &guard);
313 		}
314 	} else {
315 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
316 		/*
317 		 * Nah nah nah nah na nah
318 		 * There is a bug in the Sun client that puts 0xffff in the mode
319 		 * field of sattr when it should put in 0xffffffff. The u_short
320 		 * doesn't sign extend.
321 		 * --> check the low order 2 bytes for 0xffff
322 		 */
323 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
324 			vap->va_mode = nfstov_mode(sp->sa_mode);
325 		if (sp->sa_uid != nfs_xdrneg1)
326 			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
327 		if (sp->sa_gid != nfs_xdrneg1)
328 			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
329 		if (sp->sa_size != nfs_xdrneg1)
330 			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
331 		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
332 #ifdef notyet
333 			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
334 #else
335 			vap->va_atime.tv_sec =
336 				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
337 			vap->va_atime.tv_nsec = 0;
338 #endif
339 		}
340 		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
341 			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
342 
343 	}
344 
345 	/*
346 	 * Now that we have all the fields, lets do it.
347 	 */
348 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
349 		(nfsd->nd_flag & ND_KERBAUTH), TRUE);
350 	if (error) {
351 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
352 				      2 * NFSX_UNSIGNED, &error));
353 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
354 				 postat_ret, vap);
355 		error = 0;
356 		goto nfsmout;
357 	}
358 
359 	/*
360 	 * vp now an active resource, pay careful attention to cleanup
361 	 */
362 
363 	if (info.v3) {
364 		error = preat_ret = VOP_GETATTR(vp, &preat);
365 		if (!error && gcheck &&
366 			(preat.va_ctime.tv_sec != guard.tv_sec ||
367 			 preat.va_ctime.tv_nsec != guard.tv_nsec))
368 			error = NFSERR_NOT_SYNC;
369 		if (error) {
370 			vput(vp);
371 			vp = NULL;
372 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
373 					      NFSX_WCCDATA(info.v3), &error));
374 			nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
375 					 postat_ret, vap);
376 			error = 0;
377 			goto nfsmout;
378 		}
379 	}
380 
381 	/*
382 	 * If the size is being changed write acces is required, otherwise
383 	 * just check for a read only file system.
384 	 */
385 	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
386 		if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
387 			error = EROFS;
388 			goto out;
389 		}
390 	} else {
391 		if (vp->v_type == VDIR) {
392 			error = EISDIR;
393 			goto out;
394 		} else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
395 			    td, 0)) != 0){
396 			goto out;
397 		}
398 	}
399 	error = VOP_SETATTR(vp, vap, cred);
400 	postat_ret = VOP_GETATTR(vp, vap);
401 	if (!error)
402 		error = postat_ret;
403 out:
404 	vput(vp);
405 	vp = NULL;
406 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
407 		   NFSX_WCCORFATTR(info.v3), &error));
408 	if (info.v3) {
409 		nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
410 				 postat_ret, vap);
411 		error = 0;
412 		goto nfsmout;
413 	} else {
414 		fp = nfsm_build(&info, NFSX_V2FATTR);
415 		nfsm_srvfattr(nfsd, vap, fp);
416 	}
417 	/* fall through */
418 
419 nfsmout:
420 	*mrq = info.mreq;
421 	if (vp)
422 		vput(vp);
423 	return(error);
424 }
425 
426 /*
427  * nfs lookup rpc
428  */
429 int
430 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
431 	     struct thread *td, struct mbuf **mrq)
432 {
433 	struct sockaddr *nam = nfsd->nd_nam;
434 	struct ucred *cred = &nfsd->nd_cr;
435 	struct nfs_fattr *fp;
436 	struct nlookupdata nd;
437 	struct vnode *vp;
438 	struct vnode *dirp;
439 	struct nchandle nch;
440 	nfsfh_t nfh;
441 	fhandle_t *fhp;
442 	int error = 0, len, dirattr_ret = 1;
443 	int pubflag;
444 	struct vattr va, dirattr, *vap = &va;
445 	struct nfsm_info info;
446 
447 	info.mrep = nfsd->nd_mrep;
448 	info.mreq = NULL;
449 	info.md = nfsd->nd_md;
450 	info.dpos = nfsd->nd_dpos;
451 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
452 
453 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
454 	nlookup_zero(&nd);
455 	dirp = NULL;
456 	vp = NULL;
457 
458 	fhp = &nfh.fh_generic;
459 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
460 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
461 
462 	pubflag = nfs_ispublicfh(fhp);
463 
464 	error = nfs_namei(&nd, cred, 0, NULL, &vp,
465 		fhp, len, slp, nam, &info.md, &info.dpos,
466 		&dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
467 
468 	/*
469 	 * namei failure, only dirp to cleanup.  Clear out garbarge from
470 	 * structure in case macros jump to nfsmout.
471 	 */
472 
473 	if (error) {
474 		if (dirp) {
475 			if (info.v3)
476 				dirattr_ret = VOP_GETATTR(dirp, &dirattr);
477 			vrele(dirp);
478 			dirp = NULL;
479 		}
480 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
481 				      NFSX_POSTOPATTR(info.v3), &error));
482 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
483 		error = 0;
484 		goto nfsmout;
485 	}
486 
487 	/*
488 	 * Locate index file for public filehandle
489 	 *
490 	 * error is 0 on entry and 0 on exit from this block.
491 	 */
492 
493 	if (pubflag) {
494 		if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
495 			/*
496 			 * Setup call to lookup() to see if we can find
497 			 * the index file. Arguably, this doesn't belong
498 			 * in a kernel.. Ugh.  If an error occurs, do not
499 			 * try to install an index file and then clear the
500 			 * error.
501 			 *
502 			 * When we replace nd with ind and redirect ndp,
503 			 * maintenance of ni_startdir and ni_vp shift to
504 			 * ind and we have to clean them up in the old nd.
505 			 * However, the cnd resource continues to be maintained
506 			 * via the original nd.  Confused?  You aren't alone!
507 			 */
508 			vn_unlock(vp);
509 			cache_copy(&nd.nl_nch, &nch);
510 			nlookup_done(&nd);
511 			error = nlookup_init_raw(&nd, nfs_pub.np_index,
512 						UIO_SYSSPACE, 0, cred, &nch);
513 			cache_drop(&nch);
514 			if (error == 0)
515 				error = nlookup(&nd);
516 
517 			if (error == 0) {
518 				/*
519 				 * Found an index file. Get rid of
520 				 * the old references.  transfer vp and
521 				 * load up the new vp.  Fortunately we do
522 				 * not have to deal with dvp, that would be
523 				 * a huge mess.
524 				 */
525 				if (dirp)
526 					vrele(dirp);
527 				dirp = vp;
528 				vp = NULL;
529 				error = cache_vget(&nd.nl_nch, nd.nl_cred,
530 							LK_EXCLUSIVE, &vp);
531 				KKASSERT(error == 0);
532 			}
533 			error = 0;
534 		}
535 		/*
536 		 * If the public filehandle was used, check that this lookup
537 		 * didn't result in a filehandle outside the publicly exported
538 		 * filesystem.  We clear the poor vp here to avoid lockups due
539 		 * to NFS I/O.
540 		 */
541 
542 		if (vp->v_mount != nfs_pub.np_mount) {
543 			vput(vp);
544 			vp = NULL;
545 			error = EPERM;
546 		}
547 	}
548 
549 	if (dirp) {
550 		if (info.v3)
551 			dirattr_ret = VOP_GETATTR(dirp, &dirattr);
552 		vrele(dirp);
553 		dirp = NULL;
554 	}
555 
556 	/*
557 	 * Resources at this point:
558 	 *	ndp->ni_vp	may not be NULL
559 	 *
560 	 */
561 
562 	if (error) {
563 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
564 				      NFSX_POSTOPATTR(info.v3), &error));
565 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
566 		error = 0;
567 		goto nfsmout;
568 	}
569 
570 	/*
571 	 * Clear out some resources prior to potentially blocking.  This
572 	 * is not as critical as ni_dvp resources in other routines, but
573 	 * it helps.
574 	 */
575 	nlookup_done(&nd);
576 
577 	/*
578 	 * Get underlying attribute, then release remaining resources ( for
579 	 * the same potential blocking reason ) and reply.
580 	 */
581 	bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
582 	error = VFS_VPTOFH(vp, &fhp->fh_fid);
583 	if (!error)
584 		error = VOP_GETATTR(vp, vap);
585 
586 	vput(vp);
587 	vp = NULL;
588 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
589 			      NFSX_SRVFH(info.v3) +
590 			      NFSX_POSTOPORFATTR(info.v3) +
591 			      NFSX_POSTOPATTR(info.v3),
592 			      &error));
593 	if (error) {
594 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
595 		error = 0;
596 		goto nfsmout;
597 	}
598 	nfsm_srvfhtom(&info, fhp);
599 	if (info.v3) {
600 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
601 		nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
602 	} else {
603 		fp = nfsm_build(&info, NFSX_V2FATTR);
604 		nfsm_srvfattr(nfsd, vap, fp);
605 	}
606 
607 nfsmout:
608 	*mrq = info.mreq;
609 	if (dirp)
610 		vrele(dirp);
611 	nlookup_done(&nd);		/* may be called twice */
612 	if (vp)
613 		vput(vp);
614 	return (error);
615 }
616 
617 /*
618  * nfs readlink service
619  */
620 int
621 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
622 	       struct thread *td, struct mbuf **mrq)
623 {
624 	struct sockaddr *nam = nfsd->nd_nam;
625 	struct ucred *cred = &nfsd->nd_cr;
626 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
627 	struct iovec *ivp = iv;
628 	u_int32_t *tl;
629 	int error = 0, rdonly, i, tlen, len, getret;
630 	struct mbuf *mp1, *mp2, *mp3;
631 	struct vnode *vp = NULL;
632 	struct mount *mp = NULL;
633 	struct vattr attr;
634 	nfsfh_t nfh;
635 	fhandle_t *fhp;
636 	struct uio io, *uiop = &io;
637 	struct nfsm_info info;
638 
639 	info.mrep = nfsd->nd_mrep;
640 	info.mreq = NULL;
641 	info.md = nfsd->nd_md;
642 	info.dpos = nfsd->nd_dpos;
643 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
644 
645 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
646 #ifndef nolint
647 	mp2 = NULL;
648 #endif
649 	mp3 = NULL;
650 	fhp = &nfh.fh_generic;
651 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
652 	len = 0;
653 	i = 0;
654 	while (len < NFS_MAXPATHLEN) {
655 		mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
656 		mp1->m_len = MCLBYTES;
657 		if (len == 0)
658 			mp3 = mp2 = mp1;
659 		else {
660 			mp2->m_next = mp1;
661 			mp2 = mp1;
662 		}
663 		if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
664 			mp1->m_len = NFS_MAXPATHLEN-len;
665 			len = NFS_MAXPATHLEN;
666 		} else
667 			len += mp1->m_len;
668 		ivp->iov_base = mtod(mp1, caddr_t);
669 		ivp->iov_len = mp1->m_len;
670 		i++;
671 		ivp++;
672 	}
673 	uiop->uio_iov = iv;
674 	uiop->uio_iovcnt = i;
675 	uiop->uio_offset = 0;
676 	uiop->uio_resid = len;
677 	uiop->uio_rw = UIO_READ;
678 	uiop->uio_segflg = UIO_SYSSPACE;
679 	uiop->uio_td = NULL;
680 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
681 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
682 	if (error) {
683 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
684 				      2 * NFSX_UNSIGNED, &error));
685 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
686 		error = 0;
687 		goto nfsmout;
688 	}
689 	if (vp->v_type != VLNK) {
690 		if (info.v3)
691 			error = EINVAL;
692 		else
693 			error = ENXIO;
694 		goto out;
695 	}
696 	error = VOP_READLINK(vp, uiop, cred);
697 out:
698 	getret = VOP_GETATTR(vp, &attr);
699 	vput(vp);
700 	vp = NULL;
701 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
702 			     NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
703 			     &error));
704 	if (info.v3) {
705 		nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
706 		if (error) {
707 			error = 0;
708 			goto nfsmout;
709 		}
710 	}
711 	if (uiop->uio_resid > 0) {
712 		len -= uiop->uio_resid;
713 		tlen = nfsm_rndup(len);
714 		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
715 	}
716 	tl = nfsm_build(&info, NFSX_UNSIGNED);
717 	*tl = txdr_unsigned(len);
718 	info.mb->m_next = mp3;
719 	mp3 = NULL;
720 nfsmout:
721 	*mrq = info.mreq;
722 	if (mp3)
723 		m_freem(mp3);
724 	if (vp)
725 		vput(vp);
726 	return(error);
727 }
728 
729 /*
730  * nfs read service
731  */
732 int
733 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
734 	   struct thread *td, struct mbuf **mrq)
735 {
736 	struct nfsm_info info;
737 	struct sockaddr *nam = nfsd->nd_nam;
738 	struct ucred *cred = &nfsd->nd_cr;
739 	struct iovec *iv;
740 	struct iovec *iv2;
741 	struct mbuf *m;
742 	struct nfs_fattr *fp;
743 	u_int32_t *tl;
744 	int i;
745 	int reqlen;
746 	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
747 	struct mbuf *m2;
748 	struct vnode *vp = NULL;
749 	struct mount *mp = NULL;
750 	nfsfh_t nfh;
751 	fhandle_t *fhp;
752 	struct uio io, *uiop = &io;
753 	struct vattr va, *vap = &va;
754 	struct nfsheur *nh;
755 	off_t off;
756 	int ioflag = 0;
757 
758 	info.mrep = nfsd->nd_mrep;
759 	info.mreq = NULL;
760 	info.md = nfsd->nd_md;
761 	info.dpos = nfsd->nd_dpos;
762 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
763 
764 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
765 	fhp = &nfh.fh_generic;
766 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
767 	if (info.v3) {
768 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
769 		off = fxdr_hyper(tl);
770 	} else {
771 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
772 		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
773 	}
774 	NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
775 					    NFS_SRVMAXDATA(nfsd), &error));
776 
777 	/*
778 	 * Reference vp.  If an error occurs, vp will be invalid, but we
779 	 * have to NULL it just in case.  The macros might goto nfsmout
780 	 * as well.
781 	 */
782 
783 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
784 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
785 	if (error) {
786 		vp = NULL;
787 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
788 				      2 * NFSX_UNSIGNED, &error));
789 		nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
790 		error = 0;
791 		goto nfsmout;
792 	}
793 
794 	if (vp->v_type != VREG) {
795 		if (info.v3)
796 			error = EINVAL;
797 		else
798 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
799 	}
800 	if (!error) {
801 	    if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
802 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
803 	}
804 	getret = VOP_GETATTR(vp, vap);
805 	if (!error)
806 		error = getret;
807 	if (error) {
808 		vput(vp);
809 		vp = NULL;
810 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
811 				      NFSX_POSTOPATTR(info.v3), &error));
812 		nfsm_srvpostop_attr(&info, nfsd, getret, vap);
813 		error = 0;
814 		goto nfsmout;
815 	}
816 
817 	/*
818 	 * Calculate byte count to read
819 	 */
820 
821 	if (off >= vap->va_size)
822 		cnt = 0;
823 	else if ((off + reqlen) > vap->va_size)
824 		cnt = vap->va_size - off;
825 	else
826 		cnt = reqlen;
827 
828 	/*
829 	 * Calculate seqcount for heuristic
830 	 */
831 
832 	{
833 		int hi;
834 		int try = 32;
835 
836 		/*
837 		 * Locate best candidate
838 		 */
839 
840 		hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
841 		nh = &nfsheur[hi];
842 
843 		while (try--) {
844 			if (nfsheur[hi].nh_vp == vp) {
845 				nh = &nfsheur[hi];
846 				break;
847 			}
848 			if (nfsheur[hi].nh_use > 0)
849 				--nfsheur[hi].nh_use;
850 			hi = (hi + 1) % NUM_HEURISTIC;
851 			if (nfsheur[hi].nh_use < nh->nh_use)
852 				nh = &nfsheur[hi];
853 		}
854 
855 		if (nh->nh_vp != vp) {
856 			nh->nh_vp = vp;
857 			nh->nh_nextr = off;
858 			nh->nh_use = NHUSE_INIT;
859 			if (off == 0)
860 				nh->nh_seqcount = 4;
861 			else
862 				nh->nh_seqcount = 1;
863 		}
864 
865 		/*
866 		 * Calculate heuristic
867 		 */
868 
869 		if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
870 			if (++nh->nh_seqcount > IO_SEQMAX)
871 				nh->nh_seqcount = IO_SEQMAX;
872 		} else if (nh->nh_seqcount > 1) {
873 			nh->nh_seqcount = 1;
874 		} else {
875 			nh->nh_seqcount = 0;
876 		}
877 		nh->nh_use += NHUSE_INC;
878 		if (nh->nh_use > NHUSE_MAX)
879 			nh->nh_use = NHUSE_MAX;
880 		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
881         }
882 
883 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
884 			      NFSX_POSTOPORFATTR(info.v3) +
885 			      3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
886 			      &error));
887 	if (info.v3) {
888 		tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
889 		*tl++ = nfs_true;
890 		fp = (struct nfs_fattr *)tl;
891 		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
892 	} else {
893 		tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
894 		fp = (struct nfs_fattr *)tl;
895 		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
896 	}
897 	len = left = nfsm_rndup(cnt);
898 	if (cnt > 0) {
899 		/*
900 		 * Generate the mbuf list with the uio_iov ref. to it.
901 		 */
902 		i = 0;
903 		m = m2 = info.mb;
904 		while (left > 0) {
905 			siz = min(M_TRAILINGSPACE(m), left);
906 			if (siz > 0) {
907 				left -= siz;
908 				i++;
909 			}
910 			if (left > 0) {
911 				m = m_getcl(MB_WAIT, MT_DATA, 0);
912 				m->m_len = 0;
913 				m2->m_next = m;
914 				m2 = m;
915 			}
916 		}
917 		iv = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
918 		uiop->uio_iov = iv2 = iv;
919 		m = info.mb;
920 		left = len;
921 		i = 0;
922 		while (left > 0) {
923 			if (m == NULL)
924 				panic("nfsrv_read iov");
925 			siz = min(M_TRAILINGSPACE(m), left);
926 			if (siz > 0) {
927 				iv->iov_base = mtod(m, caddr_t) + m->m_len;
928 				iv->iov_len = siz;
929 				m->m_len += siz;
930 				left -= siz;
931 				iv++;
932 				i++;
933 			}
934 			m = m->m_next;
935 		}
936 		uiop->uio_iovcnt = i;
937 		uiop->uio_offset = off;
938 		uiop->uio_resid = len;
939 		uiop->uio_rw = UIO_READ;
940 		uiop->uio_segflg = UIO_SYSSPACE;
941 		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
942 		off = uiop->uio_offset;
943 		nh->nh_nextr = off;
944 		kfree((caddr_t)iv2, M_TEMP);
945 		if (error || (getret = VOP_GETATTR(vp, vap))) {
946 			if (!error)
947 				error = getret;
948 			m_freem(info.mreq);
949 			info.mreq = NULL;
950 			vput(vp);
951 			vp = NULL;
952 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
953 					      NFSX_POSTOPATTR(info.v3),
954 					      &error));
955 			nfsm_srvpostop_attr(&info, nfsd, getret, vap);
956 			error = 0;
957 			goto nfsmout;
958 		}
959 	} else {
960 		uiop->uio_resid = 0;
961 	}
962 	vput(vp);
963 	vp = NULL;
964 	nfsm_srvfattr(nfsd, vap, fp);
965 	tlen = len - uiop->uio_resid;
966 	cnt = cnt < tlen ? cnt : tlen;
967 	tlen = nfsm_rndup(cnt);
968 	if (len != tlen || tlen != cnt)
969 		nfsm_adj(info.mb, len - tlen, tlen - cnt);
970 	if (info.v3) {
971 		*tl++ = txdr_unsigned(cnt);
972 		if (len < reqlen)
973 			*tl++ = nfs_true;
974 		else
975 			*tl++ = nfs_false;
976 	}
977 	*tl = txdr_unsigned(cnt);
978 nfsmout:
979 	*mrq = info.mreq;
980 	if (vp)
981 		vput(vp);
982 	return(error);
983 }
984 
985 /*
986  * nfs write service
987  */
988 int
989 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
990 	    struct thread *td, struct mbuf **mrq)
991 {
992 	struct sockaddr *nam = nfsd->nd_nam;
993 	struct ucred *cred = &nfsd->nd_cr;
994 	struct iovec *ivp;
995 	int i, cnt;
996 	struct mbuf *mp1;
997 	struct nfs_fattr *fp;
998 	struct iovec *iv;
999 	struct vattr va, forat;
1000 	struct vattr *vap = &va;
1001 	u_int32_t *tl;
1002 	int error = 0, rdonly, len, forat_ret = 1;
1003 	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1004 	int stable = NFSV3WRITE_FILESYNC;
1005 	struct vnode *vp = NULL;
1006 	struct mount *mp = NULL;
1007 	nfsfh_t nfh;
1008 	fhandle_t *fhp;
1009 	struct uio io, *uiop = &io;
1010 	struct nfsm_info info;
1011 	off_t off;
1012 
1013 	info.mrep = nfsd->nd_mrep;
1014 	info.mreq = NULL;
1015 	info.md = nfsd->nd_md;
1016 	info.dpos = nfsd->nd_dpos;
1017 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1018 
1019 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1020 	if (info.mrep == NULL) {
1021 		error = 0;
1022 		goto nfsmout;
1023 	}
1024 	fhp = &nfh.fh_generic;
1025 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1026 	if (info.v3) {
1027 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1028 		off = fxdr_hyper(tl);
1029 		tl += 3;
1030 		stable = fxdr_unsigned(int, *tl++);
1031 	} else {
1032 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1033 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1034 		tl += 2;
1035 		if (nfs_async)
1036 	    		stable = NFSV3WRITE_UNSTABLE;
1037 	}
1038 	retlen = len = fxdr_unsigned(int32_t, *tl);
1039 	cnt = i = 0;
1040 
1041 	/*
1042 	 * For NFS Version 2, it is not obvious what a write of zero length
1043 	 * should do, but I might as well be consistent with Version 3,
1044 	 * which is to return ok so long as there are no permission problems.
1045 	 */
1046 	if (len > 0) {
1047 	    zeroing = 1;
1048 	    mp1 = info.mrep;
1049 	    while (mp1) {
1050 		if (mp1 == info.md) {
1051 			zeroing = 0;
1052 			adjust = info.dpos - mtod(mp1, caddr_t);
1053 			mp1->m_len -= adjust;
1054 			if (mp1->m_len > 0 && adjust > 0)
1055 				mp1->m_data += adjust;
1056 		}
1057 		if (zeroing)
1058 			mp1->m_len = 0;
1059 		else if (mp1->m_len > 0) {
1060 			i += mp1->m_len;
1061 			if (i > len) {
1062 				mp1->m_len -= (i - len);
1063 				zeroing	= 1;
1064 			}
1065 			if (mp1->m_len > 0)
1066 				cnt++;
1067 		}
1068 		mp1 = mp1->m_next;
1069 	    }
1070 	}
1071 	if (len > NFS_MAXDATA || len < 0 || i < len) {
1072 		error = EIO;
1073 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1074 				      2 * NFSX_UNSIGNED, &error));
1075 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1076 				 aftat_ret, vap);
1077 		error = 0;
1078 		goto nfsmout;
1079 	}
1080 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1081 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1082 	if (error) {
1083 		vp = NULL;
1084 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1085 				      2 * NFSX_UNSIGNED, &error));
1086 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1087 				 aftat_ret, vap);
1088 		error = 0;
1089 		goto nfsmout;
1090 	}
1091 	if (info.v3)
1092 		forat_ret = VOP_GETATTR(vp, &forat);
1093 	if (vp->v_type != VREG) {
1094 		if (info.v3)
1095 			error = EINVAL;
1096 		else
1097 			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1098 	}
1099 	if (!error) {
1100 		error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1101 	}
1102 	if (error) {
1103 		vput(vp);
1104 		vp = NULL;
1105 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1106 				      NFSX_WCCDATA(info.v3), &error));
1107 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1108 				 aftat_ret, vap);
1109 		error = 0;
1110 		goto nfsmout;
1111 	}
1112 
1113 	if (len > 0) {
1114 	    ivp = kmalloc(cnt * sizeof(struct iovec), M_TEMP, M_WAITOK);
1115 	    uiop->uio_iov = iv = ivp;
1116 	    uiop->uio_iovcnt = cnt;
1117 	    mp1 = info.mrep;
1118 	    while (mp1) {
1119 		if (mp1->m_len > 0) {
1120 			ivp->iov_base = mtod(mp1, caddr_t);
1121 			ivp->iov_len = mp1->m_len;
1122 			ivp++;
1123 		}
1124 		mp1 = mp1->m_next;
1125 	    }
1126 
1127 	    /*
1128 	     * XXX
1129 	     * The IO_METASYNC flag indicates that all metadata (and not just
1130 	     * enough to ensure data integrity) mus be written to stable storage
1131 	     * synchronously.
1132 	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1133 	     */
1134 	    if (stable == NFSV3WRITE_UNSTABLE)
1135 		ioflags = IO_NODELOCKED;
1136 	    else if (stable == NFSV3WRITE_DATASYNC)
1137 		ioflags = (IO_SYNC | IO_NODELOCKED);
1138 	    else
1139 		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1140 	    uiop->uio_resid = len;
1141 	    uiop->uio_rw = UIO_WRITE;
1142 	    uiop->uio_segflg = UIO_SYSSPACE;
1143 	    uiop->uio_td = NULL;
1144 	    uiop->uio_offset = off;
1145 	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1146 	    nfsstats.srvvop_writes++;
1147 	    kfree((caddr_t)iv, M_TEMP);
1148 	}
1149 	aftat_ret = VOP_GETATTR(vp, vap);
1150 	vput(vp);
1151 	vp = NULL;
1152 	if (!error)
1153 		error = aftat_ret;
1154 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1155 			      NFSX_PREOPATTR(info.v3) +
1156 			      NFSX_POSTOPORFATTR(info.v3) +
1157 			      2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1158 			      &error));
1159 	if (info.v3) {
1160 		nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1161 				 aftat_ret, vap);
1162 		if (error) {
1163 			error = 0;
1164 			goto nfsmout;
1165 		}
1166 		tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1167 		*tl++ = txdr_unsigned(retlen);
1168 		/*
1169 		 * If nfs_async is set, then pretend the write was FILESYNC.
1170 		 */
1171 		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1172 			*tl++ = txdr_unsigned(stable);
1173 		else
1174 			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1175 		/*
1176 		 * Actually, there is no need to txdr these fields,
1177 		 * but it may make the values more human readable,
1178 		 * for debugging purposes.
1179 		 */
1180 		if (nfsver.tv_sec == 0)
1181 			nfsver = boottime;
1182 		*tl++ = txdr_unsigned(nfsver.tv_sec);
1183 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1184 	} else {
1185 		fp = nfsm_build(&info, NFSX_V2FATTR);
1186 		nfsm_srvfattr(nfsd, vap, fp);
1187 	}
1188 nfsmout:
1189 	*mrq = info.mreq;
1190 	if (vp)
1191 		vput(vp);
1192 	return(error);
1193 }
1194 
1195 /*
1196  * NFS write service with write gathering support. Called when
1197  * nfsrvw_procrastinate > 0.
1198  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1199  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1200  * Jan. 1994.
1201  */
1202 int
1203 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1204 		  struct thread *td, struct mbuf **mrq)
1205 {
1206 	struct iovec *ivp;
1207 	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1208 	struct nfs_fattr *fp;
1209 	int i;
1210 	struct iovec *iov;
1211 	struct nfsrvw_delayhash *wpp;
1212 	struct ucred *cred;
1213 	struct vattr va, forat;
1214 	u_int32_t *tl;
1215 	int error = 0, rdonly, len, forat_ret = 1;
1216 	int ioflags, aftat_ret = 1, adjust, zeroing;
1217 	struct mbuf *mp1;
1218 	struct vnode *vp = NULL;
1219 	struct mount *mp = NULL;
1220 	struct uio io, *uiop = &io;
1221 	u_quad_t cur_usec;
1222 	struct nfsm_info info;
1223 
1224 	info.mreq = NULL;
1225 
1226 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1227 #ifndef nolint
1228 	i = 0;
1229 	len = 0;
1230 #endif
1231 	if (*ndp) {
1232 	    nfsd = *ndp;
1233 	    *ndp = NULL;
1234 	    info.mrep = nfsd->nd_mrep;
1235 	    info.mreq = NULL;
1236 	    info.md = nfsd->nd_md;
1237 	    info.dpos = nfsd->nd_dpos;
1238 	    info.v3 = (nfsd->nd_flag & ND_NFSV3);
1239 	    cred = &nfsd->nd_cr;
1240 	    LIST_INIT(&nfsd->nd_coalesce);
1241 	    nfsd->nd_mreq = NULL;
1242 	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1243 	    cur_usec = nfs_curusec();
1244 	    nfsd->nd_time = cur_usec +
1245 		(info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1246 
1247 	    /*
1248 	     * Now, get the write header..
1249 	     */
1250 	    NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1251 	    if (info.v3) {
1252 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1253 		nfsd->nd_off = fxdr_hyper(tl);
1254 		tl += 3;
1255 		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1256 	    } else {
1257 		NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1258 		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1259 		tl += 2;
1260 		if (nfs_async)
1261 			nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1262 	    }
1263 	    len = fxdr_unsigned(int32_t, *tl);
1264 	    nfsd->nd_len = len;
1265 	    nfsd->nd_eoff = nfsd->nd_off + len;
1266 
1267 	    /*
1268 	     * Trim the header out of the mbuf list and trim off any trailing
1269 	     * junk so that the mbuf list has only the write data.
1270 	     */
1271 	    zeroing = 1;
1272 	    i = 0;
1273 	    mp1 = info.mrep;
1274 	    while (mp1) {
1275 		if (mp1 == info.md) {
1276 		    zeroing = 0;
1277 		    adjust = info.dpos - mtod(mp1, caddr_t);
1278 		    mp1->m_len -= adjust;
1279 		    if (mp1->m_len > 0 && adjust > 0)
1280 			mp1->m_data += adjust;
1281 		}
1282 		if (zeroing)
1283 		    mp1->m_len = 0;
1284 		else {
1285 		    i += mp1->m_len;
1286 		    if (i > len) {
1287 			mp1->m_len -= (i - len);
1288 			zeroing = 1;
1289 		    }
1290 		}
1291 		mp1 = mp1->m_next;
1292 	    }
1293 	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1294 nfsmout:
1295 		m_freem(info.mrep);
1296 		info.mrep = NULL;
1297 		error = EIO;
1298 		nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1299 		if (info.v3) {
1300 		    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1301 				     aftat_ret, &va);
1302 		}
1303 		nfsd->nd_mreq = info.mreq;
1304 		nfsd->nd_mrep = NULL;
1305 		nfsd->nd_time = 0;
1306 	    }
1307 
1308 	    /*
1309 	     * Add this entry to the hash and time queues.
1310 	     */
1311 	    owp = NULL;
1312 	    wp = slp->ns_tq.lh_first;
1313 	    while (wp && wp->nd_time < nfsd->nd_time) {
1314 		owp = wp;
1315 		wp = wp->nd_tq.le_next;
1316 	    }
1317 	    NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1318 	    if (owp) {
1319 		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1320 	    } else {
1321 		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1322 	    }
1323 	    if (nfsd->nd_mrep) {
1324 		wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1325 		owp = NULL;
1326 		wp = wpp->lh_first;
1327 		while (wp &&
1328 		    bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1329 		    owp = wp;
1330 		    wp = wp->nd_hash.le_next;
1331 		}
1332 		while (wp && wp->nd_off < nfsd->nd_off &&
1333 		    !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1334 		    owp = wp;
1335 		    wp = wp->nd_hash.le_next;
1336 		}
1337 		if (owp) {
1338 		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1339 
1340 		    /*
1341 		     * Search the hash list for overlapping entries and
1342 		     * coalesce.
1343 		     */
1344 		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1345 			wp = nfsd->nd_hash.le_next;
1346 			if (NFSW_SAMECRED(owp, nfsd))
1347 			    nfsrvw_coalesce(owp, nfsd);
1348 		    }
1349 		} else {
1350 		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1351 		}
1352 	    }
1353 	}
1354 
1355 	/*
1356 	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1357 	 * and generate the associated reply mbuf list(s).
1358 	 */
1359 loop1:
1360 	cur_usec = nfs_curusec();
1361 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1362 		owp = nfsd->nd_tq.le_next;
1363 		if (nfsd->nd_time > cur_usec)
1364 		    break;
1365 		if (nfsd->nd_mreq)
1366 		    continue;
1367 		NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1368 		LIST_REMOVE(nfsd, nd_tq);
1369 		LIST_REMOVE(nfsd, nd_hash);
1370 		info.mrep = nfsd->nd_mrep;
1371 		info.mreq = NULL;
1372 		info.v3 = (nfsd->nd_flag & ND_NFSV3);
1373 		nfsd->nd_mrep = NULL;
1374 		cred = &nfsd->nd_cr;
1375 		forat_ret = aftat_ret = 1;
1376 		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp,
1377 				     nfsd->nd_nam, &rdonly,
1378 				     (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1379 		if (!error) {
1380 		    if (info.v3)
1381 			forat_ret = VOP_GETATTR(vp, &forat);
1382 		    if (vp->v_type != VREG) {
1383 			if (info.v3)
1384 			    error = EINVAL;
1385 			else
1386 			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1387 		    }
1388 		} else {
1389 		    vp = NULL;
1390 		}
1391 		if (!error) {
1392 		    error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1393 		}
1394 
1395 		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1396 		    ioflags = IO_NODELOCKED;
1397 		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1398 		    ioflags = (IO_SYNC | IO_NODELOCKED);
1399 		else
1400 		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1401 		uiop->uio_rw = UIO_WRITE;
1402 		uiop->uio_segflg = UIO_SYSSPACE;
1403 		uiop->uio_td = NULL;
1404 		uiop->uio_offset = nfsd->nd_off;
1405 		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1406 		if (uiop->uio_resid > 0) {
1407 		    mp1 = info.mrep;
1408 		    i = 0;
1409 		    while (mp1) {
1410 			if (mp1->m_len > 0)
1411 			    i++;
1412 			mp1 = mp1->m_next;
1413 		    }
1414 		    uiop->uio_iovcnt = i;
1415 		    iov = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
1416 		    uiop->uio_iov = ivp = iov;
1417 		    mp1 = info.mrep;
1418 		    while (mp1) {
1419 			if (mp1->m_len > 0) {
1420 			    ivp->iov_base = mtod(mp1, caddr_t);
1421 			    ivp->iov_len = mp1->m_len;
1422 			    ivp++;
1423 			}
1424 			mp1 = mp1->m_next;
1425 		    }
1426 		    if (!error) {
1427 			error = VOP_WRITE(vp, uiop, ioflags, cred);
1428 			nfsstats.srvvop_writes++;
1429 		    }
1430 		    kfree((caddr_t)iov, M_TEMP);
1431 		}
1432 		m_freem(info.mrep);
1433 		info.mrep = NULL;
1434 		if (vp) {
1435 		    aftat_ret = VOP_GETATTR(vp, &va);
1436 		    vput(vp);
1437 		    vp = NULL;
1438 		}
1439 
1440 		/*
1441 		 * Loop around generating replies for all write rpcs that have
1442 		 * now been completed.
1443 		 */
1444 		swp = nfsd;
1445 		do {
1446 		    NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1447 		    if (error) {
1448 			nfsm_writereply(&info, nfsd, slp, error,
1449 					NFSX_WCCDATA(info.v3));
1450 			if (info.v3) {
1451 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1452 					     aftat_ret, &va);
1453 			}
1454 		    } else {
1455 			nfsm_writereply(&info, nfsd, slp, error,
1456 					NFSX_PREOPATTR(info.v3) +
1457 					NFSX_POSTOPORFATTR(info.v3) +
1458 					2 * NFSX_UNSIGNED +
1459 					NFSX_WRITEVERF(info.v3));
1460 			if (info.v3) {
1461 			    nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1462 					     aftat_ret, &va);
1463 			    tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1464 			    *tl++ = txdr_unsigned(nfsd->nd_len);
1465 			    *tl++ = txdr_unsigned(swp->nd_stable);
1466 			    /*
1467 			     * Actually, there is no need to txdr these fields,
1468 			     * but it may make the values more human readable,
1469 			     * for debugging purposes.
1470 			     */
1471 			    if (nfsver.tv_sec == 0)
1472 				    nfsver = boottime;
1473 			    *tl++ = txdr_unsigned(nfsver.tv_sec);
1474 			    *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1475 			} else {
1476 			    fp = nfsm_build(&info, NFSX_V2FATTR);
1477 			    nfsm_srvfattr(nfsd, &va, fp);
1478 			}
1479 		    }
1480 		    nfsd->nd_mreq = info.mreq;
1481 		    if (nfsd->nd_mrep)
1482 			panic("nfsrv_write: nd_mrep not free");
1483 
1484 		    /*
1485 		     * Done. Put it at the head of the timer queue so that
1486 		     * the final phase can return the reply.
1487 		     */
1488 		    if (nfsd != swp) {
1489 			nfsd->nd_time = 0;
1490 			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1491 		    }
1492 		    nfsd = swp->nd_coalesce.lh_first;
1493 		    if (nfsd) {
1494 			LIST_REMOVE(nfsd, nd_tq);
1495 		    }
1496 		} while (nfsd);
1497 		swp->nd_time = 0;
1498 		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1499 		goto loop1;
1500 	}
1501 
1502 	/*
1503 	 * Search for a reply to return.
1504 	 */
1505 	for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1506 		if (nfsd->nd_mreq) {
1507 		    NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1508 		    LIST_REMOVE(nfsd, nd_tq);
1509 		    break;
1510 		}
1511 	}
1512 	if (nfsd) {
1513 		*ndp = nfsd;
1514 		*mrq = nfsd->nd_mreq;
1515 	} else {
1516 		*ndp = NULL;
1517 		*mrq = NULL;
1518 	}
1519 	return (0);
1520 }
1521 
1522 /*
1523  * Coalesce the write request nfsd into owp. To do this we must:
1524  * - remove nfsd from the queues
1525  * - merge nfsd->nd_mrep into owp->nd_mrep
1526  * - update the nd_eoff and nd_stable for owp
1527  * - put nfsd on owp's nd_coalesce list
1528  * NB: Must be called at splsoftclock().
1529  */
1530 static void
1531 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1532 {
1533         int overlap;
1534         struct mbuf *mp1;
1535 	struct nfsrv_descript *p;
1536 
1537 	NFS_DPF(WG, ("C%03x-%03x",
1538 		     nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1539         LIST_REMOVE(nfsd, nd_hash);
1540         LIST_REMOVE(nfsd, nd_tq);
1541         if (owp->nd_eoff < nfsd->nd_eoff) {
1542             overlap = owp->nd_eoff - nfsd->nd_off;
1543             if (overlap < 0)
1544                 panic("nfsrv_coalesce: bad off");
1545             if (overlap > 0)
1546                 m_adj(nfsd->nd_mrep, overlap);
1547             mp1 = owp->nd_mrep;
1548             while (mp1->m_next)
1549                 mp1 = mp1->m_next;
1550             mp1->m_next = nfsd->nd_mrep;
1551             owp->nd_eoff = nfsd->nd_eoff;
1552         } else
1553             m_freem(nfsd->nd_mrep);
1554         nfsd->nd_mrep = NULL;
1555         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1556             owp->nd_stable = NFSV3WRITE_FILESYNC;
1557         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1558             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1559             owp->nd_stable = NFSV3WRITE_DATASYNC;
1560         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1561 
1562 	/*
1563 	 * If nfsd had anything else coalesced into it, transfer them
1564 	 * to owp, otherwise their replies will never get sent.
1565 	 */
1566 	for (p = nfsd->nd_coalesce.lh_first; p;
1567 	     p = nfsd->nd_coalesce.lh_first) {
1568 	    LIST_REMOVE(p, nd_tq);
1569 	    LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1570 	}
1571 }
1572 
1573 /*
1574  * nfs create service
1575  * now does a truncate to 0 length via. setattr if it already exists
1576  */
1577 int
1578 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1579 	     struct thread *td, struct mbuf **mrq)
1580 {
1581 	struct sockaddr *nam = nfsd->nd_nam;
1582 	struct ucred *cred = &nfsd->nd_cr;
1583 	struct nfs_fattr *fp;
1584 	struct vattr va, dirfor, diraft;
1585 	struct vattr *vap = &va;
1586 	struct nfsv2_sattr *sp;
1587 	u_int32_t *tl;
1588 	struct nlookupdata nd;
1589 	int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1590 	udev_t rdev = NOUDEV;
1591 	caddr_t cp;
1592 	int how, exclusive_flag = 0;
1593 	struct vnode *dirp;
1594 	struct vnode *dvp;
1595 	struct vnode *vp;
1596 	struct mount *mp;
1597 	nfsfh_t nfh;
1598 	fhandle_t *fhp;
1599 	u_quad_t tempsize;
1600 	u_char cverf[NFSX_V3CREATEVERF];
1601 	struct nfsm_info info;
1602 
1603 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1604 	nlookup_zero(&nd);
1605 	dirp = NULL;
1606 	dvp = NULL;
1607 	vp = NULL;
1608 
1609 	info.mrep = nfsd->nd_mrep;
1610 	info.mreq = NULL;
1611 	info.md = nfsd->nd_md;
1612 	info.dpos = nfsd->nd_dpos;
1613 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
1614 
1615 	fhp = &nfh.fh_generic;
1616 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1617 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1618 
1619 	/*
1620 	 * Call namei and do initial cleanup to get a few things
1621 	 * out of the way.  If we get an initial error we cleanup
1622 	 * and return here to avoid special-casing the invalid nd
1623 	 * structure through the rest of the case.  dirp may be
1624 	 * set even if an error occurs, but the nd structure will not
1625 	 * be valid at all if an error occurs so we have to invalidate it
1626 	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1627 	 */
1628 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1629 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1630 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1631 	mp = vfs_getvfs(&fhp->fh_fsid);
1632 
1633 	if (dirp) {
1634 		if (info.v3) {
1635 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1636 		} else {
1637 			vrele(dirp);
1638 			dirp = NULL;
1639 		}
1640 	}
1641 	if (error) {
1642 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1643 				      NFSX_WCCDATA(info.v3), &error));
1644 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1645 				 diraft_ret, &diraft);
1646 		error = 0;
1647 		goto nfsmout;
1648 	}
1649 
1650 	/*
1651 	 * No error.  Continue.  State:
1652 	 *
1653 	 *	dirp 		may be valid
1654 	 *	vp		may be valid or NULL if the target does not
1655 	 *			exist.
1656 	 *	dvp		is valid
1657 	 *
1658 	 * The error state is set through the code and we may also do some
1659 	 * opportunistic releasing of vnodes to avoid holding locks through
1660 	 * NFS I/O.  The cleanup at the end is a catch-all
1661 	 */
1662 
1663 	VATTR_NULL(vap);
1664 	if (info.v3) {
1665 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1666 		how = fxdr_unsigned(int, *tl);
1667 		switch (how) {
1668 		case NFSV3CREATE_GUARDED:
1669 			if (vp) {
1670 				error = EEXIST;
1671 				break;
1672 			}
1673 			/* fall through */
1674 		case NFSV3CREATE_UNCHECKED:
1675 			ERROROUT(nfsm_srvsattr(&info, vap));
1676 			break;
1677 		case NFSV3CREATE_EXCLUSIVE:
1678 			NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1679 			bcopy(cp, cverf, NFSX_V3CREATEVERF);
1680 			exclusive_flag = 1;
1681 			break;
1682 		};
1683 		vap->va_type = VREG;
1684 	} else {
1685 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1686 		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1687 		if (vap->va_type == VNON)
1688 			vap->va_type = VREG;
1689 		vap->va_mode = nfstov_mode(sp->sa_mode);
1690 		switch (vap->va_type) {
1691 		case VREG:
1692 			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1693 			if (tsize != -1)
1694 				vap->va_size = (u_quad_t)tsize;
1695 			break;
1696 		case VCHR:
1697 		case VBLK:
1698 		case VFIFO:
1699 			rdev = fxdr_unsigned(long, sp->sa_size);
1700 			break;
1701 		default:
1702 			break;
1703 		};
1704 	}
1705 
1706 	/*
1707 	 * Iff doesn't exist, create it
1708 	 * otherwise just truncate to 0 length
1709 	 *   should I set the mode too ?
1710 	 *
1711 	 * The only possible error we can have at this point is EEXIST.
1712 	 * nd.ni_vp will also be non-NULL in that case.
1713 	 */
1714 	if (vp == NULL) {
1715 		if (vap->va_mode == (mode_t)VNOVAL)
1716 			vap->va_mode = 0;
1717 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1718 			vn_unlock(dvp);
1719 			error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1720 					    nd.nl_cred, vap);
1721 			vrele(dvp);
1722 			dvp = NULL;
1723 			if (error == 0) {
1724 				if (exclusive_flag) {
1725 					exclusive_flag = 0;
1726 					VATTR_NULL(vap);
1727 					bcopy(cverf, (caddr_t)&vap->va_atime,
1728 						NFSX_V3CREATEVERF);
1729 					error = VOP_SETATTR(vp, vap, cred);
1730 				}
1731 			}
1732 		} else if (
1733 			vap->va_type == VCHR ||
1734 			vap->va_type == VBLK ||
1735 			vap->va_type == VFIFO
1736 		) {
1737 			/*
1738 			 * Handle SysV FIFO node special cases.  All other
1739 			 * devices require super user to access.
1740 			 */
1741 			if (vap->va_type == VCHR && rdev == 0xffffffff)
1742 				vap->va_type = VFIFO;
1743                         if (vap->va_type != VFIFO &&
1744                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1745 				goto nfsmreply0;
1746                         }
1747 			vap->va_rmajor = umajor(rdev);
1748 			vap->va_rminor = uminor(rdev);
1749 
1750 			vn_unlock(dvp);
1751 			error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1752 			vrele(dvp);
1753 			dvp = NULL;
1754 			if (error)
1755 				goto nfsmreply0;
1756 #if 0
1757 			/*
1758 			 * XXX what is this junk supposed to do ?
1759 			 */
1760 
1761 			vput(vp);
1762 			vp = NULL;
1763 
1764 			/*
1765 			 * release dvp prior to lookup
1766 			 */
1767 			vput(dvp);
1768 			dvp = NULL;
1769 
1770 			/*
1771 			 * Setup for lookup.
1772 			 *
1773 			 * Even though LOCKPARENT was cleared, ni_dvp may
1774 			 * be garbage.
1775 			 */
1776 			nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1777 			nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1778 			nd.ni_cnd.cn_td = td;
1779 			nd.ni_cnd.cn_cred = cred;
1780 
1781 			error = lookup(&nd);
1782 			nd.ni_dvp = NULL;
1783 
1784 			if (error != 0) {
1785 				NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1786 						      0, &error));
1787 				/* fall through on certain errors */
1788 			}
1789 			nfsrv_object_create(nd.ni_vp);
1790 			if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1791 				error = EINVAL;
1792 				goto nfsmreply0;
1793 			}
1794 #endif
1795 		} else {
1796 			error = ENXIO;
1797 		}
1798 	} else {
1799 		if (vap->va_size != -1) {
1800 			error = nfsrv_access(mp, vp, VWRITE, cred,
1801 			    (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1802 			if (!error) {
1803 				tempsize = vap->va_size;
1804 				VATTR_NULL(vap);
1805 				vap->va_size = tempsize;
1806 				error = VOP_SETATTR(vp, vap, cred);
1807 			}
1808 		}
1809 	}
1810 
1811 	if (!error) {
1812 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1813 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1814 		if (!error)
1815 			error = VOP_GETATTR(vp, vap);
1816 	}
1817 	if (info.v3) {
1818 		if (exclusive_flag && !error &&
1819 			bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1820 			error = EEXIST;
1821 		diraft_ret = VOP_GETATTR(dirp, &diraft);
1822 		vrele(dirp);
1823 		dirp = NULL;
1824 	}
1825 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1826 			      NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1827 			      NFSX_WCCDATA(info.v3),
1828 			      &error));
1829 	if (info.v3) {
1830 		if (!error) {
1831 			nfsm_srvpostop_fh(&info, fhp);
1832 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1833 		}
1834 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1835 				 diraft_ret, &diraft);
1836 		error = 0;
1837 	} else {
1838 		nfsm_srvfhtom(&info, fhp);
1839 		fp = nfsm_build(&info, NFSX_V2FATTR);
1840 		nfsm_srvfattr(nfsd, vap, fp);
1841 	}
1842 	goto nfsmout;
1843 
1844 nfsmreply0:
1845 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1846 	error = 0;
1847 	/* fall through */
1848 
1849 nfsmout:
1850 	*mrq = info.mreq;
1851 	if (dirp)
1852 		vrele(dirp);
1853 	nlookup_done(&nd);
1854 	if (dvp) {
1855 		if (dvp == vp)
1856 			vrele(dvp);
1857 		else
1858 			vput(dvp);
1859 	}
1860 	if (vp)
1861 		vput(vp);
1862 	return (error);
1863 }
1864 
1865 /*
1866  * nfs v3 mknod service
1867  */
1868 int
1869 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1870 	    struct thread *td, struct mbuf **mrq)
1871 {
1872 	struct sockaddr *nam = nfsd->nd_nam;
1873 	struct ucred *cred = &nfsd->nd_cr;
1874 	struct vattr va, dirfor, diraft;
1875 	struct vattr *vap = &va;
1876 	u_int32_t *tl;
1877 	struct nlookupdata nd;
1878 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1879 	enum vtype vtyp;
1880 	struct vnode *dirp;
1881 	struct vnode *dvp;
1882 	struct vnode *vp;
1883 	nfsfh_t nfh;
1884 	fhandle_t *fhp;
1885 	struct nfsm_info info;
1886 
1887 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1888 	nlookup_zero(&nd);
1889 	dirp = NULL;
1890 	dvp = NULL;
1891 	vp = NULL;
1892 
1893 	info.mrep = nfsd->nd_mrep;
1894 	info.mreq = NULL;
1895 	info.md = nfsd->nd_md;
1896 	info.dpos = nfsd->nd_dpos;
1897 
1898 	fhp = &nfh.fh_generic;
1899 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1900 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1901 
1902 	/*
1903 	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1904 	 * is not valid.  However, nfsm_*() routines may still jump to
1905 	 * nfsmout.
1906 	 */
1907 
1908 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1909 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1910 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1911 	if (dirp)
1912 		dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1913 	if (error) {
1914 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1915 			   NFSX_WCCDATA(1), &error));
1916 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1917 				 diraft_ret, &diraft);
1918 		error = 0;
1919 		goto nfsmout;
1920 	}
1921 	NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1922 	vtyp = nfsv3tov_type(*tl);
1923 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1924 		error = NFSERR_BADTYPE;
1925 		goto out;
1926 	}
1927 	VATTR_NULL(vap);
1928 	ERROROUT(nfsm_srvsattr(&info, vap));
1929 	if (vtyp == VCHR || vtyp == VBLK) {
1930 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1931 		vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1932 		vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1933 	}
1934 
1935 	/*
1936 	 * Iff doesn't exist, create it.
1937 	 */
1938 	if (vp) {
1939 		error = EEXIST;
1940 		goto out;
1941 	}
1942 	vap->va_type = vtyp;
1943 	if (vap->va_mode == (mode_t)VNOVAL)
1944 		vap->va_mode = 0;
1945 	if (vtyp == VSOCK) {
1946 		vn_unlock(dvp);
1947 		error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1948 		vrele(dvp);
1949 		dvp = NULL;
1950 	} else {
1951 		if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1952 			goto out;
1953 
1954 		vn_unlock(dvp);
1955 		error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1956 		vrele(dvp);
1957 		dvp = NULL;
1958 		if (error)
1959 			goto out;
1960 	}
1961 
1962 	/*
1963 	 * send response, cleanup, return.
1964 	 */
1965 out:
1966 	nlookup_done(&nd);
1967 	if (dvp) {
1968 		if (dvp == vp)
1969 			vrele(dvp);
1970 		else
1971 			vput(dvp);
1972 		dvp = NULL;
1973 	}
1974 	if (!error) {
1975 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1976 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
1977 		if (!error)
1978 			error = VOP_GETATTR(vp, vap);
1979 	}
1980 	if (vp) {
1981 		vput(vp);
1982 		vp = NULL;
1983 	}
1984 	diraft_ret = VOP_GETATTR(dirp, &diraft);
1985 	if (dirp) {
1986 		vrele(dirp);
1987 		dirp = NULL;
1988 	}
1989 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1990 			      NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1991 			      NFSX_WCCDATA(1), &error));
1992 	if (!error) {
1993 		nfsm_srvpostop_fh(&info, fhp);
1994 		nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1995 	}
1996 	nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1997 			 diraft_ret, &diraft);
1998 	*mrq = info.mreq;
1999 	return (0);
2000 nfsmout:
2001 	*mrq = info.mreq;
2002 	if (dirp)
2003 		vrele(dirp);
2004 	nlookup_done(&nd);
2005 	if (dvp) {
2006 		if (dvp == vp)
2007 			vrele(dvp);
2008 		else
2009 			vput(dvp);
2010 	}
2011 	if (vp)
2012 		vput(vp);
2013 	return (error);
2014 }
2015 
2016 /*
2017  * nfs remove service
2018  */
2019 int
2020 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2021 	     struct thread *td, struct mbuf **mrq)
2022 {
2023 	struct sockaddr *nam = nfsd->nd_nam;
2024 	struct ucred *cred = &nfsd->nd_cr;
2025 	struct nlookupdata nd;
2026 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2027 	struct vnode *dirp;
2028 	struct vnode *dvp;
2029 	struct vnode *vp;
2030 	struct vattr dirfor, diraft;
2031 	nfsfh_t nfh;
2032 	fhandle_t *fhp;
2033 	struct nfsm_info info;
2034 
2035 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2036 	nlookup_zero(&nd);
2037 	dirp = NULL;
2038 	dvp = NULL;
2039 	vp = NULL;
2040 
2041 	info.mrep = nfsd->nd_mrep;
2042 	info.mreq = NULL;
2043 	info.md = nfsd->nd_md;
2044 	info.dpos = nfsd->nd_dpos;
2045 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2046 
2047 	fhp = &nfh.fh_generic;
2048 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2049 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2050 
2051 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2052 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2053 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2054 	if (dirp) {
2055 		if (info.v3)
2056 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2057 	}
2058 	if (error == 0) {
2059 		if (vp->v_type == VDIR) {
2060 			error = EPERM;		/* POSIX */
2061 			goto out;
2062 		}
2063 		/*
2064 		 * The root of a mounted filesystem cannot be deleted.
2065 		 */
2066 		if (vp->v_flag & VROOT) {
2067 			error = EBUSY;
2068 			goto out;
2069 		}
2070 out:
2071 		if (!error) {
2072 			if (dvp != vp)
2073 				vn_unlock(dvp);
2074 			if (vp) {
2075 				vput(vp);
2076 				vp = NULL;
2077 			}
2078 			error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2079 			vrele(dvp);
2080 			dvp = NULL;
2081 		}
2082 	}
2083 	if (dirp && info.v3)
2084 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2085 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2086 	if (info.v3) {
2087 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2088 				 diraft_ret, &diraft);
2089 		error = 0;
2090 	}
2091 nfsmout:
2092 	*mrq = info.mreq;
2093 	nlookup_done(&nd);
2094 	if (dirp)
2095 		vrele(dirp);
2096 	if (dvp) {
2097 		if (dvp == vp)
2098 			vrele(dvp);
2099 		else
2100 			vput(dvp);
2101 	}
2102 	if (vp)
2103 		vput(vp);
2104 	return(error);
2105 }
2106 
2107 /*
2108  * nfs rename service
2109  */
2110 int
2111 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2112 	     struct thread *td, struct mbuf **mrq)
2113 {
2114 	struct sockaddr *nam = nfsd->nd_nam;
2115 	struct ucred *cred = &nfsd->nd_cr;
2116 	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2117 	int tdirfor_ret = 1, tdiraft_ret = 1;
2118 	struct nlookupdata fromnd, tond;
2119 	struct vnode *fvp, *fdirp, *fdvp;
2120 	struct vnode *tvp, *tdirp, *tdvp;
2121 	struct namecache *ncp;
2122 	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2123 	nfsfh_t fnfh, tnfh;
2124 	fhandle_t *ffhp, *tfhp;
2125 	uid_t saved_uid;
2126 	struct nfsm_info info;
2127 
2128 	info.mrep = nfsd->nd_mrep;
2129 	info.mreq = NULL;
2130 	info.md = nfsd->nd_md;
2131 	info.dpos = nfsd->nd_dpos;
2132 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2133 
2134 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2135 #ifndef nolint
2136 	fvp = NULL;
2137 #endif
2138 	ffhp = &fnfh.fh_generic;
2139 	tfhp = &tnfh.fh_generic;
2140 
2141 	/*
2142 	 * Clear fields incase goto nfsmout occurs from macro.
2143 	 */
2144 
2145 	nlookup_zero(&fromnd);
2146 	nlookup_zero(&tond);
2147 	fdirp = NULL;
2148 	tdirp = NULL;
2149 
2150 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2151 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2152 
2153 	/*
2154 	 * Remember our original uid so that we can reset cr_uid before
2155 	 * the second nfs_namei() call, in case it is remapped.
2156 	 */
2157 	saved_uid = cred->cr_uid;
2158 	error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2159 			  NULL, NULL,
2160 			  ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2161 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2162 	if (fdirp) {
2163 		if (info.v3)
2164 			fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2165 	}
2166 	if (error) {
2167 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2168 				      2 * NFSX_WCCDATA(info.v3), &error));
2169 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2170 				 fdiraft_ret, &fdiraft);
2171 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2172 				 tdiraft_ret, &tdiraft);
2173 		error = 0;
2174 		goto nfsmout;
2175 	}
2176 
2177 	/*
2178 	 * We have to unlock the from ncp before we can safely lookup
2179 	 * the target ncp.
2180 	 */
2181 	KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2182 	cache_unlock(&fromnd.nl_nch);
2183 	fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2184 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2185 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2186 	cred->cr_uid = saved_uid;
2187 
2188 	error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2189 			  tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2190 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2191 	if (tdirp) {
2192 		if (info.v3)
2193 			tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2194 	}
2195 	if (error)
2196 		goto out1;
2197 
2198 	/*
2199 	 * relock the source
2200 	 */
2201 	if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2202 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2203 	} else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2204 		cache_lock(&fromnd.nl_nch);
2205 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2206 	} else {
2207 		cache_unlock(&tond.nl_nch);
2208 		cache_lock(&fromnd.nl_nch);
2209 		cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2210 		cache_lock(&tond.nl_nch);
2211 		cache_resolve(&tond.nl_nch, tond.nl_cred);
2212 	}
2213 	fromnd.nl_flags |= NLC_NCPISLOCKED;
2214 
2215 	fvp = fromnd.nl_nch.ncp->nc_vp;
2216 	tvp = tond.nl_nch.ncp->nc_vp;
2217 
2218 	/*
2219 	 * Set fdvp and tdvp.  We haven't done all the topology checks
2220 	 * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2221 	 * point).  If we get through the checks these will be guarenteed
2222 	 * to be non-NULL.
2223 	 *
2224 	 * Holding the children ncp's should be sufficient to prevent
2225 	 * fdvp and tdvp ripouts.
2226 	 */
2227 	if (fromnd.nl_nch.ncp->nc_parent)
2228 		fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2229 	else
2230 		fdvp = NULL;
2231 	if (tond.nl_nch.ncp->nc_parent)
2232 		tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2233 	else
2234 		tdvp = NULL;
2235 
2236 	if (tvp != NULL) {
2237 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2238 			if (info.v3)
2239 				error = EEXIST;
2240 			else
2241 				error = EISDIR;
2242 			goto out;
2243 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2244 			if (info.v3)
2245 				error = EEXIST;
2246 			else
2247 				error = ENOTDIR;
2248 			goto out;
2249 		}
2250 		if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2251 			if (info.v3)
2252 				error = EXDEV;
2253 			else
2254 				error = ENOTEMPTY;
2255 			goto out;
2256 		}
2257 	}
2258 	if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2259 		if (info.v3)
2260 			error = EXDEV;
2261 		else
2262 			error = ENOTEMPTY;
2263 		goto out;
2264 	}
2265 	if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2266 		if (info.v3)
2267 			error = EXDEV;
2268 		else
2269 			error = ENOTEMPTY;
2270 		goto out;
2271 	}
2272 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2273 		if (info.v3)
2274 			error = EINVAL;
2275 		else
2276 			error = ENOTEMPTY;
2277 	}
2278 
2279 	/*
2280 	 * You cannot rename a source into itself or a subdirectory of itself.
2281 	 * We check this by travsering the target directory upwards looking
2282 	 * for a match against the source.
2283 	 */
2284 	if (error == 0) {
2285 		for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2286 			if (fromnd.nl_nch.ncp == ncp) {
2287 				error = EINVAL;
2288 				break;
2289 			}
2290 		}
2291 	}
2292 
2293 	/*
2294 	 * If source is the same as the destination (that is the
2295 	 * same vnode with the same name in the same directory),
2296 	 * then there is nothing to do.
2297 	 */
2298 	if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2299 		error = -1;
2300 out:
2301 	if (!error) {
2302 		/*
2303 		 * The VOP_NRENAME function releases all vnode references &
2304 		 * locks prior to returning so we need to clear the pointers
2305 		 * to bypass cleanup code later on.
2306 		 */
2307 		error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2308 				    fdvp, tdvp, tond.nl_cred);
2309 	} else {
2310 		if (error == -1)
2311 			error = 0;
2312 	}
2313 	/* fall through */
2314 
2315 out1:
2316 	if (fdirp)
2317 		fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2318 	if (tdirp)
2319 		tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2320 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2321 			      2 * NFSX_WCCDATA(info.v3), &error));
2322 	if (info.v3) {
2323 		nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2324 				 fdiraft_ret, &fdiraft);
2325 		nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2326 				 tdiraft_ret, &tdiraft);
2327 	}
2328 	error = 0;
2329 	/* fall through */
2330 
2331 nfsmout:
2332 	*mrq = info.mreq;
2333 	if (tdirp)
2334 		vrele(tdirp);
2335 	nlookup_done(&tond);
2336 	if (fdirp)
2337 		vrele(fdirp);
2338 	nlookup_done(&fromnd);
2339 	return (error);
2340 }
2341 
2342 /*
2343  * nfs link service
2344  */
2345 int
2346 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2347 	   struct thread *td, struct mbuf **mrq)
2348 {
2349 	struct sockaddr *nam = nfsd->nd_nam;
2350 	struct ucred *cred = &nfsd->nd_cr;
2351 	struct nlookupdata nd;
2352 	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2353 	int getret = 1;
2354 	struct vnode *dirp;
2355 	struct vnode *dvp;
2356 	struct vnode *vp;
2357 	struct vnode *xp;
2358 	struct mount *mp;
2359 	struct mount *xmp;
2360 	struct vattr dirfor, diraft, at;
2361 	nfsfh_t nfh, dnfh;
2362 	fhandle_t *fhp, *dfhp;
2363 	struct nfsm_info info;
2364 
2365 	info.mrep = nfsd->nd_mrep;
2366 	info.mreq = NULL;
2367 	info.md = nfsd->nd_md;
2368 	info.dpos = nfsd->nd_dpos;
2369 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2370 
2371 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2372 	nlookup_zero(&nd);
2373 	dirp = dvp = vp = xp = NULL;
2374 	mp = xmp = NULL;
2375 
2376 	fhp = &nfh.fh_generic;
2377 	dfhp = &dnfh.fh_generic;
2378 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2379 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2380 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2381 
2382 	error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2383 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2384 	if (error) {
2385 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2386 				      NFSX_POSTOPATTR(info.v3) +
2387 				      NFSX_WCCDATA(info.v3),
2388 				      &error));
2389 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2390 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2391 				 diraft_ret, &diraft);
2392 		xp = NULL;
2393 		error = 0;
2394 		goto nfsmout;
2395 	}
2396 	if (xp->v_type == VDIR) {
2397 		error = EPERM;		/* POSIX */
2398 		goto out1;
2399 	}
2400 
2401 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2402 			  dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2403 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2404 	if (dirp) {
2405 		if (info.v3)
2406 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2407 	}
2408 	if (error)
2409 		goto out1;
2410 
2411 	if (vp != NULL) {
2412 		error = EEXIST;
2413 		goto out;
2414 	}
2415 	if (xp->v_mount != dvp->v_mount)
2416 		error = EXDEV;
2417 out:
2418 	if (!error) {
2419 		vn_unlock(dvp);
2420 		error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2421 		vrele(dvp);
2422 		dvp = NULL;
2423 	}
2424 	/* fall through */
2425 
2426 out1:
2427 	if (info.v3)
2428 		getret = VOP_GETATTR(xp, &at);
2429 	if (dirp)
2430 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2431 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2432 			      NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2433 			      &error));
2434 	if (info.v3) {
2435 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2436 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2437 				 diraft_ret, &diraft);
2438 		error = 0;
2439 	}
2440 	/* fall through */
2441 
2442 nfsmout:
2443 	*mrq = info.mreq;
2444 	nlookup_done(&nd);
2445 	if (dirp)
2446 		vrele(dirp);
2447 	if (xp)
2448 		vrele(xp);
2449 	if (dvp) {
2450 		if (dvp == vp)
2451 			vrele(dvp);
2452 		else
2453 			vput(dvp);
2454 	}
2455 	if (vp)
2456 		vput(vp);
2457 	return(error);
2458 }
2459 
2460 /*
2461  * nfs symbolic link service
2462  */
2463 int
2464 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2465 	      struct thread *td, struct mbuf **mrq)
2466 {
2467 	struct sockaddr *nam = nfsd->nd_nam;
2468 	struct ucred *cred = &nfsd->nd_cr;
2469 	struct vattr va, dirfor, diraft;
2470 	struct nlookupdata nd;
2471 	struct vattr *vap = &va;
2472 	struct nfsv2_sattr *sp;
2473 	char *pathcp = NULL;
2474 	struct uio io;
2475 	struct iovec iv;
2476 	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2477 	struct vnode *dirp;
2478 	struct vnode *vp;
2479 	struct vnode *dvp;
2480 	nfsfh_t nfh;
2481 	fhandle_t *fhp;
2482 	struct nfsm_info info;
2483 
2484 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2485 	nlookup_zero(&nd);
2486 	dirp = NULL;
2487 	dvp = NULL;
2488 	vp = NULL;
2489 
2490 	info.mrep = nfsd->nd_mrep;
2491 	info.mreq =  NULL;
2492 	info.md = nfsd->nd_md;
2493 	info.dpos = nfsd->nd_dpos;
2494 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2495 
2496 	fhp = &nfh.fh_generic;
2497 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2498 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2499 
2500 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2501 			fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2502 			td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2503 	if (dirp) {
2504 		if (info.v3)
2505 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2506 	}
2507 	if (error)
2508 		goto out;
2509 
2510 	VATTR_NULL(vap);
2511 	if (info.v3) {
2512 		ERROROUT(nfsm_srvsattr(&info, vap));
2513 	}
2514 	NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2515 	pathcp = kmalloc(len2 + 1, M_TEMP, M_WAITOK);
2516 	iv.iov_base = pathcp;
2517 	iv.iov_len = len2;
2518 	io.uio_resid = len2;
2519 	io.uio_offset = 0;
2520 	io.uio_iov = &iv;
2521 	io.uio_iovcnt = 1;
2522 	io.uio_segflg = UIO_SYSSPACE;
2523 	io.uio_rw = UIO_READ;
2524 	io.uio_td = NULL;
2525 	ERROROUT(nfsm_mtouio(&info, &io, len2));
2526 	if (info.v3 == 0) {
2527 		NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2528 		vap->va_mode = nfstov_mode(sp->sa_mode);
2529 	}
2530 	*(pathcp + len2) = '\0';
2531 	if (vp) {
2532 		error = EEXIST;
2533 		goto out;
2534 	}
2535 
2536 	if (vap->va_mode == (mode_t)VNOVAL)
2537 		vap->va_mode = 0;
2538 	if (dvp != vp)
2539 		vn_unlock(dvp);
2540 	error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2541 	vrele(dvp);
2542 	dvp = NULL;
2543 	if (error == 0) {
2544 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2545 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2546 		if (!error)
2547 			error = VOP_GETATTR(vp, vap);
2548 	}
2549 
2550 out:
2551 	if (dvp) {
2552 		if (dvp == vp)
2553 			vrele(dvp);
2554 		else
2555 			vput(dvp);
2556 	}
2557 	if (vp) {
2558 		vput(vp);
2559 		vp = NULL;
2560 	}
2561 	if (pathcp) {
2562 		kfree(pathcp, M_TEMP);
2563 		pathcp = NULL;
2564 	}
2565 	if (dirp) {
2566 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2567 		vrele(dirp);
2568 		dirp = NULL;
2569 	}
2570 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2571 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2572 			      NFSX_WCCDATA(info.v3),
2573 			      &error));
2574 	if (info.v3) {
2575 		if (!error) {
2576 			nfsm_srvpostop_fh(&info, fhp);
2577 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2578 		}
2579 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2580 				 diraft_ret, &diraft);
2581 	}
2582 	error = 0;
2583 	/* fall through */
2584 
2585 nfsmout:
2586 	*mrq = info.mreq;
2587 	nlookup_done(&nd);
2588 	if (vp)
2589 		vput(vp);
2590 	if (dirp)
2591 		vrele(dirp);
2592 	if (pathcp)
2593 		kfree(pathcp, M_TEMP);
2594 	return (error);
2595 }
2596 
2597 /*
2598  * nfs mkdir service
2599  */
2600 int
2601 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2602 	    struct thread *td, struct mbuf **mrq)
2603 {
2604 	struct sockaddr *nam = nfsd->nd_nam;
2605 	struct ucred *cred = &nfsd->nd_cr;
2606 	struct vattr va, dirfor, diraft;
2607 	struct vattr *vap = &va;
2608 	struct nfs_fattr *fp;
2609 	struct nlookupdata nd;
2610 	u_int32_t *tl;
2611 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2612 	struct vnode *dirp;
2613 	struct vnode *dvp;
2614 	struct vnode *vp;
2615 	nfsfh_t nfh;
2616 	fhandle_t *fhp;
2617 	struct nfsm_info info;
2618 
2619 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2620 	nlookup_zero(&nd);
2621 	dirp = NULL;
2622 	dvp = NULL;
2623 	vp = NULL;
2624 
2625 	info.dpos = nfsd->nd_dpos;
2626 	info.mrep = nfsd->nd_mrep;
2627 	info.mreq =  NULL;
2628 	info.md = nfsd->nd_md;
2629 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2630 
2631 	fhp = &nfh.fh_generic;
2632 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2633 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2634 
2635 	error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2636 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2637 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2638 	if (dirp) {
2639 		if (info.v3)
2640 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2641 	}
2642 	if (error) {
2643 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2644 				      NFSX_WCCDATA(info.v3), &error));
2645 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2646 				 diraft_ret, &diraft);
2647 		error = 0;
2648 		goto nfsmout;
2649 	}
2650 	VATTR_NULL(vap);
2651 	if (info.v3) {
2652 		ERROROUT(nfsm_srvsattr(&info, vap));
2653 	} else {
2654 		NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2655 		vap->va_mode = nfstov_mode(*tl++);
2656 	}
2657 
2658 	/*
2659 	 * At this point nd.ni_dvp is referenced and exclusively locked and
2660 	 * nd.ni_vp, if it exists, is referenced but not locked.
2661 	 */
2662 
2663 	vap->va_type = VDIR;
2664 	if (vp != NULL) {
2665 		error = EEXIST;
2666 		goto out;
2667 	}
2668 
2669 	/*
2670 	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2671 	 * component is freed by the VOP call.  This will fill-in
2672 	 * nd.ni_vp, reference, and exclusively lock it.
2673 	 */
2674 	if (vap->va_mode == (mode_t)VNOVAL)
2675 		vap->va_mode = 0;
2676 	vn_unlock(dvp);
2677 	error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2678 	vrele(dvp);
2679 	dvp = NULL;
2680 
2681 	if (error == 0) {
2682 		bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2683 		error = VFS_VPTOFH(vp, &fhp->fh_fid);
2684 		if (error == 0)
2685 			error = VOP_GETATTR(vp, vap);
2686 	}
2687 out:
2688 	if (dirp)
2689 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2690 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2691 			      NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2692 			      NFSX_WCCDATA(info.v3),
2693 			      &error));
2694 	if (info.v3) {
2695 		if (!error) {
2696 			nfsm_srvpostop_fh(&info, fhp);
2697 			nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2698 		}
2699 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2700 				 diraft_ret, &diraft);
2701 	} else {
2702 		nfsm_srvfhtom(&info, fhp);
2703 		fp = nfsm_build(&info, NFSX_V2FATTR);
2704 		nfsm_srvfattr(nfsd, vap, fp);
2705 	}
2706 	error = 0;
2707 	/* fall through */
2708 
2709 nfsmout:
2710 	*mrq = info.mreq;
2711 	nlookup_done(&nd);
2712 	if (dirp)
2713 		vrele(dirp);
2714 	if (dvp) {
2715 		if (dvp == vp)
2716 			vrele(dvp);
2717 		else
2718 			vput(dvp);
2719 	}
2720 	if (vp)
2721 		vput(vp);
2722 	return (error);
2723 }
2724 
2725 /*
2726  * nfs rmdir service
2727  */
2728 int
2729 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2730 	    struct thread *td, struct mbuf **mrq)
2731 {
2732 	struct sockaddr *nam = nfsd->nd_nam;
2733 	struct ucred *cred = &nfsd->nd_cr;
2734 	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2735 	struct vnode *dirp;
2736 	struct vnode *dvp;
2737 	struct vnode *vp;
2738 	struct vattr dirfor, diraft;
2739 	nfsfh_t nfh;
2740 	fhandle_t *fhp;
2741 	struct nlookupdata nd;
2742 	struct nfsm_info info;
2743 
2744 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2745 	nlookup_zero(&nd);
2746 	dirp = NULL;
2747 	dvp = NULL;
2748 	vp = NULL;
2749 
2750 	info.mrep = nfsd->nd_mrep;
2751 	info.mreq = NULL;
2752 	info.md = nfsd->nd_md;
2753 	info.dpos = nfsd->nd_dpos;
2754 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2755 
2756 	fhp = &nfh.fh_generic;
2757 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2758 	NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2759 
2760 	error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2761 			  fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2762 			  td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2763 	if (dirp) {
2764 		if (info.v3)
2765 			dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2766 	}
2767 	if (error) {
2768 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2769 				      NFSX_WCCDATA(info.v3), &error));
2770 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2771 				 diraft_ret, &diraft);
2772 		error = 0;
2773 		goto nfsmout;
2774 	}
2775 	if (vp->v_type != VDIR) {
2776 		error = ENOTDIR;
2777 		goto out;
2778 	}
2779 
2780 	/*
2781 	 * The root of a mounted filesystem cannot be deleted.
2782 	 */
2783 	if (vp->v_flag & VROOT)
2784 		error = EBUSY;
2785 out:
2786 	/*
2787 	 * Issue or abort op.  Since SAVESTART is not set, path name
2788 	 * component is freed by the VOP after either.
2789 	 */
2790 	if (!error) {
2791 		if (dvp != vp)
2792 			vn_unlock(dvp);
2793 		vput(vp);
2794 		vp = NULL;
2795 		error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2796 		vrele(dvp);
2797 		dvp = NULL;
2798 	}
2799 	nlookup_done(&nd);
2800 
2801 	if (dirp)
2802 		diraft_ret = VOP_GETATTR(dirp, &diraft);
2803 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2804 	if (info.v3) {
2805 		nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2806 				 diraft_ret, &diraft);
2807 		error = 0;
2808 	}
2809 	/* fall through */
2810 
2811 nfsmout:
2812 	*mrq = info.mreq;
2813 	if (dvp) {
2814 		if (dvp == vp)
2815 			vrele(dvp);
2816 		else
2817 			vput(dvp);
2818 	}
2819 	nlookup_done(&nd);
2820 	if (dirp)
2821 		vrele(dirp);
2822 	if (vp)
2823 		vput(vp);
2824 	return(error);
2825 }
2826 
2827 /*
2828  * nfs readdir service
2829  * - mallocs what it thinks is enough to read
2830  *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2831  * - calls VOP_READDIR()
2832  * - loops around building the reply
2833  *	if the output generated exceeds count break out of loop
2834  *	The nfsm_clget macro is used here so that the reply will be packed
2835  *	tightly in mbuf clusters.
2836  * - it only knows that it has encountered eof when the VOP_READDIR()
2837  *	reads nothing
2838  * - as such one readdir rpc will return eof false although you are there
2839  *	and then the next will return eof
2840  * - it trims out records with d_fileno == 0
2841  *	this doesn't matter for Unix clients, but they might confuse clients
2842  *	for other os'.
2843  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2844  *	than requested, but this may not apply to all filesystems. For
2845  *	example, client NFS does not { although it is never remote mounted
2846  *	anyhow }
2847  *     The alternate call nfsrv_readdirplus() does lookups as well.
2848  * PS: The NFS protocol spec. does not clarify what the "count" byte
2849  *	argument is a count of.. just name strings and file id's or the
2850  *	entire reply rpc or ...
2851  *	I tried just file name and id sizes and it confused the Sun client,
2852  *	so I am using the full rpc size now. The "paranoia.." comment refers
2853  *	to including the status longwords that are not a part of the dir.
2854  *	"entry" structures, but are in the rpc.
2855  */
2856 struct flrep {
2857 	nfsuint64	fl_off;
2858 	u_int32_t	fl_postopok;
2859 	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2860 	u_int32_t	fl_fhok;
2861 	u_int32_t	fl_fhsize;
2862 	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2863 };
2864 
2865 int
2866 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2867 	      struct thread *td, struct mbuf **mrq)
2868 {
2869 	struct sockaddr *nam = nfsd->nd_nam;
2870 	struct ucred *cred = &nfsd->nd_cr;
2871 	char *bp, *be;
2872 	struct dirent *dp;
2873 	caddr_t cp;
2874 	u_int32_t *tl;
2875 	struct mbuf *mp1, *mp2;
2876 	char *cpos, *cend, *rbuf;
2877 	struct vnode *vp = NULL;
2878 	struct mount *mp = NULL;
2879 	struct vattr at;
2880 	nfsfh_t nfh;
2881 	fhandle_t *fhp;
2882 	struct uio io;
2883 	struct iovec iv;
2884 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2885 	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2886 	u_quad_t off, toff, verf;
2887 	off_t *cookies = NULL, *cookiep;
2888 	struct nfsm_info info;
2889 
2890 	info.mrep = nfsd->nd_mrep;
2891 	info.mreq = NULL;
2892 	info.md = nfsd->nd_md;
2893 	info.dpos = nfsd->nd_dpos;
2894 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
2895 
2896 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2897 	fhp = &nfh.fh_generic;
2898 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2899 	if (info.v3) {
2900 		NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2901 		toff = fxdr_hyper(tl);
2902 		tl += 2;
2903 		verf = fxdr_hyper(tl);
2904 		tl += 2;
2905 	} else {
2906 		NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2907 		toff = fxdr_unsigned(u_quad_t, *tl++);
2908 		verf = 0;	/* shut up gcc */
2909 	}
2910 	off = toff;
2911 	cnt = fxdr_unsigned(int, *tl);
2912 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2913 	xfer = NFS_SRVMAXDATA(nfsd);
2914 	if ((unsigned)cnt > xfer)
2915 		cnt = xfer;
2916 	if ((unsigned)siz > xfer)
2917 		siz = xfer;
2918 	fullsiz = siz;
2919 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2920 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2921 	if (!error && vp->v_type != VDIR) {
2922 		error = ENOTDIR;
2923 		vput(vp);
2924 		vp = NULL;
2925 	}
2926 	if (error) {
2927 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2928 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2929 		error = 0;
2930 		goto nfsmout;
2931 	}
2932 
2933 	/*
2934 	 * Obtain lock on vnode for this section of the code
2935 	 */
2936 
2937 	if (info.v3) {
2938 		error = getret = VOP_GETATTR(vp, &at);
2939 #if 0
2940 		/*
2941 		 * XXX This check may be too strict for Solaris 2.5 clients.
2942 		 */
2943 		if (!error && toff && verf && verf != at.va_filerev)
2944 			error = NFSERR_BAD_COOKIE;
2945 #endif
2946 	}
2947 	if (!error)
2948 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2949 	if (error) {
2950 		vput(vp);
2951 		vp = NULL;
2952 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2953 				      NFSX_POSTOPATTR(info.v3), &error));
2954 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2955 		error = 0;
2956 		goto nfsmout;
2957 	}
2958 	vn_unlock(vp);
2959 
2960 	/*
2961 	 * end section.  Allocate rbuf and continue
2962 	 */
2963 	rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
2964 again:
2965 	iv.iov_base = rbuf;
2966 	iv.iov_len = fullsiz;
2967 	io.uio_iov = &iv;
2968 	io.uio_iovcnt = 1;
2969 	io.uio_offset = (off_t)off;
2970 	io.uio_resid = fullsiz;
2971 	io.uio_segflg = UIO_SYSSPACE;
2972 	io.uio_rw = UIO_READ;
2973 	io.uio_td = NULL;
2974 	eofflag = 0;
2975 	if (cookies) {
2976 		kfree((caddr_t)cookies, M_TEMP);
2977 		cookies = NULL;
2978 	}
2979 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2980 	off = (off_t)io.uio_offset;
2981 	if (!cookies && !error)
2982 		error = NFSERR_PERM;
2983 	if (info.v3) {
2984 		getret = VOP_GETATTR(vp, &at);
2985 		if (!error)
2986 			error = getret;
2987 	}
2988 	if (error) {
2989 		vrele(vp);
2990 		vp = NULL;
2991 		kfree((caddr_t)rbuf, M_TEMP);
2992 		if (cookies)
2993 			kfree((caddr_t)cookies, M_TEMP);
2994 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2995 				      NFSX_POSTOPATTR(info.v3), &error));
2996 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2997 		error = 0;
2998 		goto nfsmout;
2999 	}
3000 	if (io.uio_resid) {
3001 		siz -= io.uio_resid;
3002 
3003 		/*
3004 		 * If nothing read, return eof
3005 		 * rpc reply
3006 		 */
3007 		if (siz == 0) {
3008 			vrele(vp);
3009 			vp = NULL;
3010 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3011 					      NFSX_POSTOPATTR(info.v3) +
3012 					      NFSX_COOKIEVERF(info.v3) +
3013 					      2 * NFSX_UNSIGNED,
3014 					      &error));
3015 			if (info.v3) {
3016 				nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3017 				tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3018 				txdr_hyper(at.va_filerev, tl);
3019 				tl += 2;
3020 			} else
3021 				tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3022 			*tl++ = nfs_false;
3023 			*tl = nfs_true;
3024 			kfree((caddr_t)rbuf, M_TEMP);
3025 			kfree((caddr_t)cookies, M_TEMP);
3026 			error = 0;
3027 			goto nfsmout;
3028 		}
3029 	}
3030 
3031 	/*
3032 	 * Check for degenerate cases of nothing useful read.
3033 	 * If so go try again
3034 	 */
3035 	cpos = rbuf;
3036 	cend = rbuf + siz;
3037 	dp = (struct dirent *)cpos;
3038 	cookiep = cookies;
3039 	/*
3040 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3041 	 * directory offset up to a block boundary, so it is necessary to
3042 	 * skip over the records that preceed the requested offset. This
3043 	 * requires the assumption that file offset cookies monotonically
3044 	 * increase.
3045 	 */
3046 	while (cpos < cend && ncookies > 0 &&
3047 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3048 		 ((u_quad_t)(*cookiep)) <= toff)) {
3049 		dp = _DIRENT_NEXT(dp);
3050 		cpos = (char *)dp;
3051 		cookiep++;
3052 		ncookies--;
3053 	}
3054 	if (cpos >= cend || ncookies == 0) {
3055 		toff = off;
3056 		siz = fullsiz;
3057 		goto again;
3058 	}
3059 
3060 	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
3061 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3062 			      NFSX_POSTOPATTR(info.v3) +
3063 			      NFSX_COOKIEVERF(info.v3) + siz,
3064 			      &error));
3065 	if (info.v3) {
3066 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3067 		tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3068 		txdr_hyper(at.va_filerev, tl);
3069 	}
3070 	mp1 = mp2 = info.mb;
3071 	bp = info.bpos;
3072 	be = bp + M_TRAILINGSPACE(mp1);
3073 
3074 	/* Loop through the records and build reply */
3075 	while (cpos < cend && ncookies > 0) {
3076 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3077 			nlen = dp->d_namlen;
3078 			rem = nfsm_rndup(nlen) - nlen;
3079 			len += (4 * NFSX_UNSIGNED + nlen + rem);
3080 			if (info.v3)
3081 				len += 2 * NFSX_UNSIGNED;
3082 			if (len > cnt) {
3083 				eofflag = 0;
3084 				break;
3085 			}
3086 			/*
3087 			 * Build the directory record xdr from
3088 			 * the dirent entry.
3089 			 */
3090 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3091 			*tl = nfs_true;
3092 			bp += NFSX_UNSIGNED;
3093 			if (info.v3) {
3094 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3095 				*tl = txdr_unsigned(dp->d_ino >> 32);
3096 				bp += NFSX_UNSIGNED;
3097 			}
3098 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3099 			*tl = txdr_unsigned(dp->d_ino);
3100 			bp += NFSX_UNSIGNED;
3101 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3102 			*tl = txdr_unsigned(nlen);
3103 			bp += NFSX_UNSIGNED;
3104 
3105 			/* And loop around copying the name */
3106 			xfer = nlen;
3107 			cp = dp->d_name;
3108 			while (xfer > 0) {
3109 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3110 				if ((bp+xfer) > be)
3111 					tsiz = be-bp;
3112 				else
3113 					tsiz = xfer;
3114 				bcopy(cp, bp, tsiz);
3115 				bp += tsiz;
3116 				xfer -= tsiz;
3117 				if (xfer > 0)
3118 					cp += tsiz;
3119 			}
3120 			/* And null pad to a int32_t boundary */
3121 			for (i = 0; i < rem; i++)
3122 				*bp++ = '\0';
3123 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3124 
3125 			/* Finish off the record */
3126 			if (info.v3) {
3127 				*tl = txdr_unsigned(*cookiep >> 32);
3128 				bp += NFSX_UNSIGNED;
3129 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3130 			}
3131 			*tl = txdr_unsigned(*cookiep);
3132 			bp += NFSX_UNSIGNED;
3133 		}
3134 		dp = _DIRENT_NEXT(dp);
3135 		cpos = (char *)dp;
3136 		cookiep++;
3137 		ncookies--;
3138 	}
3139 	vrele(vp);
3140 	vp = NULL;
3141 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3142 	*tl = nfs_false;
3143 	bp += NFSX_UNSIGNED;
3144 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3145 	if (eofflag)
3146 		*tl = nfs_true;
3147 	else
3148 		*tl = nfs_false;
3149 	bp += NFSX_UNSIGNED;
3150 	if (mp1 != info.mb) {
3151 		if (bp < be)
3152 			mp1->m_len = bp - mtod(mp1, caddr_t);
3153 	} else
3154 		mp1->m_len += bp - info.bpos;
3155 	kfree((caddr_t)rbuf, M_TEMP);
3156 	kfree((caddr_t)cookies, M_TEMP);
3157 
3158 nfsmout:
3159 	*mrq = info.mreq;
3160 	if (vp)
3161 		vrele(vp);
3162 	return(error);
3163 }
3164 
3165 int
3166 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3167 		  struct thread *td, struct mbuf **mrq)
3168 {
3169 	struct sockaddr *nam = nfsd->nd_nam;
3170 	struct ucred *cred = &nfsd->nd_cr;
3171 	char *bp, *be;
3172 	struct dirent *dp;
3173 	caddr_t cp;
3174 	u_int32_t *tl;
3175 	struct mbuf *mp1, *mp2;
3176 	char *cpos, *cend, *rbuf;
3177 	struct vnode *vp = NULL, *nvp;
3178 	struct mount *mp = NULL;
3179 	struct flrep fl;
3180 	nfsfh_t nfh;
3181 	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3182 	struct uio io;
3183 	struct iovec iv;
3184 	struct vattr va, at, *vap = &va;
3185 	struct nfs_fattr *fp;
3186 	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3187 	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3188 	u_quad_t off, toff, verf;
3189 	off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3190 	struct nfsm_info info;
3191 
3192 	info.mrep = nfsd->nd_mrep;
3193 	info.mreq = NULL;
3194 	info.md = nfsd->nd_md;
3195 	info.dpos = nfsd->nd_dpos;
3196 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3197 
3198 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3199 	fhp = &nfh.fh_generic;
3200 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3201 	NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3202 	toff = fxdr_hyper(tl);
3203 	tl += 2;
3204 	verf = fxdr_hyper(tl);
3205 	tl += 2;
3206 	siz = fxdr_unsigned(int, *tl++);
3207 	cnt = fxdr_unsigned(int, *tl);
3208 	off = toff;
3209 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3210 	xfer = NFS_SRVMAXDATA(nfsd);
3211 	if ((unsigned)cnt > xfer)
3212 		cnt = xfer;
3213 	if ((unsigned)siz > xfer)
3214 		siz = xfer;
3215 	fullsiz = siz;
3216 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3217 			     &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3218 	if (!error && vp->v_type != VDIR) {
3219 		error = ENOTDIR;
3220 		vput(vp);
3221 		vp = NULL;
3222 	}
3223 	if (error) {
3224 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3225 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3226 		error = 0;
3227 		goto nfsmout;
3228 	}
3229 	error = getret = VOP_GETATTR(vp, &at);
3230 #if 0
3231 	/*
3232 	 * XXX This check may be too strict for Solaris 2.5 clients.
3233 	 */
3234 	if (!error && toff && verf && verf != at.va_filerev)
3235 		error = NFSERR_BAD_COOKIE;
3236 #endif
3237 	if (!error) {
3238 		error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3239 	}
3240 	if (error) {
3241 		vput(vp);
3242 		vp = NULL;
3243 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3244 				      NFSX_V3POSTOPATTR, &error));
3245 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3246 		error = 0;
3247 		goto nfsmout;
3248 	}
3249 	vn_unlock(vp);
3250 	rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
3251 again:
3252 	iv.iov_base = rbuf;
3253 	iv.iov_len = fullsiz;
3254 	io.uio_iov = &iv;
3255 	io.uio_iovcnt = 1;
3256 	io.uio_offset = (off_t)off;
3257 	io.uio_resid = fullsiz;
3258 	io.uio_segflg = UIO_SYSSPACE;
3259 	io.uio_rw = UIO_READ;
3260 	io.uio_td = NULL;
3261 	eofflag = 0;
3262 	if (cookies) {
3263 		kfree((caddr_t)cookies, M_TEMP);
3264 		cookies = NULL;
3265 	}
3266 	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3267 	off = (u_quad_t)io.uio_offset;
3268 	getret = VOP_GETATTR(vp, &at);
3269 	if (!cookies && !error)
3270 		error = NFSERR_PERM;
3271 	if (!error)
3272 		error = getret;
3273 	if (error) {
3274 		vrele(vp);
3275 		vp = NULL;
3276 		if (cookies)
3277 			kfree((caddr_t)cookies, M_TEMP);
3278 		kfree((caddr_t)rbuf, M_TEMP);
3279 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3280 				      NFSX_V3POSTOPATTR, &error));
3281 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3282 		error = 0;
3283 		goto nfsmout;
3284 	}
3285 	if (io.uio_resid) {
3286 		siz -= io.uio_resid;
3287 
3288 		/*
3289 		 * If nothing read, return eof
3290 		 * rpc reply
3291 		 */
3292 		if (siz == 0) {
3293 			vrele(vp);
3294 			vp = NULL;
3295 			NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3296 					      NFSX_V3POSTOPATTR +
3297 					      NFSX_V3COOKIEVERF +
3298 					      2 * NFSX_UNSIGNED,
3299 					      &error));
3300 			nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3301 			tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3302 			txdr_hyper(at.va_filerev, tl);
3303 			tl += 2;
3304 			*tl++ = nfs_false;
3305 			*tl = nfs_true;
3306 			kfree((caddr_t)cookies, M_TEMP);
3307 			kfree((caddr_t)rbuf, M_TEMP);
3308 			error = 0;
3309 			goto nfsmout;
3310 		}
3311 	}
3312 
3313 	/*
3314 	 * Check for degenerate cases of nothing useful read.
3315 	 * If so go try again
3316 	 */
3317 	cpos = rbuf;
3318 	cend = rbuf + siz;
3319 	dp = (struct dirent *)cpos;
3320 	cookiep = cookies;
3321 	/*
3322 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3323 	 * directory offset up to a block boundary, so it is necessary to
3324 	 * skip over the records that preceed the requested offset. This
3325 	 * requires the assumption that file offset cookies monotonically
3326 	 * increase.
3327 	 */
3328 	while (cpos < cend && ncookies > 0 &&
3329 		(dp->d_ino == 0 || dp->d_type == DT_WHT ||
3330 		 ((u_quad_t)(*cookiep)) <= toff)) {
3331 		dp = _DIRENT_NEXT(dp);
3332 		cpos = (char *)dp;
3333 		cookiep++;
3334 		ncookies--;
3335 	}
3336 	if (cpos >= cend || ncookies == 0) {
3337 		toff = off;
3338 		siz = fullsiz;
3339 		goto again;
3340 	}
3341 
3342 	/*
3343 	 * Probe one of the directory entries to see if the filesystem
3344 	 * supports VGET.
3345 	 */
3346 	if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3347 		error = NFSERR_NOTSUPP;
3348 		vrele(vp);
3349 		vp = NULL;
3350 		kfree((caddr_t)cookies, M_TEMP);
3351 		kfree((caddr_t)rbuf, M_TEMP);
3352 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3353 				      NFSX_V3POSTOPATTR, &error));
3354 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3355 		error = 0;
3356 		goto nfsmout;
3357 	}
3358 	if (nvp) {
3359 		vput(nvp);
3360 		nvp = NULL;
3361 	}
3362 
3363 	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3364 			2 * NFSX_UNSIGNED;
3365 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3366 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3367 	tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3368 	txdr_hyper(at.va_filerev, tl);
3369 	mp1 = mp2 = info.mb;
3370 	bp = info.bpos;
3371 	be = bp + M_TRAILINGSPACE(mp1);
3372 
3373 	/* Loop through the records and build reply */
3374 	while (cpos < cend && ncookies > 0) {
3375 		if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3376 			nlen = dp->d_namlen;
3377 			rem = nfsm_rndup(nlen) - nlen;
3378 
3379 			/*
3380 			 * For readdir_and_lookup get the vnode using
3381 			 * the file number.
3382 			 */
3383 			if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3384 				goto invalid;
3385 			bzero((caddr_t)nfhp, NFSX_V3FH);
3386 			nfhp->fh_fsid = fhp->fh_fsid;
3387 			if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3388 				vput(nvp);
3389 				nvp = NULL;
3390 				goto invalid;
3391 			}
3392 			if (VOP_GETATTR(nvp, vap)) {
3393 				vput(nvp);
3394 				nvp = NULL;
3395 				goto invalid;
3396 			}
3397 			vput(nvp);
3398 			nvp = NULL;
3399 
3400 			/*
3401 			 * If either the dircount or maxcount will be
3402 			 * exceeded, get out now. Both of these lengths
3403 			 * are calculated conservatively, including all
3404 			 * XDR overheads.
3405 			 */
3406 			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3407 				NFSX_V3POSTOPATTR);
3408 			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3409 			if (len > cnt || dirlen > fullsiz) {
3410 				eofflag = 0;
3411 				break;
3412 			}
3413 
3414 			/*
3415 			 * Build the directory record xdr from
3416 			 * the dirent entry.
3417 			 */
3418 			fp = (struct nfs_fattr *)&fl.fl_fattr;
3419 			nfsm_srvfattr(nfsd, vap, fp);
3420 			fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3421 			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3422 			fl.fl_postopok = nfs_true;
3423 			fl.fl_fhok = nfs_true;
3424 			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3425 
3426 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3427 			*tl = nfs_true;
3428 			bp += NFSX_UNSIGNED;
3429 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3430 			*tl = txdr_unsigned(dp->d_ino >> 32);
3431 			bp += NFSX_UNSIGNED;
3432 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3433 			*tl = txdr_unsigned(dp->d_ino);
3434 			bp += NFSX_UNSIGNED;
3435 			tl = nfsm_clget(&info, mp1, mp2, bp, be);
3436 			*tl = txdr_unsigned(nlen);
3437 			bp += NFSX_UNSIGNED;
3438 
3439 			/* And loop around copying the name */
3440 			xfer = nlen;
3441 			cp = dp->d_name;
3442 			while (xfer > 0) {
3443 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3444 				if ((bp + xfer) > be)
3445 					tsiz = be - bp;
3446 				else
3447 					tsiz = xfer;
3448 				bcopy(cp, bp, tsiz);
3449 				bp += tsiz;
3450 				xfer -= tsiz;
3451 				cp += tsiz;
3452 			}
3453 			/* And null pad to a int32_t boundary */
3454 			for (i = 0; i < rem; i++)
3455 				*bp++ = '\0';
3456 
3457 			/*
3458 			 * Now copy the flrep structure out.
3459 			 */
3460 			xfer = sizeof (struct flrep);
3461 			cp = (caddr_t)&fl;
3462 			while (xfer > 0) {
3463 				tl = nfsm_clget(&info, mp1, mp2, bp, be);
3464 				if ((bp + xfer) > be)
3465 					tsiz = be - bp;
3466 				else
3467 					tsiz = xfer;
3468 				bcopy(cp, bp, tsiz);
3469 				bp += tsiz;
3470 				xfer -= tsiz;
3471 				cp += tsiz;
3472 			}
3473 		}
3474 invalid:
3475 		dp = _DIRENT_NEXT(dp);
3476 		cpos = (char *)dp;
3477 		cookiep++;
3478 		ncookies--;
3479 	}
3480 	vrele(vp);
3481 	vp = NULL;
3482 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3483 	*tl = nfs_false;
3484 	bp += NFSX_UNSIGNED;
3485 	tl = nfsm_clget(&info, mp1, mp2, bp, be);
3486 	if (eofflag)
3487 		*tl = nfs_true;
3488 	else
3489 		*tl = nfs_false;
3490 	bp += NFSX_UNSIGNED;
3491 	if (mp1 != info.mb) {
3492 		if (bp < be)
3493 			mp1->m_len = bp - mtod(mp1, caddr_t);
3494 	} else
3495 		mp1->m_len += bp - info.bpos;
3496 	kfree((caddr_t)cookies, M_TEMP);
3497 	kfree((caddr_t)rbuf, M_TEMP);
3498 nfsmout:
3499 	*mrq = info.mreq;
3500 	if (vp)
3501 		vrele(vp);
3502 	return(error);
3503 }
3504 
3505 /*
3506  * nfs commit service
3507  */
3508 int
3509 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3510 	     struct thread *td, struct mbuf **mrq)
3511 {
3512 	struct sockaddr *nam = nfsd->nd_nam;
3513 	struct ucred *cred = &nfsd->nd_cr;
3514 	struct vattr bfor, aft;
3515 	struct vnode *vp = NULL;
3516 	struct mount *mp = NULL;
3517 	nfsfh_t nfh;
3518 	fhandle_t *fhp;
3519 	u_int32_t *tl;
3520 	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3521 	u_quad_t off;
3522 	struct nfsm_info info;
3523 
3524 	info.mrep = nfsd->nd_mrep;
3525 	info.mreq = NULL;
3526 	info.md = nfsd->nd_md;
3527 	info.dpos = nfsd->nd_dpos;
3528 
3529 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3530 	fhp = &nfh.fh_generic;
3531 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3532 	NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3533 
3534 	/*
3535 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3536 	 * count parameters, so these arguments are useless (someday maybe).
3537 	 */
3538 	off = fxdr_hyper(tl);
3539 	tl += 2;
3540 	cnt = fxdr_unsigned(int, *tl);
3541 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3542 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3543 	if (error) {
3544 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3545 				      2 * NFSX_UNSIGNED, &error));
3546 		nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3547 				 aft_ret, &aft);
3548 		error = 0;
3549 		goto nfsmout;
3550 	}
3551 	for_ret = VOP_GETATTR(vp, &bfor);
3552 
3553 	if (cnt > MAX_COMMIT_COUNT) {
3554 		/*
3555 		 * Give up and do the whole thing
3556 		 */
3557 		if (vp->v_object &&
3558 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3559 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3560 		}
3561 		error = VOP_FSYNC(vp, MNT_WAIT, 0);
3562 	} else {
3563 		/*
3564 		 * Locate and synchronously write any buffers that fall
3565 		 * into the requested range.  Note:  we are assuming that
3566 		 * f_iosize is a power of 2.
3567 		 */
3568 		int iosize = vp->v_mount->mnt_stat.f_iosize;
3569 		int iomask = iosize - 1;
3570 		off_t loffset;
3571 
3572 		/*
3573 		 * Align to iosize boundry, super-align to page boundry.
3574 		 */
3575 		if (off & iomask) {
3576 			cnt += off & iomask;
3577 			off &= ~(u_quad_t)iomask;
3578 		}
3579 		if (off & PAGE_MASK) {
3580 			cnt += off & PAGE_MASK;
3581 			off &= ~(u_quad_t)PAGE_MASK;
3582 		}
3583 		loffset = off;
3584 
3585 		if (vp->v_object &&
3586 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3587 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3588 			    (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3589 		}
3590 
3591 		crit_enter();
3592 		while (cnt > 0) {
3593 			struct buf *bp;
3594 
3595 			/*
3596 			 * If we have a buffer and it is marked B_DELWRI we
3597 			 * have to lock and write it.  Otherwise the prior
3598 			 * write is assumed to have already been committed.
3599 			 *
3600 			 * WARNING: FINDBLK_TEST buffers represent stable
3601 			 *	    storage but not necessarily stable
3602 			 *	    content.  It is ok in this case.
3603 			 */
3604 			if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3605 				if (bp->b_flags & B_DELWRI)
3606 					bp = findblk(vp, loffset, 0);
3607 				else
3608 					bp = NULL;
3609 			}
3610 			if (bp) {
3611 				if (bp->b_flags & B_DELWRI) {
3612 					bremfree(bp);
3613 					bwrite(bp);
3614 					++nfs_commit_miss;
3615 				} else {
3616 					BUF_UNLOCK(bp);
3617 				}
3618 			}
3619 			++nfs_commit_blks;
3620 			if (cnt < iosize)
3621 				break;
3622 			cnt -= iosize;
3623 			loffset += iosize;
3624 		}
3625 		crit_exit();
3626 	}
3627 
3628 	aft_ret = VOP_GETATTR(vp, &aft);
3629 	vput(vp);
3630 	vp = NULL;
3631 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3632 			      NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3633 			      &error));
3634 	nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3635 			 aft_ret, &aft);
3636 	if (!error) {
3637 		tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3638 		if (nfsver.tv_sec == 0)
3639 			nfsver = boottime;
3640 		*tl++ = txdr_unsigned(nfsver.tv_sec);
3641 		*tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3642 	} else {
3643 		error = 0;
3644 	}
3645 nfsmout:
3646 	*mrq = info.mreq;
3647 	if (vp)
3648 		vput(vp);
3649 	return(error);
3650 }
3651 
3652 /*
3653  * nfs statfs service
3654  */
3655 int
3656 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3657 	     struct thread *td, struct mbuf **mrq)
3658 {
3659 	struct sockaddr *nam = nfsd->nd_nam;
3660 	struct ucred *cred = &nfsd->nd_cr;
3661 	struct statfs *sf;
3662 	struct nfs_statfs *sfp;
3663 	int error = 0, rdonly, getret = 1;
3664 	struct vnode *vp = NULL;
3665 	struct mount *mp = NULL;
3666 	struct vattr at;
3667 	nfsfh_t nfh;
3668 	fhandle_t *fhp;
3669 	struct statfs statfs;
3670 	u_quad_t tval;
3671 	struct nfsm_info info;
3672 
3673 	info.mrep = nfsd->nd_mrep;
3674 	info.mreq = NULL;
3675 	info.md = nfsd->nd_md;
3676 	info.dpos = nfsd->nd_dpos;
3677 	info.v3 = (nfsd->nd_flag & ND_NFSV3);
3678 
3679 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3680 	fhp = &nfh.fh_generic;
3681 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3682 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3683 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3684 	if (error) {
3685 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3686 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3687 		error = 0;
3688 		goto nfsmout;
3689 	}
3690 	sf = &statfs;
3691 	error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3692 	getret = VOP_GETATTR(vp, &at);
3693 	vput(vp);
3694 	vp = NULL;
3695 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3696 			      NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3697 			      &error));
3698 	if (info.v3)
3699 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3700 	if (error) {
3701 		error = 0;
3702 		goto nfsmout;
3703 	}
3704 	sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3705 	if (info.v3) {
3706 		tval = (u_quad_t)sf->f_blocks;
3707 		tval *= (u_quad_t)sf->f_bsize;
3708 		txdr_hyper(tval, &sfp->sf_tbytes);
3709 		tval = (u_quad_t)sf->f_bfree;
3710 		tval *= (u_quad_t)sf->f_bsize;
3711 		txdr_hyper(tval, &sfp->sf_fbytes);
3712 		tval = (u_quad_t)sf->f_bavail;
3713 		tval *= (u_quad_t)sf->f_bsize;
3714 		txdr_hyper(tval, &sfp->sf_abytes);
3715 		sfp->sf_tfiles.nfsuquad[0] = 0;
3716 		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3717 		sfp->sf_ffiles.nfsuquad[0] = 0;
3718 		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3719 		sfp->sf_afiles.nfsuquad[0] = 0;
3720 		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3721 		sfp->sf_invarsec = 0;
3722 	} else {
3723 		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3724 		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3725 		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3726 		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3727 		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3728 	}
3729 nfsmout:
3730 	*mrq = info.mreq;
3731 	if (vp)
3732 		vput(vp);
3733 	return(error);
3734 }
3735 
3736 /*
3737  * nfs fsinfo service
3738  */
3739 int
3740 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3741 	     struct thread *td, struct mbuf **mrq)
3742 {
3743 	struct sockaddr *nam = nfsd->nd_nam;
3744 	struct ucred *cred = &nfsd->nd_cr;
3745 	struct nfsv3_fsinfo *sip;
3746 	int error = 0, rdonly, getret = 1, pref;
3747 	struct vnode *vp = NULL;
3748 	struct mount *mp = NULL;
3749 	struct vattr at;
3750 	nfsfh_t nfh;
3751 	fhandle_t *fhp;
3752 	u_quad_t maxfsize;
3753 	struct statfs sb;
3754 	struct nfsm_info info;
3755 
3756 	info.mrep = nfsd->nd_mrep;
3757 	info.mreq = NULL;
3758 	info.md = nfsd->nd_md;
3759 	info.dpos = nfsd->nd_dpos;
3760 
3761 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3762 	fhp = &nfh.fh_generic;
3763 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3764 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3765 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3766 	if (error) {
3767 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3768 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3769 		error = 0;
3770 		goto nfsmout;
3771 	}
3772 
3773 	/* XXX Try to make a guess on the max file size. */
3774 	VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3775 	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3776 
3777 	getret = VOP_GETATTR(vp, &at);
3778 	vput(vp);
3779 	vp = NULL;
3780 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3781 			      NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3782 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3783 	sip = nfsm_build(&info, NFSX_V3FSINFO);
3784 
3785 	/*
3786 	 * XXX
3787 	 * There should be file system VFS OP(s) to get this information.
3788 	 * For now, assume ufs.
3789 	 */
3790 	if (slp->ns_so->so_type == SOCK_DGRAM)
3791 		pref = NFS_MAXDGRAMDATA;
3792 	else
3793 		pref = NFS_MAXDATA;
3794 	sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3795 	sip->fs_rtpref = txdr_unsigned(pref);
3796 	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3797 	sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3798 	sip->fs_wtpref = txdr_unsigned(pref);
3799 	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3800 	sip->fs_dtpref = txdr_unsigned(pref);
3801 	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3802 	sip->fs_timedelta.nfsv3_sec = 0;
3803 	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3804 	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3805 		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3806 		NFSV3FSINFO_CANSETTIME);
3807 nfsmout:
3808 	*mrq = info.mreq;
3809 	if (vp)
3810 		vput(vp);
3811 	return(error);
3812 }
3813 
3814 /*
3815  * nfs pathconf service
3816  */
3817 int
3818 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3819 	       struct thread *td, struct mbuf **mrq)
3820 {
3821 	struct sockaddr *nam = nfsd->nd_nam;
3822 	struct ucred *cred = &nfsd->nd_cr;
3823 	struct nfsv3_pathconf *pc;
3824 	int error = 0, rdonly, getret = 1;
3825 	register_t linkmax, namemax, chownres, notrunc;
3826 	struct vnode *vp = NULL;
3827 	struct mount *mp = NULL;
3828 	struct vattr at;
3829 	nfsfh_t nfh;
3830 	fhandle_t *fhp;
3831 	struct nfsm_info info;
3832 
3833 	info.mrep = nfsd->nd_mrep;
3834 	info.mreq = NULL;
3835 	info.md = nfsd->nd_md;
3836 	info.dpos = nfsd->nd_dpos;
3837 
3838 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3839 	fhp = &nfh.fh_generic;
3840 	NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3841 	error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3842 		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3843 	if (error) {
3844 		NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3845 		nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3846 		error = 0;
3847 		goto nfsmout;
3848 	}
3849 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3850 	if (!error)
3851 		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3852 	if (!error)
3853 		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3854 	if (!error)
3855 		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3856 	getret = VOP_GETATTR(vp, &at);
3857 	vput(vp);
3858 	vp = NULL;
3859 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3860 			      NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3861 			      &error));
3862 	nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3863 	if (error) {
3864 		error = 0;
3865 		goto nfsmout;
3866 	}
3867 	pc = nfsm_build(&info, NFSX_V3PATHCONF);
3868 
3869 	pc->pc_linkmax = txdr_unsigned(linkmax);
3870 	pc->pc_namemax = txdr_unsigned(namemax);
3871 	pc->pc_notrunc = txdr_unsigned(notrunc);
3872 	pc->pc_chownrestricted = txdr_unsigned(chownres);
3873 
3874 	/*
3875 	 * These should probably be supported by VOP_PATHCONF(), but
3876 	 * until msdosfs is exportable (why would you want to?), the
3877 	 * Unix defaults should be ok.
3878 	 */
3879 	pc->pc_caseinsensitive = nfs_false;
3880 	pc->pc_casepreserving = nfs_true;
3881 nfsmout:
3882 	*mrq = info.mreq;
3883 	if (vp)
3884 		vput(vp);
3885 	return(error);
3886 }
3887 
3888 /*
3889  * Null operation, used by clients to ping server
3890  */
3891 /* ARGSUSED */
3892 int
3893 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3894 	   struct thread *td, struct mbuf **mrq)
3895 {
3896 	struct nfsm_info info;
3897 	int error = NFSERR_RETVOID;
3898 
3899 	info.mrep = nfsd->nd_mrep;
3900 	info.mreq = NULL;
3901 
3902 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3903 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3904 nfsmout:
3905 	*mrq = info.mreq;
3906 	return (error);
3907 }
3908 
3909 /*
3910  * No operation, used for obsolete procedures
3911  */
3912 /* ARGSUSED */
3913 int
3914 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3915 	   struct thread *td, struct mbuf **mrq)
3916 {
3917 	struct nfsm_info info;
3918 	int error;
3919 
3920 	info.mrep = nfsd->nd_mrep;
3921 	info.mreq = NULL;
3922 
3923 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3924 	if (nfsd->nd_repstat)
3925 		error = nfsd->nd_repstat;
3926 	else
3927 		error = EPROCUNAVAIL;
3928 	NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3929 	error = 0;
3930 nfsmout:
3931 	*mrq = info.mreq;
3932 	return (error);
3933 }
3934 
3935 /*
3936  * Perform access checking for vnodes obtained from file handles that would
3937  * refer to files already opened by a Unix client. You cannot just use
3938  * vn_writechk() and VOP_ACCESS() for two reasons.
3939  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3940  * 2 - The owner is to be given access irrespective of mode bits for some
3941  *     operations, so that processes that chmod after opening a file don't
3942  *     break. I don't like this because it opens a security hole, but since
3943  *     the nfs server opens a security hole the size of a barn door anyhow,
3944  *     what the heck.
3945  *
3946  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3947  * will return EPERM instead of EACCESS. EPERM is always an error.
3948  */
3949 static int
3950 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3951 	     int rdonly, struct thread *td, int override)
3952 {
3953 	struct vattr vattr;
3954 	int error;
3955 
3956 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3957 	if (flags & VWRITE) {
3958 		/* Just vn_writechk() changed to check rdonly */
3959 		/*
3960 		 * Disallow write attempts on read-only file systems;
3961 		 * unless the file is a socket or a block or character
3962 		 * device resident on the file system.
3963 		 */
3964 		if (rdonly ||
3965 		    ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3966 			switch (vp->v_type) {
3967 			case VREG:
3968 			case VDIR:
3969 			case VLNK:
3970 				return (EROFS);
3971 			default:
3972 				break;
3973 			}
3974 		}
3975 		/*
3976 		 * If there's shared text associated with
3977 		 * the inode, we can't allow writing.
3978 		 */
3979 		if (vp->v_flag & VTEXT)
3980 			return (ETXTBSY);
3981 	}
3982 	error = VOP_GETATTR(vp, &vattr);
3983 	if (error)
3984 		return (error);
3985 	error = VOP_ACCESS(vp, flags, cred);	/* XXX ruid/rgid vs uid/gid */
3986 	/*
3987 	 * Allow certain operations for the owner (reads and writes
3988 	 * on files that are already open).
3989 	 */
3990 	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3991 		error = 0;
3992 	return error;
3993 }
3994 #endif /* NFS_NOSERVER */
3995 
3996