xref: /dflybsd-src/sys/vfs/nfs/nfs_subs.c (revision 41871674d0079dec70d55eb824f39d07dc7b3310)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_subs.c  8.8 (Berkeley) 5/22/95
37  * $FreeBSD: /repoman/r/ncvs/src/sys/nfsclient/nfs_subs.c,v 1.128 2004/04/14 23:23:55 peadar Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_subs.c,v 1.37 2006/04/07 06:38:33 dillon Exp $
39  */
40 
41 /*
42  * These functions support the macros and help fiddle mbuf chains for
43  * the nfs op functions. They do things like create the rpc header and
44  * copy data between mbuf chains and uio lists.
45  */
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/buf.h>
50 #include <sys/proc.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/nlookup.h>
54 #include <sys/namei.h>
55 #include <sys/mbuf.h>
56 #include <sys/socket.h>
57 #include <sys/stat.h>
58 #include <sys/malloc.h>
59 #include <sys/sysent.h>
60 #include <sys/syscall.h>
61 #include <sys/conf.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_object.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_zone.h>
67 
68 #include <sys/buf2.h>
69 
70 #include "rpcv2.h"
71 #include "nfsproto.h"
72 #include "nfs.h"
73 #include "nfsmount.h"
74 #include "nfsnode.h"
75 #include "xdr_subs.h"
76 #include "nfsm_subs.h"
77 #include "nfsrtt.h"
78 
79 #include <netinet/in.h>
80 
81 /*
82  * Data items converted to xdr at startup, since they are constant
83  * This is kinda hokey, but may save a little time doing byte swaps
84  */
85 u_int32_t nfs_xdrneg1;
86 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
87 	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
88 	rpc_auth_kerb;
89 u_int32_t nfs_prog, nfs_true, nfs_false;
90 
91 /* And other global data */
92 static u_int32_t nfs_xid = 0;
93 static enum vtype nv2tov_type[8]= {
94 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON,  VNON
95 };
96 enum vtype nv3tov_type[8]= {
97 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO
98 };
99 
100 int nfs_ticks;
101 int nfs_pbuf_freecnt = -1;	/* start out unlimited */
102 
103 struct nfs_reqq nfs_reqq;
104 struct nfssvc_sockhead nfssvc_sockhead;
105 int nfssvc_sockhead_flag;
106 struct nfsd_head nfsd_head;
107 int nfsd_head_flag;
108 struct nfs_bufq nfs_bufq;
109 struct nqfhhashhead *nqfhhashtbl;
110 u_long nqfhhash;
111 
112 static int nfs_prev_nfssvc_sy_narg;
113 static sy_call_t *nfs_prev_nfssvc_sy_call;
114 
115 #ifndef NFS_NOSERVER
116 
117 /*
118  * Mapping of old NFS Version 2 RPC numbers to generic numbers.
119  */
120 int nfsv3_procid[NFS_NPROCS] = {
121 	NFSPROC_NULL,
122 	NFSPROC_GETATTR,
123 	NFSPROC_SETATTR,
124 	NFSPROC_NOOP,
125 	NFSPROC_LOOKUP,
126 	NFSPROC_READLINK,
127 	NFSPROC_READ,
128 	NFSPROC_NOOP,
129 	NFSPROC_WRITE,
130 	NFSPROC_CREATE,
131 	NFSPROC_REMOVE,
132 	NFSPROC_RENAME,
133 	NFSPROC_LINK,
134 	NFSPROC_SYMLINK,
135 	NFSPROC_MKDIR,
136 	NFSPROC_RMDIR,
137 	NFSPROC_READDIR,
138 	NFSPROC_FSSTAT,
139 	NFSPROC_NOOP,
140 	NFSPROC_NOOP,
141 	NFSPROC_NOOP,
142 	NFSPROC_NOOP,
143 	NFSPROC_NOOP,
144 	NFSPROC_NOOP,
145 	NFSPROC_NOOP,
146 	NFSPROC_NOOP
147 };
148 
149 #endif /* NFS_NOSERVER */
150 /*
151  * and the reverse mapping from generic to Version 2 procedure numbers
152  */
153 int nfsv2_procid[NFS_NPROCS] = {
154 	NFSV2PROC_NULL,
155 	NFSV2PROC_GETATTR,
156 	NFSV2PROC_SETATTR,
157 	NFSV2PROC_LOOKUP,
158 	NFSV2PROC_NOOP,
159 	NFSV2PROC_READLINK,
160 	NFSV2PROC_READ,
161 	NFSV2PROC_WRITE,
162 	NFSV2PROC_CREATE,
163 	NFSV2PROC_MKDIR,
164 	NFSV2PROC_SYMLINK,
165 	NFSV2PROC_CREATE,
166 	NFSV2PROC_REMOVE,
167 	NFSV2PROC_RMDIR,
168 	NFSV2PROC_RENAME,
169 	NFSV2PROC_LINK,
170 	NFSV2PROC_READDIR,
171 	NFSV2PROC_NOOP,
172 	NFSV2PROC_STATFS,
173 	NFSV2PROC_NOOP,
174 	NFSV2PROC_NOOP,
175 	NFSV2PROC_NOOP,
176 	NFSV2PROC_NOOP,
177 	NFSV2PROC_NOOP,
178 	NFSV2PROC_NOOP,
179 	NFSV2PROC_NOOP,
180 };
181 
182 #ifndef NFS_NOSERVER
183 /*
184  * Maps errno values to nfs error numbers.
185  * Use NFSERR_IO as the catch all for ones not specifically defined in
186  * RFC 1094.
187  */
188 static u_char nfsrv_v2errmap[ELAST] = {
189   NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
190   NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
191   NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
192   NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
193   NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
194   NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
195   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
196   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
197   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
198   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
199   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
200   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
201   NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
202   NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
203   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
204   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
205   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
206   NFSERR_IO /* << Last is 86 */
207 };
208 
209 /*
210  * Maps errno values to nfs error numbers.
211  * Although it is not obvious whether or not NFS clients really care if
212  * a returned error value is in the specified list for the procedure, the
213  * safest thing to do is filter them appropriately. For Version 2, the
214  * X/Open XNFS document is the only specification that defines error values
215  * for each RPC (The RFC simply lists all possible error values for all RPCs),
216  * so I have decided to not do this for Version 2.
217  * The first entry is the default error return and the rest are the valid
218  * errors for that RPC in increasing numeric order.
219  */
220 static short nfsv3err_null[] = {
221 	0,
222 	0,
223 };
224 
225 static short nfsv3err_getattr[] = {
226 	NFSERR_IO,
227 	NFSERR_IO,
228 	NFSERR_STALE,
229 	NFSERR_BADHANDLE,
230 	NFSERR_SERVERFAULT,
231 	0,
232 };
233 
234 static short nfsv3err_setattr[] = {
235 	NFSERR_IO,
236 	NFSERR_PERM,
237 	NFSERR_IO,
238 	NFSERR_ACCES,
239 	NFSERR_INVAL,
240 	NFSERR_NOSPC,
241 	NFSERR_ROFS,
242 	NFSERR_DQUOT,
243 	NFSERR_STALE,
244 	NFSERR_BADHANDLE,
245 	NFSERR_NOT_SYNC,
246 	NFSERR_SERVERFAULT,
247 	0,
248 };
249 
250 static short nfsv3err_lookup[] = {
251 	NFSERR_IO,
252 	NFSERR_NOENT,
253 	NFSERR_IO,
254 	NFSERR_ACCES,
255 	NFSERR_NOTDIR,
256 	NFSERR_NAMETOL,
257 	NFSERR_STALE,
258 	NFSERR_BADHANDLE,
259 	NFSERR_SERVERFAULT,
260 	0,
261 };
262 
263 static short nfsv3err_access[] = {
264 	NFSERR_IO,
265 	NFSERR_IO,
266 	NFSERR_STALE,
267 	NFSERR_BADHANDLE,
268 	NFSERR_SERVERFAULT,
269 	0,
270 };
271 
272 static short nfsv3err_readlink[] = {
273 	NFSERR_IO,
274 	NFSERR_IO,
275 	NFSERR_ACCES,
276 	NFSERR_INVAL,
277 	NFSERR_STALE,
278 	NFSERR_BADHANDLE,
279 	NFSERR_NOTSUPP,
280 	NFSERR_SERVERFAULT,
281 	0,
282 };
283 
284 static short nfsv3err_read[] = {
285 	NFSERR_IO,
286 	NFSERR_IO,
287 	NFSERR_NXIO,
288 	NFSERR_ACCES,
289 	NFSERR_INVAL,
290 	NFSERR_STALE,
291 	NFSERR_BADHANDLE,
292 	NFSERR_SERVERFAULT,
293 	0,
294 };
295 
296 static short nfsv3err_write[] = {
297 	NFSERR_IO,
298 	NFSERR_IO,
299 	NFSERR_ACCES,
300 	NFSERR_INVAL,
301 	NFSERR_FBIG,
302 	NFSERR_NOSPC,
303 	NFSERR_ROFS,
304 	NFSERR_DQUOT,
305 	NFSERR_STALE,
306 	NFSERR_BADHANDLE,
307 	NFSERR_SERVERFAULT,
308 	0,
309 };
310 
311 static short nfsv3err_create[] = {
312 	NFSERR_IO,
313 	NFSERR_IO,
314 	NFSERR_ACCES,
315 	NFSERR_EXIST,
316 	NFSERR_NOTDIR,
317 	NFSERR_NOSPC,
318 	NFSERR_ROFS,
319 	NFSERR_NAMETOL,
320 	NFSERR_DQUOT,
321 	NFSERR_STALE,
322 	NFSERR_BADHANDLE,
323 	NFSERR_NOTSUPP,
324 	NFSERR_SERVERFAULT,
325 	0,
326 };
327 
328 static short nfsv3err_mkdir[] = {
329 	NFSERR_IO,
330 	NFSERR_IO,
331 	NFSERR_ACCES,
332 	NFSERR_EXIST,
333 	NFSERR_NOTDIR,
334 	NFSERR_NOSPC,
335 	NFSERR_ROFS,
336 	NFSERR_NAMETOL,
337 	NFSERR_DQUOT,
338 	NFSERR_STALE,
339 	NFSERR_BADHANDLE,
340 	NFSERR_NOTSUPP,
341 	NFSERR_SERVERFAULT,
342 	0,
343 };
344 
345 static short nfsv3err_symlink[] = {
346 	NFSERR_IO,
347 	NFSERR_IO,
348 	NFSERR_ACCES,
349 	NFSERR_EXIST,
350 	NFSERR_NOTDIR,
351 	NFSERR_NOSPC,
352 	NFSERR_ROFS,
353 	NFSERR_NAMETOL,
354 	NFSERR_DQUOT,
355 	NFSERR_STALE,
356 	NFSERR_BADHANDLE,
357 	NFSERR_NOTSUPP,
358 	NFSERR_SERVERFAULT,
359 	0,
360 };
361 
362 static short nfsv3err_mknod[] = {
363 	NFSERR_IO,
364 	NFSERR_IO,
365 	NFSERR_ACCES,
366 	NFSERR_EXIST,
367 	NFSERR_NOTDIR,
368 	NFSERR_NOSPC,
369 	NFSERR_ROFS,
370 	NFSERR_NAMETOL,
371 	NFSERR_DQUOT,
372 	NFSERR_STALE,
373 	NFSERR_BADHANDLE,
374 	NFSERR_NOTSUPP,
375 	NFSERR_SERVERFAULT,
376 	NFSERR_BADTYPE,
377 	0,
378 };
379 
380 static short nfsv3err_remove[] = {
381 	NFSERR_IO,
382 	NFSERR_NOENT,
383 	NFSERR_IO,
384 	NFSERR_ACCES,
385 	NFSERR_NOTDIR,
386 	NFSERR_ROFS,
387 	NFSERR_NAMETOL,
388 	NFSERR_STALE,
389 	NFSERR_BADHANDLE,
390 	NFSERR_SERVERFAULT,
391 	0,
392 };
393 
394 static short nfsv3err_rmdir[] = {
395 	NFSERR_IO,
396 	NFSERR_NOENT,
397 	NFSERR_IO,
398 	NFSERR_ACCES,
399 	NFSERR_EXIST,
400 	NFSERR_NOTDIR,
401 	NFSERR_INVAL,
402 	NFSERR_ROFS,
403 	NFSERR_NAMETOL,
404 	NFSERR_NOTEMPTY,
405 	NFSERR_STALE,
406 	NFSERR_BADHANDLE,
407 	NFSERR_NOTSUPP,
408 	NFSERR_SERVERFAULT,
409 	0,
410 };
411 
412 static short nfsv3err_rename[] = {
413 	NFSERR_IO,
414 	NFSERR_NOENT,
415 	NFSERR_IO,
416 	NFSERR_ACCES,
417 	NFSERR_EXIST,
418 	NFSERR_XDEV,
419 	NFSERR_NOTDIR,
420 	NFSERR_ISDIR,
421 	NFSERR_INVAL,
422 	NFSERR_NOSPC,
423 	NFSERR_ROFS,
424 	NFSERR_MLINK,
425 	NFSERR_NAMETOL,
426 	NFSERR_NOTEMPTY,
427 	NFSERR_DQUOT,
428 	NFSERR_STALE,
429 	NFSERR_BADHANDLE,
430 	NFSERR_NOTSUPP,
431 	NFSERR_SERVERFAULT,
432 	0,
433 };
434 
435 static short nfsv3err_link[] = {
436 	NFSERR_IO,
437 	NFSERR_IO,
438 	NFSERR_ACCES,
439 	NFSERR_EXIST,
440 	NFSERR_XDEV,
441 	NFSERR_NOTDIR,
442 	NFSERR_INVAL,
443 	NFSERR_NOSPC,
444 	NFSERR_ROFS,
445 	NFSERR_MLINK,
446 	NFSERR_NAMETOL,
447 	NFSERR_DQUOT,
448 	NFSERR_STALE,
449 	NFSERR_BADHANDLE,
450 	NFSERR_NOTSUPP,
451 	NFSERR_SERVERFAULT,
452 	0,
453 };
454 
455 static short nfsv3err_readdir[] = {
456 	NFSERR_IO,
457 	NFSERR_IO,
458 	NFSERR_ACCES,
459 	NFSERR_NOTDIR,
460 	NFSERR_STALE,
461 	NFSERR_BADHANDLE,
462 	NFSERR_BAD_COOKIE,
463 	NFSERR_TOOSMALL,
464 	NFSERR_SERVERFAULT,
465 	0,
466 };
467 
468 static short nfsv3err_readdirplus[] = {
469 	NFSERR_IO,
470 	NFSERR_IO,
471 	NFSERR_ACCES,
472 	NFSERR_NOTDIR,
473 	NFSERR_STALE,
474 	NFSERR_BADHANDLE,
475 	NFSERR_BAD_COOKIE,
476 	NFSERR_NOTSUPP,
477 	NFSERR_TOOSMALL,
478 	NFSERR_SERVERFAULT,
479 	0,
480 };
481 
482 static short nfsv3err_fsstat[] = {
483 	NFSERR_IO,
484 	NFSERR_IO,
485 	NFSERR_STALE,
486 	NFSERR_BADHANDLE,
487 	NFSERR_SERVERFAULT,
488 	0,
489 };
490 
491 static short nfsv3err_fsinfo[] = {
492 	NFSERR_STALE,
493 	NFSERR_STALE,
494 	NFSERR_BADHANDLE,
495 	NFSERR_SERVERFAULT,
496 	0,
497 };
498 
499 static short nfsv3err_pathconf[] = {
500 	NFSERR_STALE,
501 	NFSERR_STALE,
502 	NFSERR_BADHANDLE,
503 	NFSERR_SERVERFAULT,
504 	0,
505 };
506 
507 static short nfsv3err_commit[] = {
508 	NFSERR_IO,
509 	NFSERR_IO,
510 	NFSERR_STALE,
511 	NFSERR_BADHANDLE,
512 	NFSERR_SERVERFAULT,
513 	0,
514 };
515 
516 static short *nfsrv_v3errmap[] = {
517 	nfsv3err_null,
518 	nfsv3err_getattr,
519 	nfsv3err_setattr,
520 	nfsv3err_lookup,
521 	nfsv3err_access,
522 	nfsv3err_readlink,
523 	nfsv3err_read,
524 	nfsv3err_write,
525 	nfsv3err_create,
526 	nfsv3err_mkdir,
527 	nfsv3err_symlink,
528 	nfsv3err_mknod,
529 	nfsv3err_remove,
530 	nfsv3err_rmdir,
531 	nfsv3err_rename,
532 	nfsv3err_link,
533 	nfsv3err_readdir,
534 	nfsv3err_readdirplus,
535 	nfsv3err_fsstat,
536 	nfsv3err_fsinfo,
537 	nfsv3err_pathconf,
538 	nfsv3err_commit,
539 };
540 
541 #endif /* NFS_NOSERVER */
542 
543 extern struct nfsrtt nfsrtt;
544 extern struct nfsstats nfsstats;
545 extern nfstype nfsv2_type[9];
546 extern nfstype nfsv3_type[9];
547 extern struct nfsnodehashhead *nfsnodehashtbl;
548 extern u_long nfsnodehash;
549 
550 struct nfssvc_args;
551 extern int nfssvc(struct proc *, struct nfssvc_args *, int *);
552 
553 LIST_HEAD(nfsnodehashhead, nfsnode);
554 
555 /*
556  * This needs to return a monotonically increasing or close to monotonically
557  * increasing result, otherwise the write gathering queues won't work
558  * properly.
559  */
560 u_quad_t
561 nfs_curusec(void)
562 {
563 	struct timeval tv;
564 
565 	getmicrouptime(&tv);
566 	return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec);
567 }
568 
569 /*
570  * Create the header for an rpc request packet
571  * The hsiz is the size of the rest of the nfs request header.
572  * (just used to decide if a cluster is a good idea)
573  */
574 struct mbuf *
575 nfsm_reqh(struct vnode *vp, u_long procid, int hsiz, caddr_t *bposp)
576 {
577 	struct mbuf *mb;
578 	caddr_t bpos;
579 
580 	mb = m_getl(hsiz, MB_WAIT, MT_DATA, 0, NULL);
581 	mb->m_len = 0;
582 	bpos = mtod(mb, caddr_t);
583 
584 	/* Finally, return values */
585 	*bposp = bpos;
586 	return (mb);
587 }
588 
589 /*
590  * Build the RPC header and fill in the authorization info.
591  * The authorization string argument is only used when the credentials
592  * come from outside of the kernel.
593  * Returns the head of the mbuf list.
594  */
595 struct mbuf *
596 nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type,
597 	     int auth_len, char *auth_str, int verf_len, char *verf_str,
598 	     struct mbuf *mrest, int mrest_len, struct mbuf **mbp,
599 	     u_int32_t *xidp)
600 {
601 	struct mbuf *mb;
602 	u_int32_t *tl;
603 	caddr_t bpos;
604 	int i;
605 	struct mbuf *mreq, *mb2;
606 	int siz, grpsiz, authsiz, dsiz;
607 
608 	authsiz = nfsm_rndup(auth_len);
609 	dsiz = authsiz + 10 * NFSX_UNSIGNED;
610 	mb = m_getl(dsiz, MB_WAIT, MT_DATA, M_PKTHDR, NULL);
611 	if (dsiz < MINCLSIZE) {
612 		if (dsiz < MHLEN)
613 			MH_ALIGN(mb, dsiz);
614 		else
615 			MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
616 	}
617 	mb->m_len = mb->m_pkthdr.len = 0;
618 	mreq = mb;
619 	bpos = mtod(mb, caddr_t);
620 
621 	/*
622 	 * First the RPC header.
623 	 */
624 	nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
625 
626 	/* Get a pretty random xid to start with */
627 	if (!nfs_xid)
628 		nfs_xid = random();
629 	/*
630 	 * Skip zero xid if it should ever happen.
631 	 */
632 	if (++nfs_xid == 0)
633 		nfs_xid++;
634 
635 	*tl++ = *xidp = txdr_unsigned(nfs_xid);
636 	*tl++ = rpc_call;
637 	*tl++ = rpc_vers;
638 	*tl++ = txdr_unsigned(NFS_PROG);
639 	if (nmflag & NFSMNT_NFSV3)
640 		*tl++ = txdr_unsigned(NFS_VER3);
641 	else
642 		*tl++ = txdr_unsigned(NFS_VER2);
643 	if (nmflag & NFSMNT_NFSV3)
644 		*tl++ = txdr_unsigned(procid);
645 	else
646 		*tl++ = txdr_unsigned(nfsv2_procid[procid]);
647 
648 	/*
649 	 * And then the authorization cred.
650 	 */
651 	*tl++ = txdr_unsigned(auth_type);
652 	*tl = txdr_unsigned(authsiz);
653 	switch (auth_type) {
654 	case RPCAUTH_UNIX:
655 		nfsm_build(tl, u_int32_t *, auth_len);
656 		*tl++ = 0;		/* stamp ?? */
657 		*tl++ = 0;		/* NULL hostname */
658 		*tl++ = txdr_unsigned(cr->cr_uid);
659 		*tl++ = txdr_unsigned(cr->cr_groups[0]);
660 		grpsiz = (auth_len >> 2) - 5;
661 		*tl++ = txdr_unsigned(grpsiz);
662 		for (i = 1; i <= grpsiz; i++)
663 			*tl++ = txdr_unsigned(cr->cr_groups[i]);
664 		break;
665 	case RPCAUTH_KERB4:
666 		siz = auth_len;
667 		while (siz > 0) {
668 			if (M_TRAILINGSPACE(mb) == 0) {
669 				mb2 = m_getl(siz, MB_WAIT, MT_DATA, 0, NULL);
670 				mb2->m_len = 0;
671 				mb->m_next = mb2;
672 				mb = mb2;
673 				bpos = mtod(mb, caddr_t);
674 			}
675 			i = min(siz, M_TRAILINGSPACE(mb));
676 			bcopy(auth_str, bpos, i);
677 			mb->m_len += i;
678 			auth_str += i;
679 			bpos += i;
680 			siz -= i;
681 		}
682 		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
683 			for (i = 0; i < siz; i++)
684 				*bpos++ = '\0';
685 			mb->m_len += siz;
686 		}
687 		break;
688 	};
689 
690 	/*
691 	 * And the verifier...
692 	 */
693 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
694 	if (verf_str) {
695 		*tl++ = txdr_unsigned(RPCAUTH_KERB4);
696 		*tl = txdr_unsigned(verf_len);
697 		siz = verf_len;
698 		while (siz > 0) {
699 			if (M_TRAILINGSPACE(mb) == 0) {
700 				mb2 = m_getl(siz, MB_WAIT, MT_DATA, 0, NULL);
701 				mb2->m_len = 0;
702 				mb->m_next = mb2;
703 				mb = mb2;
704 				bpos = mtod(mb, caddr_t);
705 			}
706 			i = min(siz, M_TRAILINGSPACE(mb));
707 			bcopy(verf_str, bpos, i);
708 			mb->m_len += i;
709 			verf_str += i;
710 			bpos += i;
711 			siz -= i;
712 		}
713 		if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
714 			for (i = 0; i < siz; i++)
715 				*bpos++ = '\0';
716 			mb->m_len += siz;
717 		}
718 	} else {
719 		*tl++ = txdr_unsigned(RPCAUTH_NULL);
720 		*tl = 0;
721 	}
722 	mb->m_next = mrest;
723 	mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
724 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
725 	*mbp = mb;
726 	return (mreq);
727 }
728 
729 /*
730  * copies mbuf chain to the uio scatter/gather list
731  */
732 int
733 nfsm_mbuftouio(struct mbuf **mrep, struct uio *uiop, int siz, caddr_t *dpos)
734 {
735 	char *mbufcp, *uiocp;
736 	int xfer, left, len;
737 	struct mbuf *mp;
738 	long uiosiz, rem;
739 	int error = 0;
740 
741 	mp = *mrep;
742 	mbufcp = *dpos;
743 	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
744 	rem = nfsm_rndup(siz)-siz;
745 	while (siz > 0) {
746 		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
747 			return (EFBIG);
748 		left = uiop->uio_iov->iov_len;
749 		uiocp = uiop->uio_iov->iov_base;
750 		if (left > siz)
751 			left = siz;
752 		uiosiz = left;
753 		while (left > 0) {
754 			while (len == 0) {
755 				mp = mp->m_next;
756 				if (mp == NULL)
757 					return (EBADRPC);
758 				mbufcp = mtod(mp, caddr_t);
759 				len = mp->m_len;
760 			}
761 			xfer = (left > len) ? len : left;
762 #ifdef notdef
763 			/* Not Yet.. */
764 			if (uiop->uio_iov->iov_op != NULL)
765 				(*(uiop->uio_iov->iov_op))
766 				(mbufcp, uiocp, xfer);
767 			else
768 #endif
769 			if (uiop->uio_segflg == UIO_SYSSPACE)
770 				bcopy(mbufcp, uiocp, xfer);
771 			else
772 				copyout(mbufcp, uiocp, xfer);
773 			left -= xfer;
774 			len -= xfer;
775 			mbufcp += xfer;
776 			uiocp += xfer;
777 			uiop->uio_offset += xfer;
778 			uiop->uio_resid -= xfer;
779 		}
780 		if (uiop->uio_iov->iov_len <= siz) {
781 			uiop->uio_iovcnt--;
782 			uiop->uio_iov++;
783 		} else {
784 			uiop->uio_iov->iov_base += uiosiz;
785 			uiop->uio_iov->iov_len -= uiosiz;
786 		}
787 		siz -= uiosiz;
788 	}
789 	*dpos = mbufcp;
790 	*mrep = mp;
791 	if (rem > 0) {
792 		if (len < rem)
793 			error = nfs_adv(mrep, dpos, rem, len);
794 		else
795 			*dpos += rem;
796 	}
797 	return (error);
798 }
799 
800 /*
801  * copies a uio scatter/gather list to an mbuf chain.
802  * NOTE: can ony handle iovcnt == 1
803  */
804 int
805 nfsm_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, caddr_t *bpos)
806 {
807 	char *uiocp;
808 	struct mbuf *mp, *mp2;
809 	int xfer, left, mlen;
810 	int uiosiz, rem;
811 	boolean_t getcluster;
812 	char *cp;
813 
814 #ifdef DIAGNOSTIC
815 	if (uiop->uio_iovcnt != 1)
816 		panic("nfsm_uiotombuf: iovcnt != 1");
817 #endif
818 
819 	if (siz >= MINCLSIZE)
820 		getcluster = TRUE;
821 	else
822 		getcluster = FALSE;
823 	rem = nfsm_rndup(siz) - siz;
824 	mp = mp2 = *mq;
825 	while (siz > 0) {
826 		left = uiop->uio_iov->iov_len;
827 		uiocp = uiop->uio_iov->iov_base;
828 		if (left > siz)
829 			left = siz;
830 		uiosiz = left;
831 		while (left > 0) {
832 			mlen = M_TRAILINGSPACE(mp);
833 			if (mlen == 0) {
834 				if (getcluster)
835 					mp = m_getcl(MB_WAIT, MT_DATA, 0);
836 				else
837 					mp = m_get(MB_WAIT, MT_DATA);
838 				mp->m_len = 0;
839 				mp2->m_next = mp;
840 				mp2 = mp;
841 				mlen = M_TRAILINGSPACE(mp);
842 			}
843 			xfer = (left > mlen) ? mlen : left;
844 #ifdef notdef
845 			/* Not Yet.. */
846 			if (uiop->uio_iov->iov_op != NULL)
847 				(*(uiop->uio_iov->iov_op))
848 				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
849 			else
850 #endif
851 			if (uiop->uio_segflg == UIO_SYSSPACE)
852 				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
853 			else
854 				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
855 			mp->m_len += xfer;
856 			left -= xfer;
857 			uiocp += xfer;
858 			uiop->uio_offset += xfer;
859 			uiop->uio_resid -= xfer;
860 		}
861 		uiop->uio_iov->iov_base += uiosiz;
862 		uiop->uio_iov->iov_len -= uiosiz;
863 		siz -= uiosiz;
864 	}
865 	if (rem > 0) {
866 		if (rem > M_TRAILINGSPACE(mp)) {
867 			MGET(mp, MB_WAIT, MT_DATA);
868 			mp->m_len = 0;
869 			mp2->m_next = mp;
870 		}
871 		cp = mtod(mp, caddr_t)+mp->m_len;
872 		for (left = 0; left < rem; left++)
873 			*cp++ = '\0';
874 		mp->m_len += rem;
875 		*bpos = cp;
876 	} else
877 		*bpos = mtod(mp, caddr_t)+mp->m_len;
878 	*mq = mp;
879 	return (0);
880 }
881 
882 /*
883  * Help break down an mbuf chain by setting the first siz bytes contiguous
884  * pointed to by returned val.
885  * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
886  * cases. (The macros use the vars. dpos and dpos2)
887  */
888 int
889 nfsm_disct(struct mbuf **mdp, caddr_t *dposp, int siz, int left, caddr_t *cp2)
890 {
891 	struct mbuf *mp, *mp2;
892 	int siz2, xfer;
893 	caddr_t p;
894 
895 	mp = *mdp;
896 	while (left == 0) {
897 		*mdp = mp = mp->m_next;
898 		if (mp == NULL)
899 			return (EBADRPC);
900 		left = mp->m_len;
901 		*dposp = mtod(mp, caddr_t);
902 	}
903 	if (left >= siz) {
904 		*cp2 = *dposp;
905 		*dposp += siz;
906 	} else if (mp->m_next == NULL) {
907 		return (EBADRPC);
908 	} else if (siz > MHLEN) {
909 		panic("nfs S too big");
910 	} else {
911 		MGET(mp2, MB_WAIT, MT_DATA);
912 		mp2->m_next = mp->m_next;
913 		mp->m_next = mp2;
914 		mp->m_len -= left;
915 		mp = mp2;
916 		*cp2 = p = mtod(mp, caddr_t);
917 		bcopy(*dposp, p, left);		/* Copy what was left */
918 		siz2 = siz-left;
919 		p += left;
920 		mp2 = mp->m_next;
921 		/* Loop around copying up the siz2 bytes */
922 		while (siz2 > 0) {
923 			if (mp2 == NULL)
924 				return (EBADRPC);
925 			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
926 			if (xfer > 0) {
927 				bcopy(mtod(mp2, caddr_t), p, xfer);
928 				NFSMADV(mp2, xfer);
929 				mp2->m_len -= xfer;
930 				p += xfer;
931 				siz2 -= xfer;
932 			}
933 			if (siz2 > 0)
934 				mp2 = mp2->m_next;
935 		}
936 		mp->m_len = siz;
937 		*mdp = mp2;
938 		*dposp = mtod(mp2, caddr_t);
939 	}
940 	return (0);
941 }
942 
943 /*
944  * Advance the position in the mbuf chain.
945  */
946 int
947 nfs_adv(struct mbuf **mdp, caddr_t *dposp, int offs, int left)
948 {
949 	struct mbuf *m;
950 	int s;
951 
952 	m = *mdp;
953 	s = left;
954 	while (s < offs) {
955 		offs -= s;
956 		m = m->m_next;
957 		if (m == NULL)
958 			return (EBADRPC);
959 		s = m->m_len;
960 	}
961 	*mdp = m;
962 	*dposp = mtod(m, caddr_t)+offs;
963 	return (0);
964 }
965 
966 /*
967  * Copy a string into mbufs for the hard cases...
968  */
969 int
970 nfsm_strtmbuf(struct mbuf **mb, char **bpos, const char *cp, long siz)
971 {
972 	struct mbuf *m1 = NULL, *m2;
973 	long left, xfer, len, tlen;
974 	u_int32_t *tl;
975 	int putsize;
976 
977 	putsize = 1;
978 	m2 = *mb;
979 	left = M_TRAILINGSPACE(m2);
980 	if (left > 0) {
981 		tl = ((u_int32_t *)(*bpos));
982 		*tl++ = txdr_unsigned(siz);
983 		putsize = 0;
984 		left -= NFSX_UNSIGNED;
985 		m2->m_len += NFSX_UNSIGNED;
986 		if (left > 0) {
987 			bcopy(cp, (caddr_t) tl, left);
988 			siz -= left;
989 			cp += left;
990 			m2->m_len += left;
991 			left = 0;
992 		}
993 	}
994 	/* Loop around adding mbufs */
995 	while (siz > 0) {
996 		int msize;
997 
998 		m1 = m_getl(siz, MB_WAIT, MT_DATA, 0, &msize);
999 		m1->m_len = msize;
1000 		m2->m_next = m1;
1001 		m2 = m1;
1002 		tl = mtod(m1, u_int32_t *);
1003 		tlen = 0;
1004 		if (putsize) {
1005 			*tl++ = txdr_unsigned(siz);
1006 			m1->m_len -= NFSX_UNSIGNED;
1007 			tlen = NFSX_UNSIGNED;
1008 			putsize = 0;
1009 		}
1010 		if (siz < m1->m_len) {
1011 			len = nfsm_rndup(siz);
1012 			xfer = siz;
1013 			if (xfer < len)
1014 				*(tl+(xfer>>2)) = 0;
1015 		} else {
1016 			xfer = len = m1->m_len;
1017 		}
1018 		bcopy(cp, (caddr_t) tl, xfer);
1019 		m1->m_len = len+tlen;
1020 		siz -= xfer;
1021 		cp += xfer;
1022 	}
1023 	*mb = m1;
1024 	*bpos = mtod(m1, caddr_t)+m1->m_len;
1025 	return (0);
1026 }
1027 
1028 /*
1029  * Called once to initialize data structures...
1030  */
1031 int
1032 nfs_init(struct vfsconf *vfsp)
1033 {
1034 	int i;
1035 
1036 	callout_init(&nfs_timer_handle);
1037 	nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1);
1038 
1039 	nfs_mount_type = vfsp->vfc_typenum;
1040 	nfsrtt.pos = 0;
1041 	rpc_vers = txdr_unsigned(RPC_VER2);
1042 	rpc_call = txdr_unsigned(RPC_CALL);
1043 	rpc_reply = txdr_unsigned(RPC_REPLY);
1044 	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
1045 	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
1046 	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
1047 	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
1048 	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
1049 	rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
1050 	nfs_prog = txdr_unsigned(NFS_PROG);
1051 	nfs_true = txdr_unsigned(TRUE);
1052 	nfs_false = txdr_unsigned(FALSE);
1053 	nfs_xdrneg1 = txdr_unsigned(-1);
1054 	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
1055 	if (nfs_ticks < 1)
1056 		nfs_ticks = 1;
1057 	/* Ensure async daemons disabled */
1058 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
1059 		nfs_iodwant[i] = NULL;
1060 		nfs_iodmount[i] = (struct nfsmount *)0;
1061 	}
1062 	nfs_nhinit();			/* Init the nfsnode table */
1063 #ifndef NFS_NOSERVER
1064 	nfsrv_init(0);			/* Init server data structures */
1065 	nfsrv_initcache();		/* Init the server request cache */
1066 #endif
1067 
1068 	/*
1069 	 * Initialize reply list and start timer
1070 	 */
1071 	TAILQ_INIT(&nfs_reqq);
1072 
1073 	nfs_timer(0);
1074 
1075 	nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg;
1076 	sysent[SYS_nfssvc].sy_narg = 2;
1077 	nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call;
1078 	sysent[SYS_nfssvc].sy_call = (sy_call_t *)nfssvc;
1079 
1080 	nfs_pbuf_freecnt = nswbuf / 2 + 1;
1081 
1082 	return (0);
1083 }
1084 
1085 int
1086 nfs_uninit(struct vfsconf *vfsp)
1087 {
1088 	callout_stop(&nfs_timer_handle);
1089 	nfs_mount_type = -1;
1090 	sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg;
1091 	sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call;
1092 	return (0);
1093 }
1094 
1095 /*
1096  * Attribute cache routines.
1097  * nfs_loadattrcache() - loads or updates the cache contents from attributes
1098  *	that are on the mbuf list
1099  * nfs_getattrcache() - returns valid attributes if found in cache, returns
1100  *	error otherwise
1101  */
1102 
1103 /*
1104  * Load the attribute cache (that lives in the nfsnode entry) with
1105  * the values on the mbuf list.  Load *vaper with the attributes.  vaper
1106  * may be NULL.
1107  *
1108  * As a side effect n_mtime, which we use to determine if the file was
1109  * modified by some other host, is set to the attribute timestamp and
1110  * NRMODIFIED is set if the two values differ.
1111  *
1112  * WARNING: the mtime loaded into vaper does not necessarily represent
1113  * n_mtime or n_attr.mtime due to NACC and NUPD.
1114  */
1115 int
1116 nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp,
1117 		  struct vattr *vaper, int lattr_flags)
1118 {
1119 	struct vnode *vp = *vpp;
1120 	struct vattr *vap;
1121 	struct nfs_fattr *fp;
1122 	struct nfsnode *np;
1123 	int32_t t1;
1124 	caddr_t cp2;
1125 	int error = 0;
1126 	udev_t rdev;
1127 	struct mbuf *md;
1128 	enum vtype vtyp;
1129 	u_short vmode;
1130 	struct timespec mtime;
1131 	int v3 = NFS_ISV3(vp);
1132 
1133 	md = *mdp;
1134 	t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
1135 	if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0)
1136 		return (error);
1137 	fp = (struct nfs_fattr *)cp2;
1138 	if (v3) {
1139 		vtyp = nfsv3tov_type(fp->fa_type);
1140 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
1141 		rdev = makeudev(fxdr_unsigned(int, fp->fa3_rdev.specdata1),
1142 			fxdr_unsigned(int, fp->fa3_rdev.specdata2));
1143 		fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
1144 	} else {
1145 		vtyp = nfsv2tov_type(fp->fa_type);
1146 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
1147 		/*
1148 		 * XXX
1149 		 *
1150 		 * The duplicate information returned in fa_type and fa_mode
1151 		 * is an ambiguity in the NFS version 2 protocol.
1152 		 *
1153 		 * VREG should be taken literally as a regular file.  If a
1154 		 * server intents to return some type information differently
1155 		 * in the upper bits of the mode field (e.g. for sockets, or
1156 		 * FIFOs), NFSv2 mandates fa_type to be VNON.  Anyway, we
1157 		 * leave the examination of the mode bits even in the VREG
1158 		 * case to avoid breakage for bogus servers, but we make sure
1159 		 * that there are actually type bits set in the upper part of
1160 		 * fa_mode (and failing that, trust the va_type field).
1161 		 *
1162 		 * NFSv3 cleared the issue, and requires fa_mode to not
1163 		 * contain any type information (while also introduing sockets
1164 		 * and FIFOs for fa_type).
1165 		 */
1166 		if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
1167 			vtyp = IFTOVT(vmode);
1168 		rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
1169 		fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
1170 
1171 		/*
1172 		 * Really ugly NFSv2 kludge.
1173 		 */
1174 		if (vtyp == VCHR && rdev == (udev_t)0xffffffff)
1175 			vtyp = VFIFO;
1176 	}
1177 
1178 	/*
1179 	 * If v_type == VNON it is a new node, so fill in the v_type,
1180 	 * n_mtime fields. Check to see if it represents a special
1181 	 * device, and if so, check for a possible alias. Once the
1182 	 * correct vnode has been obtained, fill in the rest of the
1183 	 * information.
1184 	 */
1185 	np = VTONFS(vp);
1186 	if (vp->v_type != vtyp) {
1187 		nfs_setvtype(vp, vtyp);
1188 		if (vp->v_type == VFIFO) {
1189 			vp->v_ops = &vp->v_mount->mnt_vn_fifo_ops;
1190 		} else if (vp->v_type == VCHR || vp->v_type == VBLK) {
1191 			vp->v_ops = &vp->v_mount->mnt_vn_spec_ops;
1192 			addaliasu(vp, rdev);
1193 		} else {
1194 			vp->v_ops = &vp->v_mount->mnt_vn_use_ops;
1195 		}
1196 		np->n_mtime = mtime.tv_sec;
1197 	} else if (np->n_mtime != mtime.tv_sec) {
1198 		/*
1199 		 * If we haven't modified the file locally and the server
1200 		 * timestamp does not match, then the server probably
1201 		 * modified the file.  We must flag this condition so
1202 		 * the proper syncnronization can be done.  We do not
1203 		 * try to synchronize the state here because that
1204 		 * could lead to an endless recursion.
1205 		 *
1206 		 * XXX loadattrcache can be set during the reply to a write,
1207 		 * before the write timestamp is properly processed.  To
1208 		 * avoid unconditionally setting the rmodified bit (which
1209 		 * has the effect of flushing the cache), we only do this
1210 		 * check if the lmodified bit is not set.
1211 		 */
1212 		np->n_mtime = mtime.tv_sec;
1213 		if ((lattr_flags & NFS_LATTR_NOMTIMECHECK) == 0)
1214 			np->n_flag |= NRMODIFIED;
1215 	}
1216 	vap = &np->n_vattr;
1217 	vap->va_type = vtyp;
1218 	vap->va_mode = (vmode & 07777);
1219 	vap->va_rdev = rdev;
1220 	vap->va_mtime = mtime;
1221 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
1222 	if (v3) {
1223 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
1224 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
1225 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
1226 		vap->va_size = fxdr_hyper(&fp->fa3_size);
1227 		vap->va_blocksize = NFS_FABLKSIZE;
1228 		vap->va_bytes = fxdr_hyper(&fp->fa3_used);
1229 		vap->va_fileid = fxdr_unsigned(int32_t,
1230 		    fp->fa3_fileid.nfsuquad[1]);
1231 		fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
1232 		fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
1233 		vap->va_flags = 0;
1234 		vap->va_filerev = 0;
1235 	} else {
1236 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
1237 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
1238 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
1239 		vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
1240 		vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
1241 		vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
1242 		    * NFS_FABLKSIZE;
1243 		vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
1244 		fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
1245 		vap->va_flags = 0;
1246 		vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
1247 		    fp->fa2_ctime.nfsv2_sec);
1248 		vap->va_ctime.tv_nsec = 0;
1249 		vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
1250 		vap->va_filerev = 0;
1251 	}
1252 	np->n_attrstamp = time_second;
1253 	if (vap->va_size != np->n_size) {
1254 		if (vap->va_type == VREG) {
1255 			if ((lattr_flags & NFS_LATTR_NOSHRINK) &&
1256 			    vap->va_size < np->n_size) {
1257 				/*
1258 				 * We've been told not to shrink the file;
1259 				 * zero np->n_attrstamp to indicate that
1260 				 * the attributes are stale.
1261 				 *
1262 				 * This occurs primarily due to recursive
1263 				 * NFS ops that are executed during periods
1264 				 * where we cannot safely reduce the size of
1265 				 * the file.
1266 				 *
1267 				 * Additionally, write rpcs are broken down
1268 				 * into buffers and np->n_size is
1269 				 * pre-extended.  Setting NRMODIFIED here
1270 				 * can result in n_size getting reset to a
1271 				 * lower value, which is NOT what we want.
1272 				 * XXX this needs to be cleaned up a lot
1273 				 * more.
1274 				 */
1275 				vap->va_size = np->n_size;
1276 				np->n_attrstamp = 0;
1277 				if ((np->n_flag & NLMODIFIED) == 0)
1278 					np->n_flag |= NRMODIFIED;
1279 			} else if (np->n_flag & NLMODIFIED) {
1280 				/*
1281 				 * We've modified the file: Use the larger
1282 				 * of our size, and the server's size.  At
1283 				 * this point the cache coherency is all
1284 				 * shot to hell.  To try to handle multiple
1285 				 * clients appending to the file at the same
1286 				 * time mark that the server has changed
1287 				 * the file if the server's notion of the
1288 				 * file size is larger then our notion.
1289 				 *
1290 				 * XXX this needs work.
1291 				 */
1292 				if (vap->va_size < np->n_size) {
1293 					vap->va_size = np->n_size;
1294 				} else {
1295 					np->n_size = vap->va_size;
1296 					np->n_flag |= NRMODIFIED;
1297 				}
1298 			} else {
1299 				/*
1300 				 * Someone changed the file's size on the
1301 				 * server and there are no local changes
1302 				 * to get in the way, set the size and mark
1303 				 * it.
1304 				 */
1305 				np->n_size = vap->va_size;
1306 				np->n_flag |= NRMODIFIED;
1307 			}
1308 			vnode_pager_setsize(vp, np->n_size);
1309 		} else {
1310 			np->n_size = vap->va_size;
1311 		}
1312 	}
1313 	if (vaper != NULL) {
1314 		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
1315 		if (np->n_flag & NCHG) {
1316 			if (np->n_flag & NACC)
1317 				vaper->va_atime = np->n_atim;
1318 			if (np->n_flag & NUPD)
1319 				vaper->va_mtime = np->n_mtim;
1320 		}
1321 	}
1322 	return (0);
1323 }
1324 
1325 #ifdef NFS_ACDEBUG
1326 #include <sys/sysctl.h>
1327 SYSCTL_DECL(_vfs_nfs);
1328 static int nfs_acdebug;
1329 SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, "");
1330 #endif
1331 
1332 /*
1333  * Check the time stamp
1334  * If the cache is valid, copy contents to *vap and return 0
1335  * otherwise return an error
1336  */
1337 int
1338 nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
1339 {
1340 	struct nfsnode *np;
1341 	struct vattr *vap;
1342 	struct nfsmount *nmp;
1343 	int timeo;
1344 
1345 	np = VTONFS(vp);
1346 	vap = &np->n_vattr;
1347 	nmp = VFSTONFS(vp->v_mount);
1348 
1349 	/*
1350 	 * Dynamic timeout based on how recently the file was modified.
1351 	 * n_mtime is always valid.
1352 	 */
1353 	timeo = (get_approximate_time_t() - np->n_mtime) / 60;
1354 
1355 #ifdef NFS_ACDEBUG
1356 	if (nfs_acdebug>1)
1357 		printf("nfs_getattrcache: initial timeo = %d\n", timeo);
1358 #endif
1359 
1360 	if (vap->va_type == VDIR) {
1361 		if ((np->n_flag & NLMODIFIED) || timeo < nmp->nm_acdirmin)
1362 			timeo = nmp->nm_acdirmin;
1363 		else if (timeo > nmp->nm_acdirmax)
1364 			timeo = nmp->nm_acdirmax;
1365 	} else {
1366 		if ((np->n_flag & NLMODIFIED) || timeo < nmp->nm_acregmin)
1367 			timeo = nmp->nm_acregmin;
1368 		else if (timeo > nmp->nm_acregmax)
1369 			timeo = nmp->nm_acregmax;
1370 	}
1371 
1372 #ifdef NFS_ACDEBUG
1373 	if (nfs_acdebug > 2)
1374 		printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
1375 			nmp->nm_acregmin, nmp->nm_acregmax,
1376 			nmp->nm_acdirmin, nmp->nm_acdirmax);
1377 
1378 	if (nfs_acdebug)
1379 		printf("nfs_getattrcache: age = %d; final timeo = %d\n",
1380 			(int)(time_second - np->n_attrstamp), timeo);
1381 #endif
1382 
1383 	if (np->n_attrstamp == 0 || (time_second - np->n_attrstamp) >= timeo) {
1384 		nfsstats.attrcache_misses++;
1385 		return (ENOENT);
1386 	}
1387 	nfsstats.attrcache_hits++;
1388 
1389 	/*
1390 	 * Our attribute cache can be stale due to modifications made on
1391 	 * this host.  XXX this is a bad hack.  We need a more deterministic
1392 	 * means of finding out which np fields are valid verses attr cache
1393 	 * fields.  We really should update the vattr info on the fly when
1394 	 * making local changes.
1395 	 */
1396 	if (vap->va_size != np->n_size) {
1397 		if (vap->va_type == VREG) {
1398 			if (np->n_flag & NLMODIFIED) {
1399 				if (vap->va_size < np->n_size)
1400 					vap->va_size = np->n_size;
1401 				else
1402 					np->n_size = vap->va_size;
1403 			} else {
1404 				np->n_size = vap->va_size;
1405 			}
1406 			vnode_pager_setsize(vp, np->n_size);
1407 		} else {
1408 			np->n_size = vap->va_size;
1409 		}
1410 	}
1411 	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
1412 	if (np->n_flag & NCHG) {
1413 		if (np->n_flag & NACC)
1414 			vaper->va_atime = np->n_atim;
1415 		if (np->n_flag & NUPD)
1416 			vaper->va_mtime = np->n_mtim;
1417 	}
1418 	return (0);
1419 }
1420 
1421 #ifndef NFS_NOSERVER
1422 
1423 /*
1424  * Set up nameidata for a lookup() call and do it.
1425  *
1426  * If pubflag is set, this call is done for a lookup operation on the
1427  * public filehandle. In that case we allow crossing mountpoints and
1428  * absolute pathnames. However, the caller is expected to check that
1429  * the lookup result is within the public fs, and deny access if
1430  * it is not.
1431  *
1432  * dirp may be set whether an error is returned or not, and must be
1433  * released by the caller.
1434  *
1435  * On return nd->nl_ncp usually points to the target ncp, which may represent
1436  * a negative hit.
1437  *
1438  * NOTE: the caller must call nlookup_done(nd) unconditionally on return
1439  * to cleanup.
1440  */
1441 int
1442 nfs_namei(struct nlookupdata *nd, struct ucred *cred, int nameiop,
1443 	struct vnode **dvpp, struct vnode **vpp,
1444 	fhandle_t *fhp, int len,
1445 	struct nfssvc_sock *slp, struct sockaddr *nam, struct mbuf **mdp,
1446 	caddr_t *dposp, struct vnode **dirpp, struct thread *td,
1447 	int kerbflag, int pubflag)
1448 {
1449 	int i, rem;
1450 	int flags;
1451 	struct mbuf *md;
1452 	char *fromcp, *tocp, *cp;
1453 	char *namebuf;
1454 	struct namecache *ncp;
1455 	struct vnode *dp;
1456 	int error, rdonly;
1457 
1458 	namebuf = zalloc(namei_zone);
1459 	flags = 0;
1460 	*dirpp = NULL;
1461 
1462 	/*
1463 	 * Copy the name from the mbuf list to namebuf.
1464 	 */
1465 	fromcp = *dposp;
1466 	tocp = namebuf;
1467 	md = *mdp;
1468 	rem = mtod(md, caddr_t) + md->m_len - fromcp;
1469 	for (i = 0; i < len; i++) {
1470 		while (rem == 0) {
1471 			md = md->m_next;
1472 			if (md == NULL) {
1473 				error = EBADRPC;
1474 				goto out;
1475 			}
1476 			fromcp = mtod(md, caddr_t);
1477 			rem = md->m_len;
1478 		}
1479 		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
1480 			error = EACCES;
1481 			goto out;
1482 		}
1483 		*tocp++ = *fromcp++;
1484 		rem--;
1485 	}
1486 	*tocp = '\0';
1487 	*mdp = md;
1488 	*dposp = fromcp;
1489 	len = nfsm_rndup(len)-len;
1490 	if (len > 0) {
1491 		if (rem >= len)
1492 			*dposp += len;
1493 		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
1494 			goto out;
1495 	}
1496 
1497 	/*
1498 	 * Extract and set starting directory.  The returned dp is refd
1499 	 * but not locked.
1500 	 */
1501 	error = nfsrv_fhtovp(fhp, FALSE, &dp, cred, slp,
1502 				nam, &rdonly, kerbflag, pubflag);
1503 	if (error)
1504 		goto out;
1505 	if (dp->v_type != VDIR) {
1506 		vrele(dp);
1507 		error = ENOTDIR;
1508 		goto out;
1509 	}
1510 
1511 	/*
1512 	 * Set return directory.  Reference to dp is implicitly transfered
1513 	 * to the returned pointer.  This must be set before we potentially
1514 	 * goto out below.
1515 	 */
1516 	*dirpp = dp;
1517 
1518 	if (pubflag) {
1519 		/*
1520 		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
1521 		 * and the 'native path' indicator.
1522 		 */
1523 		cp = zalloc(namei_zone);
1524 		fromcp = namebuf;
1525 		tocp = cp;
1526 		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
1527 			switch ((unsigned char)*fromcp) {
1528 			case WEBNFS_NATIVE_CHAR:
1529 				/*
1530 				 * 'Native' path for us is the same
1531 				 * as a path according to the NFS spec,
1532 				 * just skip the escape char.
1533 				 */
1534 				fromcp++;
1535 				break;
1536 			/*
1537 			 * More may be added in the future, range 0x80-0xff
1538 			 */
1539 			default:
1540 				error = EIO;
1541 				zfree(namei_zone, cp);
1542 				goto out;
1543 			}
1544 		}
1545 		/*
1546 		 * Translate the '%' escapes, URL-style.
1547 		 */
1548 		while (*fromcp != '\0') {
1549 			if (*fromcp == WEBNFS_ESC_CHAR) {
1550 				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
1551 					fromcp++;
1552 					*tocp++ = HEXSTRTOI(fromcp);
1553 					fromcp += 2;
1554 					continue;
1555 				} else {
1556 					error = ENOENT;
1557 					zfree(namei_zone, cp);
1558 					goto out;
1559 				}
1560 			} else
1561 				*tocp++ = *fromcp++;
1562 		}
1563 		*tocp = '\0';
1564 		zfree(namei_zone, namebuf);
1565 		namebuf = cp;
1566 	}
1567 
1568 	/*
1569 	 * Setup for search.  We need to get a start directory from dp.  Note
1570 	 * that dp is ref'd, but we no longer 'own' the ref (*dirpp owns it).
1571 	 */
1572 	if (pubflag == 0) {
1573 		flags |= NLC_NFS_NOSOFTLINKTRAV;
1574 		flags |= NLC_NOCROSSMOUNT;
1575 	}
1576 	if (rdonly)
1577 		flags |= NLC_NFS_RDONLY;
1578 	if (nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME)
1579 		flags |= NLC_CREATE;
1580 
1581 	/*
1582 	 * We need a starting ncp from the directory vnode dp.  dp must not
1583 	 * be locked.  The returned ncp will be refd but not locked.
1584 	 *
1585 	 * If no suitable ncp is found we instruct cache_fromdvp() to create
1586 	 * one.  If this fails the directory has probably been removed while
1587 	 * the target was chdir'd into it and any further lookup will fail.
1588 	 */
1589 	if ((ncp = cache_fromdvp(dp, cred, 1)) == NULL) {
1590 		error = EINVAL;
1591 		goto out;
1592 	}
1593 	nlookup_init_raw(nd, namebuf, UIO_SYSSPACE, flags, cred, ncp);
1594 	cache_drop(ncp);
1595 
1596 	/*
1597 	 * Ok, do the lookup.
1598 	 */
1599 	error = nlookup(nd);
1600 
1601 	/*
1602 	 * If no error occured return the requested dvpp and vpp.  If
1603 	 * NLC_CREATE was specified nd->nl_ncp may represent a negative
1604 	 * cache hit in which case we do not attempt to obtain the vp.
1605 	 */
1606 	if (error == 0) {
1607 		ncp = nd->nl_ncp;
1608 		if (dvpp) {
1609 			if (ncp->nc_parent &&
1610 			    ncp->nc_parent->nc_mount == ncp->nc_mount) {
1611 				error = cache_vget(ncp->nc_parent, nd->nl_cred,
1612 						LK_EXCLUSIVE, dvpp);
1613 			} else {
1614 				error = ENXIO;
1615 			}
1616 		}
1617 		if (vpp && ncp->nc_vp) {
1618 			error = cache_vget(ncp, nd->nl_cred, LK_EXCLUSIVE, vpp);
1619 		}
1620 		if (error) {
1621 			if (dvpp && *dvpp) {
1622 				vput(*dvpp);
1623 				*dvpp = NULL;
1624 			}
1625 			if (vpp && *vpp) {
1626 				vput(*vpp);
1627 				*vpp = NULL;
1628 			}
1629 		}
1630 	}
1631 
1632 	/*
1633 	 * Finish up.
1634 	 */
1635 out:
1636 	zfree(namei_zone, namebuf);
1637 	return (error);
1638 }
1639 
1640 /*
1641  * A fiddled version of m_adj() that ensures null fill to a long
1642  * boundary and only trims off the back end
1643  */
1644 void
1645 nfsm_adj(struct mbuf *mp, int len, int nul)
1646 {
1647 	struct mbuf *m;
1648 	int count, i;
1649 	char *cp;
1650 
1651 	/*
1652 	 * Trim from tail.  Scan the mbuf chain,
1653 	 * calculating its length and finding the last mbuf.
1654 	 * If the adjustment only affects this mbuf, then just
1655 	 * adjust and return.  Otherwise, rescan and truncate
1656 	 * after the remaining size.
1657 	 */
1658 	count = 0;
1659 	m = mp;
1660 	for (;;) {
1661 		count += m->m_len;
1662 		if (m->m_next == (struct mbuf *)0)
1663 			break;
1664 		m = m->m_next;
1665 	}
1666 	if (m->m_len > len) {
1667 		m->m_len -= len;
1668 		if (nul > 0) {
1669 			cp = mtod(m, caddr_t)+m->m_len-nul;
1670 			for (i = 0; i < nul; i++)
1671 				*cp++ = '\0';
1672 		}
1673 		return;
1674 	}
1675 	count -= len;
1676 	if (count < 0)
1677 		count = 0;
1678 	/*
1679 	 * Correct length for chain is "count".
1680 	 * Find the mbuf with last data, adjust its length,
1681 	 * and toss data from remaining mbufs on chain.
1682 	 */
1683 	for (m = mp; m; m = m->m_next) {
1684 		if (m->m_len >= count) {
1685 			m->m_len = count;
1686 			if (nul > 0) {
1687 				cp = mtod(m, caddr_t)+m->m_len-nul;
1688 				for (i = 0; i < nul; i++)
1689 					*cp++ = '\0';
1690 			}
1691 			break;
1692 		}
1693 		count -= m->m_len;
1694 	}
1695 	for (m = m->m_next;m;m = m->m_next)
1696 		m->m_len = 0;
1697 }
1698 
1699 /*
1700  * Make these functions instead of macros, so that the kernel text size
1701  * doesn't get too big...
1702  */
1703 void
1704 nfsm_srvwcc(struct nfsrv_descript *nfsd, int before_ret,
1705 	    struct vattr *before_vap, int after_ret, struct vattr *after_vap,
1706 	    struct mbuf **mbp, char **bposp)
1707 {
1708 	struct mbuf *mb = *mbp, *mb2;
1709 	char *bpos = *bposp;
1710 	u_int32_t *tl;
1711 
1712 	/*
1713 	 * before_ret is 0 if before_vap is valid, non-zero if it isn't.
1714 	 */
1715 	if (before_ret) {
1716 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1717 		*tl = nfs_false;
1718 	} else {
1719 		nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
1720 		*tl++ = nfs_true;
1721 		txdr_hyper(before_vap->va_size, tl);
1722 		tl += 2;
1723 		txdr_nfsv3time(&(before_vap->va_mtime), tl);
1724 		tl += 2;
1725 		txdr_nfsv3time(&(before_vap->va_ctime), tl);
1726 	}
1727 	*bposp = bpos;
1728 	*mbp = mb;
1729 	nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
1730 }
1731 
1732 void
1733 nfsm_srvpostopattr(struct nfsrv_descript *nfsd, int after_ret,
1734 		   struct vattr *after_vap, struct mbuf **mbp, char **bposp)
1735 {
1736 	struct mbuf *mb = *mbp, *mb2;
1737 	char *bpos = *bposp;
1738 	u_int32_t *tl;
1739 	struct nfs_fattr *fp;
1740 
1741 	if (after_ret) {
1742 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1743 		*tl = nfs_false;
1744 	} else {
1745 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
1746 		*tl++ = nfs_true;
1747 		fp = (struct nfs_fattr *)tl;
1748 		nfsm_srvfattr(nfsd, after_vap, fp);
1749 	}
1750 	*mbp = mb;
1751 	*bposp = bpos;
1752 }
1753 
1754 void
1755 nfsm_srvfattr(struct nfsrv_descript *nfsd, struct vattr *vap,
1756 	      struct nfs_fattr *fp)
1757 {
1758 
1759 	fp->fa_nlink = txdr_unsigned(vap->va_nlink);
1760 	fp->fa_uid = txdr_unsigned(vap->va_uid);
1761 	fp->fa_gid = txdr_unsigned(vap->va_gid);
1762 	if (nfsd->nd_flag & ND_NFSV3) {
1763 		fp->fa_type = vtonfsv3_type(vap->va_type);
1764 		fp->fa_mode = vtonfsv3_mode(vap->va_mode);
1765 		txdr_hyper(vap->va_size, &fp->fa3_size);
1766 		txdr_hyper(vap->va_bytes, &fp->fa3_used);
1767 		fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev));
1768 		fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev));
1769 		fp->fa3_fsid.nfsuquad[0] = 0;
1770 		fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
1771 		fp->fa3_fileid.nfsuquad[0] = 0;
1772 		fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid);
1773 		txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
1774 		txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
1775 		txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
1776 	} else {
1777 		fp->fa_type = vtonfsv2_type(vap->va_type);
1778 		fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1779 		fp->fa2_size = txdr_unsigned(vap->va_size);
1780 		fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize);
1781 		if (vap->va_type == VFIFO)
1782 			fp->fa2_rdev = 0xffffffff;
1783 		else
1784 			fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
1785 		fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
1786 		fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
1787 		fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
1788 		txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
1789 		txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
1790 		txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
1791 	}
1792 }
1793 
1794 /*
1795  * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
1796  * 	- look up fsid in mount list (if not found ret error)
1797  *	- get vp and export rights by calling VFS_FHTOVP()
1798  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
1799  *	- if not lockflag unlock it with VOP_UNLOCK()
1800  */
1801 int
1802 nfsrv_fhtovp(fhandle_t *fhp, int lockflag, struct vnode **vpp,
1803 	     struct ucred *cred, struct nfssvc_sock *slp, struct sockaddr *nam,
1804 	     int *rdonlyp, int kerbflag, int pubflag)
1805 {
1806 	struct thread *td = curthread; /* XXX */
1807 	struct mount *mp;
1808 	int i;
1809 	struct ucred *credanon;
1810 	int error, exflags;
1811 #ifdef MNT_EXNORESPORT		/* XXX needs mountd and /etc/exports help yet */
1812 	struct sockaddr_int *saddr;
1813 #endif
1814 
1815 	*vpp = (struct vnode *)0;
1816 
1817 	if (nfs_ispublicfh(fhp)) {
1818 		if (!pubflag || !nfs_pub.np_valid)
1819 			return (ESTALE);
1820 		fhp = &nfs_pub.np_handle;
1821 	}
1822 
1823 	mp = vfs_getvfs(&fhp->fh_fsid);
1824 	if (!mp)
1825 		return (ESTALE);
1826 	error = VFS_CHECKEXP(mp, nam, &exflags, &credanon);
1827 	if (error)
1828 		return (error);
1829 	error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp);
1830 	if (error)
1831 		return (error);
1832 #ifdef MNT_EXNORESPORT
1833 	if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
1834 		saddr = (struct sockaddr_in *)nam;
1835 		if (saddr->sin_family == AF_INET &&
1836 		    ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
1837 			vput(*vpp);
1838 			*vpp = NULL;
1839 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
1840 		}
1841 	}
1842 #endif
1843 	/*
1844 	 * Check/setup credentials.
1845 	 */
1846 	if (exflags & MNT_EXKERB) {
1847 		if (!kerbflag) {
1848 			vput(*vpp);
1849 			*vpp = NULL;
1850 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
1851 		}
1852 	} else if (kerbflag) {
1853 		vput(*vpp);
1854 		*vpp = NULL;
1855 		return (NFSERR_AUTHERR | AUTH_TOOWEAK);
1856 	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
1857 		cred->cr_uid = credanon->cr_uid;
1858 		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
1859 			cred->cr_groups[i] = credanon->cr_groups[i];
1860 		cred->cr_ngroups = i;
1861 	}
1862 	if (exflags & MNT_EXRDONLY)
1863 		*rdonlyp = 1;
1864 	else
1865 		*rdonlyp = 0;
1866 
1867 	if (!lockflag)
1868 		VOP_UNLOCK(*vpp, 0, td);
1869 	return (0);
1870 }
1871 
1872 
1873 /*
1874  * WebNFS: check if a filehandle is a public filehandle. For v3, this
1875  * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
1876  * transformed this to all zeroes in both cases, so check for it.
1877  */
1878 int
1879 nfs_ispublicfh(fhandle_t *fhp)
1880 {
1881 	char *cp = (char *)fhp;
1882 	int i;
1883 
1884 	for (i = 0; i < NFSX_V3FH; i++)
1885 		if (*cp++ != 0)
1886 			return (FALSE);
1887 	return (TRUE);
1888 }
1889 
1890 #endif /* NFS_NOSERVER */
1891 /*
1892  * This function compares two net addresses by family and returns TRUE
1893  * if they are the same host.
1894  * If there is any doubt, return FALSE.
1895  * The AF_INET family is handled as a special case so that address mbufs
1896  * don't need to be saved to store "struct in_addr", which is only 4 bytes.
1897  */
1898 int
1899 netaddr_match(int family, union nethostaddr *haddr, struct sockaddr *nam)
1900 {
1901 	struct sockaddr_in *inetaddr;
1902 
1903 	switch (family) {
1904 	case AF_INET:
1905 		inetaddr = (struct sockaddr_in *)nam;
1906 		if (inetaddr->sin_family == AF_INET &&
1907 		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
1908 			return (1);
1909 		break;
1910 	default:
1911 		break;
1912 	};
1913 	return (0);
1914 }
1915 
1916 static nfsuint64 nfs_nullcookie = { { 0, 0 } };
1917 /*
1918  * This function finds the directory cookie that corresponds to the
1919  * logical byte offset given.
1920  */
1921 nfsuint64 *
1922 nfs_getcookie(struct nfsnode *np, off_t off, int add)
1923 {
1924 	struct nfsdmap *dp, *dp2;
1925 	int pos;
1926 
1927 	pos = (uoff_t)off / NFS_DIRBLKSIZ;
1928 	if (pos == 0 || off < 0) {
1929 #ifdef DIAGNOSTIC
1930 		if (add)
1931 			panic("nfs getcookie add at <= 0");
1932 #endif
1933 		return (&nfs_nullcookie);
1934 	}
1935 	pos--;
1936 	dp = np->n_cookies.lh_first;
1937 	if (!dp) {
1938 		if (add) {
1939 			MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap),
1940 				M_NFSDIROFF, M_WAITOK);
1941 			dp->ndm_eocookie = 0;
1942 			LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
1943 		} else
1944 			return ((nfsuint64 *)0);
1945 	}
1946 	while (pos >= NFSNUMCOOKIES) {
1947 		pos -= NFSNUMCOOKIES;
1948 		if (dp->ndm_list.le_next) {
1949 			if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
1950 				pos >= dp->ndm_eocookie)
1951 				return ((nfsuint64 *)0);
1952 			dp = dp->ndm_list.le_next;
1953 		} else if (add) {
1954 			MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
1955 				M_NFSDIROFF, M_WAITOK);
1956 			dp2->ndm_eocookie = 0;
1957 			LIST_INSERT_AFTER(dp, dp2, ndm_list);
1958 			dp = dp2;
1959 		} else
1960 			return ((nfsuint64 *)0);
1961 	}
1962 	if (pos >= dp->ndm_eocookie) {
1963 		if (add)
1964 			dp->ndm_eocookie = pos + 1;
1965 		else
1966 			return ((nfsuint64 *)0);
1967 	}
1968 	return (&dp->ndm_cookies[pos]);
1969 }
1970 
1971 /*
1972  * Invalidate cached directory information, except for the actual directory
1973  * blocks (which are invalidated separately).
1974  * Done mainly to avoid the use of stale offset cookies.
1975  */
1976 void
1977 nfs_invaldir(struct vnode *vp)
1978 {
1979 	struct nfsnode *np = VTONFS(vp);
1980 
1981 #ifdef DIAGNOSTIC
1982 	if (vp->v_type != VDIR)
1983 		panic("nfs: invaldir not dir");
1984 #endif
1985 	np->n_direofoffset = 0;
1986 	np->n_cookieverf.nfsuquad[0] = 0;
1987 	np->n_cookieverf.nfsuquad[1] = 0;
1988 	if (np->n_cookies.lh_first)
1989 		np->n_cookies.lh_first->ndm_eocookie = 0;
1990 }
1991 
1992 /*
1993  * Set the v_type field for an NFS client's vnode and initialize for
1994  * buffer cache operations if necessary.
1995  */
1996 void
1997 nfs_setvtype(struct vnode *vp, enum vtype vtyp)
1998 {
1999 	vp->v_type = vtyp;
2000 
2001 	switch(vtyp) {
2002 	case VREG:
2003 	case VDIR:
2004 	case VLNK:
2005 		vinitvmio(vp, 0);	/* needs VMIO, size not yet known */
2006 		break;
2007 	default:
2008 		break;
2009 	}
2010 }
2011 
2012 /*
2013  * The write verifier has changed (probably due to a server reboot), so all
2014  * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
2015  * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
2016  * and B_CLUSTEROK flags.  Once done the new write verifier can be set for the
2017  * mount point.
2018  *
2019  * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data
2020  * writes are not clusterable.
2021  */
2022 
2023 static int nfs_clearcommit_bp(struct buf *bp, void *data __unused);
2024 
2025 void
2026 nfs_clearcommit(struct mount *mp)
2027 {
2028 	struct vnode *vp, *nvp;
2029 	lwkt_tokref ilock;
2030 
2031 	lwkt_gettoken(&ilock, &mntvnode_token);
2032 	crit_enter();
2033 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp; vp = nvp) {
2034 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);	/* ZZZ */
2035 		RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
2036 			nfs_clearcommit_bp, NULL);
2037 	}
2038 	crit_exit();
2039 	lwkt_reltoken(&ilock);
2040 }
2041 
2042 static int
2043 nfs_clearcommit_bp(struct buf *bp, void *data __unused)
2044 {
2045 	if (BUF_REFCNT(bp) == 0 &&
2046 	    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
2047 	     == (B_DELWRI | B_NEEDCOMMIT)) {
2048 		bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
2049 	}
2050 	return(0);
2051 }
2052 
2053 #ifndef NFS_NOSERVER
2054 /*
2055  * Map errnos to NFS error numbers. For Version 3 also filter out error
2056  * numbers not specified for the associated procedure.
2057  */
2058 int
2059 nfsrv_errmap(struct nfsrv_descript *nd, int err)
2060 {
2061 	short *defaulterrp, *errp;
2062 
2063 	if (nd->nd_flag & ND_NFSV3) {
2064 	    if (nd->nd_procnum <= NFSPROC_COMMIT) {
2065 		errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
2066 		while (*++errp) {
2067 			if (*errp == err)
2068 				return (err);
2069 			else if (*errp > err)
2070 				break;
2071 		}
2072 		return ((int)*defaulterrp);
2073 	    } else
2074 		return (err & 0xffff);
2075 	}
2076 	if (err <= ELAST)
2077 		return ((int)nfsrv_v2errmap[err - 1]);
2078 	return (NFSERR_IO);
2079 }
2080 
2081 /*
2082  * Sort the group list in increasing numerical order.
2083  * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
2084  *  that used to be here.)
2085  */
2086 void
2087 nfsrvw_sort(gid_t *list, int num)
2088 {
2089 	int i, j;
2090 	gid_t v;
2091 
2092 	/* Insertion sort. */
2093 	for (i = 1; i < num; i++) {
2094 		v = list[i];
2095 		/* find correct slot for value v, moving others up */
2096 		for (j = i; --j >= 0 && v < list[j];)
2097 			list[j + 1] = list[j];
2098 		list[j + 1] = v;
2099 	}
2100 }
2101 
2102 /*
2103  * copy credentials making sure that the result can be compared with bcmp().
2104  */
2105 void
2106 nfsrv_setcred(struct ucred *incred, struct ucred *outcred)
2107 {
2108 	int i;
2109 
2110 	bzero((caddr_t)outcred, sizeof (struct ucred));
2111 	outcred->cr_ref = 1;
2112 	outcred->cr_uid = incred->cr_uid;
2113 	outcred->cr_ngroups = incred->cr_ngroups;
2114 	for (i = 0; i < incred->cr_ngroups; i++)
2115 		outcred->cr_groups[i] = incred->cr_groups[i];
2116 	nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups);
2117 }
2118 #endif /* NFS_NOSERVER */
2119