xref: /netbsd-src/sys/nfs/nfs_subs.c (revision ce2c90c7c172d95d2402a5b3d96d8f8e6d138a21)
1 /*	$NetBSD: nfs_subs.c,v 1.174 2006/10/14 09:18:57 yamt Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)nfs_subs.c	8.8 (Berkeley) 5/22/95
35  */
36 
37 /*
38  * Copyright 2000 Wasabi Systems, Inc.
39  * All rights reserved.
40  *
41  * Written by Frank van der Linden for Wasabi Systems, Inc.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *      This product includes software developed for the NetBSD Project by
54  *      Wasabi Systems, Inc.
55  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
56  *    or promote products derived from this software without specific prior
57  *    written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69  * POSSIBILITY OF SUCH DAMAGE.
70  */
71 
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: nfs_subs.c,v 1.174 2006/10/14 09:18:57 yamt Exp $");
74 
75 #include "fs_nfs.h"
76 #include "opt_nfs.h"
77 #include "opt_nfsserver.h"
78 #include "opt_iso.h"
79 #include "opt_inet.h"
80 
81 /*
82  * These functions support the macros and help fiddle mbuf chains for
83  * the nfs op functions. They do things like create the rpc header and
84  * copy data between mbuf chains and uio lists.
85  */
86 #include <sys/param.h>
87 #include <sys/proc.h>
88 #include <sys/systm.h>
89 #include <sys/kernel.h>
90 #include <sys/mount.h>
91 #include <sys/vnode.h>
92 #include <sys/namei.h>
93 #include <sys/mbuf.h>
94 #include <sys/socket.h>
95 #include <sys/stat.h>
96 #include <sys/malloc.h>
97 #include <sys/filedesc.h>
98 #include <sys/time.h>
99 #include <sys/dirent.h>
100 #include <sys/once.h>
101 #include <sys/kauth.h>
102 
103 #include <uvm/uvm_extern.h>
104 
105 #include <nfs/rpcv2.h>
106 #include <nfs/nfsproto.h>
107 #include <nfs/nfsnode.h>
108 #include <nfs/nfs.h>
109 #include <nfs/xdr_subs.h>
110 #include <nfs/nfsm_subs.h>
111 #include <nfs/nfsmount.h>
112 #include <nfs/nqnfs.h>
113 #include <nfs/nfsrtt.h>
114 #include <nfs/nfs_var.h>
115 
116 #include <miscfs/specfs/specdev.h>
117 
118 #include <netinet/in.h>
119 #ifdef ISO
120 #include <netiso/iso.h>
121 #endif
122 
123 /*
124  * Data items converted to xdr at startup, since they are constant
125  * This is kinda hokey, but may save a little time doing byte swaps
126  */
127 u_int32_t nfs_xdrneg1;
128 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
129 	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
130 	rpc_auth_kerb;
131 u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false;
132 
133 /* And other global data */
134 const nfstype nfsv2_type[9] =
135 	{ NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON };
136 const nfstype nfsv3_type[9] =
137 	{ NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON };
138 const enum vtype nv2tov_type[8] =
139 	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON };
140 const enum vtype nv3tov_type[8] =
141 	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO };
142 int nfs_ticks;
143 int nfs_commitsize;
144 
145 MALLOC_DEFINE(M_NFSDIROFF, "NFS diroff", "NFS directory cookies");
146 
147 /* NFS client/server stats. */
148 struct nfsstats nfsstats;
149 
150 /*
151  * Mapping of old NFS Version 2 RPC numbers to generic numbers.
152  */
153 const int nfsv3_procid[NFS_NPROCS] = {
154 	NFSPROC_NULL,
155 	NFSPROC_GETATTR,
156 	NFSPROC_SETATTR,
157 	NFSPROC_NOOP,
158 	NFSPROC_LOOKUP,
159 	NFSPROC_READLINK,
160 	NFSPROC_READ,
161 	NFSPROC_NOOP,
162 	NFSPROC_WRITE,
163 	NFSPROC_CREATE,
164 	NFSPROC_REMOVE,
165 	NFSPROC_RENAME,
166 	NFSPROC_LINK,
167 	NFSPROC_SYMLINK,
168 	NFSPROC_MKDIR,
169 	NFSPROC_RMDIR,
170 	NFSPROC_READDIR,
171 	NFSPROC_FSSTAT,
172 	NFSPROC_NOOP,
173 	NFSPROC_NOOP,
174 	NFSPROC_NOOP,
175 	NFSPROC_NOOP,
176 	NFSPROC_NOOP,
177 	NFSPROC_NOOP,
178 	NFSPROC_NOOP,
179 	NFSPROC_NOOP
180 };
181 
182 /*
183  * and the reverse mapping from generic to Version 2 procedure numbers
184  */
185 const int nfsv2_procid[NFS_NPROCS] = {
186 	NFSV2PROC_NULL,
187 	NFSV2PROC_GETATTR,
188 	NFSV2PROC_SETATTR,
189 	NFSV2PROC_LOOKUP,
190 	NFSV2PROC_NOOP,
191 	NFSV2PROC_READLINK,
192 	NFSV2PROC_READ,
193 	NFSV2PROC_WRITE,
194 	NFSV2PROC_CREATE,
195 	NFSV2PROC_MKDIR,
196 	NFSV2PROC_SYMLINK,
197 	NFSV2PROC_CREATE,
198 	NFSV2PROC_REMOVE,
199 	NFSV2PROC_RMDIR,
200 	NFSV2PROC_RENAME,
201 	NFSV2PROC_LINK,
202 	NFSV2PROC_READDIR,
203 	NFSV2PROC_NOOP,
204 	NFSV2PROC_STATFS,
205 	NFSV2PROC_NOOP,
206 	NFSV2PROC_NOOP,
207 	NFSV2PROC_NOOP,
208 	NFSV2PROC_NOOP,
209 	NFSV2PROC_NOOP,
210 	NFSV2PROC_NOOP,
211 	NFSV2PROC_NOOP,
212 };
213 
214 /*
215  * Maps errno values to nfs error numbers.
216  * Use NFSERR_IO as the catch all for ones not specifically defined in
217  * RFC 1094.
218  */
219 static const u_char nfsrv_v2errmap[ELAST] = {
220   NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
221   NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
222   NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
223   NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
224   NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
225   NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
226   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
227   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
228   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
229   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
230   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
231   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
232   NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
233   NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
234   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
235   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
236   NFSERR_IO,	NFSERR_IO,
237 };
238 
239 /*
240  * Maps errno values to nfs error numbers.
241  * Although it is not obvious whether or not NFS clients really care if
242  * a returned error value is in the specified list for the procedure, the
243  * safest thing to do is filter them appropriately. For Version 2, the
244  * X/Open XNFS document is the only specification that defines error values
245  * for each RPC (The RFC simply lists all possible error values for all RPCs),
246  * so I have decided to not do this for Version 2.
247  * The first entry is the default error return and the rest are the valid
248  * errors for that RPC in increasing numeric order.
249  */
250 static const short nfsv3err_null[] = {
251 	0,
252 	0,
253 };
254 
255 static const short nfsv3err_getattr[] = {
256 	NFSERR_IO,
257 	NFSERR_IO,
258 	NFSERR_STALE,
259 	NFSERR_BADHANDLE,
260 	NFSERR_SERVERFAULT,
261 	0,
262 };
263 
264 static const short nfsv3err_setattr[] = {
265 	NFSERR_IO,
266 	NFSERR_PERM,
267 	NFSERR_IO,
268 	NFSERR_ACCES,
269 	NFSERR_INVAL,
270 	NFSERR_NOSPC,
271 	NFSERR_ROFS,
272 	NFSERR_DQUOT,
273 	NFSERR_STALE,
274 	NFSERR_BADHANDLE,
275 	NFSERR_NOT_SYNC,
276 	NFSERR_SERVERFAULT,
277 	0,
278 };
279 
280 static const short nfsv3err_lookup[] = {
281 	NFSERR_IO,
282 	NFSERR_NOENT,
283 	NFSERR_IO,
284 	NFSERR_ACCES,
285 	NFSERR_NOTDIR,
286 	NFSERR_NAMETOL,
287 	NFSERR_STALE,
288 	NFSERR_BADHANDLE,
289 	NFSERR_SERVERFAULT,
290 	0,
291 };
292 
293 static const short nfsv3err_access[] = {
294 	NFSERR_IO,
295 	NFSERR_IO,
296 	NFSERR_STALE,
297 	NFSERR_BADHANDLE,
298 	NFSERR_SERVERFAULT,
299 	0,
300 };
301 
302 static const short nfsv3err_readlink[] = {
303 	NFSERR_IO,
304 	NFSERR_IO,
305 	NFSERR_ACCES,
306 	NFSERR_INVAL,
307 	NFSERR_STALE,
308 	NFSERR_BADHANDLE,
309 	NFSERR_NOTSUPP,
310 	NFSERR_SERVERFAULT,
311 	0,
312 };
313 
314 static const short nfsv3err_read[] = {
315 	NFSERR_IO,
316 	NFSERR_IO,
317 	NFSERR_NXIO,
318 	NFSERR_ACCES,
319 	NFSERR_INVAL,
320 	NFSERR_STALE,
321 	NFSERR_BADHANDLE,
322 	NFSERR_SERVERFAULT,
323 	NFSERR_JUKEBOX,
324 	0,
325 };
326 
327 static const short nfsv3err_write[] = {
328 	NFSERR_IO,
329 	NFSERR_IO,
330 	NFSERR_ACCES,
331 	NFSERR_INVAL,
332 	NFSERR_FBIG,
333 	NFSERR_NOSPC,
334 	NFSERR_ROFS,
335 	NFSERR_DQUOT,
336 	NFSERR_STALE,
337 	NFSERR_BADHANDLE,
338 	NFSERR_SERVERFAULT,
339 	NFSERR_JUKEBOX,
340 	0,
341 };
342 
343 static const short nfsv3err_create[] = {
344 	NFSERR_IO,
345 	NFSERR_IO,
346 	NFSERR_ACCES,
347 	NFSERR_EXIST,
348 	NFSERR_NOTDIR,
349 	NFSERR_NOSPC,
350 	NFSERR_ROFS,
351 	NFSERR_NAMETOL,
352 	NFSERR_DQUOT,
353 	NFSERR_STALE,
354 	NFSERR_BADHANDLE,
355 	NFSERR_NOTSUPP,
356 	NFSERR_SERVERFAULT,
357 	0,
358 };
359 
360 static const short nfsv3err_mkdir[] = {
361 	NFSERR_IO,
362 	NFSERR_IO,
363 	NFSERR_ACCES,
364 	NFSERR_EXIST,
365 	NFSERR_NOTDIR,
366 	NFSERR_NOSPC,
367 	NFSERR_ROFS,
368 	NFSERR_NAMETOL,
369 	NFSERR_DQUOT,
370 	NFSERR_STALE,
371 	NFSERR_BADHANDLE,
372 	NFSERR_NOTSUPP,
373 	NFSERR_SERVERFAULT,
374 	0,
375 };
376 
377 static const short nfsv3err_symlink[] = {
378 	NFSERR_IO,
379 	NFSERR_IO,
380 	NFSERR_ACCES,
381 	NFSERR_EXIST,
382 	NFSERR_NOTDIR,
383 	NFSERR_NOSPC,
384 	NFSERR_ROFS,
385 	NFSERR_NAMETOL,
386 	NFSERR_DQUOT,
387 	NFSERR_STALE,
388 	NFSERR_BADHANDLE,
389 	NFSERR_NOTSUPP,
390 	NFSERR_SERVERFAULT,
391 	0,
392 };
393 
394 static const short nfsv3err_mknod[] = {
395 	NFSERR_IO,
396 	NFSERR_IO,
397 	NFSERR_ACCES,
398 	NFSERR_EXIST,
399 	NFSERR_NOTDIR,
400 	NFSERR_NOSPC,
401 	NFSERR_ROFS,
402 	NFSERR_NAMETOL,
403 	NFSERR_DQUOT,
404 	NFSERR_STALE,
405 	NFSERR_BADHANDLE,
406 	NFSERR_NOTSUPP,
407 	NFSERR_SERVERFAULT,
408 	NFSERR_BADTYPE,
409 	0,
410 };
411 
412 static const short nfsv3err_remove[] = {
413 	NFSERR_IO,
414 	NFSERR_NOENT,
415 	NFSERR_IO,
416 	NFSERR_ACCES,
417 	NFSERR_NOTDIR,
418 	NFSERR_ROFS,
419 	NFSERR_NAMETOL,
420 	NFSERR_STALE,
421 	NFSERR_BADHANDLE,
422 	NFSERR_SERVERFAULT,
423 	0,
424 };
425 
426 static const short nfsv3err_rmdir[] = {
427 	NFSERR_IO,
428 	NFSERR_NOENT,
429 	NFSERR_IO,
430 	NFSERR_ACCES,
431 	NFSERR_EXIST,
432 	NFSERR_NOTDIR,
433 	NFSERR_INVAL,
434 	NFSERR_ROFS,
435 	NFSERR_NAMETOL,
436 	NFSERR_NOTEMPTY,
437 	NFSERR_STALE,
438 	NFSERR_BADHANDLE,
439 	NFSERR_NOTSUPP,
440 	NFSERR_SERVERFAULT,
441 	0,
442 };
443 
444 static const short nfsv3err_rename[] = {
445 	NFSERR_IO,
446 	NFSERR_NOENT,
447 	NFSERR_IO,
448 	NFSERR_ACCES,
449 	NFSERR_EXIST,
450 	NFSERR_XDEV,
451 	NFSERR_NOTDIR,
452 	NFSERR_ISDIR,
453 	NFSERR_INVAL,
454 	NFSERR_NOSPC,
455 	NFSERR_ROFS,
456 	NFSERR_MLINK,
457 	NFSERR_NAMETOL,
458 	NFSERR_NOTEMPTY,
459 	NFSERR_DQUOT,
460 	NFSERR_STALE,
461 	NFSERR_BADHANDLE,
462 	NFSERR_NOTSUPP,
463 	NFSERR_SERVERFAULT,
464 	0,
465 };
466 
467 static const short nfsv3err_link[] = {
468 	NFSERR_IO,
469 	NFSERR_IO,
470 	NFSERR_ACCES,
471 	NFSERR_EXIST,
472 	NFSERR_XDEV,
473 	NFSERR_NOTDIR,
474 	NFSERR_INVAL,
475 	NFSERR_NOSPC,
476 	NFSERR_ROFS,
477 	NFSERR_MLINK,
478 	NFSERR_NAMETOL,
479 	NFSERR_DQUOT,
480 	NFSERR_STALE,
481 	NFSERR_BADHANDLE,
482 	NFSERR_NOTSUPP,
483 	NFSERR_SERVERFAULT,
484 	0,
485 };
486 
487 static const short nfsv3err_readdir[] = {
488 	NFSERR_IO,
489 	NFSERR_IO,
490 	NFSERR_ACCES,
491 	NFSERR_NOTDIR,
492 	NFSERR_STALE,
493 	NFSERR_BADHANDLE,
494 	NFSERR_BAD_COOKIE,
495 	NFSERR_TOOSMALL,
496 	NFSERR_SERVERFAULT,
497 	0,
498 };
499 
500 static const short nfsv3err_readdirplus[] = {
501 	NFSERR_IO,
502 	NFSERR_IO,
503 	NFSERR_ACCES,
504 	NFSERR_NOTDIR,
505 	NFSERR_STALE,
506 	NFSERR_BADHANDLE,
507 	NFSERR_BAD_COOKIE,
508 	NFSERR_NOTSUPP,
509 	NFSERR_TOOSMALL,
510 	NFSERR_SERVERFAULT,
511 	0,
512 };
513 
514 static const short nfsv3err_fsstat[] = {
515 	NFSERR_IO,
516 	NFSERR_IO,
517 	NFSERR_STALE,
518 	NFSERR_BADHANDLE,
519 	NFSERR_SERVERFAULT,
520 	0,
521 };
522 
523 static const short nfsv3err_fsinfo[] = {
524 	NFSERR_STALE,
525 	NFSERR_STALE,
526 	NFSERR_BADHANDLE,
527 	NFSERR_SERVERFAULT,
528 	0,
529 };
530 
531 static const short nfsv3err_pathconf[] = {
532 	NFSERR_STALE,
533 	NFSERR_STALE,
534 	NFSERR_BADHANDLE,
535 	NFSERR_SERVERFAULT,
536 	0,
537 };
538 
539 static const short nfsv3err_commit[] = {
540 	NFSERR_IO,
541 	NFSERR_IO,
542 	NFSERR_STALE,
543 	NFSERR_BADHANDLE,
544 	NFSERR_SERVERFAULT,
545 	0,
546 };
547 
548 static const short * const nfsrv_v3errmap[] = {
549 	nfsv3err_null,
550 	nfsv3err_getattr,
551 	nfsv3err_setattr,
552 	nfsv3err_lookup,
553 	nfsv3err_access,
554 	nfsv3err_readlink,
555 	nfsv3err_read,
556 	nfsv3err_write,
557 	nfsv3err_create,
558 	nfsv3err_mkdir,
559 	nfsv3err_symlink,
560 	nfsv3err_mknod,
561 	nfsv3err_remove,
562 	nfsv3err_rmdir,
563 	nfsv3err_rename,
564 	nfsv3err_link,
565 	nfsv3err_readdir,
566 	nfsv3err_readdirplus,
567 	nfsv3err_fsstat,
568 	nfsv3err_fsinfo,
569 	nfsv3err_pathconf,
570 	nfsv3err_commit,
571 };
572 
573 extern struct nfsrtt nfsrtt;
574 extern time_t nqnfsstarttime;
575 extern int nqsrv_clockskew;
576 extern int nqsrv_writeslack;
577 extern int nqsrv_maxlease;
578 extern const int nqnfs_piggy[NFS_NPROCS];
579 extern struct nfsnodehashhead *nfsnodehashtbl;
580 extern u_long nfsnodehash;
581 
582 u_long nfsdirhashmask;
583 
584 int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *));
585 
586 /*
587  * Create the header for an rpc request packet
588  * The hsiz is the size of the rest of the nfs request header.
589  * (just used to decide if a cluster is a good idea)
590  */
591 struct mbuf *
592 nfsm_reqh(
593     struct nfsnode *np __unused,
594     u_long procid __unused,
595     int hsiz,
596     caddr_t *bposp
597 )
598 {
599 	struct mbuf *mb;
600 	caddr_t bpos;
601 #ifndef NFS_V2_ONLY
602 	struct nfsmount *nmp;
603 	u_int32_t *tl;
604 	int nqflag;
605 #else
606 	do { if (&np) {} } while (/* CONSTCOND */ 0); /* for -Wunused */
607 	do { if (&procid) {} } while (/* CONSTCOND */ 0); /* for -Wunused */
608 #endif
609 
610 	mb = m_get(M_WAIT, MT_DATA);
611 	MCLAIM(mb, &nfs_mowner);
612 	if (hsiz >= MINCLSIZE)
613 		m_clget(mb, M_WAIT);
614 	mb->m_len = 0;
615 	bpos = mtod(mb, caddr_t);
616 
617 #ifndef NFS_V2_ONLY
618 	/*
619 	 * For NQNFS, add lease request.
620 	 */
621 	if (np) {
622 		nmp = VFSTONFS(np->n_vnode->v_mount);
623 		if (nmp->nm_flag & NFSMNT_NQNFS) {
624 			nqflag = NQNFS_NEEDLEASE(np, procid);
625 			if (nqflag) {
626 				nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED);
627 				*tl++ = txdr_unsigned(nqflag);
628 				*tl = txdr_unsigned(nmp->nm_leaseterm);
629 			} else {
630 				nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
631 				*tl = 0;
632 			}
633 		}
634 	}
635 #endif
636 	/* Finally, return values */
637 	*bposp = bpos;
638 	return (mb);
639 }
640 
641 /*
642  * Build the RPC header and fill in the authorization info.
643  * The authorization string argument is only used when the credentials
644  * come from outside of the kernel.
645  * Returns the head of the mbuf list.
646  */
647 struct mbuf *
648 nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len,
649 	verf_str, mrest, mrest_len, mbp, xidp)
650 	kauth_cred_t cr;
651 	int nmflag;
652 	int procid;
653 	int auth_type;
654 	int auth_len;
655 	char *auth_str;
656 	int verf_len;
657 	char *verf_str;
658 	struct mbuf *mrest;
659 	int mrest_len;
660 	struct mbuf **mbp;
661 	u_int32_t *xidp;
662 {
663 	struct mbuf *mb;
664 	u_int32_t *tl;
665 	caddr_t bpos;
666 	int i;
667 	struct mbuf *mreq;
668 	int siz, grpsiz, authsiz;
669 
670 	authsiz = nfsm_rndup(auth_len);
671 	mb = m_gethdr(M_WAIT, MT_DATA);
672 	MCLAIM(mb, &nfs_mowner);
673 	if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
674 		m_clget(mb, M_WAIT);
675 	} else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
676 		MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED);
677 	} else {
678 		MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
679 	}
680 	mb->m_len = 0;
681 	mreq = mb;
682 	bpos = mtod(mb, caddr_t);
683 
684 	/*
685 	 * First the RPC header.
686 	 */
687 	nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
688 
689 	*tl++ = *xidp = nfs_getxid();
690 	*tl++ = rpc_call;
691 	*tl++ = rpc_vers;
692 	if (nmflag & NFSMNT_NQNFS) {
693 		*tl++ = txdr_unsigned(NQNFS_PROG);
694 		*tl++ = txdr_unsigned(NQNFS_VER3);
695 	} else {
696 		*tl++ = txdr_unsigned(NFS_PROG);
697 		if (nmflag & NFSMNT_NFSV3)
698 			*tl++ = txdr_unsigned(NFS_VER3);
699 		else
700 			*tl++ = txdr_unsigned(NFS_VER2);
701 	}
702 	if (nmflag & NFSMNT_NFSV3)
703 		*tl++ = txdr_unsigned(procid);
704 	else
705 		*tl++ = txdr_unsigned(nfsv2_procid[procid]);
706 
707 	/*
708 	 * And then the authorization cred.
709 	 */
710 	*tl++ = txdr_unsigned(auth_type);
711 	*tl = txdr_unsigned(authsiz);
712 	switch (auth_type) {
713 	case RPCAUTH_UNIX:
714 		nfsm_build(tl, u_int32_t *, auth_len);
715 		*tl++ = 0;		/* stamp ?? */
716 		*tl++ = 0;		/* NULL hostname */
717 		*tl++ = txdr_unsigned(kauth_cred_geteuid(cr));
718 		*tl++ = txdr_unsigned(kauth_cred_getegid(cr));
719 		grpsiz = (auth_len >> 2) - 5;
720 		*tl++ = txdr_unsigned(grpsiz);
721 		for (i = 0; i < grpsiz; i++)
722 			*tl++ = txdr_unsigned(kauth_cred_group(cr, i)); /* XXX elad review */
723 		break;
724 	case RPCAUTH_KERB4:
725 		siz = auth_len;
726 		while (siz > 0) {
727 			if (M_TRAILINGSPACE(mb) == 0) {
728 				struct mbuf *mb2;
729 				mb2 = m_get(M_WAIT, MT_DATA);
730 				MCLAIM(mb2, &nfs_mowner);
731 				if (siz >= MINCLSIZE)
732 					m_clget(mb2, M_WAIT);
733 				mb->m_next = mb2;
734 				mb = mb2;
735 				mb->m_len = 0;
736 				bpos = mtod(mb, caddr_t);
737 			}
738 			i = min(siz, M_TRAILINGSPACE(mb));
739 			memcpy(bpos, auth_str, i);
740 			mb->m_len += i;
741 			auth_str += i;
742 			bpos += i;
743 			siz -= i;
744 		}
745 		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
746 			for (i = 0; i < siz; i++)
747 				*bpos++ = '\0';
748 			mb->m_len += siz;
749 		}
750 		break;
751 	};
752 
753 	/*
754 	 * And the verifier...
755 	 */
756 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
757 	if (verf_str) {
758 		*tl++ = txdr_unsigned(RPCAUTH_KERB4);
759 		*tl = txdr_unsigned(verf_len);
760 		siz = verf_len;
761 		while (siz > 0) {
762 			if (M_TRAILINGSPACE(mb) == 0) {
763 				struct mbuf *mb2;
764 				mb2 = m_get(M_WAIT, MT_DATA);
765 				MCLAIM(mb2, &nfs_mowner);
766 				if (siz >= MINCLSIZE)
767 					m_clget(mb2, M_WAIT);
768 				mb->m_next = mb2;
769 				mb = mb2;
770 				mb->m_len = 0;
771 				bpos = mtod(mb, caddr_t);
772 			}
773 			i = min(siz, M_TRAILINGSPACE(mb));
774 			memcpy(bpos, verf_str, i);
775 			mb->m_len += i;
776 			verf_str += i;
777 			bpos += i;
778 			siz -= i;
779 		}
780 		if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
781 			for (i = 0; i < siz; i++)
782 				*bpos++ = '\0';
783 			mb->m_len += siz;
784 		}
785 	} else {
786 		*tl++ = txdr_unsigned(RPCAUTH_NULL);
787 		*tl = 0;
788 	}
789 	mb->m_next = mrest;
790 	mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
791 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
792 	*mbp = mb;
793 	return (mreq);
794 }
795 
796 /*
797  * copies mbuf chain to the uio scatter/gather list
798  */
799 int
800 nfsm_mbuftouio(mrep, uiop, siz, dpos)
801 	struct mbuf **mrep;
802 	struct uio *uiop;
803 	int siz;
804 	caddr_t *dpos;
805 {
806 	char *mbufcp, *uiocp;
807 	int xfer, left, len;
808 	struct mbuf *mp;
809 	long uiosiz, rem;
810 	int error = 0;
811 
812 	mp = *mrep;
813 	mbufcp = *dpos;
814 	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
815 	rem = nfsm_rndup(siz)-siz;
816 	while (siz > 0) {
817 		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
818 			return (EFBIG);
819 		left = uiop->uio_iov->iov_len;
820 		uiocp = uiop->uio_iov->iov_base;
821 		if (left > siz)
822 			left = siz;
823 		uiosiz = left;
824 		while (left > 0) {
825 			while (len == 0) {
826 				mp = mp->m_next;
827 				if (mp == NULL)
828 					return (EBADRPC);
829 				mbufcp = mtod(mp, caddr_t);
830 				len = mp->m_len;
831 			}
832 			xfer = (left > len) ? len : left;
833 			error = copyout_vmspace(uiop->uio_vmspace, mbufcp,
834 			    uiocp, xfer);
835 			if (error) {
836 				return error;
837 			}
838 			left -= xfer;
839 			len -= xfer;
840 			mbufcp += xfer;
841 			uiocp += xfer;
842 			uiop->uio_offset += xfer;
843 			uiop->uio_resid -= xfer;
844 		}
845 		if (uiop->uio_iov->iov_len <= siz) {
846 			uiop->uio_iovcnt--;
847 			uiop->uio_iov++;
848 		} else {
849 			uiop->uio_iov->iov_base =
850 			    (caddr_t)uiop->uio_iov->iov_base + uiosiz;
851 			uiop->uio_iov->iov_len -= uiosiz;
852 		}
853 		siz -= uiosiz;
854 	}
855 	*dpos = mbufcp;
856 	*mrep = mp;
857 	if (rem > 0) {
858 		if (len < rem)
859 			error = nfs_adv(mrep, dpos, rem, len);
860 		else
861 			*dpos += rem;
862 	}
863 	return (error);
864 }
865 
866 /*
867  * copies a uio scatter/gather list to an mbuf chain.
868  * NOTE: can ony handle iovcnt == 1
869  */
870 int
871 nfsm_uiotombuf(uiop, mq, siz, bpos)
872 	struct uio *uiop;
873 	struct mbuf **mq;
874 	int siz;
875 	caddr_t *bpos;
876 {
877 	char *uiocp;
878 	struct mbuf *mp, *mp2;
879 	int xfer, left, mlen;
880 	int uiosiz, clflg, rem;
881 	char *cp;
882 	int error;
883 
884 #ifdef DIAGNOSTIC
885 	if (uiop->uio_iovcnt != 1)
886 		panic("nfsm_uiotombuf: iovcnt != 1");
887 #endif
888 
889 	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
890 		clflg = 1;
891 	else
892 		clflg = 0;
893 	rem = nfsm_rndup(siz)-siz;
894 	mp = mp2 = *mq;
895 	while (siz > 0) {
896 		left = uiop->uio_iov->iov_len;
897 		uiocp = uiop->uio_iov->iov_base;
898 		if (left > siz)
899 			left = siz;
900 		uiosiz = left;
901 		while (left > 0) {
902 			mlen = M_TRAILINGSPACE(mp);
903 			if (mlen == 0) {
904 				mp = m_get(M_WAIT, MT_DATA);
905 				MCLAIM(mp, &nfs_mowner);
906 				if (clflg)
907 					m_clget(mp, M_WAIT);
908 				mp->m_len = 0;
909 				mp2->m_next = mp;
910 				mp2 = mp;
911 				mlen = M_TRAILINGSPACE(mp);
912 			}
913 			xfer = (left > mlen) ? mlen : left;
914 			cp = mtod(mp, caddr_t) + mp->m_len;
915 			error = copyin_vmspace(uiop->uio_vmspace, uiocp, cp,
916 			    xfer);
917 			if (error) {
918 				/* XXX */
919 			}
920 			mp->m_len += xfer;
921 			left -= xfer;
922 			uiocp += xfer;
923 			uiop->uio_offset += xfer;
924 			uiop->uio_resid -= xfer;
925 		}
926 		uiop->uio_iov->iov_base = (caddr_t)uiop->uio_iov->iov_base +
927 		    uiosiz;
928 		uiop->uio_iov->iov_len -= uiosiz;
929 		siz -= uiosiz;
930 	}
931 	if (rem > 0) {
932 		if (rem > M_TRAILINGSPACE(mp)) {
933 			mp = m_get(M_WAIT, MT_DATA);
934 			MCLAIM(mp, &nfs_mowner);
935 			mp->m_len = 0;
936 			mp2->m_next = mp;
937 		}
938 		cp = mtod(mp, caddr_t) + mp->m_len;
939 		for (left = 0; left < rem; left++)
940 			*cp++ = '\0';
941 		mp->m_len += rem;
942 		*bpos = cp;
943 	} else
944 		*bpos = mtod(mp, caddr_t)+mp->m_len;
945 	*mq = mp;
946 	return (0);
947 }
948 
949 /*
950  * Get at least "siz" bytes of correctly aligned data.
951  * When called the mbuf pointers are not necessarily correct,
952  * dsosp points to what ought to be in m_data and left contains
953  * what ought to be in m_len.
954  * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
955  * cases. (The macros use the vars. dpos and dpos2)
956  */
957 int
958 nfsm_disct(mdp, dposp, siz, left, cp2)
959 	struct mbuf **mdp;
960 	caddr_t *dposp;
961 	int siz;
962 	int left;
963 	caddr_t *cp2;
964 {
965 	struct mbuf *m1, *m2;
966 	struct mbuf *havebuf = NULL;
967 	caddr_t src = *dposp;
968 	caddr_t dst;
969 	int len;
970 
971 #ifdef DEBUG
972 	if (left < 0)
973 		panic("nfsm_disct: left < 0");
974 #endif
975 	m1 = *mdp;
976 	/*
977 	 * Skip through the mbuf chain looking for an mbuf with
978 	 * some data. If the first mbuf found has enough data
979 	 * and it is correctly aligned return it.
980 	 */
981 	while (left == 0) {
982 		havebuf = m1;
983 		*mdp = m1 = m1->m_next;
984 		if (m1 == NULL)
985 			return (EBADRPC);
986 		src = mtod(m1, caddr_t);
987 		left = m1->m_len;
988 		/*
989 		 * If we start a new mbuf and it is big enough
990 		 * and correctly aligned just return it, don't
991 		 * do any pull up.
992 		 */
993 		if (left >= siz && nfsm_aligned(src)) {
994 			*cp2 = src;
995 			*dposp = src + siz;
996 			return (0);
997 		}
998 	}
999 	if (m1->m_flags & M_EXT) {
1000 		if (havebuf) {
1001 			/* If the first mbuf with data has external data
1002 			 * and there is a previous empty mbuf use it
1003 			 * to move the data into.
1004 			 */
1005 			m2 = m1;
1006 			*mdp = m1 = havebuf;
1007 			if (m1->m_flags & M_EXT) {
1008 				MEXTREMOVE(m1);
1009 			}
1010 		} else {
1011 			/*
1012 			 * If the first mbuf has a external data
1013 			 * and there is no previous empty mbuf
1014 			 * allocate a new mbuf and move the external
1015 			 * data to the new mbuf. Also make the first
1016 			 * mbuf look empty.
1017 			 */
1018 			m2 = m_get(M_WAIT, MT_DATA);
1019 			m2->m_ext = m1->m_ext;
1020 			m2->m_data = src;
1021 			m2->m_len = left;
1022 			MCLADDREFERENCE(m1, m2);
1023 			MEXTREMOVE(m1);
1024 			m2->m_next = m1->m_next;
1025 			m1->m_next = m2;
1026 		}
1027 		m1->m_len = 0;
1028 		if (m1->m_flags & M_PKTHDR)
1029 			dst = m1->m_pktdat;
1030 		else
1031 			dst = m1->m_dat;
1032 		m1->m_data = dst;
1033 	} else {
1034 		/*
1035 		 * If the first mbuf has no external data
1036 		 * move the data to the front of the mbuf.
1037 		 */
1038 		if (m1->m_flags & M_PKTHDR)
1039 			dst = m1->m_pktdat;
1040 		else
1041 			dst = m1->m_dat;
1042 		m1->m_data = dst;
1043 		if (dst != src)
1044 			memmove(dst, src, left);
1045 		dst += left;
1046 		m1->m_len = left;
1047 		m2 = m1->m_next;
1048 	}
1049 	*cp2 = m1->m_data;
1050 	*dposp = mtod(m1, caddr_t) + siz;
1051 	/*
1052 	 * Loop through mbufs pulling data up into first mbuf until
1053 	 * the first mbuf is full or there is no more data to
1054 	 * pullup.
1055 	 */
1056 	while ((len = M_TRAILINGSPACE(m1)) != 0 && m2) {
1057 		if ((len = min(len, m2->m_len)) != 0)
1058 			memcpy(dst, m2->m_data, len);
1059 		m1->m_len += len;
1060 		dst += len;
1061 		m2->m_data += len;
1062 		m2->m_len -= len;
1063 		m2 = m2->m_next;
1064 	}
1065 	if (m1->m_len < siz)
1066 		return (EBADRPC);
1067 	return (0);
1068 }
1069 
1070 /*
1071  * Advance the position in the mbuf chain.
1072  */
1073 int
1074 nfs_adv(mdp, dposp, offs, left)
1075 	struct mbuf **mdp;
1076 	caddr_t *dposp;
1077 	int offs;
1078 	int left;
1079 {
1080 	struct mbuf *m;
1081 	int s;
1082 
1083 	m = *mdp;
1084 	s = left;
1085 	while (s < offs) {
1086 		offs -= s;
1087 		m = m->m_next;
1088 		if (m == NULL)
1089 			return (EBADRPC);
1090 		s = m->m_len;
1091 	}
1092 	*mdp = m;
1093 	*dposp = mtod(m, caddr_t)+offs;
1094 	return (0);
1095 }
1096 
1097 /*
1098  * Copy a string into mbufs for the hard cases...
1099  */
1100 int
1101 nfsm_strtmbuf(mb, bpos, cp, siz)
1102 	struct mbuf **mb;
1103 	char **bpos;
1104 	const char *cp;
1105 	long siz;
1106 {
1107 	struct mbuf *m1 = NULL, *m2;
1108 	long left, xfer, len, tlen;
1109 	u_int32_t *tl;
1110 	int putsize;
1111 
1112 	putsize = 1;
1113 	m2 = *mb;
1114 	left = M_TRAILINGSPACE(m2);
1115 	if (left > 0) {
1116 		tl = ((u_int32_t *)(*bpos));
1117 		*tl++ = txdr_unsigned(siz);
1118 		putsize = 0;
1119 		left -= NFSX_UNSIGNED;
1120 		m2->m_len += NFSX_UNSIGNED;
1121 		if (left > 0) {
1122 			memcpy((caddr_t) tl, cp, left);
1123 			siz -= left;
1124 			cp += left;
1125 			m2->m_len += left;
1126 			left = 0;
1127 		}
1128 	}
1129 	/* Loop around adding mbufs */
1130 	while (siz > 0) {
1131 		m1 = m_get(M_WAIT, MT_DATA);
1132 		MCLAIM(m1, &nfs_mowner);
1133 		if (siz > MLEN)
1134 			m_clget(m1, M_WAIT);
1135 		m1->m_len = NFSMSIZ(m1);
1136 		m2->m_next = m1;
1137 		m2 = m1;
1138 		tl = mtod(m1, u_int32_t *);
1139 		tlen = 0;
1140 		if (putsize) {
1141 			*tl++ = txdr_unsigned(siz);
1142 			m1->m_len -= NFSX_UNSIGNED;
1143 			tlen = NFSX_UNSIGNED;
1144 			putsize = 0;
1145 		}
1146 		if (siz < m1->m_len) {
1147 			len = nfsm_rndup(siz);
1148 			xfer = siz;
1149 			if (xfer < len)
1150 				*(tl+(xfer>>2)) = 0;
1151 		} else {
1152 			xfer = len = m1->m_len;
1153 		}
1154 		memcpy((caddr_t) tl, cp, xfer);
1155 		m1->m_len = len+tlen;
1156 		siz -= xfer;
1157 		cp += xfer;
1158 	}
1159 	*mb = m1;
1160 	*bpos = mtod(m1, caddr_t)+m1->m_len;
1161 	return (0);
1162 }
1163 
1164 /*
1165  * Directory caching routines. They work as follows:
1166  * - a cache is maintained per VDIR nfsnode.
1167  * - for each offset cookie that is exported to userspace, and can
1168  *   thus be thrown back at us as an offset to VOP_READDIR, store
1169  *   information in the cache.
1170  * - cached are:
1171  *   - cookie itself
1172  *   - blocknumber (essentially just a search key in the buffer cache)
1173  *   - entry number in block.
1174  *   - offset cookie of block in which this entry is stored
1175  *   - 32 bit cookie if NFSMNT_XLATECOOKIE is used.
1176  * - entries are looked up in a hash table
1177  * - also maintained is an LRU list of entries, used to determine
1178  *   which ones to delete if the cache grows too large.
1179  * - if 32 <-> 64 translation mode is requested for a filesystem,
1180  *   the cache also functions as a translation table
1181  * - in the translation case, invalidating the cache does not mean
1182  *   flushing it, but just marking entries as invalid, except for
1183  *   the <64bit cookie, 32bitcookie> pair which is still valid, to
1184  *   still be able to use the cache as a translation table.
1185  * - 32 bit cookies are uniquely created by combining the hash table
1186  *   entry value, and one generation count per hash table entry,
1187  *   incremented each time an entry is appended to the chain.
1188  * - the cache is invalidated each time a direcory is modified
1189  * - sanity checks are also done; if an entry in a block turns
1190  *   out not to have a matching cookie, the cache is invalidated
1191  *   and a new block starting from the wanted offset is fetched from
1192  *   the server.
1193  * - directory entries as read from the server are extended to contain
1194  *   the 64bit and, optionally, the 32bit cookies, for sanity checking
1195  *   the cache and exporting them to userspace through the cookie
1196  *   argument to VOP_READDIR.
1197  */
1198 
1199 u_long
1200 nfs_dirhash(off)
1201 	off_t off;
1202 {
1203 	int i;
1204 	char *cp = (char *)&off;
1205 	u_long sum = 0L;
1206 
1207 	for (i = 0 ; i < sizeof (off); i++)
1208 		sum += *cp++;
1209 
1210 	return sum;
1211 }
1212 
1213 #define	_NFSDC_MTX(np)		(&NFSTOV(np)->v_interlock)
1214 #define	NFSDC_LOCK(np)		simple_lock(_NFSDC_MTX(np))
1215 #define	NFSDC_UNLOCK(np)	simple_unlock(_NFSDC_MTX(np))
1216 #define	NFSDC_ASSERT_LOCKED(np) LOCK_ASSERT(simple_lock_held(_NFSDC_MTX(np)))
1217 
1218 void
1219 nfs_initdircache(vp)
1220 	struct vnode *vp;
1221 {
1222 	struct nfsnode *np = VTONFS(vp);
1223 	struct nfsdirhashhead *dircache;
1224 
1225 	dircache = hashinit(NFS_DIRHASHSIZ, HASH_LIST, M_NFSDIROFF,
1226 	    M_WAITOK, &nfsdirhashmask);
1227 
1228 	NFSDC_LOCK(np);
1229 	if (np->n_dircache == NULL) {
1230 		np->n_dircachesize = 0;
1231 		np->n_dircache = dircache;
1232 		dircache = NULL;
1233 		TAILQ_INIT(&np->n_dirchain);
1234 	}
1235 	NFSDC_UNLOCK(np);
1236 	if (dircache)
1237 		hashdone(dircache, M_NFSDIROFF);
1238 }
1239 
1240 void
1241 nfs_initdirxlatecookie(vp)
1242 	struct vnode *vp;
1243 {
1244 	struct nfsnode *np = VTONFS(vp);
1245 	unsigned *dirgens;
1246 
1247 	KASSERT(VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_XLATECOOKIE);
1248 
1249 	dirgens = malloc(NFS_DIRHASHSIZ * sizeof (unsigned), M_NFSDIROFF,
1250 	    M_WAITOK|M_ZERO);
1251 	NFSDC_LOCK(np);
1252 	if (np->n_dirgens == NULL) {
1253 		np->n_dirgens = dirgens;
1254 		dirgens = NULL;
1255 	}
1256 	NFSDC_UNLOCK(np);
1257 	if (dirgens)
1258 		free(dirgens, M_NFSDIROFF);
1259 }
1260 
1261 static const struct nfsdircache dzero;
1262 
1263 static void nfs_unlinkdircache __P((struct nfsnode *np, struct nfsdircache *));
1264 static void nfs_putdircache_unlocked __P((struct nfsnode *,
1265     struct nfsdircache *));
1266 
1267 static void
1268 nfs_unlinkdircache(np, ndp)
1269 	struct nfsnode *np;
1270 	struct nfsdircache *ndp;
1271 {
1272 
1273 	NFSDC_ASSERT_LOCKED(np);
1274 	KASSERT(ndp != &dzero);
1275 
1276 	if (LIST_NEXT(ndp, dc_hash) == (void *)-1)
1277 		return;
1278 
1279 	TAILQ_REMOVE(&np->n_dirchain, ndp, dc_chain);
1280 	LIST_REMOVE(ndp, dc_hash);
1281 	LIST_NEXT(ndp, dc_hash) = (void *)-1; /* mark as unlinked */
1282 
1283 	nfs_putdircache_unlocked(np, ndp);
1284 }
1285 
1286 void
1287 nfs_putdircache(np, ndp)
1288 	struct nfsnode *np;
1289 	struct nfsdircache *ndp;
1290 {
1291 	int ref;
1292 
1293 	if (ndp == &dzero)
1294 		return;
1295 
1296 	KASSERT(ndp->dc_refcnt > 0);
1297 	NFSDC_LOCK(np);
1298 	ref = --ndp->dc_refcnt;
1299 	NFSDC_UNLOCK(np);
1300 
1301 	if (ref == 0)
1302 		free(ndp, M_NFSDIROFF);
1303 }
1304 
1305 static void
1306 nfs_putdircache_unlocked(struct nfsnode *np __unused, struct nfsdircache *ndp)
1307 {
1308 	int ref;
1309 
1310 	NFSDC_ASSERT_LOCKED(np);
1311 
1312 	if (ndp == &dzero)
1313 		return;
1314 
1315 	KASSERT(ndp->dc_refcnt > 0);
1316 	ref = --ndp->dc_refcnt;
1317 	if (ref == 0)
1318 		free(ndp, M_NFSDIROFF);
1319 }
1320 
1321 struct nfsdircache *
1322 nfs_searchdircache(vp, off, do32, hashent)
1323 	struct vnode *vp;
1324 	off_t off;
1325 	int do32;
1326 	int *hashent;
1327 {
1328 	struct nfsdirhashhead *ndhp;
1329 	struct nfsdircache *ndp = NULL;
1330 	struct nfsnode *np = VTONFS(vp);
1331 	unsigned ent;
1332 
1333 	/*
1334 	 * Zero is always a valid cookie.
1335 	 */
1336 	if (off == 0)
1337 		/* XXXUNCONST */
1338 		return (struct nfsdircache *)__UNCONST(&dzero);
1339 
1340 	if (!np->n_dircache)
1341 		return NULL;
1342 
1343 	/*
1344 	 * We use a 32bit cookie as search key, directly reconstruct
1345 	 * the hashentry. Else use the hashfunction.
1346 	 */
1347 	if (do32) {
1348 		ent = (u_int32_t)off >> 24;
1349 		if (ent >= NFS_DIRHASHSIZ)
1350 			return NULL;
1351 		ndhp = &np->n_dircache[ent];
1352 	} else {
1353 		ndhp = NFSDIRHASH(np, off);
1354 	}
1355 
1356 	if (hashent)
1357 		*hashent = (int)(ndhp - np->n_dircache);
1358 
1359 	NFSDC_LOCK(np);
1360 	if (do32) {
1361 		LIST_FOREACH(ndp, ndhp, dc_hash) {
1362 			if (ndp->dc_cookie32 == (u_int32_t)off) {
1363 				/*
1364 				 * An invalidated entry will become the
1365 				 * start of a new block fetched from
1366 				 * the server.
1367 				 */
1368 				if (ndp->dc_flags & NFSDC_INVALID) {
1369 					ndp->dc_blkcookie = ndp->dc_cookie;
1370 					ndp->dc_entry = 0;
1371 					ndp->dc_flags &= ~NFSDC_INVALID;
1372 				}
1373 				break;
1374 			}
1375 		}
1376 	} else {
1377 		LIST_FOREACH(ndp, ndhp, dc_hash) {
1378 			if (ndp->dc_cookie == off)
1379 				break;
1380 		}
1381 	}
1382 	if (ndp != NULL)
1383 		ndp->dc_refcnt++;
1384 	NFSDC_UNLOCK(np);
1385 	return ndp;
1386 }
1387 
1388 
1389 struct nfsdircache *
1390 nfs_enterdircache(struct vnode *vp, off_t off, off_t blkoff, int en,
1391     daddr_t blkno __unused)
1392 {
1393 	struct nfsnode *np = VTONFS(vp);
1394 	struct nfsdirhashhead *ndhp;
1395 	struct nfsdircache *ndp = NULL;
1396 	struct nfsdircache *newndp = NULL;
1397 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1398 	int hashent = 0, gen, overwrite;	/* XXX: GCC */
1399 
1400 	/*
1401 	 * XXX refuse entries for offset 0. amd(8) erroneously sets
1402 	 * cookie 0 for the '.' entry, making this necessary. This
1403 	 * isn't so bad, as 0 is a special case anyway.
1404 	 */
1405 	if (off == 0)
1406 		/* XXXUNCONST */
1407 		return (struct nfsdircache *)__UNCONST(&dzero);
1408 
1409 	if (!np->n_dircache)
1410 		/*
1411 		 * XXX would like to do this in nfs_nget but vtype
1412 		 * isn't known at that time.
1413 		 */
1414 		nfs_initdircache(vp);
1415 
1416 	if ((nmp->nm_flag & NFSMNT_XLATECOOKIE) && !np->n_dirgens)
1417 		nfs_initdirxlatecookie(vp);
1418 
1419 retry:
1420 	ndp = nfs_searchdircache(vp, off, 0, &hashent);
1421 
1422 	NFSDC_LOCK(np);
1423 	if (ndp && (ndp->dc_flags & NFSDC_INVALID) == 0) {
1424 		/*
1425 		 * Overwriting an old entry. Check if it's the same.
1426 		 * If so, just return. If not, remove the old entry.
1427 		 */
1428 		if (ndp->dc_blkcookie == blkoff && ndp->dc_entry == en)
1429 			goto done;
1430 		nfs_unlinkdircache(np, ndp);
1431 		nfs_putdircache_unlocked(np, ndp);
1432 		ndp = NULL;
1433 	}
1434 
1435 	ndhp = &np->n_dircache[hashent];
1436 
1437 	if (!ndp) {
1438 		if (newndp == NULL) {
1439 			NFSDC_UNLOCK(np);
1440 			newndp = malloc(sizeof(*ndp), M_NFSDIROFF, M_WAITOK);
1441 			newndp->dc_refcnt = 1;
1442 			LIST_NEXT(newndp, dc_hash) = (void *)-1;
1443 			goto retry;
1444 		}
1445 		ndp = newndp;
1446 		newndp = NULL;
1447 		overwrite = 0;
1448 		if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
1449 			/*
1450 			 * We're allocating a new entry, so bump the
1451 			 * generation number.
1452 			 */
1453 			KASSERT(np->n_dirgens);
1454 			gen = ++np->n_dirgens[hashent];
1455 			if (gen == 0) {
1456 				np->n_dirgens[hashent]++;
1457 				gen++;
1458 			}
1459 			ndp->dc_cookie32 = (hashent << 24) | (gen & 0xffffff);
1460 		}
1461 	} else
1462 		overwrite = 1;
1463 
1464 	ndp->dc_cookie = off;
1465 	ndp->dc_blkcookie = blkoff;
1466 	ndp->dc_entry = en;
1467 	ndp->dc_flags = 0;
1468 
1469 	if (overwrite)
1470 		goto done;
1471 
1472 	/*
1473 	 * If the maximum directory cookie cache size has been reached
1474 	 * for this node, take one off the front. The idea is that
1475 	 * directories are typically read front-to-back once, so that
1476 	 * the oldest entries can be thrown away without much performance
1477 	 * loss.
1478 	 */
1479 	if (np->n_dircachesize == NFS_MAXDIRCACHE) {
1480 		nfs_unlinkdircache(np, TAILQ_FIRST(&np->n_dirchain));
1481 	} else
1482 		np->n_dircachesize++;
1483 
1484 	KASSERT(ndp->dc_refcnt == 1);
1485 	LIST_INSERT_HEAD(ndhp, ndp, dc_hash);
1486 	TAILQ_INSERT_TAIL(&np->n_dirchain, ndp, dc_chain);
1487 	ndp->dc_refcnt++;
1488 done:
1489 	KASSERT(ndp->dc_refcnt > 0);
1490 	NFSDC_UNLOCK(np);
1491 	if (newndp)
1492 		nfs_putdircache(np, newndp);
1493 	return ndp;
1494 }
1495 
1496 void
1497 nfs_invaldircache(vp, flags)
1498 	struct vnode *vp;
1499 	int flags;
1500 {
1501 	struct nfsnode *np = VTONFS(vp);
1502 	struct nfsdircache *ndp = NULL;
1503 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1504 	const boolean_t forcefree = flags & NFS_INVALDIRCACHE_FORCE;
1505 
1506 #ifdef DIAGNOSTIC
1507 	if (vp->v_type != VDIR)
1508 		panic("nfs: invaldircache: not dir");
1509 #endif
1510 
1511 	if ((flags & NFS_INVALDIRCACHE_KEEPEOF) == 0)
1512 		np->n_flag &= ~NEOFVALID;
1513 
1514 	if (!np->n_dircache)
1515 		return;
1516 
1517 	NFSDC_LOCK(np);
1518 	if (!(nmp->nm_flag & NFSMNT_XLATECOOKIE) || forcefree) {
1519 		while ((ndp = TAILQ_FIRST(&np->n_dirchain)) != NULL) {
1520 			KASSERT(!forcefree || ndp->dc_refcnt == 1);
1521 			nfs_unlinkdircache(np, ndp);
1522 		}
1523 		np->n_dircachesize = 0;
1524 		if (forcefree && np->n_dirgens) {
1525 			FREE(np->n_dirgens, M_NFSDIROFF);
1526 			np->n_dirgens = NULL;
1527 		}
1528 	} else {
1529 		TAILQ_FOREACH(ndp, &np->n_dirchain, dc_chain)
1530 			ndp->dc_flags |= NFSDC_INVALID;
1531 	}
1532 
1533 	NFSDC_UNLOCK(np);
1534 }
1535 
1536 /*
1537  * Called once before VFS init to initialize shared and
1538  * server-specific data structures.
1539  */
1540 static int
1541 nfs_init0(void)
1542 {
1543 	nfsrtt.pos = 0;
1544 	rpc_vers = txdr_unsigned(RPC_VER2);
1545 	rpc_call = txdr_unsigned(RPC_CALL);
1546 	rpc_reply = txdr_unsigned(RPC_REPLY);
1547 	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
1548 	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
1549 	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
1550 	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
1551 	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
1552 	rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
1553 	nfs_prog = txdr_unsigned(NFS_PROG);
1554 	nqnfs_prog = txdr_unsigned(NQNFS_PROG);
1555 	nfs_true = txdr_unsigned(TRUE);
1556 	nfs_false = txdr_unsigned(FALSE);
1557 	nfs_xdrneg1 = txdr_unsigned(-1);
1558 	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
1559 	if (nfs_ticks < 1)
1560 		nfs_ticks = 1;
1561 #ifdef NFSSERVER
1562 	nfsrv_init(0);			/* Init server data structures */
1563 	nfsrv_initcache();		/* Init the server request cache */
1564 #endif /* NFSSERVER */
1565 
1566 #if defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY))
1567 	nfsdreq_init();
1568 #endif /* defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY)) */
1569 
1570 #if defined(NFSSERVER) || !defined(NFS_V2_ONLY)
1571 	/*
1572 	 * Initialize the nqnfs data structures.
1573 	 */
1574 	if (nqnfsstarttime == 0) {
1575 		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
1576 			+ nqsrv_clockskew + nqsrv_writeslack;
1577 		NQLOADNOVRAM(nqnfsstarttime);
1578 		CIRCLEQ_INIT(&nqtimerhead);
1579 		nqfhhashtbl = hashinit(NQLCHSZ, HASH_LIST, M_NQLEASE,
1580 		    M_WAITOK, &nqfhhash);
1581 	}
1582 #endif
1583 
1584 	exithook_establish(nfs_exit, NULL);
1585 
1586 	/*
1587 	 * Initialize reply list and start timer
1588 	 */
1589 	TAILQ_INIT(&nfs_reqq);
1590 	nfs_timer(NULL);
1591 	MOWNER_ATTACH(&nfs_mowner);
1592 
1593 #ifdef NFS
1594 	/* Initialize the kqueue structures */
1595 	nfs_kqinit();
1596 	/* Initialize the iod structures */
1597 	nfs_iodinit();
1598 #endif
1599 	return 0;
1600 }
1601 
1602 void
1603 nfs_init(void)
1604 {
1605 	static ONCE_DECL(nfs_init_once);
1606 
1607 	RUN_ONCE(&nfs_init_once, nfs_init0);
1608 }
1609 
1610 #ifdef NFS
1611 /*
1612  * Called once at VFS init to initialize client-specific data structures.
1613  */
1614 void
1615 nfs_vfs_init()
1616 {
1617 	/* Initialize NFS server / client shared data. */
1618 	nfs_init();
1619 
1620 	nfs_nhinit();			/* Init the nfsnode table */
1621 	nfs_commitsize = uvmexp.npages << (PAGE_SHIFT - 4);
1622 }
1623 
1624 void
1625 nfs_vfs_reinit()
1626 {
1627 	nfs_nhreinit();
1628 }
1629 
1630 void
1631 nfs_vfs_done()
1632 {
1633 	nfs_nhdone();
1634 }
1635 
1636 /*
1637  * Attribute cache routines.
1638  * nfs_loadattrcache() - loads or updates the cache contents from attributes
1639  *	that are on the mbuf list
1640  * nfs_getattrcache() - returns valid attributes if found in cache, returns
1641  *	error otherwise
1642  */
1643 
1644 /*
1645  * Load the attribute cache (that lives in the nfsnode entry) with
1646  * the values on the mbuf list and
1647  * Iff vap not NULL
1648  *    copy the attributes to *vaper
1649  */
1650 int
1651 nfsm_loadattrcache(vpp, mdp, dposp, vaper, flags)
1652 	struct vnode **vpp;
1653 	struct mbuf **mdp;
1654 	caddr_t *dposp;
1655 	struct vattr *vaper;
1656 	int flags;
1657 {
1658 	int32_t t1;
1659 	caddr_t cp2;
1660 	int error = 0;
1661 	struct mbuf *md;
1662 	int v3 = NFS_ISV3(*vpp);
1663 
1664 	md = *mdp;
1665 	t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
1666 	error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2);
1667 	if (error)
1668 		return (error);
1669 	return nfs_loadattrcache(vpp, (struct nfs_fattr *)cp2, vaper, flags);
1670 }
1671 
1672 int
1673 nfs_loadattrcache(vpp, fp, vaper, flags)
1674 	struct vnode **vpp;
1675 	struct nfs_fattr *fp;
1676 	struct vattr *vaper;
1677 	int flags;
1678 {
1679 	struct vnode *vp = *vpp;
1680 	struct vattr *vap;
1681 	int v3 = NFS_ISV3(vp);
1682 	enum vtype vtyp;
1683 	u_short vmode;
1684 	struct timespec mtime;
1685 	struct timespec ctime;
1686 	struct vnode *nvp;
1687 	int32_t rdev;
1688 	struct nfsnode *np;
1689 	extern int (**spec_nfsv2nodeop_p) __P((void *));
1690 	uid_t uid;
1691 	gid_t gid;
1692 
1693 	if (v3) {
1694 		vtyp = nfsv3tov_type(fp->fa_type);
1695 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
1696 		rdev = makedev(fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata1),
1697 			fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata2));
1698 		fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
1699 		fxdr_nfsv3time(&fp->fa3_ctime, &ctime);
1700 	} else {
1701 		vtyp = nfsv2tov_type(fp->fa_type);
1702 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
1703 		if (vtyp == VNON || vtyp == VREG)
1704 			vtyp = IFTOVT(vmode);
1705 		rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
1706 		fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
1707 		ctime.tv_sec = fxdr_unsigned(u_int32_t,
1708 		    fp->fa2_ctime.nfsv2_sec);
1709 		ctime.tv_nsec = 0;
1710 
1711 		/*
1712 		 * Really ugly NFSv2 kludge.
1713 		 */
1714 		if (vtyp == VCHR && rdev == 0xffffffff)
1715 			vtyp = VFIFO;
1716 	}
1717 
1718 	vmode &= ALLPERMS;
1719 
1720 	/*
1721 	 * If v_type == VNON it is a new node, so fill in the v_type,
1722 	 * n_mtime fields. Check to see if it represents a special
1723 	 * device, and if so, check for a possible alias. Once the
1724 	 * correct vnode has been obtained, fill in the rest of the
1725 	 * information.
1726 	 */
1727 	np = VTONFS(vp);
1728 	if (vp->v_type == VNON) {
1729 		vp->v_type = vtyp;
1730 		if (vp->v_type == VFIFO) {
1731 			extern int (**fifo_nfsv2nodeop_p) __P((void *));
1732 			vp->v_op = fifo_nfsv2nodeop_p;
1733 		} else if (vp->v_type == VREG) {
1734 			lockinit(&np->n_commitlock, PINOD, "nfsclock", 0, 0);
1735 		} else if (vp->v_type == VCHR || vp->v_type == VBLK) {
1736 			vp->v_op = spec_nfsv2nodeop_p;
1737 			nvp = checkalias(vp, (dev_t)rdev, vp->v_mount);
1738 			if (nvp) {
1739 				/*
1740 				 * Discard unneeded vnode, but save its nfsnode.
1741 				 * Since the nfsnode does not have a lock, its
1742 				 * vnode lock has to be carried over.
1743 				 */
1744 				/*
1745 				 * XXX is the old node sure to be locked here?
1746 				 */
1747 				KASSERT(lockstatus(&vp->v_lock) ==
1748 				    LK_EXCLUSIVE);
1749 				nvp->v_data = vp->v_data;
1750 				vp->v_data = NULL;
1751 				VOP_UNLOCK(vp, 0);
1752 				vp->v_op = spec_vnodeop_p;
1753 				vrele(vp);
1754 				vgone(vp);
1755 				lockmgr(&nvp->v_lock, LK_EXCLUSIVE,
1756 				    &nvp->v_interlock);
1757 				/*
1758 				 * Reinitialize aliased node.
1759 				 */
1760 				np->n_vnode = nvp;
1761 				*vpp = vp = nvp;
1762 			}
1763 		}
1764 		np->n_mtime = mtime;
1765 	}
1766 	uid = fxdr_unsigned(uid_t, fp->fa_uid);
1767 	gid = fxdr_unsigned(gid_t, fp->fa_gid);
1768 	vap = np->n_vattr;
1769 
1770 	/*
1771 	 * Invalidate access cache if uid, gid, mode or ctime changed.
1772 	 */
1773 	if (np->n_accstamp != -1 &&
1774 	    (gid != vap->va_gid || uid != vap->va_uid || vmode != vap->va_mode
1775 	    || timespeccmp(&ctime, &vap->va_ctime, !=)))
1776 		np->n_accstamp = -1;
1777 
1778 	vap->va_type = vtyp;
1779 	vap->va_mode = vmode;
1780 	vap->va_rdev = (dev_t)rdev;
1781 	vap->va_mtime = mtime;
1782 	vap->va_ctime = ctime;
1783 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
1784 	switch (vtyp) {
1785 	case VDIR:
1786 		vap->va_blocksize = NFS_DIRFRAGSIZ;
1787 		break;
1788 	case VBLK:
1789 		vap->va_blocksize = BLKDEV_IOSIZE;
1790 		break;
1791 	case VCHR:
1792 		vap->va_blocksize = MAXBSIZE;
1793 		break;
1794 	default:
1795 		vap->va_blocksize = v3 ? vp->v_mount->mnt_stat.f_iosize :
1796 		    fxdr_unsigned(int32_t, fp->fa2_blocksize);
1797 		break;
1798 	}
1799 	if (v3) {
1800 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
1801 		vap->va_uid = uid;
1802 		vap->va_gid = gid;
1803 		vap->va_size = fxdr_hyper(&fp->fa3_size);
1804 		vap->va_bytes = fxdr_hyper(&fp->fa3_used);
1805 		vap->va_fileid = fxdr_hyper(&fp->fa3_fileid);
1806 		fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
1807 		vap->va_flags = 0;
1808 		vap->va_filerev = 0;
1809 	} else {
1810 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
1811 		vap->va_uid = uid;
1812 		vap->va_gid = gid;
1813 		vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
1814 		vap->va_bytes = fxdr_unsigned(int32_t, fp->fa2_blocks)
1815 		    * NFS_FABLKSIZE;
1816 		vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
1817 		fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
1818 		vap->va_flags = 0;
1819 		vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
1820 		vap->va_filerev = 0;
1821 	}
1822 	if (vap->va_size != np->n_size) {
1823 		if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) {
1824 			vap->va_size = np->n_size;
1825 		} else {
1826 			np->n_size = vap->va_size;
1827 			if (vap->va_type == VREG) {
1828 				/*
1829 				 * we can't free pages if NAC_NOTRUNC because
1830 				 * the pages can be owned by ourselves.
1831 				 */
1832 				if (flags & NAC_NOTRUNC) {
1833 					np->n_flag |= NTRUNCDELAYED;
1834 				} else {
1835 					genfs_node_wrlock(vp);
1836 					simple_lock(&vp->v_interlock);
1837 					(void)VOP_PUTPAGES(vp, 0,
1838 					    0, PGO_SYNCIO | PGO_CLEANIT |
1839 					    PGO_FREE | PGO_ALLPAGES);
1840 					uvm_vnp_setsize(vp, np->n_size);
1841 					genfs_node_unlock(vp);
1842 				}
1843 			}
1844 		}
1845 	}
1846 	np->n_attrstamp = time_second;
1847 	if (vaper != NULL) {
1848 		memcpy((caddr_t)vaper, (caddr_t)vap, sizeof(*vap));
1849 		if (np->n_flag & NCHG) {
1850 			if (np->n_flag & NACC)
1851 				vaper->va_atime = np->n_atim;
1852 			if (np->n_flag & NUPD)
1853 				vaper->va_mtime = np->n_mtim;
1854 		}
1855 	}
1856 	return (0);
1857 }
1858 
1859 /*
1860  * Check the time stamp
1861  * If the cache is valid, copy contents to *vap and return 0
1862  * otherwise return an error
1863  */
1864 int
1865 nfs_getattrcache(vp, vaper)
1866 	struct vnode *vp;
1867 	struct vattr *vaper;
1868 {
1869 	struct nfsnode *np = VTONFS(vp);
1870 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1871 	struct vattr *vap;
1872 
1873 	if (np->n_attrstamp == 0 ||
1874 	    (time_second - np->n_attrstamp) >= NFS_ATTRTIMEO(nmp, np)) {
1875 		nfsstats.attrcache_misses++;
1876 		return (ENOENT);
1877 	}
1878 	nfsstats.attrcache_hits++;
1879 	vap = np->n_vattr;
1880 	if (vap->va_size != np->n_size) {
1881 		if (vap->va_type == VREG) {
1882 			if (np->n_flag & NMODIFIED) {
1883 				if (vap->va_size < np->n_size)
1884 					vap->va_size = np->n_size;
1885 				else
1886 					np->n_size = vap->va_size;
1887 			} else
1888 				np->n_size = vap->va_size;
1889 			genfs_node_wrlock(vp);
1890 			uvm_vnp_setsize(vp, np->n_size);
1891 			genfs_node_unlock(vp);
1892 		} else
1893 			np->n_size = vap->va_size;
1894 	}
1895 	memcpy((caddr_t)vaper, (caddr_t)vap, sizeof(struct vattr));
1896 	if (np->n_flag & NCHG) {
1897 		if (np->n_flag & NACC)
1898 			vaper->va_atime = np->n_atim;
1899 		if (np->n_flag & NUPD)
1900 			vaper->va_mtime = np->n_mtim;
1901 	}
1902 	return (0);
1903 }
1904 
1905 void
1906 nfs_delayedtruncate(vp)
1907 	struct vnode *vp;
1908 {
1909 	struct nfsnode *np = VTONFS(vp);
1910 
1911 	if (np->n_flag & NTRUNCDELAYED) {
1912 		np->n_flag &= ~NTRUNCDELAYED;
1913 		genfs_node_wrlock(vp);
1914 		simple_lock(&vp->v_interlock);
1915 		(void)VOP_PUTPAGES(vp, 0,
1916 		    0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES);
1917 		uvm_vnp_setsize(vp, np->n_size);
1918 		genfs_node_unlock(vp);
1919 	}
1920 }
1921 
1922 #define	NFS_WCCKLUDGE_TIMEOUT	(24 * 60 * 60)	/* 1 day */
1923 #define	NFS_WCCKLUDGE(nmp, now) \
1924 	(((nmp)->nm_iflag & NFSMNT_WCCKLUDGE) && \
1925 	((now) - (nmp)->nm_wcckludgetime - NFS_WCCKLUDGE_TIMEOUT) < 0)
1926 
1927 /*
1928  * nfs_check_wccdata: check inaccurate wcc_data
1929  *
1930  * => return non-zero if we shouldn't trust the wcc_data.
1931  * => NFS_WCCKLUDGE_TIMEOUT is for the case that the server is "fixed".
1932  */
1933 
1934 int
1935 nfs_check_wccdata(
1936     struct nfsnode *np __unused,
1937     const struct timespec *ctime __unused,
1938     struct timespec *mtime __unused,
1939     boolean_t docheck __unused
1940 )
1941 {
1942 	int error = 0;
1943 
1944 #if !defined(NFS_V2_ONLY)
1945 
1946 	if (docheck) {
1947 		struct vnode *vp = NFSTOV(np);
1948 		struct nfsmount *nmp;
1949 		long now = time_second;
1950 #if defined(DEBUG)
1951 		const char *reason = NULL; /* XXX: gcc */
1952 #endif
1953 
1954 		if (timespeccmp(&np->n_vattr->va_mtime, mtime, <=)) {
1955 #if defined(DEBUG)
1956 			reason = "mtime";
1957 #endif
1958 			error = EINVAL;
1959 		}
1960 
1961 		if (vp->v_type == VDIR &&
1962 		    timespeccmp(&np->n_vattr->va_ctime, ctime, <=)) {
1963 #if defined(DEBUG)
1964 			reason = "ctime";
1965 #endif
1966 			error = EINVAL;
1967 		}
1968 
1969 		nmp = VFSTONFS(vp->v_mount);
1970 		if (error) {
1971 
1972 			/*
1973 			 * despite of the fact that we've updated the file,
1974 			 * timestamps of the file were not updated as we
1975 			 * expected.
1976 			 * it means that the server has incompatible
1977 			 * semantics of timestamps or (more likely)
1978 			 * the server time is not precise enough to
1979 			 * track each modifications.
1980 			 * in that case, we disable wcc processing.
1981 			 *
1982 			 * yes, strictly speaking, we should disable all
1983 			 * caching.  it's a compromise.
1984 			 */
1985 
1986 			simple_lock(&nmp->nm_slock);
1987 #if defined(DEBUG)
1988 			if (!NFS_WCCKLUDGE(nmp, now)) {
1989 				printf("%s: inaccurate wcc data (%s) detected,"
1990 				    " disabling wcc\n",
1991 				    vp->v_mount->mnt_stat.f_mntfromname,
1992 				    reason);
1993 			}
1994 #endif
1995 			nmp->nm_iflag |= NFSMNT_WCCKLUDGE;
1996 			nmp->nm_wcckludgetime = now;
1997 			simple_unlock(&nmp->nm_slock);
1998 		} else if (NFS_WCCKLUDGE(nmp, now)) {
1999 			error = EPERM; /* XXX */
2000 		} else if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
2001 			simple_lock(&nmp->nm_slock);
2002 			if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
2003 #if defined(DEBUG)
2004 				printf("%s: re-enabling wcc\n",
2005 				    vp->v_mount->mnt_stat.f_mntfromname);
2006 #endif
2007 				nmp->nm_iflag &= ~NFSMNT_WCCKLUDGE;
2008 			}
2009 			simple_unlock(&nmp->nm_slock);
2010 		}
2011 	}
2012 
2013 #else
2014 	do { if (&np) {} } while (/* CONSTCOND */ 0); /* for -Wunused */
2015 	do { if (&ctime) {} } while (/* CONSTCOND */ 0);
2016 	do { if (&mtime) {} } while (/* CONSTCOND */ 0);
2017 	do { if (&docheck) {} } while (/* CONSTCOND */ 0);
2018 #endif /* !defined(NFS_V2_ONLY) */
2019 
2020 	return error;
2021 }
2022 
2023 /*
2024  * Heuristic to see if the server XDR encodes directory cookies or not.
2025  * it is not supposed to, but a lot of servers may do this. Also, since
2026  * most/all servers will implement V2 as well, it is expected that they
2027  * may return just 32 bits worth of cookie information, so we need to
2028  * find out in which 32 bits this information is available. We do this
2029  * to avoid trouble with emulated binaries that can't handle 64 bit
2030  * directory offsets.
2031  */
2032 
2033 void
2034 nfs_cookieheuristic(vp, flagp, l, cred)
2035 	struct vnode *vp;
2036 	int *flagp;
2037 	struct lwp *l;
2038 	kauth_cred_t cred;
2039 {
2040 	struct uio auio;
2041 	struct iovec aiov;
2042 	caddr_t tbuf, cp;
2043 	struct dirent *dp;
2044 	off_t *cookies = NULL, *cop;
2045 	int error, eof, nc, len;
2046 
2047 	MALLOC(tbuf, caddr_t, NFS_DIRFRAGSIZ, M_TEMP, M_WAITOK);
2048 
2049 	aiov.iov_base = tbuf;
2050 	aiov.iov_len = NFS_DIRFRAGSIZ;
2051 	auio.uio_iov = &aiov;
2052 	auio.uio_iovcnt = 1;
2053 	auio.uio_rw = UIO_READ;
2054 	auio.uio_resid = NFS_DIRFRAGSIZ;
2055 	auio.uio_offset = 0;
2056 	UIO_SETUP_SYSSPACE(&auio);
2057 
2058 	error = VOP_READDIR(vp, &auio, cred, &eof, &cookies, &nc);
2059 
2060 	len = NFS_DIRFRAGSIZ - auio.uio_resid;
2061 	if (error || len == 0) {
2062 		FREE(tbuf, M_TEMP);
2063 		if (cookies)
2064 			free(cookies, M_TEMP);
2065 		return;
2066 	}
2067 
2068 	/*
2069 	 * Find the first valid entry and look at its offset cookie.
2070 	 */
2071 
2072 	cp = tbuf;
2073 	for (cop = cookies; len > 0; len -= dp->d_reclen) {
2074 		dp = (struct dirent *)cp;
2075 		if (dp->d_fileno != 0 && len >= dp->d_reclen) {
2076 			if ((*cop >> 32) != 0 && (*cop & 0xffffffffLL) == 0) {
2077 				*flagp |= NFSMNT_SWAPCOOKIE;
2078 				nfs_invaldircache(vp, 0);
2079 				nfs_vinvalbuf(vp, 0, cred, l, 1);
2080 			}
2081 			break;
2082 		}
2083 		cop++;
2084 		cp += dp->d_reclen;
2085 	}
2086 
2087 	FREE(tbuf, M_TEMP);
2088 	free(cookies, M_TEMP);
2089 }
2090 #endif /* NFS */
2091 
2092 #ifdef NFSSERVER
2093 /*
2094  * Set up nameidata for a lookup() call and do it.
2095  *
2096  * If pubflag is set, this call is done for a lookup operation on the
2097  * public filehandle. In that case we allow crossing mountpoints and
2098  * absolute pathnames. However, the caller is expected to check that
2099  * the lookup result is within the public fs, and deny access if
2100  * it is not.
2101  */
2102 int
2103 nfs_namei(ndp, nsfh, len, slp, nam, mdp, dposp, retdirp, l, kerbflag, pubflag)
2104 	struct nameidata *ndp;
2105 	nfsrvfh_t *nsfh;
2106 	uint32_t len;
2107 	struct nfssvc_sock *slp;
2108 	struct mbuf *nam;
2109 	struct mbuf **mdp;
2110 	caddr_t *dposp;
2111 	struct vnode **retdirp;
2112 	struct lwp *l;
2113 	int kerbflag, pubflag;
2114 {
2115 	int i, rem;
2116 	struct mbuf *md;
2117 	char *fromcp, *tocp, *cp;
2118 	struct iovec aiov;
2119 	struct uio auio;
2120 	struct vnode *dp;
2121 	int error, rdonly, linklen;
2122 	struct componentname *cnp = &ndp->ni_cnd;
2123 
2124 	*retdirp = (struct vnode *)0;
2125 
2126 	if ((len + 1) > MAXPATHLEN)
2127 		return (ENAMETOOLONG);
2128 	if (len == 0)
2129 		return (EACCES);
2130 	cnp->cn_pnbuf = PNBUF_GET();
2131 
2132 	/*
2133 	 * Copy the name from the mbuf list to ndp->ni_pnbuf
2134 	 * and set the various ndp fields appropriately.
2135 	 */
2136 	fromcp = *dposp;
2137 	tocp = cnp->cn_pnbuf;
2138 	md = *mdp;
2139 	rem = mtod(md, caddr_t) + md->m_len - fromcp;
2140 	for (i = 0; i < len; i++) {
2141 		while (rem == 0) {
2142 			md = md->m_next;
2143 			if (md == NULL) {
2144 				error = EBADRPC;
2145 				goto out;
2146 			}
2147 			fromcp = mtod(md, caddr_t);
2148 			rem = md->m_len;
2149 		}
2150 		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
2151 			error = EACCES;
2152 			goto out;
2153 		}
2154 		*tocp++ = *fromcp++;
2155 		rem--;
2156 	}
2157 	*tocp = '\0';
2158 	*mdp = md;
2159 	*dposp = fromcp;
2160 	len = nfsm_rndup(len)-len;
2161 	if (len > 0) {
2162 		if (rem >= len)
2163 			*dposp += len;
2164 		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
2165 			goto out;
2166 	}
2167 
2168 	/*
2169 	 * Extract and set starting directory.
2170 	 */
2171 	error = nfsrv_fhtovp(nsfh, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
2172 	    nam, &rdonly, kerbflag, pubflag);
2173 	if (error)
2174 		goto out;
2175 	if (dp->v_type != VDIR) {
2176 		vrele(dp);
2177 		error = ENOTDIR;
2178 		goto out;
2179 	}
2180 
2181 	if (rdonly)
2182 		cnp->cn_flags |= RDONLY;
2183 
2184 	*retdirp = dp;
2185 
2186 	if (pubflag) {
2187 		/*
2188 		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
2189 		 * and the 'native path' indicator.
2190 		 */
2191 		cp = PNBUF_GET();
2192 		fromcp = cnp->cn_pnbuf;
2193 		tocp = cp;
2194 		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
2195 			switch ((unsigned char)*fromcp) {
2196 			case WEBNFS_NATIVE_CHAR:
2197 				/*
2198 				 * 'Native' path for us is the same
2199 				 * as a path according to the NFS spec,
2200 				 * just skip the escape char.
2201 				 */
2202 				fromcp++;
2203 				break;
2204 			/*
2205 			 * More may be added in the future, range 0x80-0xff
2206 			 */
2207 			default:
2208 				error = EIO;
2209 				PNBUF_PUT(cp);
2210 				goto out;
2211 			}
2212 		}
2213 		/*
2214 		 * Translate the '%' escapes, URL-style.
2215 		 */
2216 		while (*fromcp != '\0') {
2217 			if (*fromcp == WEBNFS_ESC_CHAR) {
2218 				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
2219 					fromcp++;
2220 					*tocp++ = HEXSTRTOI(fromcp);
2221 					fromcp += 2;
2222 					continue;
2223 				} else {
2224 					error = ENOENT;
2225 					PNBUF_PUT(cp);
2226 					goto out;
2227 				}
2228 			} else
2229 				*tocp++ = *fromcp++;
2230 		}
2231 		*tocp = '\0';
2232 		PNBUF_PUT(cnp->cn_pnbuf);
2233 		cnp->cn_pnbuf = cp;
2234 	}
2235 
2236 	ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
2237 	ndp->ni_segflg = UIO_SYSSPACE;
2238 	ndp->ni_rootdir = rootvnode;
2239 
2240 	if (pubflag) {
2241 		ndp->ni_loopcnt = 0;
2242 		if (cnp->cn_pnbuf[0] == '/')
2243 			dp = rootvnode;
2244 	} else {
2245 		cnp->cn_flags |= NOCROSSMOUNT;
2246 	}
2247 
2248 	cnp->cn_lwp = l;
2249 	VREF(dp);
2250 
2251     for (;;) {
2252 	cnp->cn_nameptr = cnp->cn_pnbuf;
2253 	ndp->ni_startdir = dp;
2254 	/*
2255 	 * And call lookup() to do the real work
2256 	 */
2257 	error = lookup(ndp);
2258 	if (error) {
2259 		PNBUF_PUT(cnp->cn_pnbuf);
2260 		return (error);
2261 	}
2262 	/*
2263 	 * Check for encountering a symbolic link
2264 	 */
2265 	if ((cnp->cn_flags & ISSYMLINK) == 0) {
2266 		if (cnp->cn_flags & (SAVENAME | SAVESTART))
2267 			cnp->cn_flags |= HASBUF;
2268 		else
2269 			PNBUF_PUT(cnp->cn_pnbuf);
2270 		return (0);
2271 	} else {
2272 		if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
2273 			VOP_UNLOCK(ndp->ni_dvp, 0);
2274 		if (!pubflag) {
2275 			error = EINVAL;
2276 			break;
2277 		}
2278 
2279 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
2280 			error = ELOOP;
2281 			break;
2282 		}
2283 		if (ndp->ni_vp->v_mount->mnt_flag & MNT_SYMPERM) {
2284 			error = VOP_ACCESS(ndp->ni_vp, VEXEC, cnp->cn_cred,
2285 			    cnp->cn_lwp);
2286 			if (error != 0)
2287 				break;
2288 		}
2289 		if (ndp->ni_pathlen > 1)
2290 			cp = PNBUF_GET();
2291 		else
2292 			cp = cnp->cn_pnbuf;
2293 		aiov.iov_base = cp;
2294 		aiov.iov_len = MAXPATHLEN;
2295 		auio.uio_iov = &aiov;
2296 		auio.uio_iovcnt = 1;
2297 		auio.uio_offset = 0;
2298 		auio.uio_rw = UIO_READ;
2299 		auio.uio_resid = MAXPATHLEN;
2300 		UIO_SETUP_SYSSPACE(&auio);
2301 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
2302 		if (error) {
2303 		badlink:
2304 			if (ndp->ni_pathlen > 1)
2305 				PNBUF_PUT(cp);
2306 			break;
2307 		}
2308 		linklen = MAXPATHLEN - auio.uio_resid;
2309 		if (linklen == 0) {
2310 			error = ENOENT;
2311 			goto badlink;
2312 		}
2313 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
2314 			error = ENAMETOOLONG;
2315 			goto badlink;
2316 		}
2317 		if (ndp->ni_pathlen > 1) {
2318 			memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen);
2319 			PNBUF_PUT(cnp->cn_pnbuf);
2320 			cnp->cn_pnbuf = cp;
2321 		} else
2322 			cnp->cn_pnbuf[linklen] = '\0';
2323 		ndp->ni_pathlen += linklen;
2324 		vput(ndp->ni_vp);
2325 		dp = ndp->ni_dvp;
2326 		/*
2327 		 * Check if root directory should replace current directory.
2328 		 */
2329 		if (cnp->cn_pnbuf[0] == '/') {
2330 			vrele(dp);
2331 			dp = ndp->ni_rootdir;
2332 			VREF(dp);
2333 		}
2334 	}
2335    }
2336 	vrele(ndp->ni_dvp);
2337 	vput(ndp->ni_vp);
2338 	ndp->ni_vp = NULL;
2339 out:
2340 	PNBUF_PUT(cnp->cn_pnbuf);
2341 	return (error);
2342 }
2343 #endif /* NFSSERVER */
2344 
2345 /*
2346  * A fiddled version of m_adj() that ensures null fill to a 32-bit
2347  * boundary and only trims off the back end
2348  *
2349  * 1. trim off 'len' bytes as m_adj(mp, -len).
2350  * 2. add zero-padding 'nul' bytes at the end of the mbuf chain.
2351  */
2352 void
2353 nfs_zeropad(mp, len, nul)
2354 	struct mbuf *mp;
2355 	int len;
2356 	int nul;
2357 {
2358 	struct mbuf *m;
2359 	int count;
2360 
2361 	/*
2362 	 * Trim from tail.  Scan the mbuf chain,
2363 	 * calculating its length and finding the last mbuf.
2364 	 * If the adjustment only affects this mbuf, then just
2365 	 * adjust and return.  Otherwise, rescan and truncate
2366 	 * after the remaining size.
2367 	 */
2368 	count = 0;
2369 	m = mp;
2370 	for (;;) {
2371 		count += m->m_len;
2372 		if (m->m_next == NULL)
2373 			break;
2374 		m = m->m_next;
2375 	}
2376 
2377 	KDASSERT(count >= len);
2378 
2379 	if (m->m_len >= len) {
2380 		m->m_len -= len;
2381 	} else {
2382 		count -= len;
2383 		/*
2384 		 * Correct length for chain is "count".
2385 		 * Find the mbuf with last data, adjust its length,
2386 		 * and toss data from remaining mbufs on chain.
2387 		 */
2388 		for (m = mp; m; m = m->m_next) {
2389 			if (m->m_len >= count) {
2390 				m->m_len = count;
2391 				break;
2392 			}
2393 			count -= m->m_len;
2394 		}
2395 		KASSERT(m && m->m_next);
2396 		m_freem(m->m_next);
2397 		m->m_next = NULL;
2398 	}
2399 
2400 	KDASSERT(m->m_next == NULL);
2401 
2402 	/*
2403 	 * zero-padding.
2404 	 */
2405 	if (nul > 0) {
2406 		char *cp;
2407 		int i;
2408 
2409 		if (M_ROMAP(m) || M_TRAILINGSPACE(m) < nul) {
2410 			struct mbuf *n;
2411 
2412 			KDASSERT(MLEN >= nul);
2413 			n = m_get(M_WAIT, MT_DATA);
2414 			MCLAIM(n, &nfs_mowner);
2415 			n->m_len = nul;
2416 			n->m_next = NULL;
2417 			m->m_next = n;
2418 			cp = mtod(n, caddr_t);
2419 		} else {
2420 			cp = mtod(m, caddr_t) + m->m_len;
2421 			m->m_len += nul;
2422 		}
2423 		for (i = 0; i < nul; i++)
2424 			*cp++ = '\0';
2425 	}
2426 	return;
2427 }
2428 
2429 /*
2430  * Make these functions instead of macros, so that the kernel text size
2431  * doesn't get too big...
2432  */
2433 void
2434 nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp)
2435 	struct nfsrv_descript *nfsd;
2436 	int before_ret;
2437 	struct vattr *before_vap;
2438 	int after_ret;
2439 	struct vattr *after_vap;
2440 	struct mbuf **mbp;
2441 	char **bposp;
2442 {
2443 	struct mbuf *mb = *mbp;
2444 	char *bpos = *bposp;
2445 	u_int32_t *tl;
2446 
2447 	if (before_ret) {
2448 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
2449 		*tl = nfs_false;
2450 	} else {
2451 		nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
2452 		*tl++ = nfs_true;
2453 		txdr_hyper(before_vap->va_size, tl);
2454 		tl += 2;
2455 		txdr_nfsv3time(&(before_vap->va_mtime), tl);
2456 		tl += 2;
2457 		txdr_nfsv3time(&(before_vap->va_ctime), tl);
2458 	}
2459 	*bposp = bpos;
2460 	*mbp = mb;
2461 	nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
2462 }
2463 
2464 void
2465 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp)
2466 	struct nfsrv_descript *nfsd;
2467 	int after_ret;
2468 	struct vattr *after_vap;
2469 	struct mbuf **mbp;
2470 	char **bposp;
2471 {
2472 	struct mbuf *mb = *mbp;
2473 	char *bpos = *bposp;
2474 	u_int32_t *tl;
2475 	struct nfs_fattr *fp;
2476 
2477 	if (after_ret) {
2478 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
2479 		*tl = nfs_false;
2480 	} else {
2481 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
2482 		*tl++ = nfs_true;
2483 		fp = (struct nfs_fattr *)tl;
2484 		nfsm_srvfattr(nfsd, after_vap, fp);
2485 	}
2486 	*mbp = mb;
2487 	*bposp = bpos;
2488 }
2489 
2490 void
2491 nfsm_srvfattr(nfsd, vap, fp)
2492 	struct nfsrv_descript *nfsd;
2493 	struct vattr *vap;
2494 	struct nfs_fattr *fp;
2495 {
2496 
2497 	fp->fa_nlink = txdr_unsigned(vap->va_nlink);
2498 	fp->fa_uid = txdr_unsigned(vap->va_uid);
2499 	fp->fa_gid = txdr_unsigned(vap->va_gid);
2500 	if (nfsd->nd_flag & ND_NFSV3) {
2501 		fp->fa_type = vtonfsv3_type(vap->va_type);
2502 		fp->fa_mode = vtonfsv3_mode(vap->va_mode);
2503 		txdr_hyper(vap->va_size, &fp->fa3_size);
2504 		txdr_hyper(vap->va_bytes, &fp->fa3_used);
2505 		fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev));
2506 		fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev));
2507 		fp->fa3_fsid.nfsuquad[0] = 0;
2508 		fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
2509 		txdr_hyper(vap->va_fileid, &fp->fa3_fileid);
2510 		txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
2511 		txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
2512 		txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
2513 	} else {
2514 		fp->fa_type = vtonfsv2_type(vap->va_type);
2515 		fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
2516 		fp->fa2_size = txdr_unsigned(vap->va_size);
2517 		fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize);
2518 		if (vap->va_type == VFIFO)
2519 			fp->fa2_rdev = 0xffffffff;
2520 		else
2521 			fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
2522 		fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
2523 		fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
2524 		fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
2525 		txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
2526 		txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
2527 		txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
2528 	}
2529 }
2530 
2531 #ifdef NFSSERVER
2532 /*
2533  * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
2534  * 	- look up fsid in mount list (if not found ret error)
2535  *	- get vp and export rights by calling VFS_FHTOVP()
2536  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
2537  *	- if not lockflag unlock it with VOP_UNLOCK()
2538  */
2539 int
2540 nfsrv_fhtovp(
2541     nfsrvfh_t *nsfh,
2542     int lockflag,
2543     struct vnode **vpp,
2544     kauth_cred_t cred,
2545     struct nfssvc_sock *slp __unused,
2546     struct mbuf *nam,
2547     int *rdonlyp,
2548     int kerbflag,
2549     int pubflag
2550 )
2551 {
2552 	struct mount *mp;
2553 	kauth_cred_t credanon;
2554 	int error, exflags;
2555 	struct sockaddr_in *saddr;
2556 	fhandle_t *fhp;
2557 
2558 	fhp = NFSRVFH_FHANDLE(nsfh);
2559 	*vpp = (struct vnode *)0;
2560 
2561 	if (nfs_ispublicfh(nsfh)) {
2562 		if (!pubflag || !nfs_pub.np_valid)
2563 			return (ESTALE);
2564 		fhp = nfs_pub.np_handle;
2565 	}
2566 
2567 	error = netexport_check(&fhp->fh_fsid, nam, &mp, &exflags, &credanon);
2568 	if (error) {
2569 		return error;
2570 	}
2571 
2572 	error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp);
2573 	if (error)
2574 		return (error);
2575 
2576 	if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
2577 		saddr = mtod(nam, struct sockaddr_in *);
2578 		if ((saddr->sin_family == AF_INET) &&
2579 		    ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
2580 			vput(*vpp);
2581 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
2582 		}
2583 #ifdef INET6
2584 		if ((saddr->sin_family == AF_INET6) &&
2585 		    ntohs(saddr->sin_port) >= IPV6PORT_RESERVED) {
2586 			vput(*vpp);
2587 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
2588 		}
2589 #endif
2590 	}
2591 	/*
2592 	 * Check/setup credentials.
2593 	 */
2594 	if (exflags & MNT_EXKERB) {
2595 		if (!kerbflag) {
2596 			vput(*vpp);
2597 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
2598 		}
2599 	} else if (kerbflag) {
2600 		vput(*vpp);
2601 		return (NFSERR_AUTHERR | AUTH_TOOWEAK);
2602 	} else if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
2603 		    NULL) == 0 || (exflags & MNT_EXPORTANON)) {
2604 		kauth_cred_clone(credanon, cred);
2605 	}
2606 	if (exflags & MNT_EXRDONLY)
2607 		*rdonlyp = 1;
2608 	else
2609 		*rdonlyp = 0;
2610 	if (!lockflag)
2611 		VOP_UNLOCK(*vpp, 0);
2612 	return (0);
2613 }
2614 
2615 /*
2616  * WebNFS: check if a filehandle is a public filehandle. For v3, this
2617  * means a length of 0, for v2 it means all zeroes.
2618  */
2619 int
2620 nfs_ispublicfh(const nfsrvfh_t *nsfh)
2621 {
2622 	const char *cp = (const void *)(NFSRVFH_DATA(nsfh));
2623 	int i;
2624 
2625 	if (NFSRVFH_SIZE(nsfh) == 0) {
2626 		return TRUE;
2627 	}
2628 	if (NFSRVFH_SIZE(nsfh) != NFSX_V2FH) {
2629 		return FALSE;
2630 	}
2631 	for (i = 0; i < NFSX_V2FH; i++)
2632 		if (*cp++ != 0)
2633 			return FALSE;
2634 	return TRUE;
2635 }
2636 #endif /* NFSSERVER */
2637 
2638 /*
2639  * This function compares two net addresses by family and returns TRUE
2640  * if they are the same host.
2641  * If there is any doubt, return FALSE.
2642  * The AF_INET family is handled as a special case so that address mbufs
2643  * don't need to be saved to store "struct in_addr", which is only 4 bytes.
2644  */
2645 int
2646 netaddr_match(family, haddr, nam)
2647 	int family;
2648 	union nethostaddr *haddr;
2649 	struct mbuf *nam;
2650 {
2651 	struct sockaddr_in *inetaddr;
2652 
2653 	switch (family) {
2654 	case AF_INET:
2655 		inetaddr = mtod(nam, struct sockaddr_in *);
2656 		if (inetaddr->sin_family == AF_INET &&
2657 		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
2658 			return (1);
2659 		break;
2660 #ifdef INET6
2661 	case AF_INET6:
2662 	    {
2663 		struct sockaddr_in6 *sin6_1, *sin6_2;
2664 
2665 		sin6_1 = mtod(nam, struct sockaddr_in6 *);
2666 		sin6_2 = mtod(haddr->had_nam, struct sockaddr_in6 *);
2667 		if (sin6_1->sin6_family == AF_INET6 &&
2668 		    IN6_ARE_ADDR_EQUAL(&sin6_1->sin6_addr, &sin6_2->sin6_addr))
2669 			return 1;
2670 	    }
2671 #endif
2672 #ifdef ISO
2673 	case AF_ISO:
2674 	    {
2675 		struct sockaddr_iso *isoaddr1, *isoaddr2;
2676 
2677 		isoaddr1 = mtod(nam, struct sockaddr_iso *);
2678 		isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
2679 		if (isoaddr1->siso_family == AF_ISO &&
2680 		    isoaddr1->siso_nlen > 0 &&
2681 		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
2682 		    SAME_ISOADDR(isoaddr1, isoaddr2))
2683 			return (1);
2684 		break;
2685 	    }
2686 #endif	/* ISO */
2687 	default:
2688 		break;
2689 	};
2690 	return (0);
2691 }
2692 
2693 /*
2694  * The write verifier has changed (probably due to a server reboot), so all
2695  * PG_NEEDCOMMIT pages will have to be written again. Since they are marked
2696  * as dirty or are being written out just now, all this takes is clearing
2697  * the PG_NEEDCOMMIT flag. Once done the new write verifier can be set for
2698  * the mount point.
2699  */
2700 void
2701 nfs_clearcommit(mp)
2702 	struct mount *mp;
2703 {
2704 	struct vnode *vp;
2705 	struct nfsnode *np;
2706 	struct vm_page *pg;
2707 	struct nfsmount *nmp = VFSTONFS(mp);
2708 
2709 	lockmgr(&nmp->nm_writeverflock, LK_EXCLUSIVE, NULL);
2710 
2711 	LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2712 		KASSERT(vp->v_mount == mp);
2713 		if (vp->v_type != VREG)
2714 			continue;
2715 		np = VTONFS(vp);
2716 		np->n_pushlo = np->n_pushhi = np->n_pushedlo =
2717 		    np->n_pushedhi = 0;
2718 		np->n_commitflags &=
2719 		    ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID);
2720 		simple_lock(&vp->v_uobj.vmobjlock);
2721 		TAILQ_FOREACH(pg, &vp->v_uobj.memq, listq) {
2722 			pg->flags &= ~PG_NEEDCOMMIT;
2723 		}
2724 		simple_unlock(&vp->v_uobj.vmobjlock);
2725 	}
2726 	simple_lock(&nmp->nm_slock);
2727 	nmp->nm_iflag &= ~NFSMNT_STALEWRITEVERF;
2728 	simple_unlock(&nmp->nm_slock);
2729 	lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL);
2730 }
2731 
2732 void
2733 nfs_merge_commit_ranges(vp)
2734 	struct vnode *vp;
2735 {
2736 	struct nfsnode *np = VTONFS(vp);
2737 
2738 	KASSERT(np->n_commitflags & NFS_COMMIT_PUSH_VALID);
2739 
2740 	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
2741 		np->n_pushedlo = np->n_pushlo;
2742 		np->n_pushedhi = np->n_pushhi;
2743 		np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
2744 	} else {
2745 		if (np->n_pushlo < np->n_pushedlo)
2746 			np->n_pushedlo = np->n_pushlo;
2747 		if (np->n_pushhi > np->n_pushedhi)
2748 			np->n_pushedhi = np->n_pushhi;
2749 	}
2750 
2751 	np->n_pushlo = np->n_pushhi = 0;
2752 	np->n_commitflags &= ~NFS_COMMIT_PUSH_VALID;
2753 
2754 #ifdef NFS_DEBUG_COMMIT
2755 	printf("merge: committed: %u - %u\n", (unsigned)np->n_pushedlo,
2756 	    (unsigned)np->n_pushedhi);
2757 #endif
2758 }
2759 
2760 int
2761 nfs_in_committed_range(vp, off, len)
2762 	struct vnode *vp;
2763 	off_t off, len;
2764 {
2765 	struct nfsnode *np = VTONFS(vp);
2766 	off_t lo, hi;
2767 
2768 	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
2769 		return 0;
2770 	lo = off;
2771 	hi = lo + len;
2772 
2773 	return (lo >= np->n_pushedlo && hi <= np->n_pushedhi);
2774 }
2775 
2776 int
2777 nfs_in_tobecommitted_range(vp, off, len)
2778 	struct vnode *vp;
2779 	off_t off, len;
2780 {
2781 	struct nfsnode *np = VTONFS(vp);
2782 	off_t lo, hi;
2783 
2784 	if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
2785 		return 0;
2786 	lo = off;
2787 	hi = lo + len;
2788 
2789 	return (lo >= np->n_pushlo && hi <= np->n_pushhi);
2790 }
2791 
2792 void
2793 nfs_add_committed_range(vp, off, len)
2794 	struct vnode *vp;
2795 	off_t off, len;
2796 {
2797 	struct nfsnode *np = VTONFS(vp);
2798 	off_t lo, hi;
2799 
2800 	lo = off;
2801 	hi = lo + len;
2802 
2803 	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
2804 		np->n_pushedlo = lo;
2805 		np->n_pushedhi = hi;
2806 		np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
2807 	} else {
2808 		if (hi > np->n_pushedhi)
2809 			np->n_pushedhi = hi;
2810 		if (lo < np->n_pushedlo)
2811 			np->n_pushedlo = lo;
2812 	}
2813 #ifdef NFS_DEBUG_COMMIT
2814 	printf("add: committed: %u - %u\n", (unsigned)np->n_pushedlo,
2815 	    (unsigned)np->n_pushedhi);
2816 #endif
2817 }
2818 
2819 void
2820 nfs_del_committed_range(vp, off, len)
2821 	struct vnode *vp;
2822 	off_t off, len;
2823 {
2824 	struct nfsnode *np = VTONFS(vp);
2825 	off_t lo, hi;
2826 
2827 	if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
2828 		return;
2829 
2830 	lo = off;
2831 	hi = lo + len;
2832 
2833 	if (lo > np->n_pushedhi || hi < np->n_pushedlo)
2834 		return;
2835 	if (lo <= np->n_pushedlo)
2836 		np->n_pushedlo = hi;
2837 	else if (hi >= np->n_pushedhi)
2838 		np->n_pushedhi = lo;
2839 	else {
2840 		/*
2841 		 * XXX There's only one range. If the deleted range
2842 		 * is in the middle, pick the largest of the
2843 		 * contiguous ranges that it leaves.
2844 		 */
2845 		if ((np->n_pushedlo - lo) > (hi - np->n_pushedhi))
2846 			np->n_pushedhi = lo;
2847 		else
2848 			np->n_pushedlo = hi;
2849 	}
2850 #ifdef NFS_DEBUG_COMMIT
2851 	printf("del: committed: %u - %u\n", (unsigned)np->n_pushedlo,
2852 	    (unsigned)np->n_pushedhi);
2853 #endif
2854 }
2855 
2856 void
2857 nfs_add_tobecommitted_range(vp, off, len)
2858 	struct vnode *vp;
2859 	off_t off, len;
2860 {
2861 	struct nfsnode *np = VTONFS(vp);
2862 	off_t lo, hi;
2863 
2864 	lo = off;
2865 	hi = lo + len;
2866 
2867 	if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) {
2868 		np->n_pushlo = lo;
2869 		np->n_pushhi = hi;
2870 		np->n_commitflags |= NFS_COMMIT_PUSH_VALID;
2871 	} else {
2872 		if (lo < np->n_pushlo)
2873 			np->n_pushlo = lo;
2874 		if (hi > np->n_pushhi)
2875 			np->n_pushhi = hi;
2876 	}
2877 #ifdef NFS_DEBUG_COMMIT
2878 	printf("add: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
2879 	    (unsigned)np->n_pushhi);
2880 #endif
2881 }
2882 
2883 void
2884 nfs_del_tobecommitted_range(vp, off, len)
2885 	struct vnode *vp;
2886 	off_t off, len;
2887 {
2888 	struct nfsnode *np = VTONFS(vp);
2889 	off_t lo, hi;
2890 
2891 	if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
2892 		return;
2893 
2894 	lo = off;
2895 	hi = lo + len;
2896 
2897 	if (lo > np->n_pushhi || hi < np->n_pushlo)
2898 		return;
2899 
2900 	if (lo <= np->n_pushlo)
2901 		np->n_pushlo = hi;
2902 	else if (hi >= np->n_pushhi)
2903 		np->n_pushhi = lo;
2904 	else {
2905 		/*
2906 		 * XXX There's only one range. If the deleted range
2907 		 * is in the middle, pick the largest of the
2908 		 * contiguous ranges that it leaves.
2909 		 */
2910 		if ((np->n_pushlo - lo) > (hi - np->n_pushhi))
2911 			np->n_pushhi = lo;
2912 		else
2913 			np->n_pushlo = hi;
2914 	}
2915 #ifdef NFS_DEBUG_COMMIT
2916 	printf("del: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
2917 	    (unsigned)np->n_pushhi);
2918 #endif
2919 }
2920 
2921 /*
2922  * Map errnos to NFS error numbers. For Version 3 also filter out error
2923  * numbers not specified for the associated procedure.
2924  */
2925 int
2926 nfsrv_errmap(nd, err)
2927 	struct nfsrv_descript *nd;
2928 	int err;
2929 {
2930 	const short *defaulterrp, *errp;
2931 
2932 	if (nd->nd_flag & ND_NFSV3) {
2933 	    if (nd->nd_procnum <= NFSPROC_COMMIT) {
2934 		errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
2935 		while (*++errp) {
2936 			if (*errp == err)
2937 				return (err);
2938 			else if (*errp > err)
2939 				break;
2940 		}
2941 		return ((int)*defaulterrp);
2942 	    } else
2943 		return (err & 0xffff);
2944 	}
2945 	if (err <= ELAST)
2946 		return ((int)nfsrv_v2errmap[err - 1]);
2947 	return (NFSERR_IO);
2948 }
2949 
2950 u_int32_t
2951 nfs_getxid()
2952 {
2953 	static u_int32_t base;
2954 	static u_int32_t nfs_xid = 0;
2955 	static struct simplelock nfs_xidlock = SIMPLELOCK_INITIALIZER;
2956 	u_int32_t newxid;
2957 
2958 	simple_lock(&nfs_xidlock);
2959 	/*
2960 	 * derive initial xid from system time
2961 	 * XXX time is invalid if root not yet mounted
2962 	 */
2963 	if (__predict_false(!base && (rootvp))) {
2964 		struct timeval tv;
2965 
2966 		microtime(&tv);
2967 		base = tv.tv_sec << 12;
2968 		nfs_xid = base;
2969 	}
2970 
2971 	/*
2972 	 * Skip zero xid if it should ever happen.
2973 	 */
2974 	if (__predict_false(++nfs_xid == 0))
2975 		nfs_xid++;
2976 	newxid = nfs_xid;
2977 	simple_unlock(&nfs_xidlock);
2978 
2979 	return txdr_unsigned(newxid);
2980 }
2981 
2982 /*
2983  * assign a new xid for existing request.
2984  * used for NFSERR_JUKEBOX handling.
2985  */
2986 void
2987 nfs_renewxid(struct nfsreq *req)
2988 {
2989 	u_int32_t xid;
2990 	int off;
2991 
2992 	xid = nfs_getxid();
2993 	if (req->r_nmp->nm_sotype == SOCK_STREAM)
2994 		off = sizeof(u_int32_t); /* RPC record mark */
2995 	else
2996 		off = 0;
2997 
2998 	m_copyback(req->r_mreq, off, sizeof(xid), (void *)&xid);
2999 	req->r_xid = xid;
3000 }
3001 
3002 #if defined(NFSSERVER)
3003 int
3004 nfsrv_composefh(struct vnode *vp, nfsrvfh_t *nsfh, boolean_t v3)
3005 {
3006 	int error;
3007 	size_t fhsize;
3008 
3009 	fhsize = NFSD_MAXFHSIZE;
3010 	error = vfs_composefh(vp, (void *)NFSRVFH_DATA(nsfh), &fhsize);
3011 	if (NFSX_FHTOOBIG_P(fhsize, v3)) {
3012 		error = EOPNOTSUPP;
3013 	}
3014 	if (error != 0) {
3015 		return error;
3016 	}
3017 	if (!v3 && fhsize < NFSX_V2FH) {
3018 		memset((char *)NFSRVFH_DATA(nsfh) + fhsize, 0,
3019 		    NFSX_V2FH - fhsize);
3020 		fhsize = NFSX_V2FH;
3021 	}
3022 	if ((fhsize % NFSX_UNSIGNED) != 0) {
3023 		return EOPNOTSUPP;
3024 	}
3025 	nsfh->nsfh_size = fhsize;
3026 	return 0;
3027 }
3028 
3029 int
3030 nfsrv_comparefh(const nfsrvfh_t *fh1, const nfsrvfh_t *fh2)
3031 {
3032 
3033 	if (NFSRVFH_SIZE(fh1) != NFSRVFH_SIZE(fh2)) {
3034 		return NFSRVFH_SIZE(fh2) - NFSRVFH_SIZE(fh1);
3035 	}
3036 	return memcmp(NFSRVFH_DATA(fh1), NFSRVFH_DATA(fh2), NFSRVFH_SIZE(fh1));
3037 }
3038 
3039 void
3040 nfsrv_copyfh(nfsrvfh_t *fh1, const nfsrvfh_t *fh2)
3041 {
3042 	size_t size;
3043 
3044 	fh1->nsfh_size = size = NFSRVFH_SIZE(fh2);
3045 	memcpy(NFSRVFH_DATA(fh1), NFSRVFH_DATA(fh2), size);
3046 }
3047 #endif /* defined(NFSSERVER) */
3048