xref: /netbsd-src/sys/nfs/nfs_subs.c (revision 2a399c6883d870daece976daec6ffa7bb7f934ce)
1 /*	$NetBSD: nfs_subs.c,v 1.49 1997/10/19 01:46:32 fvdl Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)nfs_subs.c	8.8 (Berkeley) 5/22/95
39  */
40 
41 
42 /*
43  * These functions support the macros and help fiddle mbuf chains for
44  * the nfs op functions. They do things like create the rpc header and
45  * copy data between mbuf chains and uio lists.
46  */
47 #include <sys/param.h>
48 #include <sys/proc.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/namei.h>
54 #include <sys/mbuf.h>
55 #include <sys/socket.h>
56 #include <sys/stat.h>
57 #include <sys/malloc.h>
58 #include <sys/time.h>
59 #include <sys/dirent.h>
60 
61 #include <vm/vm.h>
62 
63 #include <nfs/rpcv2.h>
64 #include <nfs/nfsproto.h>
65 #include <nfs/nfsnode.h>
66 #include <nfs/nfs.h>
67 #include <nfs/xdr_subs.h>
68 #include <nfs/nfsm_subs.h>
69 #include <nfs/nfsmount.h>
70 #include <nfs/nqnfs.h>
71 #include <nfs/nfsrtt.h>
72 #include <nfs/nfs_var.h>
73 
74 #include <miscfs/specfs/specdev.h>
75 
76 #include <vm/vm.h>
77 
78 #include <netinet/in.h>
79 #ifdef ISO
80 #include <netiso/iso.h>
81 #endif
82 
83 /*
84  * Data items converted to xdr at startup, since they are constant
85  * This is kinda hokey, but may save a little time doing byte swaps
86  */
87 u_int32_t nfs_xdrneg1;
88 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
89 	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
90 	rpc_auth_kerb;
91 u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false;
92 
93 /* And other global data */
94 static u_int32_t nfs_xid = 0;
95 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
96 		      NFCHR, NFNON };
97 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
98 		      NFFIFO, NFNON };
99 enum vtype nv2tov_type[8] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON };
100 enum vtype nv3tov_type[8]={ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO };
101 int nfs_ticks;
102 extern struct nfs_public nfs_pub;
103 
104 /* NFS client/server stats. */
105 struct nfsstats nfsstats;
106 
107 /*
108  * Mapping of old NFS Version 2 RPC numbers to generic numbers.
109  */
110 int nfsv3_procid[NFS_NPROCS] = {
111 	NFSPROC_NULL,
112 	NFSPROC_GETATTR,
113 	NFSPROC_SETATTR,
114 	NFSPROC_NOOP,
115 	NFSPROC_LOOKUP,
116 	NFSPROC_READLINK,
117 	NFSPROC_READ,
118 	NFSPROC_NOOP,
119 	NFSPROC_WRITE,
120 	NFSPROC_CREATE,
121 	NFSPROC_REMOVE,
122 	NFSPROC_RENAME,
123 	NFSPROC_LINK,
124 	NFSPROC_SYMLINK,
125 	NFSPROC_MKDIR,
126 	NFSPROC_RMDIR,
127 	NFSPROC_READDIR,
128 	NFSPROC_FSSTAT,
129 	NFSPROC_NOOP,
130 	NFSPROC_NOOP,
131 	NFSPROC_NOOP,
132 	NFSPROC_NOOP,
133 	NFSPROC_NOOP,
134 	NFSPROC_NOOP,
135 	NFSPROC_NOOP,
136 	NFSPROC_NOOP
137 };
138 
139 /*
140  * and the reverse mapping from generic to Version 2 procedure numbers
141  */
142 int nfsv2_procid[NFS_NPROCS] = {
143 	NFSV2PROC_NULL,
144 	NFSV2PROC_GETATTR,
145 	NFSV2PROC_SETATTR,
146 	NFSV2PROC_LOOKUP,
147 	NFSV2PROC_NOOP,
148 	NFSV2PROC_READLINK,
149 	NFSV2PROC_READ,
150 	NFSV2PROC_WRITE,
151 	NFSV2PROC_CREATE,
152 	NFSV2PROC_MKDIR,
153 	NFSV2PROC_SYMLINK,
154 	NFSV2PROC_CREATE,
155 	NFSV2PROC_REMOVE,
156 	NFSV2PROC_RMDIR,
157 	NFSV2PROC_RENAME,
158 	NFSV2PROC_LINK,
159 	NFSV2PROC_READDIR,
160 	NFSV2PROC_NOOP,
161 	NFSV2PROC_STATFS,
162 	NFSV2PROC_NOOP,
163 	NFSV2PROC_NOOP,
164 	NFSV2PROC_NOOP,
165 	NFSV2PROC_NOOP,
166 	NFSV2PROC_NOOP,
167 	NFSV2PROC_NOOP,
168 	NFSV2PROC_NOOP,
169 };
170 
171 /*
172  * Maps errno values to nfs error numbers.
173  * Use NFSERR_IO as the catch all for ones not specifically defined in
174  * RFC 1094.
175  */
176 static u_char nfsrv_v2errmap[ELAST] = {
177   NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
178   NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
179   NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
180   NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
181   NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
182   NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
183   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
184   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
185   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
186   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
187   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
188   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
189   NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
190   NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
191   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
192   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
193   NFSERR_IO,
194 };
195 
196 /*
197  * Maps errno values to nfs error numbers.
198  * Although it is not obvious whether or not NFS clients really care if
199  * a returned error value is in the specified list for the procedure, the
200  * safest thing to do is filter them appropriately. For Version 2, the
201  * X/Open XNFS document is the only specification that defines error values
202  * for each RPC (The RFC simply lists all possible error values for all RPCs),
203  * so I have decided to not do this for Version 2.
204  * The first entry is the default error return and the rest are the valid
205  * errors for that RPC in increasing numeric order.
206  */
207 static short nfsv3err_null[] = {
208 	0,
209 	0,
210 };
211 
212 static short nfsv3err_getattr[] = {
213 	NFSERR_IO,
214 	NFSERR_IO,
215 	NFSERR_STALE,
216 	NFSERR_BADHANDLE,
217 	NFSERR_SERVERFAULT,
218 	0,
219 };
220 
221 static short nfsv3err_setattr[] = {
222 	NFSERR_IO,
223 	NFSERR_PERM,
224 	NFSERR_IO,
225 	NFSERR_ACCES,
226 	NFSERR_INVAL,
227 	NFSERR_NOSPC,
228 	NFSERR_ROFS,
229 	NFSERR_DQUOT,
230 	NFSERR_STALE,
231 	NFSERR_BADHANDLE,
232 	NFSERR_NOT_SYNC,
233 	NFSERR_SERVERFAULT,
234 	0,
235 };
236 
237 static short nfsv3err_lookup[] = {
238 	NFSERR_IO,
239 	NFSERR_NOENT,
240 	NFSERR_IO,
241 	NFSERR_ACCES,
242 	NFSERR_NOTDIR,
243 	NFSERR_NAMETOL,
244 	NFSERR_STALE,
245 	NFSERR_BADHANDLE,
246 	NFSERR_SERVERFAULT,
247 	0,
248 };
249 
250 static short nfsv3err_access[] = {
251 	NFSERR_IO,
252 	NFSERR_IO,
253 	NFSERR_STALE,
254 	NFSERR_BADHANDLE,
255 	NFSERR_SERVERFAULT,
256 	0,
257 };
258 
259 static short nfsv3err_readlink[] = {
260 	NFSERR_IO,
261 	NFSERR_IO,
262 	NFSERR_ACCES,
263 	NFSERR_INVAL,
264 	NFSERR_STALE,
265 	NFSERR_BADHANDLE,
266 	NFSERR_NOTSUPP,
267 	NFSERR_SERVERFAULT,
268 	0,
269 };
270 
271 static short nfsv3err_read[] = {
272 	NFSERR_IO,
273 	NFSERR_IO,
274 	NFSERR_NXIO,
275 	NFSERR_ACCES,
276 	NFSERR_INVAL,
277 	NFSERR_STALE,
278 	NFSERR_BADHANDLE,
279 	NFSERR_SERVERFAULT,
280 	0,
281 };
282 
283 static short nfsv3err_write[] = {
284 	NFSERR_IO,
285 	NFSERR_IO,
286 	NFSERR_ACCES,
287 	NFSERR_INVAL,
288 	NFSERR_FBIG,
289 	NFSERR_NOSPC,
290 	NFSERR_ROFS,
291 	NFSERR_DQUOT,
292 	NFSERR_STALE,
293 	NFSERR_BADHANDLE,
294 	NFSERR_SERVERFAULT,
295 	0,
296 };
297 
298 static short nfsv3err_create[] = {
299 	NFSERR_IO,
300 	NFSERR_IO,
301 	NFSERR_ACCES,
302 	NFSERR_EXIST,
303 	NFSERR_NOTDIR,
304 	NFSERR_NOSPC,
305 	NFSERR_ROFS,
306 	NFSERR_NAMETOL,
307 	NFSERR_DQUOT,
308 	NFSERR_STALE,
309 	NFSERR_BADHANDLE,
310 	NFSERR_NOTSUPP,
311 	NFSERR_SERVERFAULT,
312 	0,
313 };
314 
315 static short nfsv3err_mkdir[] = {
316 	NFSERR_IO,
317 	NFSERR_IO,
318 	NFSERR_ACCES,
319 	NFSERR_EXIST,
320 	NFSERR_NOTDIR,
321 	NFSERR_NOSPC,
322 	NFSERR_ROFS,
323 	NFSERR_NAMETOL,
324 	NFSERR_DQUOT,
325 	NFSERR_STALE,
326 	NFSERR_BADHANDLE,
327 	NFSERR_NOTSUPP,
328 	NFSERR_SERVERFAULT,
329 	0,
330 };
331 
332 static short nfsv3err_symlink[] = {
333 	NFSERR_IO,
334 	NFSERR_IO,
335 	NFSERR_ACCES,
336 	NFSERR_EXIST,
337 	NFSERR_NOTDIR,
338 	NFSERR_NOSPC,
339 	NFSERR_ROFS,
340 	NFSERR_NAMETOL,
341 	NFSERR_DQUOT,
342 	NFSERR_STALE,
343 	NFSERR_BADHANDLE,
344 	NFSERR_NOTSUPP,
345 	NFSERR_SERVERFAULT,
346 	0,
347 };
348 
349 static short nfsv3err_mknod[] = {
350 	NFSERR_IO,
351 	NFSERR_IO,
352 	NFSERR_ACCES,
353 	NFSERR_EXIST,
354 	NFSERR_NOTDIR,
355 	NFSERR_NOSPC,
356 	NFSERR_ROFS,
357 	NFSERR_NAMETOL,
358 	NFSERR_DQUOT,
359 	NFSERR_STALE,
360 	NFSERR_BADHANDLE,
361 	NFSERR_NOTSUPP,
362 	NFSERR_SERVERFAULT,
363 	NFSERR_BADTYPE,
364 	0,
365 };
366 
367 static short nfsv3err_remove[] = {
368 	NFSERR_IO,
369 	NFSERR_NOENT,
370 	NFSERR_IO,
371 	NFSERR_ACCES,
372 	NFSERR_NOTDIR,
373 	NFSERR_ROFS,
374 	NFSERR_NAMETOL,
375 	NFSERR_STALE,
376 	NFSERR_BADHANDLE,
377 	NFSERR_SERVERFAULT,
378 	0,
379 };
380 
381 static short nfsv3err_rmdir[] = {
382 	NFSERR_IO,
383 	NFSERR_NOENT,
384 	NFSERR_IO,
385 	NFSERR_ACCES,
386 	NFSERR_EXIST,
387 	NFSERR_NOTDIR,
388 	NFSERR_INVAL,
389 	NFSERR_ROFS,
390 	NFSERR_NAMETOL,
391 	NFSERR_NOTEMPTY,
392 	NFSERR_STALE,
393 	NFSERR_BADHANDLE,
394 	NFSERR_NOTSUPP,
395 	NFSERR_SERVERFAULT,
396 	0,
397 };
398 
399 static short nfsv3err_rename[] = {
400 	NFSERR_IO,
401 	NFSERR_NOENT,
402 	NFSERR_IO,
403 	NFSERR_ACCES,
404 	NFSERR_EXIST,
405 	NFSERR_XDEV,
406 	NFSERR_NOTDIR,
407 	NFSERR_ISDIR,
408 	NFSERR_INVAL,
409 	NFSERR_NOSPC,
410 	NFSERR_ROFS,
411 	NFSERR_MLINK,
412 	NFSERR_NAMETOL,
413 	NFSERR_NOTEMPTY,
414 	NFSERR_DQUOT,
415 	NFSERR_STALE,
416 	NFSERR_BADHANDLE,
417 	NFSERR_NOTSUPP,
418 	NFSERR_SERVERFAULT,
419 	0,
420 };
421 
422 static short nfsv3err_link[] = {
423 	NFSERR_IO,
424 	NFSERR_IO,
425 	NFSERR_ACCES,
426 	NFSERR_EXIST,
427 	NFSERR_XDEV,
428 	NFSERR_NOTDIR,
429 	NFSERR_INVAL,
430 	NFSERR_NOSPC,
431 	NFSERR_ROFS,
432 	NFSERR_MLINK,
433 	NFSERR_NAMETOL,
434 	NFSERR_DQUOT,
435 	NFSERR_STALE,
436 	NFSERR_BADHANDLE,
437 	NFSERR_NOTSUPP,
438 	NFSERR_SERVERFAULT,
439 	0,
440 };
441 
442 static short nfsv3err_readdir[] = {
443 	NFSERR_IO,
444 	NFSERR_IO,
445 	NFSERR_ACCES,
446 	NFSERR_NOTDIR,
447 	NFSERR_STALE,
448 	NFSERR_BADHANDLE,
449 	NFSERR_BAD_COOKIE,
450 	NFSERR_TOOSMALL,
451 	NFSERR_SERVERFAULT,
452 	0,
453 };
454 
455 static short nfsv3err_readdirplus[] = {
456 	NFSERR_IO,
457 	NFSERR_IO,
458 	NFSERR_ACCES,
459 	NFSERR_NOTDIR,
460 	NFSERR_STALE,
461 	NFSERR_BADHANDLE,
462 	NFSERR_BAD_COOKIE,
463 	NFSERR_NOTSUPP,
464 	NFSERR_TOOSMALL,
465 	NFSERR_SERVERFAULT,
466 	0,
467 };
468 
469 static short nfsv3err_fsstat[] = {
470 	NFSERR_IO,
471 	NFSERR_IO,
472 	NFSERR_STALE,
473 	NFSERR_BADHANDLE,
474 	NFSERR_SERVERFAULT,
475 	0,
476 };
477 
478 static short nfsv3err_fsinfo[] = {
479 	NFSERR_STALE,
480 	NFSERR_STALE,
481 	NFSERR_BADHANDLE,
482 	NFSERR_SERVERFAULT,
483 	0,
484 };
485 
486 static short nfsv3err_pathconf[] = {
487 	NFSERR_STALE,
488 	NFSERR_STALE,
489 	NFSERR_BADHANDLE,
490 	NFSERR_SERVERFAULT,
491 	0,
492 };
493 
494 static short nfsv3err_commit[] = {
495 	NFSERR_IO,
496 	NFSERR_IO,
497 	NFSERR_STALE,
498 	NFSERR_BADHANDLE,
499 	NFSERR_SERVERFAULT,
500 	0,
501 };
502 
503 static short *nfsrv_v3errmap[] = {
504 	nfsv3err_null,
505 	nfsv3err_getattr,
506 	nfsv3err_setattr,
507 	nfsv3err_lookup,
508 	nfsv3err_access,
509 	nfsv3err_readlink,
510 	nfsv3err_read,
511 	nfsv3err_write,
512 	nfsv3err_create,
513 	nfsv3err_mkdir,
514 	nfsv3err_symlink,
515 	nfsv3err_mknod,
516 	nfsv3err_remove,
517 	nfsv3err_rmdir,
518 	nfsv3err_rename,
519 	nfsv3err_link,
520 	nfsv3err_readdir,
521 	nfsv3err_readdirplus,
522 	nfsv3err_fsstat,
523 	nfsv3err_fsinfo,
524 	nfsv3err_pathconf,
525 	nfsv3err_commit,
526 };
527 
528 extern struct nfsrtt nfsrtt;
529 extern time_t nqnfsstarttime;
530 extern int nqsrv_clockskew;
531 extern int nqsrv_writeslack;
532 extern int nqsrv_maxlease;
533 extern int nqnfs_piggy[NFS_NPROCS];
534 extern nfstype nfsv2_type[9];
535 extern nfstype nfsv3_type[9];
536 extern struct nfsnodehashhead *nfsnodehashtbl;
537 extern u_long nfsnodehash;
538 
539 LIST_HEAD(nfsnodehashhead, nfsnode);
540 u_long nfsdirhashmask;
541 
542 int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *));
543 
544 /*
545  * Create the header for an rpc request packet
546  * The hsiz is the size of the rest of the nfs request header.
547  * (just used to decide if a cluster is a good idea)
548  */
549 struct mbuf *
550 nfsm_reqh(vp, procid, hsiz, bposp)
551 	struct vnode *vp;
552 	u_long procid;
553 	int hsiz;
554 	caddr_t *bposp;
555 {
556 	register struct mbuf *mb;
557 	register u_int32_t *tl;
558 	register caddr_t bpos;
559 	struct mbuf *mb2;
560 	struct nfsmount *nmp;
561 	int nqflag;
562 
563 	MGET(mb, M_WAIT, MT_DATA);
564 	if (hsiz >= MINCLSIZE)
565 		MCLGET(mb, M_WAIT);
566 	mb->m_len = 0;
567 	bpos = mtod(mb, caddr_t);
568 
569 	/*
570 	 * For NQNFS, add lease request.
571 	 */
572 	if (vp) {
573 		nmp = VFSTONFS(vp->v_mount);
574 		if (nmp->nm_flag & NFSMNT_NQNFS) {
575 			nqflag = NQNFS_NEEDLEASE(vp, procid);
576 			if (nqflag) {
577 				nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED);
578 				*tl++ = txdr_unsigned(nqflag);
579 				*tl = txdr_unsigned(nmp->nm_leaseterm);
580 			} else {
581 				nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
582 				*tl = 0;
583 			}
584 		}
585 	}
586 	/* Finally, return values */
587 	*bposp = bpos;
588 	return (mb);
589 }
590 
591 /*
592  * Build the RPC header and fill in the authorization info.
593  * The authorization string argument is only used when the credentials
594  * come from outside of the kernel.
595  * Returns the head of the mbuf list.
596  */
597 struct mbuf *
598 nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len,
599 	verf_str, mrest, mrest_len, mbp, xidp)
600 	register struct ucred *cr;
601 	int nmflag;
602 	int procid;
603 	int auth_type;
604 	int auth_len;
605 	char *auth_str;
606 	int verf_len;
607 	char *verf_str;
608 	struct mbuf *mrest;
609 	int mrest_len;
610 	struct mbuf **mbp;
611 	u_int32_t *xidp;
612 {
613 	register struct mbuf *mb;
614 	register u_int32_t *tl;
615 	register caddr_t bpos;
616 	register int i;
617 	struct mbuf *mreq, *mb2;
618 	int siz, grpsiz, authsiz;
619 	struct timeval tv;
620 	static u_int32_t base;
621 
622 	authsiz = nfsm_rndup(auth_len);
623 	MGETHDR(mb, M_WAIT, MT_DATA);
624 	if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
625 		MCLGET(mb, M_WAIT);
626 	} else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
627 		MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED);
628 	} else {
629 		MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
630 	}
631 	mb->m_len = 0;
632 	mreq = mb;
633 	bpos = mtod(mb, caddr_t);
634 
635 	/*
636 	 * First the RPC header.
637 	 */
638 	nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
639 
640 	/*
641 	 * derive initial xid from system time
642 	 * XXX time is invalid if root not yet mounted
643 	 */
644 	if (!base && (rootvp)) {
645 		microtime(&tv);
646 		base = tv.tv_sec << 12;
647 		nfs_xid = base;
648 	}
649 	/*
650 	 * Skip zero xid if it should ever happen.
651 	 */
652 	if (++nfs_xid == 0)
653 		nfs_xid++;
654 
655 	*tl++ = *xidp = txdr_unsigned(nfs_xid);
656 	*tl++ = rpc_call;
657 	*tl++ = rpc_vers;
658 	if (nmflag & NFSMNT_NQNFS) {
659 		*tl++ = txdr_unsigned(NQNFS_PROG);
660 		*tl++ = txdr_unsigned(NQNFS_VER3);
661 	} else {
662 		*tl++ = txdr_unsigned(NFS_PROG);
663 		if (nmflag & NFSMNT_NFSV3)
664 			*tl++ = txdr_unsigned(NFS_VER3);
665 		else
666 			*tl++ = txdr_unsigned(NFS_VER2);
667 	}
668 	if (nmflag & NFSMNT_NFSV3)
669 		*tl++ = txdr_unsigned(procid);
670 	else
671 		*tl++ = txdr_unsigned(nfsv2_procid[procid]);
672 
673 	/*
674 	 * And then the authorization cred.
675 	 */
676 	*tl++ = txdr_unsigned(auth_type);
677 	*tl = txdr_unsigned(authsiz);
678 	switch (auth_type) {
679 	case RPCAUTH_UNIX:
680 		nfsm_build(tl, u_int32_t *, auth_len);
681 		*tl++ = 0;		/* stamp ?? */
682 		*tl++ = 0;		/* NULL hostname */
683 		*tl++ = txdr_unsigned(cr->cr_uid);
684 		*tl++ = txdr_unsigned(cr->cr_gid);
685 		grpsiz = (auth_len >> 2) - 5;
686 		*tl++ = txdr_unsigned(grpsiz);
687 		for (i = 0; i < grpsiz; i++)
688 			*tl++ = txdr_unsigned(cr->cr_groups[i]);
689 		break;
690 	case RPCAUTH_KERB4:
691 		siz = auth_len;
692 		while (siz > 0) {
693 			if (M_TRAILINGSPACE(mb) == 0) {
694 				MGET(mb2, M_WAIT, MT_DATA);
695 				if (siz >= MINCLSIZE)
696 					MCLGET(mb2, M_WAIT);
697 				mb->m_next = mb2;
698 				mb = mb2;
699 				mb->m_len = 0;
700 				bpos = mtod(mb, caddr_t);
701 			}
702 			i = min(siz, M_TRAILINGSPACE(mb));
703 			bcopy(auth_str, bpos, i);
704 			mb->m_len += i;
705 			auth_str += i;
706 			bpos += i;
707 			siz -= i;
708 		}
709 		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
710 			for (i = 0; i < siz; i++)
711 				*bpos++ = '\0';
712 			mb->m_len += siz;
713 		}
714 		break;
715 	};
716 
717 	/*
718 	 * And the verifier...
719 	 */
720 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
721 	if (verf_str) {
722 		*tl++ = txdr_unsigned(RPCAUTH_KERB4);
723 		*tl = txdr_unsigned(verf_len);
724 		siz = verf_len;
725 		while (siz > 0) {
726 			if (M_TRAILINGSPACE(mb) == 0) {
727 				MGET(mb2, M_WAIT, MT_DATA);
728 				if (siz >= MINCLSIZE)
729 					MCLGET(mb2, M_WAIT);
730 				mb->m_next = mb2;
731 				mb = mb2;
732 				mb->m_len = 0;
733 				bpos = mtod(mb, caddr_t);
734 			}
735 			i = min(siz, M_TRAILINGSPACE(mb));
736 			bcopy(verf_str, bpos, i);
737 			mb->m_len += i;
738 			verf_str += i;
739 			bpos += i;
740 			siz -= i;
741 		}
742 		if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
743 			for (i = 0; i < siz; i++)
744 				*bpos++ = '\0';
745 			mb->m_len += siz;
746 		}
747 	} else {
748 		*tl++ = txdr_unsigned(RPCAUTH_NULL);
749 		*tl = 0;
750 	}
751 	mb->m_next = mrest;
752 	mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
753 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
754 	*mbp = mb;
755 	return (mreq);
756 }
757 
758 /*
759  * copies mbuf chain to the uio scatter/gather list
760  */
761 int
762 nfsm_mbuftouio(mrep, uiop, siz, dpos)
763 	struct mbuf **mrep;
764 	register struct uio *uiop;
765 	int siz;
766 	caddr_t *dpos;
767 {
768 	register char *mbufcp, *uiocp;
769 	register int xfer, left, len;
770 	register struct mbuf *mp;
771 	long uiosiz, rem;
772 	int error = 0;
773 
774 	mp = *mrep;
775 	mbufcp = *dpos;
776 	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
777 	rem = nfsm_rndup(siz)-siz;
778 	while (siz > 0) {
779 		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
780 			return (EFBIG);
781 		left = uiop->uio_iov->iov_len;
782 		uiocp = uiop->uio_iov->iov_base;
783 		if (left > siz)
784 			left = siz;
785 		uiosiz = left;
786 		while (left > 0) {
787 			while (len == 0) {
788 				mp = mp->m_next;
789 				if (mp == NULL)
790 					return (EBADRPC);
791 				mbufcp = mtod(mp, caddr_t);
792 				len = mp->m_len;
793 			}
794 			xfer = (left > len) ? len : left;
795 #ifdef notdef
796 			/* Not Yet.. */
797 			if (uiop->uio_iov->iov_op != NULL)
798 				(*(uiop->uio_iov->iov_op))
799 				(mbufcp, uiocp, xfer);
800 			else
801 #endif
802 			if (uiop->uio_segflg == UIO_SYSSPACE)
803 				bcopy(mbufcp, uiocp, xfer);
804 			else
805 				copyout(mbufcp, uiocp, xfer);
806 			left -= xfer;
807 			len -= xfer;
808 			mbufcp += xfer;
809 			uiocp += xfer;
810 			uiop->uio_offset += xfer;
811 			uiop->uio_resid -= xfer;
812 		}
813 		if (uiop->uio_iov->iov_len <= siz) {
814 			uiop->uio_iovcnt--;
815 			uiop->uio_iov++;
816 		} else {
817 			uiop->uio_iov->iov_base += uiosiz;
818 			uiop->uio_iov->iov_len -= uiosiz;
819 		}
820 		siz -= uiosiz;
821 	}
822 	*dpos = mbufcp;
823 	*mrep = mp;
824 	if (rem > 0) {
825 		if (len < rem)
826 			error = nfs_adv(mrep, dpos, rem, len);
827 		else
828 			*dpos += rem;
829 	}
830 	return (error);
831 }
832 
833 /*
834  * copies a uio scatter/gather list to an mbuf chain.
835  * NOTE: can ony handle iovcnt == 1
836  */
837 int
838 nfsm_uiotombuf(uiop, mq, siz, bpos)
839 	register struct uio *uiop;
840 	struct mbuf **mq;
841 	int siz;
842 	caddr_t *bpos;
843 {
844 	register char *uiocp;
845 	register struct mbuf *mp, *mp2;
846 	register int xfer, left, mlen;
847 	int uiosiz, clflg, rem;
848 	char *cp;
849 
850 #ifdef DIAGNOSTIC
851 	if (uiop->uio_iovcnt != 1)
852 		panic("nfsm_uiotombuf: iovcnt != 1");
853 #endif
854 
855 	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
856 		clflg = 1;
857 	else
858 		clflg = 0;
859 	rem = nfsm_rndup(siz)-siz;
860 	mp = mp2 = *mq;
861 	while (siz > 0) {
862 		left = uiop->uio_iov->iov_len;
863 		uiocp = uiop->uio_iov->iov_base;
864 		if (left > siz)
865 			left = siz;
866 		uiosiz = left;
867 		while (left > 0) {
868 			mlen = M_TRAILINGSPACE(mp);
869 			if (mlen == 0) {
870 				MGET(mp, M_WAIT, MT_DATA);
871 				if (clflg)
872 					MCLGET(mp, M_WAIT);
873 				mp->m_len = 0;
874 				mp2->m_next = mp;
875 				mp2 = mp;
876 				mlen = M_TRAILINGSPACE(mp);
877 			}
878 			xfer = (left > mlen) ? mlen : left;
879 #ifdef notdef
880 			/* Not Yet.. */
881 			if (uiop->uio_iov->iov_op != NULL)
882 				(*(uiop->uio_iov->iov_op))
883 				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
884 			else
885 #endif
886 			if (uiop->uio_segflg == UIO_SYSSPACE)
887 				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
888 			else
889 				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
890 			mp->m_len += xfer;
891 			left -= xfer;
892 			uiocp += xfer;
893 			uiop->uio_offset += xfer;
894 			uiop->uio_resid -= xfer;
895 		}
896 		uiop->uio_iov->iov_base += uiosiz;
897 		uiop->uio_iov->iov_len -= uiosiz;
898 		siz -= uiosiz;
899 	}
900 	if (rem > 0) {
901 		if (rem > M_TRAILINGSPACE(mp)) {
902 			MGET(mp, M_WAIT, MT_DATA);
903 			mp->m_len = 0;
904 			mp2->m_next = mp;
905 		}
906 		cp = mtod(mp, caddr_t)+mp->m_len;
907 		for (left = 0; left < rem; left++)
908 			*cp++ = '\0';
909 		mp->m_len += rem;
910 		*bpos = cp;
911 	} else
912 		*bpos = mtod(mp, caddr_t)+mp->m_len;
913 	*mq = mp;
914 	return (0);
915 }
916 
917 /*
918  * Get at least "siz" bytes of correctly aligned data.
919  * When called the mbuf pointers are not necessarily correct,
920  * dsosp points to what ought to be in m_data and left contains
921  * what ought to be in m_len.
922  * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
923  * cases. (The macros use the vars. dpos and dpos2)
924  */
925 int
926 nfsm_disct(mdp, dposp, siz, left, cp2)
927 	struct mbuf **mdp;
928 	caddr_t *dposp;
929 	int siz;
930 	int left;
931 	caddr_t *cp2;
932 {
933 	register struct mbuf *m1, *m2;
934 	struct mbuf *havebuf = NULL;
935 	caddr_t src = *dposp;
936 	caddr_t dst;
937 	int len;
938 
939 #ifdef DEBUG
940 	if (left < 0)
941 		panic("nfsm_disct: left < 0");
942 #endif
943 	m1 = *mdp;
944 	/*
945 	 * Skip through the mbuf chain looking for an mbuf with
946 	 * some data. If the first mbuf found has enough data
947 	 * and it is correctly aligned return it.
948 	 */
949 	while (left == 0) {
950 		havebuf = m1;
951 		*mdp = m1 = m1->m_next;
952 		if (m1 == NULL)
953 			return (EBADRPC);
954 		src = mtod(m1, caddr_t);
955 		left = m1->m_len;
956 		/*
957 		 * If we start a new mbuf and it is big enough
958 		 * and correctly aligned just return it, don't
959 		 * do any pull up.
960 		 */
961 		if (left >= siz && nfsm_aligned(src)) {
962 			*cp2 = src;
963 			*dposp = src + siz;
964 			return (0);
965 		}
966 	}
967 	if (m1->m_flags & M_EXT) {
968 		if (havebuf) {
969 			/* If the first mbuf with data has external data
970 			 * and there is a previous empty mbuf use it
971 			 * to move the data into.
972 			 */
973 			m2 = m1;
974 			*mdp = m1 = havebuf;
975 			if (m1->m_flags & M_EXT) {
976 				MEXTREMOVE(m1);
977 			}
978 		} else {
979 			/*
980 			 * If the first mbuf has a external data
981 			 * and there is no previous empty mbuf
982 			 * allocate a new mbuf and move the external
983 			 * data to the new mbuf. Also make the first
984 			 * mbuf look empty.
985 			 */
986 			m2 = m_get(M_WAIT, MT_DATA);
987 			m2->m_ext = m1->m_ext;
988 			m2->m_data = src;
989 			m2->m_len = left;
990 			MCLADDREFERENCE(m1, m2);
991 			MEXTREMOVE(m1);
992 			m2->m_next = m1->m_next;
993 			m1->m_next = m2;
994 		}
995 		m1->m_len = 0;
996 		dst = m1->m_dat;
997 	} else {
998 		/*
999 		 * If the first mbuf has no external data
1000 		 * move the data to the front of the mbuf.
1001 		 */
1002 		if ((dst = m1->m_dat) != src)
1003 			ovbcopy(src, dst, left);
1004 		dst += left;
1005 		m1->m_len = left;
1006 		m2 = m1->m_next;
1007 	}
1008 	m1->m_flags &= ~M_PKTHDR;
1009 	*cp2 = m1->m_data = m1->m_dat;   /* data is at beginning of buffer */
1010 	*dposp = mtod(m1, caddr_t) + siz;
1011 	/*
1012 	 * Loop through mbufs pulling data up into first mbuf until
1013 	 * the first mbuf is full or there is no more data to
1014 	 * pullup.
1015 	 */
1016 	while ((len = (MLEN - m1->m_len)) != 0 && m2) {
1017 		if ((len = min(len, m2->m_len)) != 0)
1018 			bcopy(m2->m_data, dst, len);
1019 		m1->m_len += len;
1020 		dst += len;
1021 		m2->m_data += len;
1022 		m2->m_len -= len;
1023 		m2 = m2->m_next;
1024 	}
1025 	if (m1->m_len < siz)
1026 		return (EBADRPC);
1027 	return (0);
1028 }
1029 
1030 /*
1031  * Advance the position in the mbuf chain.
1032  */
1033 int
1034 nfs_adv(mdp, dposp, offs, left)
1035 	struct mbuf **mdp;
1036 	caddr_t *dposp;
1037 	int offs;
1038 	int left;
1039 {
1040 	register struct mbuf *m;
1041 	register int s;
1042 
1043 	m = *mdp;
1044 	s = left;
1045 	while (s < offs) {
1046 		offs -= s;
1047 		m = m->m_next;
1048 		if (m == NULL)
1049 			return (EBADRPC);
1050 		s = m->m_len;
1051 	}
1052 	*mdp = m;
1053 	*dposp = mtod(m, caddr_t)+offs;
1054 	return (0);
1055 }
1056 
1057 /*
1058  * Copy a string into mbufs for the hard cases...
1059  */
1060 int
1061 nfsm_strtmbuf(mb, bpos, cp, siz)
1062 	struct mbuf **mb;
1063 	char **bpos;
1064 	const char *cp;
1065 	long siz;
1066 {
1067 	register struct mbuf *m1 = NULL, *m2;
1068 	long left, xfer, len, tlen;
1069 	u_int32_t *tl;
1070 	int putsize;
1071 
1072 	putsize = 1;
1073 	m2 = *mb;
1074 	left = M_TRAILINGSPACE(m2);
1075 	if (left > 0) {
1076 		tl = ((u_int32_t *)(*bpos));
1077 		*tl++ = txdr_unsigned(siz);
1078 		putsize = 0;
1079 		left -= NFSX_UNSIGNED;
1080 		m2->m_len += NFSX_UNSIGNED;
1081 		if (left > 0) {
1082 			bcopy(cp, (caddr_t) tl, left);
1083 			siz -= left;
1084 			cp += left;
1085 			m2->m_len += left;
1086 			left = 0;
1087 		}
1088 	}
1089 	/* Loop around adding mbufs */
1090 	while (siz > 0) {
1091 		MGET(m1, M_WAIT, MT_DATA);
1092 		if (siz > MLEN)
1093 			MCLGET(m1, M_WAIT);
1094 		m1->m_len = NFSMSIZ(m1);
1095 		m2->m_next = m1;
1096 		m2 = m1;
1097 		tl = mtod(m1, u_int32_t *);
1098 		tlen = 0;
1099 		if (putsize) {
1100 			*tl++ = txdr_unsigned(siz);
1101 			m1->m_len -= NFSX_UNSIGNED;
1102 			tlen = NFSX_UNSIGNED;
1103 			putsize = 0;
1104 		}
1105 		if (siz < m1->m_len) {
1106 			len = nfsm_rndup(siz);
1107 			xfer = siz;
1108 			if (xfer < len)
1109 				*(tl+(xfer>>2)) = 0;
1110 		} else {
1111 			xfer = len = m1->m_len;
1112 		}
1113 		bcopy(cp, (caddr_t) tl, xfer);
1114 		m1->m_len = len+tlen;
1115 		siz -= xfer;
1116 		cp += xfer;
1117 	}
1118 	*mb = m1;
1119 	*bpos = mtod(m1, caddr_t)+m1->m_len;
1120 	return (0);
1121 }
1122 
1123 /*
1124  * Directory caching routines. They work as follows:
1125  * - a cache is maintained per VDIR nfsnode.
1126  * - for each offset cookie that is exported to userspace, and can
1127  *   thus be thrown back at us as an offset to VOP_READDIR, store
1128  *   information in the cache.
1129  * - cached are:
1130  *   - cookie itself
1131  *   - blocknumber (essentially just a search key in the buffer cache)
1132  *   - entry number in block.
1133  *   - offset cookie of block in which this entry is stored
1134  *   - 32 bit cookie if NFSMNT_XLATECOOKIE is used.
1135  * - entries are looked up in a hash table
1136  * - also maintained is an LRU list of entries, used to determine
1137  *   which ones to delete if the cache grows too large.
1138  * - if 32 <-> 64 translation mode is requested for a filesystem,
1139  *   the cache also functions as a translation table
1140  * - in the translation case, invalidating the cache does not mean
1141  *   flushing it, but just marking entries as invalid, except for
1142  *   the <64bit cookie, 32bitcookie> pair which is still valid, to
1143  *   still be able to use the cache as a translation table.
1144  * - 32 bit cookies are uniquely created by combining the hash table
1145  *   entry value, and one generation count per hash table entry,
1146  *   incremented each time an entry is appended to the chain.
1147  * - the cache is invalidated each time a direcory is modified
1148  * - sanity checks are also done; if an entry in a block turns
1149  *   out not to have a matching cookie, the cache is invalidated
1150  *   and a new block starting from the wanted offset is fetched from
1151  *   the server.
1152  * - directory entries as read from the server are extended to contain
1153  *   the 64bit and, optionally, the 32bit cookies, for sanity checking
1154  *   the cache and exporting them to userspace through the cookie
1155  *   argument to VOP_READDIR.
1156  */
1157 
1158 u_long
1159 nfs_dirhash(off)
1160 	off_t off;
1161 {
1162 	int i;
1163 	char *cp = (char *)&off;
1164 	u_long sum = 0L;
1165 
1166 	for (i = 0 ; i < sizeof (off); i++)
1167 		sum += *cp++;
1168 
1169 	return sum;
1170 }
1171 
1172 void
1173 nfs_initdircache(vp)
1174 	struct vnode *vp;
1175 {
1176 	struct nfsnode *np = VTONFS(vp);
1177 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1178 
1179 	np->n_dircachesize = 0;
1180 	np->n_dblkno = 1;
1181 	np->n_dircache =
1182 	    hashinit(NFS_DIRHASHSIZ, M_NFSDIROFF, &nfsdirhashmask);
1183 	TAILQ_INIT(&np->n_dirchain);
1184 	if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
1185 		MALLOC(np->n_dirgens, unsigned *,
1186 		    NFS_DIRHASHSIZ * sizeof (unsigned), M_NFSDIROFF,
1187 		    M_WAITOK);
1188 		bzero((caddr_t)np->n_dirgens,
1189 		    NFS_DIRHASHSIZ * sizeof (unsigned));
1190 	}
1191 }
1192 
1193 static struct nfsdircache dzero = {0, 0, {0, 0}, {0, 0}, 0, 0, 0};
1194 
1195 struct nfsdircache *
1196 nfs_searchdircache(vp, off, do32, hashent)
1197 	struct vnode *vp;
1198 	off_t off;
1199 	int do32;
1200 	int *hashent;
1201 {
1202 	struct nfsdirhashhead *ndhp;
1203 	struct nfsdircache *ndp = NULL;
1204 	struct nfsnode *np = VTONFS(vp);
1205 	unsigned ent;
1206 
1207 	/*
1208 	 * Zero is always a valid cookie.
1209 	 */
1210 	if (off == 0)
1211 		return &dzero;
1212 
1213 	/*
1214 	 * We use a 32bit cookie as search key, directly reconstruct
1215 	 * the hashentry. Else use the hashfunction.
1216 	 */
1217 	if (do32) {
1218 		ent = (u_int32_t)off >> 24;
1219 		if (ent >= NFS_DIRHASHSIZ)
1220 			return NULL;
1221 		ndhp = &np->n_dircache[ent];
1222 	} else {
1223 		ndhp = NFSDIRHASH(np, off);
1224 	}
1225 
1226 	if (hashent)
1227 		*hashent = (int)(ndhp - np->n_dircache);
1228 	if (do32) {
1229 		for (ndp = ndhp->lh_first; ndp; ndp = ndp->dc_hash.le_next) {
1230 			if (ndp->dc_cookie32 == (u_int32_t)off) {
1231 				/*
1232 				 * An invalidated entry will become the
1233 				 * start of a new block fetched from
1234 				 * the server.
1235 				 */
1236 				if (ndp->dc_blkno == -1) {
1237 					ndp->dc_blkcookie = ndp->dc_cookie;
1238 					ndp->dc_blkno = np->n_dblkno++;
1239 					ndp->dc_entry = 0;
1240 				}
1241 				break;
1242 			}
1243 		}
1244 	} else {
1245 		for (ndp = ndhp->lh_first; ndp; ndp = ndp->dc_hash.le_next)
1246 			if (ndp->dc_cookie == off)
1247 				break;
1248 	}
1249 	return ndp;
1250 }
1251 
1252 
1253 struct nfsdircache *
1254 nfs_enterdircache(vp, off, blkoff, en, blkno)
1255 	struct vnode *vp;
1256 	off_t off, blkoff;
1257 	daddr_t blkno;
1258 	int en;
1259 {
1260 	struct nfsnode *np = VTONFS(vp);
1261 	struct nfsdirhashhead *ndhp;
1262 	struct nfsdircache *ndp = NULL, *first;
1263 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1264 	int hashent, gen, overwrite;
1265 
1266 	if (!np->n_dircache)
1267 		/*
1268 		 * XXX would like to do this in nfs_nget but vtype
1269 		 * isn't known at that time.
1270 		 */
1271 		nfs_initdircache(vp);
1272 
1273 	ndp = nfs_searchdircache(vp, off, 0, &hashent);
1274 
1275 	if (ndp && ndp->dc_blkno != -1) {
1276 		/*
1277 		 * Overwriting an old entry. Check if it's the same.
1278 		 * If so, just return. If not, remove the old entry.
1279 		 */
1280 		if (ndp->dc_blkcookie == blkoff && ndp->dc_entry == en)
1281 			return ndp;
1282 		TAILQ_REMOVE(&np->n_dirchain, ndp, dc_chain);
1283 		LIST_REMOVE(ndp, dc_hash);
1284 		FREE(ndp, M_NFSDIROFF);
1285 		ndp = 0;
1286 	}
1287 
1288 	ndhp = &np->n_dircache[hashent];
1289 
1290 	if (!ndp) {
1291 		MALLOC(ndp, struct nfsdircache *, sizeof (*ndp), M_NFSDIROFF,
1292 		    M_WAITOK);
1293 		overwrite = 0;
1294 		if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
1295 			/*
1296 			 * We're allocating a new entry, so bump the
1297 			 * generation number.
1298 			 */
1299 			gen = ++np->n_dirgens[hashent];
1300 			if (gen == 0) {
1301 				np->n_dirgens[hashent]++;
1302 				gen++;
1303 			}
1304 			ndp->dc_cookie32 = (hashent << 24) | (gen & 0xffffff);
1305 		}
1306 	} else
1307 		overwrite = 1;
1308 
1309 	/*
1310 	 * If the entry number is 0, we are at the start of a new block, so
1311 	 * allocate a new blocknumber.
1312 	 */
1313 	if (en == 0)
1314 		ndp->dc_blkno = np->n_dblkno++;
1315 	else
1316 		ndp->dc_blkno = blkno;
1317 
1318 	ndp->dc_cookie = off;
1319 	ndp->dc_blkcookie = blkoff;
1320 	ndp->dc_entry = en;
1321 
1322 	if (overwrite)
1323 		return ndp;
1324 
1325 	/*
1326 	 * If the maximum directory cookie cache size has been reached
1327 	 * for this node, take one off the front. The idea is that
1328 	 * directories are typically read front-to-back once, so that
1329 	 * the oldest entries can be thrown away without much performance
1330 	 * loss.
1331 	 */
1332 	if (np->n_dircachesize == NFS_MAXDIRCACHE) {
1333 		first = np->n_dirchain.tqh_first;
1334 		TAILQ_REMOVE(&np->n_dirchain, first, dc_chain);
1335 		LIST_REMOVE(first, dc_hash);
1336 		FREE(first, M_NFSDIROFF);
1337 	} else
1338 		np->n_dircachesize++;
1339 
1340 	LIST_INSERT_HEAD(ndhp, ndp, dc_hash);
1341 	TAILQ_INSERT_TAIL(&np->n_dirchain, ndp, dc_chain);
1342 	return ndp;
1343 }
1344 
1345 void
1346 nfs_invaldircache(vp, forcefree)
1347 	struct vnode *vp;
1348 	int forcefree;
1349 {
1350 	struct nfsnode *np = VTONFS(vp);
1351 	struct nfsdircache *ndp = NULL;
1352 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1353 
1354 #ifdef DIAGNOSTIC
1355 	if (vp->v_type != VDIR)
1356 		panic("nfs: invaldircache: not dir");
1357 #endif
1358 
1359 	if (!np->n_dircache)
1360 		return;
1361 
1362 	if (!(nmp->nm_flag & NFSMNT_XLATECOOKIE) || forcefree) {
1363 		while ((ndp = np->n_dirchain.tqh_first)) {
1364 			TAILQ_REMOVE(&np->n_dirchain, ndp, dc_chain);
1365 			LIST_REMOVE(ndp, dc_hash);
1366 			FREE(ndp, M_NFSDIROFF);
1367 		}
1368 		np->n_dircachesize = 0;
1369 		if (forcefree && np->n_dirgens) {
1370 			FREE(np->n_dirgens, M_NFSDIROFF);
1371 		}
1372 	} else {
1373 		for (ndp = np->n_dirchain.tqh_first; ndp;
1374 		    ndp = ndp->dc_chain.tqe_next)
1375 			ndp->dc_blkno = -1;
1376 	}
1377 
1378 	np->n_dblkno = 1;
1379 }
1380 
1381 /*
1382  * Called once before VFS init to initialize shared and
1383  * server-specific data structures.
1384  */
1385 void
1386 nfs_init()
1387 {
1388 
1389 #if !defined(alpha) && defined(DIAGNOSTIC)
1390 	/*
1391 	 * Check to see if major data structures haven't bloated.
1392 	 */
1393 	if (sizeof (struct nfsnode) > NFS_NODEALLOC) {
1394 		printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC);
1395 		printf("Try reducing NFS_SMALLFH\n");
1396 	}
1397 	if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) {
1398 		printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC);
1399 		printf("Try reducing NFS_UIDHASHSIZ\n");
1400 	}
1401 	if (sizeof (struct nfsuid) > NFS_UIDALLOC) {
1402 		printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC);
1403 		printf("Try unionizing the nu_nickname and nu_flag fields\n");
1404 	}
1405 #endif
1406 
1407 	nfsrtt.pos = 0;
1408 	rpc_vers = txdr_unsigned(RPC_VER2);
1409 	rpc_call = txdr_unsigned(RPC_CALL);
1410 	rpc_reply = txdr_unsigned(RPC_REPLY);
1411 	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
1412 	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
1413 	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
1414 	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
1415 	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
1416 	rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
1417 	nfs_prog = txdr_unsigned(NFS_PROG);
1418 	nqnfs_prog = txdr_unsigned(NQNFS_PROG);
1419 	nfs_true = txdr_unsigned(TRUE);
1420 	nfs_false = txdr_unsigned(FALSE);
1421 	nfs_xdrneg1 = txdr_unsigned(-1);
1422 	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
1423 	if (nfs_ticks < 1)
1424 		nfs_ticks = 1;
1425 #ifdef NFSSERVER
1426 	nfsrv_init(0);			/* Init server data structures */
1427 	nfsrv_initcache();		/* Init the server request cache */
1428 #endif /* NFSSERVER */
1429 
1430 	/*
1431 	 * Initialize the nqnfs data structures.
1432 	 */
1433 	if (nqnfsstarttime == 0) {
1434 		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
1435 			+ nqsrv_clockskew + nqsrv_writeslack;
1436 		NQLOADNOVRAM(nqnfsstarttime);
1437 		CIRCLEQ_INIT(&nqtimerhead);
1438 		nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash);
1439 	}
1440 
1441 	/*
1442 	 * Initialize reply list and start timer
1443 	 */
1444 	TAILQ_INIT(&nfs_reqq);
1445 	nfs_timer(NULL);
1446 }
1447 
1448 #ifdef NFS
1449 /*
1450  * Called once at VFS init to initialize client-specific data structures.
1451  */
1452 void
1453 nfs_vfs_init()
1454 {
1455 	register int i;
1456 
1457 	/* Ensure async daemons disabled */
1458 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
1459 		nfs_iodwant[i] = (struct proc *)0;
1460 		nfs_iodmount[i] = (struct nfsmount *)0;
1461 	}
1462 	nfs_nhinit();			/* Init the nfsnode table */
1463 }
1464 
1465 /*
1466  * Attribute cache routines.
1467  * nfs_loadattrcache() - loads or updates the cache contents from attributes
1468  *	that are on the mbuf list
1469  * nfs_getattrcache() - returns valid attributes if found in cache, returns
1470  *	error otherwise
1471  */
1472 
1473 /*
1474  * Load the attribute cache (that lives in the nfsnode entry) with
1475  * the values on the mbuf list and
1476  * Iff vap not NULL
1477  *    copy the attributes to *vaper
1478  */
1479 int
1480 nfsm_loadattrcache(vpp, mdp, dposp, vaper)
1481 	struct vnode **vpp;
1482 	struct mbuf **mdp;
1483 	caddr_t *dposp;
1484 	struct vattr *vaper;
1485 {
1486 	register int32_t t1;
1487 	caddr_t cp2;
1488 	int error = 0;
1489 	struct mbuf *md;
1490 	int v3 = NFS_ISV3(*vpp);
1491 
1492 	md = *mdp;
1493 	t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
1494 	error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2);
1495 	if (error)
1496 		return (error);
1497 	return nfs_loadattrcache(vpp, (struct nfs_fattr *)cp2, vaper);
1498 }
1499 
1500 int
1501 nfs_loadattrcache(vpp, fp, vaper)
1502 	struct vnode **vpp;
1503 	struct nfs_fattr *fp;
1504 	struct vattr *vaper;
1505 {
1506 	register struct vnode *vp = *vpp;
1507 	register struct vattr *vap;
1508 	int v3 = NFS_ISV3(vp);
1509 	enum vtype vtyp;
1510 	u_short vmode;
1511 	struct timespec mtime;
1512 	struct vnode *nvp;
1513 	int32_t rdev;
1514 	register struct nfsnode *np;
1515 	extern int (**spec_nfsv2nodeop_p) __P((void *));
1516 
1517 	if (v3) {
1518 		vtyp = nfsv3tov_type(fp->fa_type);
1519 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
1520 		rdev = makedev(fxdr_unsigned(u_char, fp->fa3_rdev.specdata1),
1521 			fxdr_unsigned(u_char, fp->fa3_rdev.specdata2));
1522 		fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
1523 	} else {
1524 		vtyp = nfsv2tov_type(fp->fa_type);
1525 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
1526 		if (vtyp == VNON || vtyp == VREG)
1527 			vtyp = IFTOVT(vmode);
1528 		rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
1529 		fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
1530 
1531 		/*
1532 		 * Really ugly NFSv2 kludge.
1533 		 */
1534 		if (vtyp == VCHR && rdev == 0xffffffff)
1535 			vtyp = VFIFO;
1536 	}
1537 
1538 	/*
1539 	 * If v_type == VNON it is a new node, so fill in the v_type,
1540 	 * n_mtime fields. Check to see if it represents a special
1541 	 * device, and if so, check for a possible alias. Once the
1542 	 * correct vnode has been obtained, fill in the rest of the
1543 	 * information.
1544 	 */
1545 	np = VTONFS(vp);
1546 	if (vp->v_type != vtyp) {
1547 		vp->v_type = vtyp;
1548 		if (vp->v_type == VFIFO) {
1549 #ifndef FIFO
1550 			return (EOPNOTSUPP);
1551 #else
1552 			extern int (**fifo_nfsv2nodeop_p) __P((void *));
1553 			vp->v_op = fifo_nfsv2nodeop_p;
1554 #endif /* FIFO */
1555 		}
1556 		if (vp->v_type == VCHR || vp->v_type == VBLK) {
1557 			vp->v_op = spec_nfsv2nodeop_p;
1558 			nvp = checkalias(vp, (dev_t)rdev, vp->v_mount);
1559 			if (nvp) {
1560 				/*
1561 				 * Discard unneeded vnode, but save its nfsnode.
1562 				 * Since the nfsnode does not have a lock, its
1563 				 * vnode lock has to be carried over.
1564 				 */
1565 #ifdef Lite2_integrated
1566 				nvp->v_vnlock = vp->v_vnlock;
1567 				vp->v_vnlock = NULL;
1568 #endif
1569 				nvp->v_data = vp->v_data;
1570 				vp->v_data = NULL;
1571 				vp->v_op = spec_vnodeop_p;
1572 				vrele(vp);
1573 				vgone(vp);
1574 				/*
1575 				 * Reinitialize aliased node.
1576 				 */
1577 				np->n_vnode = nvp;
1578 				*vpp = vp = nvp;
1579 			}
1580 		}
1581 		np->n_mtime = mtime.tv_sec;
1582 	}
1583 	vap = np->n_vattr;
1584 	vap->va_type = vtyp;
1585 	vap->va_mode = vmode & ALLPERMS;
1586 	vap->va_rdev = (dev_t)rdev;
1587 	vap->va_mtime = mtime;
1588 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
1589 	if (v3) {
1590 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
1591 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
1592 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
1593 		fxdr_hyper(&fp->fa3_size, &vap->va_size);
1594 		if (vtyp == VDIR)
1595 			vap->va_blocksize = NFS_DIRFRAGSIZ;
1596 		else
1597 			vap->va_blocksize = NFS_FABLKSIZE;
1598 		fxdr_hyper(&fp->fa3_used, &vap->va_bytes);
1599 		vap->va_fileid = fxdr_unsigned(int32_t,
1600 		    fp->fa3_fileid.nfsuquad[1]);
1601 		fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
1602 		fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
1603 		vap->va_flags = 0;
1604 		vap->va_filerev = 0;
1605 	} else {
1606 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
1607 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
1608 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
1609 		vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
1610 		if (vtyp == VDIR)
1611 			vap->va_blocksize = NFS_DIRFRAGSIZ;
1612 		else
1613 			vap->va_blocksize =
1614 				fxdr_unsigned(int32_t, fp->fa2_blocksize);
1615 		vap->va_bytes = fxdr_unsigned(int32_t, fp->fa2_blocks)
1616 		    * NFS_FABLKSIZE;
1617 		vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
1618 		fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
1619 		vap->va_flags = 0;
1620 		vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
1621 		    fp->fa2_ctime.nfsv2_sec);
1622 		vap->va_ctime.tv_nsec = 0;
1623 		vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
1624 		vap->va_filerev = 0;
1625 	}
1626 	if (vap->va_size != np->n_size) {
1627 		if (vap->va_type == VREG) {
1628 			if (np->n_flag & NMODIFIED) {
1629 				if (vap->va_size < np->n_size)
1630 					vap->va_size = np->n_size;
1631 				else
1632 					np->n_size = vap->va_size;
1633 			} else
1634 				np->n_size = vap->va_size;
1635 			vnode_pager_setsize(vp, np->n_size);
1636 		} else
1637 			np->n_size = vap->va_size;
1638 	}
1639 	np->n_attrstamp = time.tv_sec;
1640 	if (vaper != NULL) {
1641 		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
1642 		if (np->n_flag & NCHG) {
1643 			if (np->n_flag & NACC)
1644 				vaper->va_atime = np->n_atim;
1645 			if (np->n_flag & NUPD)
1646 				vaper->va_mtime = np->n_mtim;
1647 		}
1648 	}
1649 	return (0);
1650 }
1651 
1652 /*
1653  * Check the time stamp
1654  * If the cache is valid, copy contents to *vap and return 0
1655  * otherwise return an error
1656  */
1657 int
1658 nfs_getattrcache(vp, vaper)
1659 	register struct vnode *vp;
1660 	struct vattr *vaper;
1661 {
1662 	register struct nfsnode *np = VTONFS(vp);
1663 	register struct vattr *vap;
1664 
1665 	if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
1666 		nfsstats.attrcache_misses++;
1667 		return (ENOENT);
1668 	}
1669 	nfsstats.attrcache_hits++;
1670 	vap = np->n_vattr;
1671 	if (vap->va_size != np->n_size) {
1672 		if (vap->va_type == VREG) {
1673 			if (np->n_flag & NMODIFIED) {
1674 				if (vap->va_size < np->n_size)
1675 					vap->va_size = np->n_size;
1676 				else
1677 					np->n_size = vap->va_size;
1678 			} else
1679 				np->n_size = vap->va_size;
1680 			vnode_pager_setsize(vp, np->n_size);
1681 		} else
1682 			np->n_size = vap->va_size;
1683 	}
1684 	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
1685 	if (np->n_flag & NCHG) {
1686 		if (np->n_flag & NACC)
1687 			vaper->va_atime = np->n_atim;
1688 		if (np->n_flag & NUPD)
1689 			vaper->va_mtime = np->n_mtim;
1690 	}
1691 	return (0);
1692 }
1693 
1694 /*
1695  * Heuristic to see if the server XDR encodes directory cookies or not.
1696  * it is not supposed to, but a lot of servers may do this. Also, since
1697  * most/all servers will implement V2 as well, it is expected that they
1698  * may return just 32 bits worth of cookie information, so we need to
1699  * find out in which 32 bits this information is available. We do this
1700  * to avoid trouble with emulated binaries that can't handle 64 bit
1701  * directory offsets.
1702  */
1703 
1704 void
1705 nfs_cookieheuristic(vp, flagp, p, cred)
1706 	struct vnode *vp;
1707 	int *flagp;
1708 	struct proc *p;
1709 	struct ucred *cred;
1710 {
1711 	struct uio auio;
1712 	struct iovec aiov;
1713 	caddr_t buf, cp;
1714 	struct dirent *dp;
1715 	off_t *cookies, *cop;
1716 	int error, eof, nc, len;
1717 
1718 	nc = NFS_DIRFRAGSIZ / 16;
1719 	MALLOC(buf, caddr_t, NFS_DIRFRAGSIZ, M_TEMP, M_WAITOK);
1720 	MALLOC(cookies, off_t *, nc * sizeof (off_t), M_TEMP, M_WAITOK);
1721 
1722 	aiov.iov_base = buf;
1723 	aiov.iov_len = NFS_DIRFRAGSIZ;
1724 	auio.uio_iov = &aiov;
1725 	auio.uio_iovcnt = 1;
1726 	auio.uio_rw = UIO_READ;
1727 	auio.uio_segflg = UIO_SYSSPACE;
1728 	auio.uio_procp = p;
1729 	auio.uio_resid = NFS_DIRFRAGSIZ;
1730 	auio.uio_offset = 0;
1731 
1732 	error = VOP_READDIR(vp, &auio, cred, &eof, cookies, nc);
1733 
1734 	len = NFS_DIRFRAGSIZ - auio.uio_resid;
1735 	if (error || len == 0) {
1736 		FREE(buf, M_TEMP);
1737 		FREE(cookies, M_TEMP);
1738 		return;
1739 	}
1740 
1741 	/*
1742 	 * Find the first valid entry and look at its offset cookie.
1743 	 */
1744 
1745 	cp = buf;
1746 	for (cop = cookies; len > 0; len -= dp->d_reclen) {
1747 		dp = (struct dirent *)cp;
1748 		if (dp->d_fileno != 0 && len >= dp->d_reclen) {
1749 			if ((*cop >> 32) != 0 && (*cop & 0xffffffffLL) == 0) {
1750 				*flagp |= NFSMNT_SWAPCOOKIE;
1751 				nfs_invaldircache(vp, 0);
1752 				nfs_vinvalbuf(vp, 0, cred, p, 1);
1753 			}
1754 			break;
1755 		}
1756 		cop++;
1757 		cp += dp->d_reclen;
1758 	}
1759 
1760 	FREE(buf, M_TEMP);
1761 	FREE(cookies, M_TEMP);
1762 }
1763 #endif /* NFS */
1764 
1765 /*
1766  * Set up nameidata for a lookup() call and do it.
1767  *
1768  * If pubflag is set, this call is done for a lookup operation on the
1769  * public filehandle. In that case we allow crossing mountpoints and
1770  * absolute pathnames. However, the caller is expected to check that
1771  * the lookup result is within the public fs, and deny access if
1772  * it is not.
1773  */
1774 int
1775 nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag)
1776 	register struct nameidata *ndp;
1777 	fhandle_t *fhp;
1778 	int len;
1779 	struct nfssvc_sock *slp;
1780 	struct mbuf *nam;
1781 	struct mbuf **mdp;
1782 	caddr_t *dposp;
1783 	struct vnode **retdirp;
1784 	struct proc *p;
1785 	int kerbflag, pubflag;
1786 {
1787 	register int i, rem;
1788 	register struct mbuf *md;
1789 	register char *fromcp, *tocp, *cp;
1790 	struct iovec aiov;
1791 	struct uio auio;
1792 	struct vnode *dp;
1793 	int error, rdonly, linklen;
1794 	struct componentname *cnp = &ndp->ni_cnd;
1795 
1796 	*retdirp = (struct vnode *)0;
1797 	MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
1798 	/*
1799 	 * Copy the name from the mbuf list to ndp->ni_pnbuf
1800 	 * and set the various ndp fields appropriately.
1801 	 */
1802 	fromcp = *dposp;
1803 	tocp = cnp->cn_pnbuf;
1804 	md = *mdp;
1805 	rem = mtod(md, caddr_t) + md->m_len - fromcp;
1806 	for (i = 0; i < len; i++) {
1807 		while (rem == 0) {
1808 			md = md->m_next;
1809 			if (md == NULL) {
1810 				error = EBADRPC;
1811 				goto out;
1812 			}
1813 			fromcp = mtod(md, caddr_t);
1814 			rem = md->m_len;
1815 		}
1816 		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
1817 			error = EACCES;
1818 			goto out;
1819 		}
1820 		*tocp++ = *fromcp++;
1821 		rem--;
1822 	}
1823 	*tocp = '\0';
1824 	*mdp = md;
1825 	*dposp = fromcp;
1826 	len = nfsm_rndup(len)-len;
1827 	if (len > 0) {
1828 		if (rem >= len)
1829 			*dposp += len;
1830 		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
1831 			goto out;
1832 	}
1833 
1834 	/*
1835 	 * Extract and set starting directory.
1836 	 */
1837 	error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
1838 	    nam, &rdonly, kerbflag, pubflag);
1839 	if (error)
1840 		goto out;
1841 	if (dp->v_type != VDIR) {
1842 		vrele(dp);
1843 		error = ENOTDIR;
1844 		goto out;
1845 	}
1846 
1847 	if (rdonly)
1848 		cnp->cn_flags |= RDONLY;
1849 
1850 	*retdirp = dp;
1851 
1852 	if (pubflag) {
1853 		/*
1854 		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
1855 		 * and the 'native path' indicator.
1856 		 */
1857 		MALLOC(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1858 		fromcp = cnp->cn_pnbuf;
1859 		tocp = cp;
1860 		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
1861 			switch ((unsigned char)*fromcp) {
1862 			case WEBNFS_NATIVE_CHAR:
1863 				/*
1864 				 * 'Native' path for us is the same
1865 				 * as a path according to the NFS spec,
1866 				 * just skip the escape char.
1867 				 */
1868 				fromcp++;
1869 				break;
1870 			/*
1871 			 * More may be added in the future, range 0x80-0xff
1872 			 */
1873 			default:
1874 				error = EIO;
1875 				FREE(cp, M_NAMEI);
1876 				goto out;
1877 			}
1878 		}
1879 		/*
1880 		 * Translate the '%' escapes, URL-style.
1881 		 */
1882 		while (*fromcp != '\0') {
1883 			if (*fromcp == WEBNFS_ESC_CHAR) {
1884 				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
1885 					fromcp++;
1886 					*tocp++ = HEXSTRTOI(fromcp);
1887 					fromcp += 2;
1888 					continue;
1889 				} else {
1890 					error = ENOENT;
1891 					FREE(cp, M_NAMEI);
1892 					goto out;
1893 				}
1894 			} else
1895 				*tocp++ = *fromcp++;
1896 		}
1897 		*tocp = '\0';
1898 		FREE(cnp->cn_pnbuf, M_NAMEI);
1899 		cnp->cn_pnbuf = cp;
1900 	}
1901 
1902 	ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
1903 	ndp->ni_segflg = UIO_SYSSPACE;
1904 
1905 	if (pubflag) {
1906 		ndp->ni_rootdir = rootvnode;
1907 		ndp->ni_loopcnt = 0;
1908 		if (cnp->cn_pnbuf[0] == '/')
1909 			dp = rootvnode;
1910 	} else {
1911 		cnp->cn_flags |= NOCROSSMOUNT;
1912 	}
1913 
1914 	cnp->cn_proc = p;
1915 	VREF(dp);
1916 
1917     for (;;) {
1918 	cnp->cn_nameptr = cnp->cn_pnbuf;
1919 	ndp->ni_startdir = dp;
1920 	/*
1921 	 * And call lookup() to do the real work
1922 	 */
1923 	error = lookup(ndp);
1924 	if (error)
1925 		break;
1926 	/*
1927 	 * Check for encountering a symbolic link
1928 	 */
1929 	if ((cnp->cn_flags & ISSYMLINK) == 0) {
1930 		if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
1931 			cnp->cn_flags |= HASBUF;
1932 			return (0);
1933 		}
1934 		break;
1935 	} else {
1936 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
1937 			VOP_UNLOCK(ndp->ni_dvp);
1938 		if (!pubflag) {
1939 			vrele(ndp->ni_dvp);
1940 			vput(ndp->ni_vp);
1941 			ndp->ni_vp = NULL;
1942 			error = EINVAL;
1943 			break;
1944 		}
1945 
1946 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
1947 			error = ELOOP;
1948 			break;
1949 		}
1950 		if (ndp->ni_pathlen > 1)
1951 			MALLOC(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
1952 		else
1953 			cp = cnp->cn_pnbuf;
1954 		aiov.iov_base = cp;
1955 		aiov.iov_len = MAXPATHLEN;
1956 		auio.uio_iov = &aiov;
1957 		auio.uio_iovcnt = 1;
1958 		auio.uio_offset = 0;
1959 		auio.uio_rw = UIO_READ;
1960 		auio.uio_segflg = UIO_SYSSPACE;
1961 		auio.uio_procp = (struct proc *)0;
1962 		auio.uio_resid = MAXPATHLEN;
1963 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
1964 		if (error) {
1965 		badlink:
1966 			if (ndp->ni_pathlen > 1)
1967 				FREE(cp, M_NAMEI);
1968 			break;
1969 		}
1970 		linklen = MAXPATHLEN - auio.uio_resid;
1971 		if (linklen == 0) {
1972 			error = ENOENT;
1973 			goto badlink;
1974 		}
1975 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
1976 			error = ENAMETOOLONG;
1977 			goto badlink;
1978 		}
1979 		if (ndp->ni_pathlen > 1) {
1980 			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
1981 			FREE(cnp->cn_pnbuf, M_NAMEI);
1982 			cnp->cn_pnbuf = cp;
1983 		} else
1984 			cnp->cn_pnbuf[linklen] = '\0';
1985 		ndp->ni_pathlen += linklen;
1986 		vput(ndp->ni_vp);
1987 		dp = ndp->ni_dvp;
1988 		/*
1989 		 * Check if root directory should replace current directory.
1990 		 */
1991 		if (cnp->cn_pnbuf[0] == '/') {
1992 			vrele(dp);
1993 			dp = ndp->ni_rootdir;
1994 			VREF(dp);
1995 		}
1996 	}
1997    }
1998 out:
1999 	FREE(cnp->cn_pnbuf, M_NAMEI);
2000 	return (error);
2001 }
2002 
2003 /*
2004  * A fiddled version of m_adj() that ensures null fill to a long
2005  * boundary and only trims off the back end
2006  */
2007 void
2008 nfsm_adj(mp, len, nul)
2009 	struct mbuf *mp;
2010 	register int len;
2011 	int nul;
2012 {
2013 	register struct mbuf *m;
2014 	register int count, i;
2015 	register char *cp;
2016 
2017 	/*
2018 	 * Trim from tail.  Scan the mbuf chain,
2019 	 * calculating its length and finding the last mbuf.
2020 	 * If the adjustment only affects this mbuf, then just
2021 	 * adjust and return.  Otherwise, rescan and truncate
2022 	 * after the remaining size.
2023 	 */
2024 	count = 0;
2025 	m = mp;
2026 	for (;;) {
2027 		count += m->m_len;
2028 		if (m->m_next == (struct mbuf *)0)
2029 			break;
2030 		m = m->m_next;
2031 	}
2032 	if (m->m_len > len) {
2033 		m->m_len -= len;
2034 		if (nul > 0) {
2035 			cp = mtod(m, caddr_t)+m->m_len-nul;
2036 			for (i = 0; i < nul; i++)
2037 				*cp++ = '\0';
2038 		}
2039 		return;
2040 	}
2041 	count -= len;
2042 	if (count < 0)
2043 		count = 0;
2044 	/*
2045 	 * Correct length for chain is "count".
2046 	 * Find the mbuf with last data, adjust its length,
2047 	 * and toss data from remaining mbufs on chain.
2048 	 */
2049 	for (m = mp; m; m = m->m_next) {
2050 		if (m->m_len >= count) {
2051 			m->m_len = count;
2052 			if (nul > 0) {
2053 				cp = mtod(m, caddr_t)+m->m_len-nul;
2054 				for (i = 0; i < nul; i++)
2055 					*cp++ = '\0';
2056 			}
2057 			break;
2058 		}
2059 		count -= m->m_len;
2060 	}
2061 	for (m = m->m_next;m;m = m->m_next)
2062 		m->m_len = 0;
2063 }
2064 
2065 /*
2066  * Make these functions instead of macros, so that the kernel text size
2067  * doesn't get too big...
2068  */
2069 void
2070 nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp)
2071 	struct nfsrv_descript *nfsd;
2072 	int before_ret;
2073 	register struct vattr *before_vap;
2074 	int after_ret;
2075 	struct vattr *after_vap;
2076 	struct mbuf **mbp;
2077 	char **bposp;
2078 {
2079 	register struct mbuf *mb = *mbp, *mb2;
2080 	register char *bpos = *bposp;
2081 	register u_int32_t *tl;
2082 
2083 	if (before_ret) {
2084 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
2085 		*tl = nfs_false;
2086 	} else {
2087 		nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
2088 		*tl++ = nfs_true;
2089 		txdr_hyper(&(before_vap->va_size), tl);
2090 		tl += 2;
2091 		txdr_nfsv3time(&(before_vap->va_mtime), tl);
2092 		tl += 2;
2093 		txdr_nfsv3time(&(before_vap->va_ctime), tl);
2094 	}
2095 	*bposp = bpos;
2096 	*mbp = mb;
2097 	nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
2098 }
2099 
2100 void
2101 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp)
2102 	struct nfsrv_descript *nfsd;
2103 	int after_ret;
2104 	struct vattr *after_vap;
2105 	struct mbuf **mbp;
2106 	char **bposp;
2107 {
2108 	register struct mbuf *mb = *mbp, *mb2;
2109 	register char *bpos = *bposp;
2110 	register u_int32_t *tl;
2111 	register struct nfs_fattr *fp;
2112 
2113 	if (after_ret) {
2114 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
2115 		*tl = nfs_false;
2116 	} else {
2117 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
2118 		*tl++ = nfs_true;
2119 		fp = (struct nfs_fattr *)tl;
2120 		nfsm_srvfattr(nfsd, after_vap, fp);
2121 	}
2122 	*mbp = mb;
2123 	*bposp = bpos;
2124 }
2125 
2126 void
2127 nfsm_srvfattr(nfsd, vap, fp)
2128 	register struct nfsrv_descript *nfsd;
2129 	register struct vattr *vap;
2130 	register struct nfs_fattr *fp;
2131 {
2132 
2133 	fp->fa_nlink = txdr_unsigned(vap->va_nlink);
2134 	fp->fa_uid = txdr_unsigned(vap->va_uid);
2135 	fp->fa_gid = txdr_unsigned(vap->va_gid);
2136 	if (nfsd->nd_flag & ND_NFSV3) {
2137 		fp->fa_type = vtonfsv3_type(vap->va_type);
2138 		fp->fa_mode = vtonfsv3_mode(vap->va_mode);
2139 		txdr_hyper(&vap->va_size, &fp->fa3_size);
2140 		txdr_hyper(&vap->va_bytes, &fp->fa3_used);
2141 		fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev));
2142 		fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev));
2143 		fp->fa3_fsid.nfsuquad[0] = 0;
2144 		fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
2145 		fp->fa3_fileid.nfsuquad[0] = 0;
2146 		fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid);
2147 		txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
2148 		txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
2149 		txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
2150 	} else {
2151 		fp->fa_type = vtonfsv2_type(vap->va_type);
2152 		fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
2153 		fp->fa2_size = txdr_unsigned(vap->va_size);
2154 		fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize);
2155 		if (vap->va_type == VFIFO)
2156 			fp->fa2_rdev = 0xffffffff;
2157 		else
2158 			fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
2159 		fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
2160 		fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
2161 		fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
2162 		txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
2163 		txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
2164 		txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
2165 	}
2166 }
2167 
2168 /*
2169  * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
2170  * 	- look up fsid in mount list (if not found ret error)
2171  *	- get vp and export rights by calling VFS_FHTOVP()
2172  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
2173  *	- if not lockflag unlock it with VOP_UNLOCK()
2174  */
2175 int
2176 nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag)
2177 	fhandle_t *fhp;
2178 	int lockflag;
2179 	struct vnode **vpp;
2180 	struct ucred *cred;
2181 	struct nfssvc_sock *slp;
2182 	struct mbuf *nam;
2183 	int *rdonlyp;
2184 	int kerbflag;
2185 {
2186 #ifdef Lite2_integrated
2187 	struct proc *p = curproc;	/* XXX */
2188 #endif
2189 	register struct mount *mp;
2190 	register int i;
2191 	struct ucred *credanon;
2192 	int error, exflags;
2193 	struct sockaddr_in *saddr;
2194 
2195 	*vpp = (struct vnode *)0;
2196 
2197 	if (nfs_ispublicfh(fhp)) {
2198 		if (!pubflag || !nfs_pub.np_valid)
2199 			return (ESTALE);
2200 		fhp = &nfs_pub.np_handle;
2201 	}
2202 
2203 #ifdef Lite2_integrated
2204 	mp = vfs_getvfs(&fhp->fh_fsid);
2205 #else
2206 	mp = getvfs(&fhp->fh_fsid);
2207 #endif
2208 	if (!mp)
2209 		return (ESTALE);
2210 	error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon);
2211 	if (error)
2212 		return (error);
2213 
2214 	if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
2215 		saddr = mtod(nam, struct sockaddr_in *);
2216 		if (saddr->sin_family == AF_INET &&
2217 		    ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
2218 			vput(*vpp);
2219 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
2220 		}
2221 	}
2222 	/*
2223 	 * Check/setup credentials.
2224 	 */
2225 	if (exflags & MNT_EXKERB) {
2226 		if (!kerbflag) {
2227 			vput(*vpp);
2228 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
2229 		}
2230 	} else if (kerbflag) {
2231 		vput(*vpp);
2232 		return (NFSERR_AUTHERR | AUTH_TOOWEAK);
2233 	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
2234 		cred->cr_uid = credanon->cr_uid;
2235 		cred->cr_gid = credanon->cr_gid;
2236 		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
2237 			cred->cr_groups[i] = credanon->cr_groups[i];
2238 		cred->cr_ngroups = i;
2239 	}
2240 	if (exflags & MNT_EXRDONLY)
2241 		*rdonlyp = 1;
2242 	else
2243 		*rdonlyp = 0;
2244 	if (!lockflag)
2245 #ifdef Lite2_integrated
2246 		VOP_UNLOCK(*vpp, 0, p);
2247 #else
2248 		VOP_UNLOCK(*vpp);
2249 #endif
2250 	return (0);
2251 }
2252 
2253 /*
2254  * WebNFS: check if a filehandle is a public filehandle. For v3, this
2255  * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
2256  * transformed this to all zeroes in both cases, so check for it.
2257  */
2258 int
2259 nfs_ispublicfh(fhp)
2260 	fhandle_t *fhp;
2261 {
2262 	char *cp = (char *)fhp;
2263 	int i;
2264 
2265 	for (i = 0; i < NFSX_V3FH; i++)
2266 		if (*cp++ != 0)
2267 			return (FALSE);
2268 	return (TRUE);
2269 }
2270 
2271 /*
2272  * This function compares two net addresses by family and returns TRUE
2273  * if they are the same host.
2274  * If there is any doubt, return FALSE.
2275  * The AF_INET family is handled as a special case so that address mbufs
2276  * don't need to be saved to store "struct in_addr", which is only 4 bytes.
2277  */
2278 int
2279 netaddr_match(family, haddr, nam)
2280 	int family;
2281 	union nethostaddr *haddr;
2282 	struct mbuf *nam;
2283 {
2284 	register struct sockaddr_in *inetaddr;
2285 
2286 	switch (family) {
2287 	case AF_INET:
2288 		inetaddr = mtod(nam, struct sockaddr_in *);
2289 		if (inetaddr->sin_family == AF_INET &&
2290 		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
2291 			return (1);
2292 		break;
2293 #ifdef ISO
2294 	case AF_ISO:
2295 	    {
2296 		register struct sockaddr_iso *isoaddr1, *isoaddr2;
2297 
2298 		isoaddr1 = mtod(nam, struct sockaddr_iso *);
2299 		isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
2300 		if (isoaddr1->siso_family == AF_ISO &&
2301 		    isoaddr1->siso_nlen > 0 &&
2302 		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
2303 		    SAME_ISOADDR(isoaddr1, isoaddr2))
2304 			return (1);
2305 		break;
2306 	    }
2307 #endif	/* ISO */
2308 	default:
2309 		break;
2310 	};
2311 	return (0);
2312 }
2313 
2314 
2315 /*
2316  * The write verifier has changed (probably due to a server reboot), so all
2317  * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
2318  * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
2319  * flag. Once done the new write verifier can be set for the mount point.
2320  */
2321 void
2322 nfs_clearcommit(mp)
2323 	struct mount *mp;
2324 {
2325 	register struct vnode *vp, *nvp;
2326 	register struct buf *bp, *nbp;
2327 	int s;
2328 
2329 	s = splbio();
2330 loop:
2331 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
2332 		if (vp->v_mount != mp)	/* Paranoia */
2333 			goto loop;
2334 		nvp = vp->v_mntvnodes.le_next;
2335 		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
2336 			nbp = bp->b_vnbufs.le_next;
2337 			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
2338 				== (B_DELWRI | B_NEEDCOMMIT))
2339 				bp->b_flags &= ~B_NEEDCOMMIT;
2340 		}
2341 	}
2342 	splx(s);
2343 }
2344 
2345 /*
2346  * Map errnos to NFS error numbers. For Version 3 also filter out error
2347  * numbers not specified for the associated procedure.
2348  */
2349 int
2350 nfsrv_errmap(nd, err)
2351 	struct nfsrv_descript *nd;
2352 	register int err;
2353 {
2354 	register short *defaulterrp, *errp;
2355 
2356 	if (nd->nd_flag & ND_NFSV3) {
2357 	    if (nd->nd_procnum <= NFSPROC_COMMIT) {
2358 		errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
2359 		while (*++errp) {
2360 			if (*errp == err)
2361 				return (err);
2362 			else if (*errp > err)
2363 				break;
2364 		}
2365 		return ((int)*defaulterrp);
2366 	    } else
2367 		return (err & 0xffff);
2368 	}
2369 	if (err <= ELAST)
2370 		return ((int)nfsrv_v2errmap[err - 1]);
2371 	return (NFSERR_IO);
2372 }
2373 
2374 /*
2375  * Sort the group list in increasing numerical order.
2376  * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
2377  *  that used to be here.)
2378  */
2379 void
2380 nfsrvw_sort(list, num)
2381         register gid_t *list;
2382         register int num;
2383 {
2384 	register int i, j;
2385 	gid_t v;
2386 
2387 	/* Insertion sort. */
2388 	for (i = 1; i < num; i++) {
2389 		v = list[i];
2390 		/* find correct slot for value v, moving others up */
2391 		for (j = i; --j >= 0 && v < list[j];)
2392 			list[j + 1] = list[j];
2393 		list[j + 1] = v;
2394 	}
2395 }
2396 
2397 /*
2398  * copy credentials making sure that the result can be compared with bcmp().
2399  */
2400 void
2401 nfsrv_setcred(incred, outcred)
2402 	register struct ucred *incred, *outcred;
2403 {
2404 	register int i;
2405 
2406 	bzero((caddr_t)outcred, sizeof (struct ucred));
2407 	outcred->cr_ref = 1;
2408 	outcred->cr_uid = incred->cr_uid;
2409 	outcred->cr_gid = incred->cr_gid;
2410 	outcred->cr_ngroups = incred->cr_ngroups;
2411 	for (i = 0; i < incred->cr_ngroups; i++)
2412 		outcred->cr_groups[i] = incred->cr_groups[i];
2413 	nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups);
2414 }
2415