1 /* $NetBSD: nfs_subs.c,v 1.174 2006/10/14 09:18:57 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 35 */ 36 37 /* 38 * Copyright 2000 Wasabi Systems, Inc. 39 * All rights reserved. 40 * 41 * Written by Frank van der Linden for Wasabi Systems, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed for the NetBSD Project by 54 * Wasabi Systems, Inc. 55 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 56 * or promote products derived from this software without specific prior 57 * written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 69 * POSSIBILITY OF SUCH DAMAGE. 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: nfs_subs.c,v 1.174 2006/10/14 09:18:57 yamt Exp $"); 74 75 #include "fs_nfs.h" 76 #include "opt_nfs.h" 77 #include "opt_nfsserver.h" 78 #include "opt_iso.h" 79 #include "opt_inet.h" 80 81 /* 82 * These functions support the macros and help fiddle mbuf chains for 83 * the nfs op functions. They do things like create the rpc header and 84 * copy data between mbuf chains and uio lists. 85 */ 86 #include <sys/param.h> 87 #include <sys/proc.h> 88 #include <sys/systm.h> 89 #include <sys/kernel.h> 90 #include <sys/mount.h> 91 #include <sys/vnode.h> 92 #include <sys/namei.h> 93 #include <sys/mbuf.h> 94 #include <sys/socket.h> 95 #include <sys/stat.h> 96 #include <sys/malloc.h> 97 #include <sys/filedesc.h> 98 #include <sys/time.h> 99 #include <sys/dirent.h> 100 #include <sys/once.h> 101 #include <sys/kauth.h> 102 103 #include <uvm/uvm_extern.h> 104 105 #include <nfs/rpcv2.h> 106 #include <nfs/nfsproto.h> 107 #include <nfs/nfsnode.h> 108 #include <nfs/nfs.h> 109 #include <nfs/xdr_subs.h> 110 #include <nfs/nfsm_subs.h> 111 #include <nfs/nfsmount.h> 112 #include <nfs/nqnfs.h> 113 #include <nfs/nfsrtt.h> 114 #include <nfs/nfs_var.h> 115 116 #include <miscfs/specfs/specdev.h> 117 118 #include <netinet/in.h> 119 #ifdef ISO 120 #include <netiso/iso.h> 121 #endif 122 123 /* 124 * Data items converted to xdr at startup, since they are constant 125 * This is kinda hokey, but may save a little time doing byte swaps 126 */ 127 u_int32_t nfs_xdrneg1; 128 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, 129 rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, 130 rpc_auth_kerb; 131 u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false; 132 133 /* And other global data */ 134 const nfstype nfsv2_type[9] = 135 { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON }; 136 const nfstype nfsv3_type[9] = 137 { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; 138 const enum vtype nv2tov_type[8] = 139 { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; 140 const enum vtype nv3tov_type[8] = 141 { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; 142 int nfs_ticks; 143 int nfs_commitsize; 144 145 MALLOC_DEFINE(M_NFSDIROFF, "NFS diroff", "NFS directory cookies"); 146 147 /* NFS client/server stats. */ 148 struct nfsstats nfsstats; 149 150 /* 151 * Mapping of old NFS Version 2 RPC numbers to generic numbers. 152 */ 153 const int nfsv3_procid[NFS_NPROCS] = { 154 NFSPROC_NULL, 155 NFSPROC_GETATTR, 156 NFSPROC_SETATTR, 157 NFSPROC_NOOP, 158 NFSPROC_LOOKUP, 159 NFSPROC_READLINK, 160 NFSPROC_READ, 161 NFSPROC_NOOP, 162 NFSPROC_WRITE, 163 NFSPROC_CREATE, 164 NFSPROC_REMOVE, 165 NFSPROC_RENAME, 166 NFSPROC_LINK, 167 NFSPROC_SYMLINK, 168 NFSPROC_MKDIR, 169 NFSPROC_RMDIR, 170 NFSPROC_READDIR, 171 NFSPROC_FSSTAT, 172 NFSPROC_NOOP, 173 NFSPROC_NOOP, 174 NFSPROC_NOOP, 175 NFSPROC_NOOP, 176 NFSPROC_NOOP, 177 NFSPROC_NOOP, 178 NFSPROC_NOOP, 179 NFSPROC_NOOP 180 }; 181 182 /* 183 * and the reverse mapping from generic to Version 2 procedure numbers 184 */ 185 const int nfsv2_procid[NFS_NPROCS] = { 186 NFSV2PROC_NULL, 187 NFSV2PROC_GETATTR, 188 NFSV2PROC_SETATTR, 189 NFSV2PROC_LOOKUP, 190 NFSV2PROC_NOOP, 191 NFSV2PROC_READLINK, 192 NFSV2PROC_READ, 193 NFSV2PROC_WRITE, 194 NFSV2PROC_CREATE, 195 NFSV2PROC_MKDIR, 196 NFSV2PROC_SYMLINK, 197 NFSV2PROC_CREATE, 198 NFSV2PROC_REMOVE, 199 NFSV2PROC_RMDIR, 200 NFSV2PROC_RENAME, 201 NFSV2PROC_LINK, 202 NFSV2PROC_READDIR, 203 NFSV2PROC_NOOP, 204 NFSV2PROC_STATFS, 205 NFSV2PROC_NOOP, 206 NFSV2PROC_NOOP, 207 NFSV2PROC_NOOP, 208 NFSV2PROC_NOOP, 209 NFSV2PROC_NOOP, 210 NFSV2PROC_NOOP, 211 NFSV2PROC_NOOP, 212 }; 213 214 /* 215 * Maps errno values to nfs error numbers. 216 * Use NFSERR_IO as the catch all for ones not specifically defined in 217 * RFC 1094. 218 */ 219 static const u_char nfsrv_v2errmap[ELAST] = { 220 NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, 221 NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 222 NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, 223 NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, 224 NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 225 NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, 226 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 227 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 228 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 229 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 230 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 231 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 232 NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, 233 NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, 234 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 235 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 236 NFSERR_IO, NFSERR_IO, 237 }; 238 239 /* 240 * Maps errno values to nfs error numbers. 241 * Although it is not obvious whether or not NFS clients really care if 242 * a returned error value is in the specified list for the procedure, the 243 * safest thing to do is filter them appropriately. For Version 2, the 244 * X/Open XNFS document is the only specification that defines error values 245 * for each RPC (The RFC simply lists all possible error values for all RPCs), 246 * so I have decided to not do this for Version 2. 247 * The first entry is the default error return and the rest are the valid 248 * errors for that RPC in increasing numeric order. 249 */ 250 static const short nfsv3err_null[] = { 251 0, 252 0, 253 }; 254 255 static const short nfsv3err_getattr[] = { 256 NFSERR_IO, 257 NFSERR_IO, 258 NFSERR_STALE, 259 NFSERR_BADHANDLE, 260 NFSERR_SERVERFAULT, 261 0, 262 }; 263 264 static const short nfsv3err_setattr[] = { 265 NFSERR_IO, 266 NFSERR_PERM, 267 NFSERR_IO, 268 NFSERR_ACCES, 269 NFSERR_INVAL, 270 NFSERR_NOSPC, 271 NFSERR_ROFS, 272 NFSERR_DQUOT, 273 NFSERR_STALE, 274 NFSERR_BADHANDLE, 275 NFSERR_NOT_SYNC, 276 NFSERR_SERVERFAULT, 277 0, 278 }; 279 280 static const short nfsv3err_lookup[] = { 281 NFSERR_IO, 282 NFSERR_NOENT, 283 NFSERR_IO, 284 NFSERR_ACCES, 285 NFSERR_NOTDIR, 286 NFSERR_NAMETOL, 287 NFSERR_STALE, 288 NFSERR_BADHANDLE, 289 NFSERR_SERVERFAULT, 290 0, 291 }; 292 293 static const short nfsv3err_access[] = { 294 NFSERR_IO, 295 NFSERR_IO, 296 NFSERR_STALE, 297 NFSERR_BADHANDLE, 298 NFSERR_SERVERFAULT, 299 0, 300 }; 301 302 static const short nfsv3err_readlink[] = { 303 NFSERR_IO, 304 NFSERR_IO, 305 NFSERR_ACCES, 306 NFSERR_INVAL, 307 NFSERR_STALE, 308 NFSERR_BADHANDLE, 309 NFSERR_NOTSUPP, 310 NFSERR_SERVERFAULT, 311 0, 312 }; 313 314 static const short nfsv3err_read[] = { 315 NFSERR_IO, 316 NFSERR_IO, 317 NFSERR_NXIO, 318 NFSERR_ACCES, 319 NFSERR_INVAL, 320 NFSERR_STALE, 321 NFSERR_BADHANDLE, 322 NFSERR_SERVERFAULT, 323 NFSERR_JUKEBOX, 324 0, 325 }; 326 327 static const short nfsv3err_write[] = { 328 NFSERR_IO, 329 NFSERR_IO, 330 NFSERR_ACCES, 331 NFSERR_INVAL, 332 NFSERR_FBIG, 333 NFSERR_NOSPC, 334 NFSERR_ROFS, 335 NFSERR_DQUOT, 336 NFSERR_STALE, 337 NFSERR_BADHANDLE, 338 NFSERR_SERVERFAULT, 339 NFSERR_JUKEBOX, 340 0, 341 }; 342 343 static const short nfsv3err_create[] = { 344 NFSERR_IO, 345 NFSERR_IO, 346 NFSERR_ACCES, 347 NFSERR_EXIST, 348 NFSERR_NOTDIR, 349 NFSERR_NOSPC, 350 NFSERR_ROFS, 351 NFSERR_NAMETOL, 352 NFSERR_DQUOT, 353 NFSERR_STALE, 354 NFSERR_BADHANDLE, 355 NFSERR_NOTSUPP, 356 NFSERR_SERVERFAULT, 357 0, 358 }; 359 360 static const short nfsv3err_mkdir[] = { 361 NFSERR_IO, 362 NFSERR_IO, 363 NFSERR_ACCES, 364 NFSERR_EXIST, 365 NFSERR_NOTDIR, 366 NFSERR_NOSPC, 367 NFSERR_ROFS, 368 NFSERR_NAMETOL, 369 NFSERR_DQUOT, 370 NFSERR_STALE, 371 NFSERR_BADHANDLE, 372 NFSERR_NOTSUPP, 373 NFSERR_SERVERFAULT, 374 0, 375 }; 376 377 static const short nfsv3err_symlink[] = { 378 NFSERR_IO, 379 NFSERR_IO, 380 NFSERR_ACCES, 381 NFSERR_EXIST, 382 NFSERR_NOTDIR, 383 NFSERR_NOSPC, 384 NFSERR_ROFS, 385 NFSERR_NAMETOL, 386 NFSERR_DQUOT, 387 NFSERR_STALE, 388 NFSERR_BADHANDLE, 389 NFSERR_NOTSUPP, 390 NFSERR_SERVERFAULT, 391 0, 392 }; 393 394 static const short nfsv3err_mknod[] = { 395 NFSERR_IO, 396 NFSERR_IO, 397 NFSERR_ACCES, 398 NFSERR_EXIST, 399 NFSERR_NOTDIR, 400 NFSERR_NOSPC, 401 NFSERR_ROFS, 402 NFSERR_NAMETOL, 403 NFSERR_DQUOT, 404 NFSERR_STALE, 405 NFSERR_BADHANDLE, 406 NFSERR_NOTSUPP, 407 NFSERR_SERVERFAULT, 408 NFSERR_BADTYPE, 409 0, 410 }; 411 412 static const short nfsv3err_remove[] = { 413 NFSERR_IO, 414 NFSERR_NOENT, 415 NFSERR_IO, 416 NFSERR_ACCES, 417 NFSERR_NOTDIR, 418 NFSERR_ROFS, 419 NFSERR_NAMETOL, 420 NFSERR_STALE, 421 NFSERR_BADHANDLE, 422 NFSERR_SERVERFAULT, 423 0, 424 }; 425 426 static const short nfsv3err_rmdir[] = { 427 NFSERR_IO, 428 NFSERR_NOENT, 429 NFSERR_IO, 430 NFSERR_ACCES, 431 NFSERR_EXIST, 432 NFSERR_NOTDIR, 433 NFSERR_INVAL, 434 NFSERR_ROFS, 435 NFSERR_NAMETOL, 436 NFSERR_NOTEMPTY, 437 NFSERR_STALE, 438 NFSERR_BADHANDLE, 439 NFSERR_NOTSUPP, 440 NFSERR_SERVERFAULT, 441 0, 442 }; 443 444 static const short nfsv3err_rename[] = { 445 NFSERR_IO, 446 NFSERR_NOENT, 447 NFSERR_IO, 448 NFSERR_ACCES, 449 NFSERR_EXIST, 450 NFSERR_XDEV, 451 NFSERR_NOTDIR, 452 NFSERR_ISDIR, 453 NFSERR_INVAL, 454 NFSERR_NOSPC, 455 NFSERR_ROFS, 456 NFSERR_MLINK, 457 NFSERR_NAMETOL, 458 NFSERR_NOTEMPTY, 459 NFSERR_DQUOT, 460 NFSERR_STALE, 461 NFSERR_BADHANDLE, 462 NFSERR_NOTSUPP, 463 NFSERR_SERVERFAULT, 464 0, 465 }; 466 467 static const short nfsv3err_link[] = { 468 NFSERR_IO, 469 NFSERR_IO, 470 NFSERR_ACCES, 471 NFSERR_EXIST, 472 NFSERR_XDEV, 473 NFSERR_NOTDIR, 474 NFSERR_INVAL, 475 NFSERR_NOSPC, 476 NFSERR_ROFS, 477 NFSERR_MLINK, 478 NFSERR_NAMETOL, 479 NFSERR_DQUOT, 480 NFSERR_STALE, 481 NFSERR_BADHANDLE, 482 NFSERR_NOTSUPP, 483 NFSERR_SERVERFAULT, 484 0, 485 }; 486 487 static const short nfsv3err_readdir[] = { 488 NFSERR_IO, 489 NFSERR_IO, 490 NFSERR_ACCES, 491 NFSERR_NOTDIR, 492 NFSERR_STALE, 493 NFSERR_BADHANDLE, 494 NFSERR_BAD_COOKIE, 495 NFSERR_TOOSMALL, 496 NFSERR_SERVERFAULT, 497 0, 498 }; 499 500 static const short nfsv3err_readdirplus[] = { 501 NFSERR_IO, 502 NFSERR_IO, 503 NFSERR_ACCES, 504 NFSERR_NOTDIR, 505 NFSERR_STALE, 506 NFSERR_BADHANDLE, 507 NFSERR_BAD_COOKIE, 508 NFSERR_NOTSUPP, 509 NFSERR_TOOSMALL, 510 NFSERR_SERVERFAULT, 511 0, 512 }; 513 514 static const short nfsv3err_fsstat[] = { 515 NFSERR_IO, 516 NFSERR_IO, 517 NFSERR_STALE, 518 NFSERR_BADHANDLE, 519 NFSERR_SERVERFAULT, 520 0, 521 }; 522 523 static const short nfsv3err_fsinfo[] = { 524 NFSERR_STALE, 525 NFSERR_STALE, 526 NFSERR_BADHANDLE, 527 NFSERR_SERVERFAULT, 528 0, 529 }; 530 531 static const short nfsv3err_pathconf[] = { 532 NFSERR_STALE, 533 NFSERR_STALE, 534 NFSERR_BADHANDLE, 535 NFSERR_SERVERFAULT, 536 0, 537 }; 538 539 static const short nfsv3err_commit[] = { 540 NFSERR_IO, 541 NFSERR_IO, 542 NFSERR_STALE, 543 NFSERR_BADHANDLE, 544 NFSERR_SERVERFAULT, 545 0, 546 }; 547 548 static const short * const nfsrv_v3errmap[] = { 549 nfsv3err_null, 550 nfsv3err_getattr, 551 nfsv3err_setattr, 552 nfsv3err_lookup, 553 nfsv3err_access, 554 nfsv3err_readlink, 555 nfsv3err_read, 556 nfsv3err_write, 557 nfsv3err_create, 558 nfsv3err_mkdir, 559 nfsv3err_symlink, 560 nfsv3err_mknod, 561 nfsv3err_remove, 562 nfsv3err_rmdir, 563 nfsv3err_rename, 564 nfsv3err_link, 565 nfsv3err_readdir, 566 nfsv3err_readdirplus, 567 nfsv3err_fsstat, 568 nfsv3err_fsinfo, 569 nfsv3err_pathconf, 570 nfsv3err_commit, 571 }; 572 573 extern struct nfsrtt nfsrtt; 574 extern time_t nqnfsstarttime; 575 extern int nqsrv_clockskew; 576 extern int nqsrv_writeslack; 577 extern int nqsrv_maxlease; 578 extern const int nqnfs_piggy[NFS_NPROCS]; 579 extern struct nfsnodehashhead *nfsnodehashtbl; 580 extern u_long nfsnodehash; 581 582 u_long nfsdirhashmask; 583 584 int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *)); 585 586 /* 587 * Create the header for an rpc request packet 588 * The hsiz is the size of the rest of the nfs request header. 589 * (just used to decide if a cluster is a good idea) 590 */ 591 struct mbuf * 592 nfsm_reqh( 593 struct nfsnode *np __unused, 594 u_long procid __unused, 595 int hsiz, 596 caddr_t *bposp 597 ) 598 { 599 struct mbuf *mb; 600 caddr_t bpos; 601 #ifndef NFS_V2_ONLY 602 struct nfsmount *nmp; 603 u_int32_t *tl; 604 int nqflag; 605 #else 606 do { if (&np) {} } while (/* CONSTCOND */ 0); /* for -Wunused */ 607 do { if (&procid) {} } while (/* CONSTCOND */ 0); /* for -Wunused */ 608 #endif 609 610 mb = m_get(M_WAIT, MT_DATA); 611 MCLAIM(mb, &nfs_mowner); 612 if (hsiz >= MINCLSIZE) 613 m_clget(mb, M_WAIT); 614 mb->m_len = 0; 615 bpos = mtod(mb, caddr_t); 616 617 #ifndef NFS_V2_ONLY 618 /* 619 * For NQNFS, add lease request. 620 */ 621 if (np) { 622 nmp = VFSTONFS(np->n_vnode->v_mount); 623 if (nmp->nm_flag & NFSMNT_NQNFS) { 624 nqflag = NQNFS_NEEDLEASE(np, procid); 625 if (nqflag) { 626 nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED); 627 *tl++ = txdr_unsigned(nqflag); 628 *tl = txdr_unsigned(nmp->nm_leaseterm); 629 } else { 630 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); 631 *tl = 0; 632 } 633 } 634 } 635 #endif 636 /* Finally, return values */ 637 *bposp = bpos; 638 return (mb); 639 } 640 641 /* 642 * Build the RPC header and fill in the authorization info. 643 * The authorization string argument is only used when the credentials 644 * come from outside of the kernel. 645 * Returns the head of the mbuf list. 646 */ 647 struct mbuf * 648 nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, 649 verf_str, mrest, mrest_len, mbp, xidp) 650 kauth_cred_t cr; 651 int nmflag; 652 int procid; 653 int auth_type; 654 int auth_len; 655 char *auth_str; 656 int verf_len; 657 char *verf_str; 658 struct mbuf *mrest; 659 int mrest_len; 660 struct mbuf **mbp; 661 u_int32_t *xidp; 662 { 663 struct mbuf *mb; 664 u_int32_t *tl; 665 caddr_t bpos; 666 int i; 667 struct mbuf *mreq; 668 int siz, grpsiz, authsiz; 669 670 authsiz = nfsm_rndup(auth_len); 671 mb = m_gethdr(M_WAIT, MT_DATA); 672 MCLAIM(mb, &nfs_mowner); 673 if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { 674 m_clget(mb, M_WAIT); 675 } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { 676 MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); 677 } else { 678 MH_ALIGN(mb, 8 * NFSX_UNSIGNED); 679 } 680 mb->m_len = 0; 681 mreq = mb; 682 bpos = mtod(mb, caddr_t); 683 684 /* 685 * First the RPC header. 686 */ 687 nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED); 688 689 *tl++ = *xidp = nfs_getxid(); 690 *tl++ = rpc_call; 691 *tl++ = rpc_vers; 692 if (nmflag & NFSMNT_NQNFS) { 693 *tl++ = txdr_unsigned(NQNFS_PROG); 694 *tl++ = txdr_unsigned(NQNFS_VER3); 695 } else { 696 *tl++ = txdr_unsigned(NFS_PROG); 697 if (nmflag & NFSMNT_NFSV3) 698 *tl++ = txdr_unsigned(NFS_VER3); 699 else 700 *tl++ = txdr_unsigned(NFS_VER2); 701 } 702 if (nmflag & NFSMNT_NFSV3) 703 *tl++ = txdr_unsigned(procid); 704 else 705 *tl++ = txdr_unsigned(nfsv2_procid[procid]); 706 707 /* 708 * And then the authorization cred. 709 */ 710 *tl++ = txdr_unsigned(auth_type); 711 *tl = txdr_unsigned(authsiz); 712 switch (auth_type) { 713 case RPCAUTH_UNIX: 714 nfsm_build(tl, u_int32_t *, auth_len); 715 *tl++ = 0; /* stamp ?? */ 716 *tl++ = 0; /* NULL hostname */ 717 *tl++ = txdr_unsigned(kauth_cred_geteuid(cr)); 718 *tl++ = txdr_unsigned(kauth_cred_getegid(cr)); 719 grpsiz = (auth_len >> 2) - 5; 720 *tl++ = txdr_unsigned(grpsiz); 721 for (i = 0; i < grpsiz; i++) 722 *tl++ = txdr_unsigned(kauth_cred_group(cr, i)); /* XXX elad review */ 723 break; 724 case RPCAUTH_KERB4: 725 siz = auth_len; 726 while (siz > 0) { 727 if (M_TRAILINGSPACE(mb) == 0) { 728 struct mbuf *mb2; 729 mb2 = m_get(M_WAIT, MT_DATA); 730 MCLAIM(mb2, &nfs_mowner); 731 if (siz >= MINCLSIZE) 732 m_clget(mb2, M_WAIT); 733 mb->m_next = mb2; 734 mb = mb2; 735 mb->m_len = 0; 736 bpos = mtod(mb, caddr_t); 737 } 738 i = min(siz, M_TRAILINGSPACE(mb)); 739 memcpy(bpos, auth_str, i); 740 mb->m_len += i; 741 auth_str += i; 742 bpos += i; 743 siz -= i; 744 } 745 if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { 746 for (i = 0; i < siz; i++) 747 *bpos++ = '\0'; 748 mb->m_len += siz; 749 } 750 break; 751 }; 752 753 /* 754 * And the verifier... 755 */ 756 nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 757 if (verf_str) { 758 *tl++ = txdr_unsigned(RPCAUTH_KERB4); 759 *tl = txdr_unsigned(verf_len); 760 siz = verf_len; 761 while (siz > 0) { 762 if (M_TRAILINGSPACE(mb) == 0) { 763 struct mbuf *mb2; 764 mb2 = m_get(M_WAIT, MT_DATA); 765 MCLAIM(mb2, &nfs_mowner); 766 if (siz >= MINCLSIZE) 767 m_clget(mb2, M_WAIT); 768 mb->m_next = mb2; 769 mb = mb2; 770 mb->m_len = 0; 771 bpos = mtod(mb, caddr_t); 772 } 773 i = min(siz, M_TRAILINGSPACE(mb)); 774 memcpy(bpos, verf_str, i); 775 mb->m_len += i; 776 verf_str += i; 777 bpos += i; 778 siz -= i; 779 } 780 if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { 781 for (i = 0; i < siz; i++) 782 *bpos++ = '\0'; 783 mb->m_len += siz; 784 } 785 } else { 786 *tl++ = txdr_unsigned(RPCAUTH_NULL); 787 *tl = 0; 788 } 789 mb->m_next = mrest; 790 mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; 791 mreq->m_pkthdr.rcvif = (struct ifnet *)0; 792 *mbp = mb; 793 return (mreq); 794 } 795 796 /* 797 * copies mbuf chain to the uio scatter/gather list 798 */ 799 int 800 nfsm_mbuftouio(mrep, uiop, siz, dpos) 801 struct mbuf **mrep; 802 struct uio *uiop; 803 int siz; 804 caddr_t *dpos; 805 { 806 char *mbufcp, *uiocp; 807 int xfer, left, len; 808 struct mbuf *mp; 809 long uiosiz, rem; 810 int error = 0; 811 812 mp = *mrep; 813 mbufcp = *dpos; 814 len = mtod(mp, caddr_t)+mp->m_len-mbufcp; 815 rem = nfsm_rndup(siz)-siz; 816 while (siz > 0) { 817 if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) 818 return (EFBIG); 819 left = uiop->uio_iov->iov_len; 820 uiocp = uiop->uio_iov->iov_base; 821 if (left > siz) 822 left = siz; 823 uiosiz = left; 824 while (left > 0) { 825 while (len == 0) { 826 mp = mp->m_next; 827 if (mp == NULL) 828 return (EBADRPC); 829 mbufcp = mtod(mp, caddr_t); 830 len = mp->m_len; 831 } 832 xfer = (left > len) ? len : left; 833 error = copyout_vmspace(uiop->uio_vmspace, mbufcp, 834 uiocp, xfer); 835 if (error) { 836 return error; 837 } 838 left -= xfer; 839 len -= xfer; 840 mbufcp += xfer; 841 uiocp += xfer; 842 uiop->uio_offset += xfer; 843 uiop->uio_resid -= xfer; 844 } 845 if (uiop->uio_iov->iov_len <= siz) { 846 uiop->uio_iovcnt--; 847 uiop->uio_iov++; 848 } else { 849 uiop->uio_iov->iov_base = 850 (caddr_t)uiop->uio_iov->iov_base + uiosiz; 851 uiop->uio_iov->iov_len -= uiosiz; 852 } 853 siz -= uiosiz; 854 } 855 *dpos = mbufcp; 856 *mrep = mp; 857 if (rem > 0) { 858 if (len < rem) 859 error = nfs_adv(mrep, dpos, rem, len); 860 else 861 *dpos += rem; 862 } 863 return (error); 864 } 865 866 /* 867 * copies a uio scatter/gather list to an mbuf chain. 868 * NOTE: can ony handle iovcnt == 1 869 */ 870 int 871 nfsm_uiotombuf(uiop, mq, siz, bpos) 872 struct uio *uiop; 873 struct mbuf **mq; 874 int siz; 875 caddr_t *bpos; 876 { 877 char *uiocp; 878 struct mbuf *mp, *mp2; 879 int xfer, left, mlen; 880 int uiosiz, clflg, rem; 881 char *cp; 882 int error; 883 884 #ifdef DIAGNOSTIC 885 if (uiop->uio_iovcnt != 1) 886 panic("nfsm_uiotombuf: iovcnt != 1"); 887 #endif 888 889 if (siz > MLEN) /* or should it >= MCLBYTES ?? */ 890 clflg = 1; 891 else 892 clflg = 0; 893 rem = nfsm_rndup(siz)-siz; 894 mp = mp2 = *mq; 895 while (siz > 0) { 896 left = uiop->uio_iov->iov_len; 897 uiocp = uiop->uio_iov->iov_base; 898 if (left > siz) 899 left = siz; 900 uiosiz = left; 901 while (left > 0) { 902 mlen = M_TRAILINGSPACE(mp); 903 if (mlen == 0) { 904 mp = m_get(M_WAIT, MT_DATA); 905 MCLAIM(mp, &nfs_mowner); 906 if (clflg) 907 m_clget(mp, M_WAIT); 908 mp->m_len = 0; 909 mp2->m_next = mp; 910 mp2 = mp; 911 mlen = M_TRAILINGSPACE(mp); 912 } 913 xfer = (left > mlen) ? mlen : left; 914 cp = mtod(mp, caddr_t) + mp->m_len; 915 error = copyin_vmspace(uiop->uio_vmspace, uiocp, cp, 916 xfer); 917 if (error) { 918 /* XXX */ 919 } 920 mp->m_len += xfer; 921 left -= xfer; 922 uiocp += xfer; 923 uiop->uio_offset += xfer; 924 uiop->uio_resid -= xfer; 925 } 926 uiop->uio_iov->iov_base = (caddr_t)uiop->uio_iov->iov_base + 927 uiosiz; 928 uiop->uio_iov->iov_len -= uiosiz; 929 siz -= uiosiz; 930 } 931 if (rem > 0) { 932 if (rem > M_TRAILINGSPACE(mp)) { 933 mp = m_get(M_WAIT, MT_DATA); 934 MCLAIM(mp, &nfs_mowner); 935 mp->m_len = 0; 936 mp2->m_next = mp; 937 } 938 cp = mtod(mp, caddr_t) + mp->m_len; 939 for (left = 0; left < rem; left++) 940 *cp++ = '\0'; 941 mp->m_len += rem; 942 *bpos = cp; 943 } else 944 *bpos = mtod(mp, caddr_t)+mp->m_len; 945 *mq = mp; 946 return (0); 947 } 948 949 /* 950 * Get at least "siz" bytes of correctly aligned data. 951 * When called the mbuf pointers are not necessarily correct, 952 * dsosp points to what ought to be in m_data and left contains 953 * what ought to be in m_len. 954 * This is used by the macros nfsm_dissect and nfsm_dissecton for tough 955 * cases. (The macros use the vars. dpos and dpos2) 956 */ 957 int 958 nfsm_disct(mdp, dposp, siz, left, cp2) 959 struct mbuf **mdp; 960 caddr_t *dposp; 961 int siz; 962 int left; 963 caddr_t *cp2; 964 { 965 struct mbuf *m1, *m2; 966 struct mbuf *havebuf = NULL; 967 caddr_t src = *dposp; 968 caddr_t dst; 969 int len; 970 971 #ifdef DEBUG 972 if (left < 0) 973 panic("nfsm_disct: left < 0"); 974 #endif 975 m1 = *mdp; 976 /* 977 * Skip through the mbuf chain looking for an mbuf with 978 * some data. If the first mbuf found has enough data 979 * and it is correctly aligned return it. 980 */ 981 while (left == 0) { 982 havebuf = m1; 983 *mdp = m1 = m1->m_next; 984 if (m1 == NULL) 985 return (EBADRPC); 986 src = mtod(m1, caddr_t); 987 left = m1->m_len; 988 /* 989 * If we start a new mbuf and it is big enough 990 * and correctly aligned just return it, don't 991 * do any pull up. 992 */ 993 if (left >= siz && nfsm_aligned(src)) { 994 *cp2 = src; 995 *dposp = src + siz; 996 return (0); 997 } 998 } 999 if (m1->m_flags & M_EXT) { 1000 if (havebuf) { 1001 /* If the first mbuf with data has external data 1002 * and there is a previous empty mbuf use it 1003 * to move the data into. 1004 */ 1005 m2 = m1; 1006 *mdp = m1 = havebuf; 1007 if (m1->m_flags & M_EXT) { 1008 MEXTREMOVE(m1); 1009 } 1010 } else { 1011 /* 1012 * If the first mbuf has a external data 1013 * and there is no previous empty mbuf 1014 * allocate a new mbuf and move the external 1015 * data to the new mbuf. Also make the first 1016 * mbuf look empty. 1017 */ 1018 m2 = m_get(M_WAIT, MT_DATA); 1019 m2->m_ext = m1->m_ext; 1020 m2->m_data = src; 1021 m2->m_len = left; 1022 MCLADDREFERENCE(m1, m2); 1023 MEXTREMOVE(m1); 1024 m2->m_next = m1->m_next; 1025 m1->m_next = m2; 1026 } 1027 m1->m_len = 0; 1028 if (m1->m_flags & M_PKTHDR) 1029 dst = m1->m_pktdat; 1030 else 1031 dst = m1->m_dat; 1032 m1->m_data = dst; 1033 } else { 1034 /* 1035 * If the first mbuf has no external data 1036 * move the data to the front of the mbuf. 1037 */ 1038 if (m1->m_flags & M_PKTHDR) 1039 dst = m1->m_pktdat; 1040 else 1041 dst = m1->m_dat; 1042 m1->m_data = dst; 1043 if (dst != src) 1044 memmove(dst, src, left); 1045 dst += left; 1046 m1->m_len = left; 1047 m2 = m1->m_next; 1048 } 1049 *cp2 = m1->m_data; 1050 *dposp = mtod(m1, caddr_t) + siz; 1051 /* 1052 * Loop through mbufs pulling data up into first mbuf until 1053 * the first mbuf is full or there is no more data to 1054 * pullup. 1055 */ 1056 while ((len = M_TRAILINGSPACE(m1)) != 0 && m2) { 1057 if ((len = min(len, m2->m_len)) != 0) 1058 memcpy(dst, m2->m_data, len); 1059 m1->m_len += len; 1060 dst += len; 1061 m2->m_data += len; 1062 m2->m_len -= len; 1063 m2 = m2->m_next; 1064 } 1065 if (m1->m_len < siz) 1066 return (EBADRPC); 1067 return (0); 1068 } 1069 1070 /* 1071 * Advance the position in the mbuf chain. 1072 */ 1073 int 1074 nfs_adv(mdp, dposp, offs, left) 1075 struct mbuf **mdp; 1076 caddr_t *dposp; 1077 int offs; 1078 int left; 1079 { 1080 struct mbuf *m; 1081 int s; 1082 1083 m = *mdp; 1084 s = left; 1085 while (s < offs) { 1086 offs -= s; 1087 m = m->m_next; 1088 if (m == NULL) 1089 return (EBADRPC); 1090 s = m->m_len; 1091 } 1092 *mdp = m; 1093 *dposp = mtod(m, caddr_t)+offs; 1094 return (0); 1095 } 1096 1097 /* 1098 * Copy a string into mbufs for the hard cases... 1099 */ 1100 int 1101 nfsm_strtmbuf(mb, bpos, cp, siz) 1102 struct mbuf **mb; 1103 char **bpos; 1104 const char *cp; 1105 long siz; 1106 { 1107 struct mbuf *m1 = NULL, *m2; 1108 long left, xfer, len, tlen; 1109 u_int32_t *tl; 1110 int putsize; 1111 1112 putsize = 1; 1113 m2 = *mb; 1114 left = M_TRAILINGSPACE(m2); 1115 if (left > 0) { 1116 tl = ((u_int32_t *)(*bpos)); 1117 *tl++ = txdr_unsigned(siz); 1118 putsize = 0; 1119 left -= NFSX_UNSIGNED; 1120 m2->m_len += NFSX_UNSIGNED; 1121 if (left > 0) { 1122 memcpy((caddr_t) tl, cp, left); 1123 siz -= left; 1124 cp += left; 1125 m2->m_len += left; 1126 left = 0; 1127 } 1128 } 1129 /* Loop around adding mbufs */ 1130 while (siz > 0) { 1131 m1 = m_get(M_WAIT, MT_DATA); 1132 MCLAIM(m1, &nfs_mowner); 1133 if (siz > MLEN) 1134 m_clget(m1, M_WAIT); 1135 m1->m_len = NFSMSIZ(m1); 1136 m2->m_next = m1; 1137 m2 = m1; 1138 tl = mtod(m1, u_int32_t *); 1139 tlen = 0; 1140 if (putsize) { 1141 *tl++ = txdr_unsigned(siz); 1142 m1->m_len -= NFSX_UNSIGNED; 1143 tlen = NFSX_UNSIGNED; 1144 putsize = 0; 1145 } 1146 if (siz < m1->m_len) { 1147 len = nfsm_rndup(siz); 1148 xfer = siz; 1149 if (xfer < len) 1150 *(tl+(xfer>>2)) = 0; 1151 } else { 1152 xfer = len = m1->m_len; 1153 } 1154 memcpy((caddr_t) tl, cp, xfer); 1155 m1->m_len = len+tlen; 1156 siz -= xfer; 1157 cp += xfer; 1158 } 1159 *mb = m1; 1160 *bpos = mtod(m1, caddr_t)+m1->m_len; 1161 return (0); 1162 } 1163 1164 /* 1165 * Directory caching routines. They work as follows: 1166 * - a cache is maintained per VDIR nfsnode. 1167 * - for each offset cookie that is exported to userspace, and can 1168 * thus be thrown back at us as an offset to VOP_READDIR, store 1169 * information in the cache. 1170 * - cached are: 1171 * - cookie itself 1172 * - blocknumber (essentially just a search key in the buffer cache) 1173 * - entry number in block. 1174 * - offset cookie of block in which this entry is stored 1175 * - 32 bit cookie if NFSMNT_XLATECOOKIE is used. 1176 * - entries are looked up in a hash table 1177 * - also maintained is an LRU list of entries, used to determine 1178 * which ones to delete if the cache grows too large. 1179 * - if 32 <-> 64 translation mode is requested for a filesystem, 1180 * the cache also functions as a translation table 1181 * - in the translation case, invalidating the cache does not mean 1182 * flushing it, but just marking entries as invalid, except for 1183 * the <64bit cookie, 32bitcookie> pair which is still valid, to 1184 * still be able to use the cache as a translation table. 1185 * - 32 bit cookies are uniquely created by combining the hash table 1186 * entry value, and one generation count per hash table entry, 1187 * incremented each time an entry is appended to the chain. 1188 * - the cache is invalidated each time a direcory is modified 1189 * - sanity checks are also done; if an entry in a block turns 1190 * out not to have a matching cookie, the cache is invalidated 1191 * and a new block starting from the wanted offset is fetched from 1192 * the server. 1193 * - directory entries as read from the server are extended to contain 1194 * the 64bit and, optionally, the 32bit cookies, for sanity checking 1195 * the cache and exporting them to userspace through the cookie 1196 * argument to VOP_READDIR. 1197 */ 1198 1199 u_long 1200 nfs_dirhash(off) 1201 off_t off; 1202 { 1203 int i; 1204 char *cp = (char *)&off; 1205 u_long sum = 0L; 1206 1207 for (i = 0 ; i < sizeof (off); i++) 1208 sum += *cp++; 1209 1210 return sum; 1211 } 1212 1213 #define _NFSDC_MTX(np) (&NFSTOV(np)->v_interlock) 1214 #define NFSDC_LOCK(np) simple_lock(_NFSDC_MTX(np)) 1215 #define NFSDC_UNLOCK(np) simple_unlock(_NFSDC_MTX(np)) 1216 #define NFSDC_ASSERT_LOCKED(np) LOCK_ASSERT(simple_lock_held(_NFSDC_MTX(np))) 1217 1218 void 1219 nfs_initdircache(vp) 1220 struct vnode *vp; 1221 { 1222 struct nfsnode *np = VTONFS(vp); 1223 struct nfsdirhashhead *dircache; 1224 1225 dircache = hashinit(NFS_DIRHASHSIZ, HASH_LIST, M_NFSDIROFF, 1226 M_WAITOK, &nfsdirhashmask); 1227 1228 NFSDC_LOCK(np); 1229 if (np->n_dircache == NULL) { 1230 np->n_dircachesize = 0; 1231 np->n_dircache = dircache; 1232 dircache = NULL; 1233 TAILQ_INIT(&np->n_dirchain); 1234 } 1235 NFSDC_UNLOCK(np); 1236 if (dircache) 1237 hashdone(dircache, M_NFSDIROFF); 1238 } 1239 1240 void 1241 nfs_initdirxlatecookie(vp) 1242 struct vnode *vp; 1243 { 1244 struct nfsnode *np = VTONFS(vp); 1245 unsigned *dirgens; 1246 1247 KASSERT(VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_XLATECOOKIE); 1248 1249 dirgens = malloc(NFS_DIRHASHSIZ * sizeof (unsigned), M_NFSDIROFF, 1250 M_WAITOK|M_ZERO); 1251 NFSDC_LOCK(np); 1252 if (np->n_dirgens == NULL) { 1253 np->n_dirgens = dirgens; 1254 dirgens = NULL; 1255 } 1256 NFSDC_UNLOCK(np); 1257 if (dirgens) 1258 free(dirgens, M_NFSDIROFF); 1259 } 1260 1261 static const struct nfsdircache dzero; 1262 1263 static void nfs_unlinkdircache __P((struct nfsnode *np, struct nfsdircache *)); 1264 static void nfs_putdircache_unlocked __P((struct nfsnode *, 1265 struct nfsdircache *)); 1266 1267 static void 1268 nfs_unlinkdircache(np, ndp) 1269 struct nfsnode *np; 1270 struct nfsdircache *ndp; 1271 { 1272 1273 NFSDC_ASSERT_LOCKED(np); 1274 KASSERT(ndp != &dzero); 1275 1276 if (LIST_NEXT(ndp, dc_hash) == (void *)-1) 1277 return; 1278 1279 TAILQ_REMOVE(&np->n_dirchain, ndp, dc_chain); 1280 LIST_REMOVE(ndp, dc_hash); 1281 LIST_NEXT(ndp, dc_hash) = (void *)-1; /* mark as unlinked */ 1282 1283 nfs_putdircache_unlocked(np, ndp); 1284 } 1285 1286 void 1287 nfs_putdircache(np, ndp) 1288 struct nfsnode *np; 1289 struct nfsdircache *ndp; 1290 { 1291 int ref; 1292 1293 if (ndp == &dzero) 1294 return; 1295 1296 KASSERT(ndp->dc_refcnt > 0); 1297 NFSDC_LOCK(np); 1298 ref = --ndp->dc_refcnt; 1299 NFSDC_UNLOCK(np); 1300 1301 if (ref == 0) 1302 free(ndp, M_NFSDIROFF); 1303 } 1304 1305 static void 1306 nfs_putdircache_unlocked(struct nfsnode *np __unused, struct nfsdircache *ndp) 1307 { 1308 int ref; 1309 1310 NFSDC_ASSERT_LOCKED(np); 1311 1312 if (ndp == &dzero) 1313 return; 1314 1315 KASSERT(ndp->dc_refcnt > 0); 1316 ref = --ndp->dc_refcnt; 1317 if (ref == 0) 1318 free(ndp, M_NFSDIROFF); 1319 } 1320 1321 struct nfsdircache * 1322 nfs_searchdircache(vp, off, do32, hashent) 1323 struct vnode *vp; 1324 off_t off; 1325 int do32; 1326 int *hashent; 1327 { 1328 struct nfsdirhashhead *ndhp; 1329 struct nfsdircache *ndp = NULL; 1330 struct nfsnode *np = VTONFS(vp); 1331 unsigned ent; 1332 1333 /* 1334 * Zero is always a valid cookie. 1335 */ 1336 if (off == 0) 1337 /* XXXUNCONST */ 1338 return (struct nfsdircache *)__UNCONST(&dzero); 1339 1340 if (!np->n_dircache) 1341 return NULL; 1342 1343 /* 1344 * We use a 32bit cookie as search key, directly reconstruct 1345 * the hashentry. Else use the hashfunction. 1346 */ 1347 if (do32) { 1348 ent = (u_int32_t)off >> 24; 1349 if (ent >= NFS_DIRHASHSIZ) 1350 return NULL; 1351 ndhp = &np->n_dircache[ent]; 1352 } else { 1353 ndhp = NFSDIRHASH(np, off); 1354 } 1355 1356 if (hashent) 1357 *hashent = (int)(ndhp - np->n_dircache); 1358 1359 NFSDC_LOCK(np); 1360 if (do32) { 1361 LIST_FOREACH(ndp, ndhp, dc_hash) { 1362 if (ndp->dc_cookie32 == (u_int32_t)off) { 1363 /* 1364 * An invalidated entry will become the 1365 * start of a new block fetched from 1366 * the server. 1367 */ 1368 if (ndp->dc_flags & NFSDC_INVALID) { 1369 ndp->dc_blkcookie = ndp->dc_cookie; 1370 ndp->dc_entry = 0; 1371 ndp->dc_flags &= ~NFSDC_INVALID; 1372 } 1373 break; 1374 } 1375 } 1376 } else { 1377 LIST_FOREACH(ndp, ndhp, dc_hash) { 1378 if (ndp->dc_cookie == off) 1379 break; 1380 } 1381 } 1382 if (ndp != NULL) 1383 ndp->dc_refcnt++; 1384 NFSDC_UNLOCK(np); 1385 return ndp; 1386 } 1387 1388 1389 struct nfsdircache * 1390 nfs_enterdircache(struct vnode *vp, off_t off, off_t blkoff, int en, 1391 daddr_t blkno __unused) 1392 { 1393 struct nfsnode *np = VTONFS(vp); 1394 struct nfsdirhashhead *ndhp; 1395 struct nfsdircache *ndp = NULL; 1396 struct nfsdircache *newndp = NULL; 1397 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1398 int hashent = 0, gen, overwrite; /* XXX: GCC */ 1399 1400 /* 1401 * XXX refuse entries for offset 0. amd(8) erroneously sets 1402 * cookie 0 for the '.' entry, making this necessary. This 1403 * isn't so bad, as 0 is a special case anyway. 1404 */ 1405 if (off == 0) 1406 /* XXXUNCONST */ 1407 return (struct nfsdircache *)__UNCONST(&dzero); 1408 1409 if (!np->n_dircache) 1410 /* 1411 * XXX would like to do this in nfs_nget but vtype 1412 * isn't known at that time. 1413 */ 1414 nfs_initdircache(vp); 1415 1416 if ((nmp->nm_flag & NFSMNT_XLATECOOKIE) && !np->n_dirgens) 1417 nfs_initdirxlatecookie(vp); 1418 1419 retry: 1420 ndp = nfs_searchdircache(vp, off, 0, &hashent); 1421 1422 NFSDC_LOCK(np); 1423 if (ndp && (ndp->dc_flags & NFSDC_INVALID) == 0) { 1424 /* 1425 * Overwriting an old entry. Check if it's the same. 1426 * If so, just return. If not, remove the old entry. 1427 */ 1428 if (ndp->dc_blkcookie == blkoff && ndp->dc_entry == en) 1429 goto done; 1430 nfs_unlinkdircache(np, ndp); 1431 nfs_putdircache_unlocked(np, ndp); 1432 ndp = NULL; 1433 } 1434 1435 ndhp = &np->n_dircache[hashent]; 1436 1437 if (!ndp) { 1438 if (newndp == NULL) { 1439 NFSDC_UNLOCK(np); 1440 newndp = malloc(sizeof(*ndp), M_NFSDIROFF, M_WAITOK); 1441 newndp->dc_refcnt = 1; 1442 LIST_NEXT(newndp, dc_hash) = (void *)-1; 1443 goto retry; 1444 } 1445 ndp = newndp; 1446 newndp = NULL; 1447 overwrite = 0; 1448 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 1449 /* 1450 * We're allocating a new entry, so bump the 1451 * generation number. 1452 */ 1453 KASSERT(np->n_dirgens); 1454 gen = ++np->n_dirgens[hashent]; 1455 if (gen == 0) { 1456 np->n_dirgens[hashent]++; 1457 gen++; 1458 } 1459 ndp->dc_cookie32 = (hashent << 24) | (gen & 0xffffff); 1460 } 1461 } else 1462 overwrite = 1; 1463 1464 ndp->dc_cookie = off; 1465 ndp->dc_blkcookie = blkoff; 1466 ndp->dc_entry = en; 1467 ndp->dc_flags = 0; 1468 1469 if (overwrite) 1470 goto done; 1471 1472 /* 1473 * If the maximum directory cookie cache size has been reached 1474 * for this node, take one off the front. The idea is that 1475 * directories are typically read front-to-back once, so that 1476 * the oldest entries can be thrown away without much performance 1477 * loss. 1478 */ 1479 if (np->n_dircachesize == NFS_MAXDIRCACHE) { 1480 nfs_unlinkdircache(np, TAILQ_FIRST(&np->n_dirchain)); 1481 } else 1482 np->n_dircachesize++; 1483 1484 KASSERT(ndp->dc_refcnt == 1); 1485 LIST_INSERT_HEAD(ndhp, ndp, dc_hash); 1486 TAILQ_INSERT_TAIL(&np->n_dirchain, ndp, dc_chain); 1487 ndp->dc_refcnt++; 1488 done: 1489 KASSERT(ndp->dc_refcnt > 0); 1490 NFSDC_UNLOCK(np); 1491 if (newndp) 1492 nfs_putdircache(np, newndp); 1493 return ndp; 1494 } 1495 1496 void 1497 nfs_invaldircache(vp, flags) 1498 struct vnode *vp; 1499 int flags; 1500 { 1501 struct nfsnode *np = VTONFS(vp); 1502 struct nfsdircache *ndp = NULL; 1503 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1504 const boolean_t forcefree = flags & NFS_INVALDIRCACHE_FORCE; 1505 1506 #ifdef DIAGNOSTIC 1507 if (vp->v_type != VDIR) 1508 panic("nfs: invaldircache: not dir"); 1509 #endif 1510 1511 if ((flags & NFS_INVALDIRCACHE_KEEPEOF) == 0) 1512 np->n_flag &= ~NEOFVALID; 1513 1514 if (!np->n_dircache) 1515 return; 1516 1517 NFSDC_LOCK(np); 1518 if (!(nmp->nm_flag & NFSMNT_XLATECOOKIE) || forcefree) { 1519 while ((ndp = TAILQ_FIRST(&np->n_dirchain)) != NULL) { 1520 KASSERT(!forcefree || ndp->dc_refcnt == 1); 1521 nfs_unlinkdircache(np, ndp); 1522 } 1523 np->n_dircachesize = 0; 1524 if (forcefree && np->n_dirgens) { 1525 FREE(np->n_dirgens, M_NFSDIROFF); 1526 np->n_dirgens = NULL; 1527 } 1528 } else { 1529 TAILQ_FOREACH(ndp, &np->n_dirchain, dc_chain) 1530 ndp->dc_flags |= NFSDC_INVALID; 1531 } 1532 1533 NFSDC_UNLOCK(np); 1534 } 1535 1536 /* 1537 * Called once before VFS init to initialize shared and 1538 * server-specific data structures. 1539 */ 1540 static int 1541 nfs_init0(void) 1542 { 1543 nfsrtt.pos = 0; 1544 rpc_vers = txdr_unsigned(RPC_VER2); 1545 rpc_call = txdr_unsigned(RPC_CALL); 1546 rpc_reply = txdr_unsigned(RPC_REPLY); 1547 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); 1548 rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); 1549 rpc_mismatch = txdr_unsigned(RPC_MISMATCH); 1550 rpc_autherr = txdr_unsigned(RPC_AUTHERR); 1551 rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); 1552 rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); 1553 nfs_prog = txdr_unsigned(NFS_PROG); 1554 nqnfs_prog = txdr_unsigned(NQNFS_PROG); 1555 nfs_true = txdr_unsigned(TRUE); 1556 nfs_false = txdr_unsigned(FALSE); 1557 nfs_xdrneg1 = txdr_unsigned(-1); 1558 nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; 1559 if (nfs_ticks < 1) 1560 nfs_ticks = 1; 1561 #ifdef NFSSERVER 1562 nfsrv_init(0); /* Init server data structures */ 1563 nfsrv_initcache(); /* Init the server request cache */ 1564 #endif /* NFSSERVER */ 1565 1566 #if defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY)) 1567 nfsdreq_init(); 1568 #endif /* defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY)) */ 1569 1570 #if defined(NFSSERVER) || !defined(NFS_V2_ONLY) 1571 /* 1572 * Initialize the nqnfs data structures. 1573 */ 1574 if (nqnfsstarttime == 0) { 1575 nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease 1576 + nqsrv_clockskew + nqsrv_writeslack; 1577 NQLOADNOVRAM(nqnfsstarttime); 1578 CIRCLEQ_INIT(&nqtimerhead); 1579 nqfhhashtbl = hashinit(NQLCHSZ, HASH_LIST, M_NQLEASE, 1580 M_WAITOK, &nqfhhash); 1581 } 1582 #endif 1583 1584 exithook_establish(nfs_exit, NULL); 1585 1586 /* 1587 * Initialize reply list and start timer 1588 */ 1589 TAILQ_INIT(&nfs_reqq); 1590 nfs_timer(NULL); 1591 MOWNER_ATTACH(&nfs_mowner); 1592 1593 #ifdef NFS 1594 /* Initialize the kqueue structures */ 1595 nfs_kqinit(); 1596 /* Initialize the iod structures */ 1597 nfs_iodinit(); 1598 #endif 1599 return 0; 1600 } 1601 1602 void 1603 nfs_init(void) 1604 { 1605 static ONCE_DECL(nfs_init_once); 1606 1607 RUN_ONCE(&nfs_init_once, nfs_init0); 1608 } 1609 1610 #ifdef NFS 1611 /* 1612 * Called once at VFS init to initialize client-specific data structures. 1613 */ 1614 void 1615 nfs_vfs_init() 1616 { 1617 /* Initialize NFS server / client shared data. */ 1618 nfs_init(); 1619 1620 nfs_nhinit(); /* Init the nfsnode table */ 1621 nfs_commitsize = uvmexp.npages << (PAGE_SHIFT - 4); 1622 } 1623 1624 void 1625 nfs_vfs_reinit() 1626 { 1627 nfs_nhreinit(); 1628 } 1629 1630 void 1631 nfs_vfs_done() 1632 { 1633 nfs_nhdone(); 1634 } 1635 1636 /* 1637 * Attribute cache routines. 1638 * nfs_loadattrcache() - loads or updates the cache contents from attributes 1639 * that are on the mbuf list 1640 * nfs_getattrcache() - returns valid attributes if found in cache, returns 1641 * error otherwise 1642 */ 1643 1644 /* 1645 * Load the attribute cache (that lives in the nfsnode entry) with 1646 * the values on the mbuf list and 1647 * Iff vap not NULL 1648 * copy the attributes to *vaper 1649 */ 1650 int 1651 nfsm_loadattrcache(vpp, mdp, dposp, vaper, flags) 1652 struct vnode **vpp; 1653 struct mbuf **mdp; 1654 caddr_t *dposp; 1655 struct vattr *vaper; 1656 int flags; 1657 { 1658 int32_t t1; 1659 caddr_t cp2; 1660 int error = 0; 1661 struct mbuf *md; 1662 int v3 = NFS_ISV3(*vpp); 1663 1664 md = *mdp; 1665 t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; 1666 error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2); 1667 if (error) 1668 return (error); 1669 return nfs_loadattrcache(vpp, (struct nfs_fattr *)cp2, vaper, flags); 1670 } 1671 1672 int 1673 nfs_loadattrcache(vpp, fp, vaper, flags) 1674 struct vnode **vpp; 1675 struct nfs_fattr *fp; 1676 struct vattr *vaper; 1677 int flags; 1678 { 1679 struct vnode *vp = *vpp; 1680 struct vattr *vap; 1681 int v3 = NFS_ISV3(vp); 1682 enum vtype vtyp; 1683 u_short vmode; 1684 struct timespec mtime; 1685 struct timespec ctime; 1686 struct vnode *nvp; 1687 int32_t rdev; 1688 struct nfsnode *np; 1689 extern int (**spec_nfsv2nodeop_p) __P((void *)); 1690 uid_t uid; 1691 gid_t gid; 1692 1693 if (v3) { 1694 vtyp = nfsv3tov_type(fp->fa_type); 1695 vmode = fxdr_unsigned(u_short, fp->fa_mode); 1696 rdev = makedev(fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata1), 1697 fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata2)); 1698 fxdr_nfsv3time(&fp->fa3_mtime, &mtime); 1699 fxdr_nfsv3time(&fp->fa3_ctime, &ctime); 1700 } else { 1701 vtyp = nfsv2tov_type(fp->fa_type); 1702 vmode = fxdr_unsigned(u_short, fp->fa_mode); 1703 if (vtyp == VNON || vtyp == VREG) 1704 vtyp = IFTOVT(vmode); 1705 rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); 1706 fxdr_nfsv2time(&fp->fa2_mtime, &mtime); 1707 ctime.tv_sec = fxdr_unsigned(u_int32_t, 1708 fp->fa2_ctime.nfsv2_sec); 1709 ctime.tv_nsec = 0; 1710 1711 /* 1712 * Really ugly NFSv2 kludge. 1713 */ 1714 if (vtyp == VCHR && rdev == 0xffffffff) 1715 vtyp = VFIFO; 1716 } 1717 1718 vmode &= ALLPERMS; 1719 1720 /* 1721 * If v_type == VNON it is a new node, so fill in the v_type, 1722 * n_mtime fields. Check to see if it represents a special 1723 * device, and if so, check for a possible alias. Once the 1724 * correct vnode has been obtained, fill in the rest of the 1725 * information. 1726 */ 1727 np = VTONFS(vp); 1728 if (vp->v_type == VNON) { 1729 vp->v_type = vtyp; 1730 if (vp->v_type == VFIFO) { 1731 extern int (**fifo_nfsv2nodeop_p) __P((void *)); 1732 vp->v_op = fifo_nfsv2nodeop_p; 1733 } else if (vp->v_type == VREG) { 1734 lockinit(&np->n_commitlock, PINOD, "nfsclock", 0, 0); 1735 } else if (vp->v_type == VCHR || vp->v_type == VBLK) { 1736 vp->v_op = spec_nfsv2nodeop_p; 1737 nvp = checkalias(vp, (dev_t)rdev, vp->v_mount); 1738 if (nvp) { 1739 /* 1740 * Discard unneeded vnode, but save its nfsnode. 1741 * Since the nfsnode does not have a lock, its 1742 * vnode lock has to be carried over. 1743 */ 1744 /* 1745 * XXX is the old node sure to be locked here? 1746 */ 1747 KASSERT(lockstatus(&vp->v_lock) == 1748 LK_EXCLUSIVE); 1749 nvp->v_data = vp->v_data; 1750 vp->v_data = NULL; 1751 VOP_UNLOCK(vp, 0); 1752 vp->v_op = spec_vnodeop_p; 1753 vrele(vp); 1754 vgone(vp); 1755 lockmgr(&nvp->v_lock, LK_EXCLUSIVE, 1756 &nvp->v_interlock); 1757 /* 1758 * Reinitialize aliased node. 1759 */ 1760 np->n_vnode = nvp; 1761 *vpp = vp = nvp; 1762 } 1763 } 1764 np->n_mtime = mtime; 1765 } 1766 uid = fxdr_unsigned(uid_t, fp->fa_uid); 1767 gid = fxdr_unsigned(gid_t, fp->fa_gid); 1768 vap = np->n_vattr; 1769 1770 /* 1771 * Invalidate access cache if uid, gid, mode or ctime changed. 1772 */ 1773 if (np->n_accstamp != -1 && 1774 (gid != vap->va_gid || uid != vap->va_uid || vmode != vap->va_mode 1775 || timespeccmp(&ctime, &vap->va_ctime, !=))) 1776 np->n_accstamp = -1; 1777 1778 vap->va_type = vtyp; 1779 vap->va_mode = vmode; 1780 vap->va_rdev = (dev_t)rdev; 1781 vap->va_mtime = mtime; 1782 vap->va_ctime = ctime; 1783 vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0]; 1784 switch (vtyp) { 1785 case VDIR: 1786 vap->va_blocksize = NFS_DIRFRAGSIZ; 1787 break; 1788 case VBLK: 1789 vap->va_blocksize = BLKDEV_IOSIZE; 1790 break; 1791 case VCHR: 1792 vap->va_blocksize = MAXBSIZE; 1793 break; 1794 default: 1795 vap->va_blocksize = v3 ? vp->v_mount->mnt_stat.f_iosize : 1796 fxdr_unsigned(int32_t, fp->fa2_blocksize); 1797 break; 1798 } 1799 if (v3) { 1800 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 1801 vap->va_uid = uid; 1802 vap->va_gid = gid; 1803 vap->va_size = fxdr_hyper(&fp->fa3_size); 1804 vap->va_bytes = fxdr_hyper(&fp->fa3_used); 1805 vap->va_fileid = fxdr_hyper(&fp->fa3_fileid); 1806 fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); 1807 vap->va_flags = 0; 1808 vap->va_filerev = 0; 1809 } else { 1810 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 1811 vap->va_uid = uid; 1812 vap->va_gid = gid; 1813 vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); 1814 vap->va_bytes = fxdr_unsigned(int32_t, fp->fa2_blocks) 1815 * NFS_FABLKSIZE; 1816 vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); 1817 fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); 1818 vap->va_flags = 0; 1819 vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec); 1820 vap->va_filerev = 0; 1821 } 1822 if (vap->va_size != np->n_size) { 1823 if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) { 1824 vap->va_size = np->n_size; 1825 } else { 1826 np->n_size = vap->va_size; 1827 if (vap->va_type == VREG) { 1828 /* 1829 * we can't free pages if NAC_NOTRUNC because 1830 * the pages can be owned by ourselves. 1831 */ 1832 if (flags & NAC_NOTRUNC) { 1833 np->n_flag |= NTRUNCDELAYED; 1834 } else { 1835 genfs_node_wrlock(vp); 1836 simple_lock(&vp->v_interlock); 1837 (void)VOP_PUTPAGES(vp, 0, 1838 0, PGO_SYNCIO | PGO_CLEANIT | 1839 PGO_FREE | PGO_ALLPAGES); 1840 uvm_vnp_setsize(vp, np->n_size); 1841 genfs_node_unlock(vp); 1842 } 1843 } 1844 } 1845 } 1846 np->n_attrstamp = time_second; 1847 if (vaper != NULL) { 1848 memcpy((caddr_t)vaper, (caddr_t)vap, sizeof(*vap)); 1849 if (np->n_flag & NCHG) { 1850 if (np->n_flag & NACC) 1851 vaper->va_atime = np->n_atim; 1852 if (np->n_flag & NUPD) 1853 vaper->va_mtime = np->n_mtim; 1854 } 1855 } 1856 return (0); 1857 } 1858 1859 /* 1860 * Check the time stamp 1861 * If the cache is valid, copy contents to *vap and return 0 1862 * otherwise return an error 1863 */ 1864 int 1865 nfs_getattrcache(vp, vaper) 1866 struct vnode *vp; 1867 struct vattr *vaper; 1868 { 1869 struct nfsnode *np = VTONFS(vp); 1870 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1871 struct vattr *vap; 1872 1873 if (np->n_attrstamp == 0 || 1874 (time_second - np->n_attrstamp) >= NFS_ATTRTIMEO(nmp, np)) { 1875 nfsstats.attrcache_misses++; 1876 return (ENOENT); 1877 } 1878 nfsstats.attrcache_hits++; 1879 vap = np->n_vattr; 1880 if (vap->va_size != np->n_size) { 1881 if (vap->va_type == VREG) { 1882 if (np->n_flag & NMODIFIED) { 1883 if (vap->va_size < np->n_size) 1884 vap->va_size = np->n_size; 1885 else 1886 np->n_size = vap->va_size; 1887 } else 1888 np->n_size = vap->va_size; 1889 genfs_node_wrlock(vp); 1890 uvm_vnp_setsize(vp, np->n_size); 1891 genfs_node_unlock(vp); 1892 } else 1893 np->n_size = vap->va_size; 1894 } 1895 memcpy((caddr_t)vaper, (caddr_t)vap, sizeof(struct vattr)); 1896 if (np->n_flag & NCHG) { 1897 if (np->n_flag & NACC) 1898 vaper->va_atime = np->n_atim; 1899 if (np->n_flag & NUPD) 1900 vaper->va_mtime = np->n_mtim; 1901 } 1902 return (0); 1903 } 1904 1905 void 1906 nfs_delayedtruncate(vp) 1907 struct vnode *vp; 1908 { 1909 struct nfsnode *np = VTONFS(vp); 1910 1911 if (np->n_flag & NTRUNCDELAYED) { 1912 np->n_flag &= ~NTRUNCDELAYED; 1913 genfs_node_wrlock(vp); 1914 simple_lock(&vp->v_interlock); 1915 (void)VOP_PUTPAGES(vp, 0, 1916 0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES); 1917 uvm_vnp_setsize(vp, np->n_size); 1918 genfs_node_unlock(vp); 1919 } 1920 } 1921 1922 #define NFS_WCCKLUDGE_TIMEOUT (24 * 60 * 60) /* 1 day */ 1923 #define NFS_WCCKLUDGE(nmp, now) \ 1924 (((nmp)->nm_iflag & NFSMNT_WCCKLUDGE) && \ 1925 ((now) - (nmp)->nm_wcckludgetime - NFS_WCCKLUDGE_TIMEOUT) < 0) 1926 1927 /* 1928 * nfs_check_wccdata: check inaccurate wcc_data 1929 * 1930 * => return non-zero if we shouldn't trust the wcc_data. 1931 * => NFS_WCCKLUDGE_TIMEOUT is for the case that the server is "fixed". 1932 */ 1933 1934 int 1935 nfs_check_wccdata( 1936 struct nfsnode *np __unused, 1937 const struct timespec *ctime __unused, 1938 struct timespec *mtime __unused, 1939 boolean_t docheck __unused 1940 ) 1941 { 1942 int error = 0; 1943 1944 #if !defined(NFS_V2_ONLY) 1945 1946 if (docheck) { 1947 struct vnode *vp = NFSTOV(np); 1948 struct nfsmount *nmp; 1949 long now = time_second; 1950 #if defined(DEBUG) 1951 const char *reason = NULL; /* XXX: gcc */ 1952 #endif 1953 1954 if (timespeccmp(&np->n_vattr->va_mtime, mtime, <=)) { 1955 #if defined(DEBUG) 1956 reason = "mtime"; 1957 #endif 1958 error = EINVAL; 1959 } 1960 1961 if (vp->v_type == VDIR && 1962 timespeccmp(&np->n_vattr->va_ctime, ctime, <=)) { 1963 #if defined(DEBUG) 1964 reason = "ctime"; 1965 #endif 1966 error = EINVAL; 1967 } 1968 1969 nmp = VFSTONFS(vp->v_mount); 1970 if (error) { 1971 1972 /* 1973 * despite of the fact that we've updated the file, 1974 * timestamps of the file were not updated as we 1975 * expected. 1976 * it means that the server has incompatible 1977 * semantics of timestamps or (more likely) 1978 * the server time is not precise enough to 1979 * track each modifications. 1980 * in that case, we disable wcc processing. 1981 * 1982 * yes, strictly speaking, we should disable all 1983 * caching. it's a compromise. 1984 */ 1985 1986 simple_lock(&nmp->nm_slock); 1987 #if defined(DEBUG) 1988 if (!NFS_WCCKLUDGE(nmp, now)) { 1989 printf("%s: inaccurate wcc data (%s) detected," 1990 " disabling wcc\n", 1991 vp->v_mount->mnt_stat.f_mntfromname, 1992 reason); 1993 } 1994 #endif 1995 nmp->nm_iflag |= NFSMNT_WCCKLUDGE; 1996 nmp->nm_wcckludgetime = now; 1997 simple_unlock(&nmp->nm_slock); 1998 } else if (NFS_WCCKLUDGE(nmp, now)) { 1999 error = EPERM; /* XXX */ 2000 } else if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) { 2001 simple_lock(&nmp->nm_slock); 2002 if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) { 2003 #if defined(DEBUG) 2004 printf("%s: re-enabling wcc\n", 2005 vp->v_mount->mnt_stat.f_mntfromname); 2006 #endif 2007 nmp->nm_iflag &= ~NFSMNT_WCCKLUDGE; 2008 } 2009 simple_unlock(&nmp->nm_slock); 2010 } 2011 } 2012 2013 #else 2014 do { if (&np) {} } while (/* CONSTCOND */ 0); /* for -Wunused */ 2015 do { if (&ctime) {} } while (/* CONSTCOND */ 0); 2016 do { if (&mtime) {} } while (/* CONSTCOND */ 0); 2017 do { if (&docheck) {} } while (/* CONSTCOND */ 0); 2018 #endif /* !defined(NFS_V2_ONLY) */ 2019 2020 return error; 2021 } 2022 2023 /* 2024 * Heuristic to see if the server XDR encodes directory cookies or not. 2025 * it is not supposed to, but a lot of servers may do this. Also, since 2026 * most/all servers will implement V2 as well, it is expected that they 2027 * may return just 32 bits worth of cookie information, so we need to 2028 * find out in which 32 bits this information is available. We do this 2029 * to avoid trouble with emulated binaries that can't handle 64 bit 2030 * directory offsets. 2031 */ 2032 2033 void 2034 nfs_cookieheuristic(vp, flagp, l, cred) 2035 struct vnode *vp; 2036 int *flagp; 2037 struct lwp *l; 2038 kauth_cred_t cred; 2039 { 2040 struct uio auio; 2041 struct iovec aiov; 2042 caddr_t tbuf, cp; 2043 struct dirent *dp; 2044 off_t *cookies = NULL, *cop; 2045 int error, eof, nc, len; 2046 2047 MALLOC(tbuf, caddr_t, NFS_DIRFRAGSIZ, M_TEMP, M_WAITOK); 2048 2049 aiov.iov_base = tbuf; 2050 aiov.iov_len = NFS_DIRFRAGSIZ; 2051 auio.uio_iov = &aiov; 2052 auio.uio_iovcnt = 1; 2053 auio.uio_rw = UIO_READ; 2054 auio.uio_resid = NFS_DIRFRAGSIZ; 2055 auio.uio_offset = 0; 2056 UIO_SETUP_SYSSPACE(&auio); 2057 2058 error = VOP_READDIR(vp, &auio, cred, &eof, &cookies, &nc); 2059 2060 len = NFS_DIRFRAGSIZ - auio.uio_resid; 2061 if (error || len == 0) { 2062 FREE(tbuf, M_TEMP); 2063 if (cookies) 2064 free(cookies, M_TEMP); 2065 return; 2066 } 2067 2068 /* 2069 * Find the first valid entry and look at its offset cookie. 2070 */ 2071 2072 cp = tbuf; 2073 for (cop = cookies; len > 0; len -= dp->d_reclen) { 2074 dp = (struct dirent *)cp; 2075 if (dp->d_fileno != 0 && len >= dp->d_reclen) { 2076 if ((*cop >> 32) != 0 && (*cop & 0xffffffffLL) == 0) { 2077 *flagp |= NFSMNT_SWAPCOOKIE; 2078 nfs_invaldircache(vp, 0); 2079 nfs_vinvalbuf(vp, 0, cred, l, 1); 2080 } 2081 break; 2082 } 2083 cop++; 2084 cp += dp->d_reclen; 2085 } 2086 2087 FREE(tbuf, M_TEMP); 2088 free(cookies, M_TEMP); 2089 } 2090 #endif /* NFS */ 2091 2092 #ifdef NFSSERVER 2093 /* 2094 * Set up nameidata for a lookup() call and do it. 2095 * 2096 * If pubflag is set, this call is done for a lookup operation on the 2097 * public filehandle. In that case we allow crossing mountpoints and 2098 * absolute pathnames. However, the caller is expected to check that 2099 * the lookup result is within the public fs, and deny access if 2100 * it is not. 2101 */ 2102 int 2103 nfs_namei(ndp, nsfh, len, slp, nam, mdp, dposp, retdirp, l, kerbflag, pubflag) 2104 struct nameidata *ndp; 2105 nfsrvfh_t *nsfh; 2106 uint32_t len; 2107 struct nfssvc_sock *slp; 2108 struct mbuf *nam; 2109 struct mbuf **mdp; 2110 caddr_t *dposp; 2111 struct vnode **retdirp; 2112 struct lwp *l; 2113 int kerbflag, pubflag; 2114 { 2115 int i, rem; 2116 struct mbuf *md; 2117 char *fromcp, *tocp, *cp; 2118 struct iovec aiov; 2119 struct uio auio; 2120 struct vnode *dp; 2121 int error, rdonly, linklen; 2122 struct componentname *cnp = &ndp->ni_cnd; 2123 2124 *retdirp = (struct vnode *)0; 2125 2126 if ((len + 1) > MAXPATHLEN) 2127 return (ENAMETOOLONG); 2128 if (len == 0) 2129 return (EACCES); 2130 cnp->cn_pnbuf = PNBUF_GET(); 2131 2132 /* 2133 * Copy the name from the mbuf list to ndp->ni_pnbuf 2134 * and set the various ndp fields appropriately. 2135 */ 2136 fromcp = *dposp; 2137 tocp = cnp->cn_pnbuf; 2138 md = *mdp; 2139 rem = mtod(md, caddr_t) + md->m_len - fromcp; 2140 for (i = 0; i < len; i++) { 2141 while (rem == 0) { 2142 md = md->m_next; 2143 if (md == NULL) { 2144 error = EBADRPC; 2145 goto out; 2146 } 2147 fromcp = mtod(md, caddr_t); 2148 rem = md->m_len; 2149 } 2150 if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) { 2151 error = EACCES; 2152 goto out; 2153 } 2154 *tocp++ = *fromcp++; 2155 rem--; 2156 } 2157 *tocp = '\0'; 2158 *mdp = md; 2159 *dposp = fromcp; 2160 len = nfsm_rndup(len)-len; 2161 if (len > 0) { 2162 if (rem >= len) 2163 *dposp += len; 2164 else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0) 2165 goto out; 2166 } 2167 2168 /* 2169 * Extract and set starting directory. 2170 */ 2171 error = nfsrv_fhtovp(nsfh, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, 2172 nam, &rdonly, kerbflag, pubflag); 2173 if (error) 2174 goto out; 2175 if (dp->v_type != VDIR) { 2176 vrele(dp); 2177 error = ENOTDIR; 2178 goto out; 2179 } 2180 2181 if (rdonly) 2182 cnp->cn_flags |= RDONLY; 2183 2184 *retdirp = dp; 2185 2186 if (pubflag) { 2187 /* 2188 * Oh joy. For WebNFS, handle those pesky '%' escapes, 2189 * and the 'native path' indicator. 2190 */ 2191 cp = PNBUF_GET(); 2192 fromcp = cnp->cn_pnbuf; 2193 tocp = cp; 2194 if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) { 2195 switch ((unsigned char)*fromcp) { 2196 case WEBNFS_NATIVE_CHAR: 2197 /* 2198 * 'Native' path for us is the same 2199 * as a path according to the NFS spec, 2200 * just skip the escape char. 2201 */ 2202 fromcp++; 2203 break; 2204 /* 2205 * More may be added in the future, range 0x80-0xff 2206 */ 2207 default: 2208 error = EIO; 2209 PNBUF_PUT(cp); 2210 goto out; 2211 } 2212 } 2213 /* 2214 * Translate the '%' escapes, URL-style. 2215 */ 2216 while (*fromcp != '\0') { 2217 if (*fromcp == WEBNFS_ESC_CHAR) { 2218 if (fromcp[1] != '\0' && fromcp[2] != '\0') { 2219 fromcp++; 2220 *tocp++ = HEXSTRTOI(fromcp); 2221 fromcp += 2; 2222 continue; 2223 } else { 2224 error = ENOENT; 2225 PNBUF_PUT(cp); 2226 goto out; 2227 } 2228 } else 2229 *tocp++ = *fromcp++; 2230 } 2231 *tocp = '\0'; 2232 PNBUF_PUT(cnp->cn_pnbuf); 2233 cnp->cn_pnbuf = cp; 2234 } 2235 2236 ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1; 2237 ndp->ni_segflg = UIO_SYSSPACE; 2238 ndp->ni_rootdir = rootvnode; 2239 2240 if (pubflag) { 2241 ndp->ni_loopcnt = 0; 2242 if (cnp->cn_pnbuf[0] == '/') 2243 dp = rootvnode; 2244 } else { 2245 cnp->cn_flags |= NOCROSSMOUNT; 2246 } 2247 2248 cnp->cn_lwp = l; 2249 VREF(dp); 2250 2251 for (;;) { 2252 cnp->cn_nameptr = cnp->cn_pnbuf; 2253 ndp->ni_startdir = dp; 2254 /* 2255 * And call lookup() to do the real work 2256 */ 2257 error = lookup(ndp); 2258 if (error) { 2259 PNBUF_PUT(cnp->cn_pnbuf); 2260 return (error); 2261 } 2262 /* 2263 * Check for encountering a symbolic link 2264 */ 2265 if ((cnp->cn_flags & ISSYMLINK) == 0) { 2266 if (cnp->cn_flags & (SAVENAME | SAVESTART)) 2267 cnp->cn_flags |= HASBUF; 2268 else 2269 PNBUF_PUT(cnp->cn_pnbuf); 2270 return (0); 2271 } else { 2272 if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) 2273 VOP_UNLOCK(ndp->ni_dvp, 0); 2274 if (!pubflag) { 2275 error = EINVAL; 2276 break; 2277 } 2278 2279 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 2280 error = ELOOP; 2281 break; 2282 } 2283 if (ndp->ni_vp->v_mount->mnt_flag & MNT_SYMPERM) { 2284 error = VOP_ACCESS(ndp->ni_vp, VEXEC, cnp->cn_cred, 2285 cnp->cn_lwp); 2286 if (error != 0) 2287 break; 2288 } 2289 if (ndp->ni_pathlen > 1) 2290 cp = PNBUF_GET(); 2291 else 2292 cp = cnp->cn_pnbuf; 2293 aiov.iov_base = cp; 2294 aiov.iov_len = MAXPATHLEN; 2295 auio.uio_iov = &aiov; 2296 auio.uio_iovcnt = 1; 2297 auio.uio_offset = 0; 2298 auio.uio_rw = UIO_READ; 2299 auio.uio_resid = MAXPATHLEN; 2300 UIO_SETUP_SYSSPACE(&auio); 2301 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 2302 if (error) { 2303 badlink: 2304 if (ndp->ni_pathlen > 1) 2305 PNBUF_PUT(cp); 2306 break; 2307 } 2308 linklen = MAXPATHLEN - auio.uio_resid; 2309 if (linklen == 0) { 2310 error = ENOENT; 2311 goto badlink; 2312 } 2313 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 2314 error = ENAMETOOLONG; 2315 goto badlink; 2316 } 2317 if (ndp->ni_pathlen > 1) { 2318 memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen); 2319 PNBUF_PUT(cnp->cn_pnbuf); 2320 cnp->cn_pnbuf = cp; 2321 } else 2322 cnp->cn_pnbuf[linklen] = '\0'; 2323 ndp->ni_pathlen += linklen; 2324 vput(ndp->ni_vp); 2325 dp = ndp->ni_dvp; 2326 /* 2327 * Check if root directory should replace current directory. 2328 */ 2329 if (cnp->cn_pnbuf[0] == '/') { 2330 vrele(dp); 2331 dp = ndp->ni_rootdir; 2332 VREF(dp); 2333 } 2334 } 2335 } 2336 vrele(ndp->ni_dvp); 2337 vput(ndp->ni_vp); 2338 ndp->ni_vp = NULL; 2339 out: 2340 PNBUF_PUT(cnp->cn_pnbuf); 2341 return (error); 2342 } 2343 #endif /* NFSSERVER */ 2344 2345 /* 2346 * A fiddled version of m_adj() that ensures null fill to a 32-bit 2347 * boundary and only trims off the back end 2348 * 2349 * 1. trim off 'len' bytes as m_adj(mp, -len). 2350 * 2. add zero-padding 'nul' bytes at the end of the mbuf chain. 2351 */ 2352 void 2353 nfs_zeropad(mp, len, nul) 2354 struct mbuf *mp; 2355 int len; 2356 int nul; 2357 { 2358 struct mbuf *m; 2359 int count; 2360 2361 /* 2362 * Trim from tail. Scan the mbuf chain, 2363 * calculating its length and finding the last mbuf. 2364 * If the adjustment only affects this mbuf, then just 2365 * adjust and return. Otherwise, rescan and truncate 2366 * after the remaining size. 2367 */ 2368 count = 0; 2369 m = mp; 2370 for (;;) { 2371 count += m->m_len; 2372 if (m->m_next == NULL) 2373 break; 2374 m = m->m_next; 2375 } 2376 2377 KDASSERT(count >= len); 2378 2379 if (m->m_len >= len) { 2380 m->m_len -= len; 2381 } else { 2382 count -= len; 2383 /* 2384 * Correct length for chain is "count". 2385 * Find the mbuf with last data, adjust its length, 2386 * and toss data from remaining mbufs on chain. 2387 */ 2388 for (m = mp; m; m = m->m_next) { 2389 if (m->m_len >= count) { 2390 m->m_len = count; 2391 break; 2392 } 2393 count -= m->m_len; 2394 } 2395 KASSERT(m && m->m_next); 2396 m_freem(m->m_next); 2397 m->m_next = NULL; 2398 } 2399 2400 KDASSERT(m->m_next == NULL); 2401 2402 /* 2403 * zero-padding. 2404 */ 2405 if (nul > 0) { 2406 char *cp; 2407 int i; 2408 2409 if (M_ROMAP(m) || M_TRAILINGSPACE(m) < nul) { 2410 struct mbuf *n; 2411 2412 KDASSERT(MLEN >= nul); 2413 n = m_get(M_WAIT, MT_DATA); 2414 MCLAIM(n, &nfs_mowner); 2415 n->m_len = nul; 2416 n->m_next = NULL; 2417 m->m_next = n; 2418 cp = mtod(n, caddr_t); 2419 } else { 2420 cp = mtod(m, caddr_t) + m->m_len; 2421 m->m_len += nul; 2422 } 2423 for (i = 0; i < nul; i++) 2424 *cp++ = '\0'; 2425 } 2426 return; 2427 } 2428 2429 /* 2430 * Make these functions instead of macros, so that the kernel text size 2431 * doesn't get too big... 2432 */ 2433 void 2434 nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) 2435 struct nfsrv_descript *nfsd; 2436 int before_ret; 2437 struct vattr *before_vap; 2438 int after_ret; 2439 struct vattr *after_vap; 2440 struct mbuf **mbp; 2441 char **bposp; 2442 { 2443 struct mbuf *mb = *mbp; 2444 char *bpos = *bposp; 2445 u_int32_t *tl; 2446 2447 if (before_ret) { 2448 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); 2449 *tl = nfs_false; 2450 } else { 2451 nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED); 2452 *tl++ = nfs_true; 2453 txdr_hyper(before_vap->va_size, tl); 2454 tl += 2; 2455 txdr_nfsv3time(&(before_vap->va_mtime), tl); 2456 tl += 2; 2457 txdr_nfsv3time(&(before_vap->va_ctime), tl); 2458 } 2459 *bposp = bpos; 2460 *mbp = mb; 2461 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); 2462 } 2463 2464 void 2465 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) 2466 struct nfsrv_descript *nfsd; 2467 int after_ret; 2468 struct vattr *after_vap; 2469 struct mbuf **mbp; 2470 char **bposp; 2471 { 2472 struct mbuf *mb = *mbp; 2473 char *bpos = *bposp; 2474 u_int32_t *tl; 2475 struct nfs_fattr *fp; 2476 2477 if (after_ret) { 2478 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); 2479 *tl = nfs_false; 2480 } else { 2481 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR); 2482 *tl++ = nfs_true; 2483 fp = (struct nfs_fattr *)tl; 2484 nfsm_srvfattr(nfsd, after_vap, fp); 2485 } 2486 *mbp = mb; 2487 *bposp = bpos; 2488 } 2489 2490 void 2491 nfsm_srvfattr(nfsd, vap, fp) 2492 struct nfsrv_descript *nfsd; 2493 struct vattr *vap; 2494 struct nfs_fattr *fp; 2495 { 2496 2497 fp->fa_nlink = txdr_unsigned(vap->va_nlink); 2498 fp->fa_uid = txdr_unsigned(vap->va_uid); 2499 fp->fa_gid = txdr_unsigned(vap->va_gid); 2500 if (nfsd->nd_flag & ND_NFSV3) { 2501 fp->fa_type = vtonfsv3_type(vap->va_type); 2502 fp->fa_mode = vtonfsv3_mode(vap->va_mode); 2503 txdr_hyper(vap->va_size, &fp->fa3_size); 2504 txdr_hyper(vap->va_bytes, &fp->fa3_used); 2505 fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev)); 2506 fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev)); 2507 fp->fa3_fsid.nfsuquad[0] = 0; 2508 fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); 2509 txdr_hyper(vap->va_fileid, &fp->fa3_fileid); 2510 txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); 2511 txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); 2512 txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); 2513 } else { 2514 fp->fa_type = vtonfsv2_type(vap->va_type); 2515 fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 2516 fp->fa2_size = txdr_unsigned(vap->va_size); 2517 fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); 2518 if (vap->va_type == VFIFO) 2519 fp->fa2_rdev = 0xffffffff; 2520 else 2521 fp->fa2_rdev = txdr_unsigned(vap->va_rdev); 2522 fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); 2523 fp->fa2_fsid = txdr_unsigned(vap->va_fsid); 2524 fp->fa2_fileid = txdr_unsigned(vap->va_fileid); 2525 txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); 2526 txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); 2527 txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); 2528 } 2529 } 2530 2531 #ifdef NFSSERVER 2532 /* 2533 * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) 2534 * - look up fsid in mount list (if not found ret error) 2535 * - get vp and export rights by calling VFS_FHTOVP() 2536 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 2537 * - if not lockflag unlock it with VOP_UNLOCK() 2538 */ 2539 int 2540 nfsrv_fhtovp( 2541 nfsrvfh_t *nsfh, 2542 int lockflag, 2543 struct vnode **vpp, 2544 kauth_cred_t cred, 2545 struct nfssvc_sock *slp __unused, 2546 struct mbuf *nam, 2547 int *rdonlyp, 2548 int kerbflag, 2549 int pubflag 2550 ) 2551 { 2552 struct mount *mp; 2553 kauth_cred_t credanon; 2554 int error, exflags; 2555 struct sockaddr_in *saddr; 2556 fhandle_t *fhp; 2557 2558 fhp = NFSRVFH_FHANDLE(nsfh); 2559 *vpp = (struct vnode *)0; 2560 2561 if (nfs_ispublicfh(nsfh)) { 2562 if (!pubflag || !nfs_pub.np_valid) 2563 return (ESTALE); 2564 fhp = nfs_pub.np_handle; 2565 } 2566 2567 error = netexport_check(&fhp->fh_fsid, nam, &mp, &exflags, &credanon); 2568 if (error) { 2569 return error; 2570 } 2571 2572 error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp); 2573 if (error) 2574 return (error); 2575 2576 if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) { 2577 saddr = mtod(nam, struct sockaddr_in *); 2578 if ((saddr->sin_family == AF_INET) && 2579 ntohs(saddr->sin_port) >= IPPORT_RESERVED) { 2580 vput(*vpp); 2581 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 2582 } 2583 #ifdef INET6 2584 if ((saddr->sin_family == AF_INET6) && 2585 ntohs(saddr->sin_port) >= IPV6PORT_RESERVED) { 2586 vput(*vpp); 2587 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 2588 } 2589 #endif 2590 } 2591 /* 2592 * Check/setup credentials. 2593 */ 2594 if (exflags & MNT_EXKERB) { 2595 if (!kerbflag) { 2596 vput(*vpp); 2597 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 2598 } 2599 } else if (kerbflag) { 2600 vput(*vpp); 2601 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 2602 } else if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 2603 NULL) == 0 || (exflags & MNT_EXPORTANON)) { 2604 kauth_cred_clone(credanon, cred); 2605 } 2606 if (exflags & MNT_EXRDONLY) 2607 *rdonlyp = 1; 2608 else 2609 *rdonlyp = 0; 2610 if (!lockflag) 2611 VOP_UNLOCK(*vpp, 0); 2612 return (0); 2613 } 2614 2615 /* 2616 * WebNFS: check if a filehandle is a public filehandle. For v3, this 2617 * means a length of 0, for v2 it means all zeroes. 2618 */ 2619 int 2620 nfs_ispublicfh(const nfsrvfh_t *nsfh) 2621 { 2622 const char *cp = (const void *)(NFSRVFH_DATA(nsfh)); 2623 int i; 2624 2625 if (NFSRVFH_SIZE(nsfh) == 0) { 2626 return TRUE; 2627 } 2628 if (NFSRVFH_SIZE(nsfh) != NFSX_V2FH) { 2629 return FALSE; 2630 } 2631 for (i = 0; i < NFSX_V2FH; i++) 2632 if (*cp++ != 0) 2633 return FALSE; 2634 return TRUE; 2635 } 2636 #endif /* NFSSERVER */ 2637 2638 /* 2639 * This function compares two net addresses by family and returns TRUE 2640 * if they are the same host. 2641 * If there is any doubt, return FALSE. 2642 * The AF_INET family is handled as a special case so that address mbufs 2643 * don't need to be saved to store "struct in_addr", which is only 4 bytes. 2644 */ 2645 int 2646 netaddr_match(family, haddr, nam) 2647 int family; 2648 union nethostaddr *haddr; 2649 struct mbuf *nam; 2650 { 2651 struct sockaddr_in *inetaddr; 2652 2653 switch (family) { 2654 case AF_INET: 2655 inetaddr = mtod(nam, struct sockaddr_in *); 2656 if (inetaddr->sin_family == AF_INET && 2657 inetaddr->sin_addr.s_addr == haddr->had_inetaddr) 2658 return (1); 2659 break; 2660 #ifdef INET6 2661 case AF_INET6: 2662 { 2663 struct sockaddr_in6 *sin6_1, *sin6_2; 2664 2665 sin6_1 = mtod(nam, struct sockaddr_in6 *); 2666 sin6_2 = mtod(haddr->had_nam, struct sockaddr_in6 *); 2667 if (sin6_1->sin6_family == AF_INET6 && 2668 IN6_ARE_ADDR_EQUAL(&sin6_1->sin6_addr, &sin6_2->sin6_addr)) 2669 return 1; 2670 } 2671 #endif 2672 #ifdef ISO 2673 case AF_ISO: 2674 { 2675 struct sockaddr_iso *isoaddr1, *isoaddr2; 2676 2677 isoaddr1 = mtod(nam, struct sockaddr_iso *); 2678 isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); 2679 if (isoaddr1->siso_family == AF_ISO && 2680 isoaddr1->siso_nlen > 0 && 2681 isoaddr1->siso_nlen == isoaddr2->siso_nlen && 2682 SAME_ISOADDR(isoaddr1, isoaddr2)) 2683 return (1); 2684 break; 2685 } 2686 #endif /* ISO */ 2687 default: 2688 break; 2689 }; 2690 return (0); 2691 } 2692 2693 /* 2694 * The write verifier has changed (probably due to a server reboot), so all 2695 * PG_NEEDCOMMIT pages will have to be written again. Since they are marked 2696 * as dirty or are being written out just now, all this takes is clearing 2697 * the PG_NEEDCOMMIT flag. Once done the new write verifier can be set for 2698 * the mount point. 2699 */ 2700 void 2701 nfs_clearcommit(mp) 2702 struct mount *mp; 2703 { 2704 struct vnode *vp; 2705 struct nfsnode *np; 2706 struct vm_page *pg; 2707 struct nfsmount *nmp = VFSTONFS(mp); 2708 2709 lockmgr(&nmp->nm_writeverflock, LK_EXCLUSIVE, NULL); 2710 2711 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2712 KASSERT(vp->v_mount == mp); 2713 if (vp->v_type != VREG) 2714 continue; 2715 np = VTONFS(vp); 2716 np->n_pushlo = np->n_pushhi = np->n_pushedlo = 2717 np->n_pushedhi = 0; 2718 np->n_commitflags &= 2719 ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID); 2720 simple_lock(&vp->v_uobj.vmobjlock); 2721 TAILQ_FOREACH(pg, &vp->v_uobj.memq, listq) { 2722 pg->flags &= ~PG_NEEDCOMMIT; 2723 } 2724 simple_unlock(&vp->v_uobj.vmobjlock); 2725 } 2726 simple_lock(&nmp->nm_slock); 2727 nmp->nm_iflag &= ~NFSMNT_STALEWRITEVERF; 2728 simple_unlock(&nmp->nm_slock); 2729 lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL); 2730 } 2731 2732 void 2733 nfs_merge_commit_ranges(vp) 2734 struct vnode *vp; 2735 { 2736 struct nfsnode *np = VTONFS(vp); 2737 2738 KASSERT(np->n_commitflags & NFS_COMMIT_PUSH_VALID); 2739 2740 if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) { 2741 np->n_pushedlo = np->n_pushlo; 2742 np->n_pushedhi = np->n_pushhi; 2743 np->n_commitflags |= NFS_COMMIT_PUSHED_VALID; 2744 } else { 2745 if (np->n_pushlo < np->n_pushedlo) 2746 np->n_pushedlo = np->n_pushlo; 2747 if (np->n_pushhi > np->n_pushedhi) 2748 np->n_pushedhi = np->n_pushhi; 2749 } 2750 2751 np->n_pushlo = np->n_pushhi = 0; 2752 np->n_commitflags &= ~NFS_COMMIT_PUSH_VALID; 2753 2754 #ifdef NFS_DEBUG_COMMIT 2755 printf("merge: committed: %u - %u\n", (unsigned)np->n_pushedlo, 2756 (unsigned)np->n_pushedhi); 2757 #endif 2758 } 2759 2760 int 2761 nfs_in_committed_range(vp, off, len) 2762 struct vnode *vp; 2763 off_t off, len; 2764 { 2765 struct nfsnode *np = VTONFS(vp); 2766 off_t lo, hi; 2767 2768 if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) 2769 return 0; 2770 lo = off; 2771 hi = lo + len; 2772 2773 return (lo >= np->n_pushedlo && hi <= np->n_pushedhi); 2774 } 2775 2776 int 2777 nfs_in_tobecommitted_range(vp, off, len) 2778 struct vnode *vp; 2779 off_t off, len; 2780 { 2781 struct nfsnode *np = VTONFS(vp); 2782 off_t lo, hi; 2783 2784 if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) 2785 return 0; 2786 lo = off; 2787 hi = lo + len; 2788 2789 return (lo >= np->n_pushlo && hi <= np->n_pushhi); 2790 } 2791 2792 void 2793 nfs_add_committed_range(vp, off, len) 2794 struct vnode *vp; 2795 off_t off, len; 2796 { 2797 struct nfsnode *np = VTONFS(vp); 2798 off_t lo, hi; 2799 2800 lo = off; 2801 hi = lo + len; 2802 2803 if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) { 2804 np->n_pushedlo = lo; 2805 np->n_pushedhi = hi; 2806 np->n_commitflags |= NFS_COMMIT_PUSHED_VALID; 2807 } else { 2808 if (hi > np->n_pushedhi) 2809 np->n_pushedhi = hi; 2810 if (lo < np->n_pushedlo) 2811 np->n_pushedlo = lo; 2812 } 2813 #ifdef NFS_DEBUG_COMMIT 2814 printf("add: committed: %u - %u\n", (unsigned)np->n_pushedlo, 2815 (unsigned)np->n_pushedhi); 2816 #endif 2817 } 2818 2819 void 2820 nfs_del_committed_range(vp, off, len) 2821 struct vnode *vp; 2822 off_t off, len; 2823 { 2824 struct nfsnode *np = VTONFS(vp); 2825 off_t lo, hi; 2826 2827 if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) 2828 return; 2829 2830 lo = off; 2831 hi = lo + len; 2832 2833 if (lo > np->n_pushedhi || hi < np->n_pushedlo) 2834 return; 2835 if (lo <= np->n_pushedlo) 2836 np->n_pushedlo = hi; 2837 else if (hi >= np->n_pushedhi) 2838 np->n_pushedhi = lo; 2839 else { 2840 /* 2841 * XXX There's only one range. If the deleted range 2842 * is in the middle, pick the largest of the 2843 * contiguous ranges that it leaves. 2844 */ 2845 if ((np->n_pushedlo - lo) > (hi - np->n_pushedhi)) 2846 np->n_pushedhi = lo; 2847 else 2848 np->n_pushedlo = hi; 2849 } 2850 #ifdef NFS_DEBUG_COMMIT 2851 printf("del: committed: %u - %u\n", (unsigned)np->n_pushedlo, 2852 (unsigned)np->n_pushedhi); 2853 #endif 2854 } 2855 2856 void 2857 nfs_add_tobecommitted_range(vp, off, len) 2858 struct vnode *vp; 2859 off_t off, len; 2860 { 2861 struct nfsnode *np = VTONFS(vp); 2862 off_t lo, hi; 2863 2864 lo = off; 2865 hi = lo + len; 2866 2867 if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) { 2868 np->n_pushlo = lo; 2869 np->n_pushhi = hi; 2870 np->n_commitflags |= NFS_COMMIT_PUSH_VALID; 2871 } else { 2872 if (lo < np->n_pushlo) 2873 np->n_pushlo = lo; 2874 if (hi > np->n_pushhi) 2875 np->n_pushhi = hi; 2876 } 2877 #ifdef NFS_DEBUG_COMMIT 2878 printf("add: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo, 2879 (unsigned)np->n_pushhi); 2880 #endif 2881 } 2882 2883 void 2884 nfs_del_tobecommitted_range(vp, off, len) 2885 struct vnode *vp; 2886 off_t off, len; 2887 { 2888 struct nfsnode *np = VTONFS(vp); 2889 off_t lo, hi; 2890 2891 if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) 2892 return; 2893 2894 lo = off; 2895 hi = lo + len; 2896 2897 if (lo > np->n_pushhi || hi < np->n_pushlo) 2898 return; 2899 2900 if (lo <= np->n_pushlo) 2901 np->n_pushlo = hi; 2902 else if (hi >= np->n_pushhi) 2903 np->n_pushhi = lo; 2904 else { 2905 /* 2906 * XXX There's only one range. If the deleted range 2907 * is in the middle, pick the largest of the 2908 * contiguous ranges that it leaves. 2909 */ 2910 if ((np->n_pushlo - lo) > (hi - np->n_pushhi)) 2911 np->n_pushhi = lo; 2912 else 2913 np->n_pushlo = hi; 2914 } 2915 #ifdef NFS_DEBUG_COMMIT 2916 printf("del: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo, 2917 (unsigned)np->n_pushhi); 2918 #endif 2919 } 2920 2921 /* 2922 * Map errnos to NFS error numbers. For Version 3 also filter out error 2923 * numbers not specified for the associated procedure. 2924 */ 2925 int 2926 nfsrv_errmap(nd, err) 2927 struct nfsrv_descript *nd; 2928 int err; 2929 { 2930 const short *defaulterrp, *errp; 2931 2932 if (nd->nd_flag & ND_NFSV3) { 2933 if (nd->nd_procnum <= NFSPROC_COMMIT) { 2934 errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; 2935 while (*++errp) { 2936 if (*errp == err) 2937 return (err); 2938 else if (*errp > err) 2939 break; 2940 } 2941 return ((int)*defaulterrp); 2942 } else 2943 return (err & 0xffff); 2944 } 2945 if (err <= ELAST) 2946 return ((int)nfsrv_v2errmap[err - 1]); 2947 return (NFSERR_IO); 2948 } 2949 2950 u_int32_t 2951 nfs_getxid() 2952 { 2953 static u_int32_t base; 2954 static u_int32_t nfs_xid = 0; 2955 static struct simplelock nfs_xidlock = SIMPLELOCK_INITIALIZER; 2956 u_int32_t newxid; 2957 2958 simple_lock(&nfs_xidlock); 2959 /* 2960 * derive initial xid from system time 2961 * XXX time is invalid if root not yet mounted 2962 */ 2963 if (__predict_false(!base && (rootvp))) { 2964 struct timeval tv; 2965 2966 microtime(&tv); 2967 base = tv.tv_sec << 12; 2968 nfs_xid = base; 2969 } 2970 2971 /* 2972 * Skip zero xid if it should ever happen. 2973 */ 2974 if (__predict_false(++nfs_xid == 0)) 2975 nfs_xid++; 2976 newxid = nfs_xid; 2977 simple_unlock(&nfs_xidlock); 2978 2979 return txdr_unsigned(newxid); 2980 } 2981 2982 /* 2983 * assign a new xid for existing request. 2984 * used for NFSERR_JUKEBOX handling. 2985 */ 2986 void 2987 nfs_renewxid(struct nfsreq *req) 2988 { 2989 u_int32_t xid; 2990 int off; 2991 2992 xid = nfs_getxid(); 2993 if (req->r_nmp->nm_sotype == SOCK_STREAM) 2994 off = sizeof(u_int32_t); /* RPC record mark */ 2995 else 2996 off = 0; 2997 2998 m_copyback(req->r_mreq, off, sizeof(xid), (void *)&xid); 2999 req->r_xid = xid; 3000 } 3001 3002 #if defined(NFSSERVER) 3003 int 3004 nfsrv_composefh(struct vnode *vp, nfsrvfh_t *nsfh, boolean_t v3) 3005 { 3006 int error; 3007 size_t fhsize; 3008 3009 fhsize = NFSD_MAXFHSIZE; 3010 error = vfs_composefh(vp, (void *)NFSRVFH_DATA(nsfh), &fhsize); 3011 if (NFSX_FHTOOBIG_P(fhsize, v3)) { 3012 error = EOPNOTSUPP; 3013 } 3014 if (error != 0) { 3015 return error; 3016 } 3017 if (!v3 && fhsize < NFSX_V2FH) { 3018 memset((char *)NFSRVFH_DATA(nsfh) + fhsize, 0, 3019 NFSX_V2FH - fhsize); 3020 fhsize = NFSX_V2FH; 3021 } 3022 if ((fhsize % NFSX_UNSIGNED) != 0) { 3023 return EOPNOTSUPP; 3024 } 3025 nsfh->nsfh_size = fhsize; 3026 return 0; 3027 } 3028 3029 int 3030 nfsrv_comparefh(const nfsrvfh_t *fh1, const nfsrvfh_t *fh2) 3031 { 3032 3033 if (NFSRVFH_SIZE(fh1) != NFSRVFH_SIZE(fh2)) { 3034 return NFSRVFH_SIZE(fh2) - NFSRVFH_SIZE(fh1); 3035 } 3036 return memcmp(NFSRVFH_DATA(fh1), NFSRVFH_DATA(fh2), NFSRVFH_SIZE(fh1)); 3037 } 3038 3039 void 3040 nfsrv_copyfh(nfsrvfh_t *fh1, const nfsrvfh_t *fh2) 3041 { 3042 size_t size; 3043 3044 fh1->nsfh_size = size = NFSRVFH_SIZE(fh2); 3045 memcpy(NFSRVFH_DATA(fh1), NFSRVFH_DATA(fh2), size); 3046 } 3047 #endif /* defined(NFSSERVER) */ 3048