1 /* $NetBSD: nfs_subs.c,v 1.199 2008/02/13 09:51:37 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 35 */ 36 37 /* 38 * Copyright 2000 Wasabi Systems, Inc. 39 * All rights reserved. 40 * 41 * Written by Frank van der Linden for Wasabi Systems, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed for the NetBSD Project by 54 * Wasabi Systems, Inc. 55 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 56 * or promote products derived from this software without specific prior 57 * written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 69 * POSSIBILITY OF SUCH DAMAGE. 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: nfs_subs.c,v 1.199 2008/02/13 09:51:37 yamt Exp $"); 74 75 #include "fs_nfs.h" 76 #include "opt_nfs.h" 77 #include "opt_nfsserver.h" 78 #include "opt_iso.h" 79 #include "opt_inet.h" 80 81 /* 82 * These functions support the macros and help fiddle mbuf chains for 83 * the nfs op functions. They do things like create the rpc header and 84 * copy data between mbuf chains and uio lists. 85 */ 86 #include <sys/param.h> 87 #include <sys/proc.h> 88 #include <sys/systm.h> 89 #include <sys/kernel.h> 90 #include <sys/kmem.h> 91 #include <sys/mount.h> 92 #include <sys/vnode.h> 93 #include <sys/namei.h> 94 #include <sys/mbuf.h> 95 #include <sys/socket.h> 96 #include <sys/stat.h> 97 #include <sys/malloc.h> 98 #include <sys/filedesc.h> 99 #include <sys/time.h> 100 #include <sys/dirent.h> 101 #include <sys/once.h> 102 #include <sys/kauth.h> 103 104 #include <uvm/uvm_extern.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfsnode.h> 109 #include <nfs/nfs.h> 110 #include <nfs/xdr_subs.h> 111 #include <nfs/nfsm_subs.h> 112 #include <nfs/nfsmount.h> 113 #include <nfs/nfsrtt.h> 114 #include <nfs/nfs_var.h> 115 116 #include <miscfs/specfs/specdev.h> 117 118 #include <netinet/in.h> 119 #ifdef ISO 120 #include <netiso/iso.h> 121 #endif 122 123 /* 124 * Data items converted to xdr at startup, since they are constant 125 * This is kinda hokey, but may save a little time doing byte swaps 126 */ 127 u_int32_t nfs_xdrneg1; 128 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, 129 rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, 130 rpc_auth_kerb; 131 u_int32_t nfs_prog, nfs_true, nfs_false; 132 133 /* And other global data */ 134 const nfstype nfsv2_type[9] = 135 { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON }; 136 const nfstype nfsv3_type[9] = 137 { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; 138 const enum vtype nv2tov_type[8] = 139 { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; 140 const enum vtype nv3tov_type[8] = 141 { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; 142 int nfs_ticks; 143 int nfs_commitsize; 144 145 MALLOC_DEFINE(M_NFSDIROFF, "NFS diroff", "NFS directory cookies"); 146 147 /* NFS client/server stats. */ 148 struct nfsstats nfsstats; 149 150 /* 151 * Mapping of old NFS Version 2 RPC numbers to generic numbers. 152 */ 153 const int nfsv3_procid[NFS_NPROCS] = { 154 NFSPROC_NULL, 155 NFSPROC_GETATTR, 156 NFSPROC_SETATTR, 157 NFSPROC_NOOP, 158 NFSPROC_LOOKUP, 159 NFSPROC_READLINK, 160 NFSPROC_READ, 161 NFSPROC_NOOP, 162 NFSPROC_WRITE, 163 NFSPROC_CREATE, 164 NFSPROC_REMOVE, 165 NFSPROC_RENAME, 166 NFSPROC_LINK, 167 NFSPROC_SYMLINK, 168 NFSPROC_MKDIR, 169 NFSPROC_RMDIR, 170 NFSPROC_READDIR, 171 NFSPROC_FSSTAT, 172 NFSPROC_NOOP, 173 NFSPROC_NOOP, 174 NFSPROC_NOOP, 175 NFSPROC_NOOP, 176 NFSPROC_NOOP 177 }; 178 179 /* 180 * and the reverse mapping from generic to Version 2 procedure numbers 181 */ 182 const int nfsv2_procid[NFS_NPROCS] = { 183 NFSV2PROC_NULL, 184 NFSV2PROC_GETATTR, 185 NFSV2PROC_SETATTR, 186 NFSV2PROC_LOOKUP, 187 NFSV2PROC_NOOP, 188 NFSV2PROC_READLINK, 189 NFSV2PROC_READ, 190 NFSV2PROC_WRITE, 191 NFSV2PROC_CREATE, 192 NFSV2PROC_MKDIR, 193 NFSV2PROC_SYMLINK, 194 NFSV2PROC_CREATE, 195 NFSV2PROC_REMOVE, 196 NFSV2PROC_RMDIR, 197 NFSV2PROC_RENAME, 198 NFSV2PROC_LINK, 199 NFSV2PROC_READDIR, 200 NFSV2PROC_NOOP, 201 NFSV2PROC_STATFS, 202 NFSV2PROC_NOOP, 203 NFSV2PROC_NOOP, 204 NFSV2PROC_NOOP, 205 NFSV2PROC_NOOP, 206 }; 207 208 /* 209 * Maps errno values to nfs error numbers. 210 * Use NFSERR_IO as the catch all for ones not specifically defined in 211 * RFC 1094. 212 */ 213 static const u_char nfsrv_v2errmap[ELAST] = { 214 NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, 215 NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 216 NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, 217 NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, 218 NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 219 NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, 220 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 221 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 222 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 223 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 224 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 225 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 226 NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, 227 NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, 228 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 229 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 230 NFSERR_IO, NFSERR_IO, 231 }; 232 233 /* 234 * Maps errno values to nfs error numbers. 235 * Although it is not obvious whether or not NFS clients really care if 236 * a returned error value is in the specified list for the procedure, the 237 * safest thing to do is filter them appropriately. For Version 2, the 238 * X/Open XNFS document is the only specification that defines error values 239 * for each RPC (The RFC simply lists all possible error values for all RPCs), 240 * so I have decided to not do this for Version 2. 241 * The first entry is the default error return and the rest are the valid 242 * errors for that RPC in increasing numeric order. 243 */ 244 static const short nfsv3err_null[] = { 245 0, 246 0, 247 }; 248 249 static const short nfsv3err_getattr[] = { 250 NFSERR_IO, 251 NFSERR_IO, 252 NFSERR_STALE, 253 NFSERR_BADHANDLE, 254 NFSERR_SERVERFAULT, 255 0, 256 }; 257 258 static const short nfsv3err_setattr[] = { 259 NFSERR_IO, 260 NFSERR_PERM, 261 NFSERR_IO, 262 NFSERR_ACCES, 263 NFSERR_INVAL, 264 NFSERR_NOSPC, 265 NFSERR_ROFS, 266 NFSERR_DQUOT, 267 NFSERR_STALE, 268 NFSERR_BADHANDLE, 269 NFSERR_NOT_SYNC, 270 NFSERR_SERVERFAULT, 271 0, 272 }; 273 274 static const short nfsv3err_lookup[] = { 275 NFSERR_IO, 276 NFSERR_NOENT, 277 NFSERR_IO, 278 NFSERR_ACCES, 279 NFSERR_NOTDIR, 280 NFSERR_NAMETOL, 281 NFSERR_STALE, 282 NFSERR_BADHANDLE, 283 NFSERR_SERVERFAULT, 284 0, 285 }; 286 287 static const short nfsv3err_access[] = { 288 NFSERR_IO, 289 NFSERR_IO, 290 NFSERR_STALE, 291 NFSERR_BADHANDLE, 292 NFSERR_SERVERFAULT, 293 0, 294 }; 295 296 static const short nfsv3err_readlink[] = { 297 NFSERR_IO, 298 NFSERR_IO, 299 NFSERR_ACCES, 300 NFSERR_INVAL, 301 NFSERR_STALE, 302 NFSERR_BADHANDLE, 303 NFSERR_NOTSUPP, 304 NFSERR_SERVERFAULT, 305 0, 306 }; 307 308 static const short nfsv3err_read[] = { 309 NFSERR_IO, 310 NFSERR_IO, 311 NFSERR_NXIO, 312 NFSERR_ACCES, 313 NFSERR_INVAL, 314 NFSERR_STALE, 315 NFSERR_BADHANDLE, 316 NFSERR_SERVERFAULT, 317 NFSERR_JUKEBOX, 318 0, 319 }; 320 321 static const short nfsv3err_write[] = { 322 NFSERR_IO, 323 NFSERR_IO, 324 NFSERR_ACCES, 325 NFSERR_INVAL, 326 NFSERR_FBIG, 327 NFSERR_NOSPC, 328 NFSERR_ROFS, 329 NFSERR_DQUOT, 330 NFSERR_STALE, 331 NFSERR_BADHANDLE, 332 NFSERR_SERVERFAULT, 333 NFSERR_JUKEBOX, 334 0, 335 }; 336 337 static const short nfsv3err_create[] = { 338 NFSERR_IO, 339 NFSERR_IO, 340 NFSERR_ACCES, 341 NFSERR_EXIST, 342 NFSERR_NOTDIR, 343 NFSERR_NOSPC, 344 NFSERR_ROFS, 345 NFSERR_NAMETOL, 346 NFSERR_DQUOT, 347 NFSERR_STALE, 348 NFSERR_BADHANDLE, 349 NFSERR_NOTSUPP, 350 NFSERR_SERVERFAULT, 351 0, 352 }; 353 354 static const short nfsv3err_mkdir[] = { 355 NFSERR_IO, 356 NFSERR_IO, 357 NFSERR_ACCES, 358 NFSERR_EXIST, 359 NFSERR_NOTDIR, 360 NFSERR_NOSPC, 361 NFSERR_ROFS, 362 NFSERR_NAMETOL, 363 NFSERR_DQUOT, 364 NFSERR_STALE, 365 NFSERR_BADHANDLE, 366 NFSERR_NOTSUPP, 367 NFSERR_SERVERFAULT, 368 0, 369 }; 370 371 static const short nfsv3err_symlink[] = { 372 NFSERR_IO, 373 NFSERR_IO, 374 NFSERR_ACCES, 375 NFSERR_EXIST, 376 NFSERR_NOTDIR, 377 NFSERR_NOSPC, 378 NFSERR_ROFS, 379 NFSERR_NAMETOL, 380 NFSERR_DQUOT, 381 NFSERR_STALE, 382 NFSERR_BADHANDLE, 383 NFSERR_NOTSUPP, 384 NFSERR_SERVERFAULT, 385 0, 386 }; 387 388 static const short nfsv3err_mknod[] = { 389 NFSERR_IO, 390 NFSERR_IO, 391 NFSERR_ACCES, 392 NFSERR_EXIST, 393 NFSERR_NOTDIR, 394 NFSERR_NOSPC, 395 NFSERR_ROFS, 396 NFSERR_NAMETOL, 397 NFSERR_DQUOT, 398 NFSERR_STALE, 399 NFSERR_BADHANDLE, 400 NFSERR_NOTSUPP, 401 NFSERR_SERVERFAULT, 402 NFSERR_BADTYPE, 403 0, 404 }; 405 406 static const short nfsv3err_remove[] = { 407 NFSERR_IO, 408 NFSERR_NOENT, 409 NFSERR_IO, 410 NFSERR_ACCES, 411 NFSERR_NOTDIR, 412 NFSERR_ROFS, 413 NFSERR_NAMETOL, 414 NFSERR_STALE, 415 NFSERR_BADHANDLE, 416 NFSERR_SERVERFAULT, 417 0, 418 }; 419 420 static const short nfsv3err_rmdir[] = { 421 NFSERR_IO, 422 NFSERR_NOENT, 423 NFSERR_IO, 424 NFSERR_ACCES, 425 NFSERR_EXIST, 426 NFSERR_NOTDIR, 427 NFSERR_INVAL, 428 NFSERR_ROFS, 429 NFSERR_NAMETOL, 430 NFSERR_NOTEMPTY, 431 NFSERR_STALE, 432 NFSERR_BADHANDLE, 433 NFSERR_NOTSUPP, 434 NFSERR_SERVERFAULT, 435 0, 436 }; 437 438 static const short nfsv3err_rename[] = { 439 NFSERR_IO, 440 NFSERR_NOENT, 441 NFSERR_IO, 442 NFSERR_ACCES, 443 NFSERR_EXIST, 444 NFSERR_XDEV, 445 NFSERR_NOTDIR, 446 NFSERR_ISDIR, 447 NFSERR_INVAL, 448 NFSERR_NOSPC, 449 NFSERR_ROFS, 450 NFSERR_MLINK, 451 NFSERR_NAMETOL, 452 NFSERR_NOTEMPTY, 453 NFSERR_DQUOT, 454 NFSERR_STALE, 455 NFSERR_BADHANDLE, 456 NFSERR_NOTSUPP, 457 NFSERR_SERVERFAULT, 458 0, 459 }; 460 461 static const short nfsv3err_link[] = { 462 NFSERR_IO, 463 NFSERR_IO, 464 NFSERR_ACCES, 465 NFSERR_EXIST, 466 NFSERR_XDEV, 467 NFSERR_NOTDIR, 468 NFSERR_INVAL, 469 NFSERR_NOSPC, 470 NFSERR_ROFS, 471 NFSERR_MLINK, 472 NFSERR_NAMETOL, 473 NFSERR_DQUOT, 474 NFSERR_STALE, 475 NFSERR_BADHANDLE, 476 NFSERR_NOTSUPP, 477 NFSERR_SERVERFAULT, 478 0, 479 }; 480 481 static const short nfsv3err_readdir[] = { 482 NFSERR_IO, 483 NFSERR_IO, 484 NFSERR_ACCES, 485 NFSERR_NOTDIR, 486 NFSERR_STALE, 487 NFSERR_BADHANDLE, 488 NFSERR_BAD_COOKIE, 489 NFSERR_TOOSMALL, 490 NFSERR_SERVERFAULT, 491 0, 492 }; 493 494 static const short nfsv3err_readdirplus[] = { 495 NFSERR_IO, 496 NFSERR_IO, 497 NFSERR_ACCES, 498 NFSERR_NOTDIR, 499 NFSERR_STALE, 500 NFSERR_BADHANDLE, 501 NFSERR_BAD_COOKIE, 502 NFSERR_NOTSUPP, 503 NFSERR_TOOSMALL, 504 NFSERR_SERVERFAULT, 505 0, 506 }; 507 508 static const short nfsv3err_fsstat[] = { 509 NFSERR_IO, 510 NFSERR_IO, 511 NFSERR_STALE, 512 NFSERR_BADHANDLE, 513 NFSERR_SERVERFAULT, 514 0, 515 }; 516 517 static const short nfsv3err_fsinfo[] = { 518 NFSERR_STALE, 519 NFSERR_STALE, 520 NFSERR_BADHANDLE, 521 NFSERR_SERVERFAULT, 522 0, 523 }; 524 525 static const short nfsv3err_pathconf[] = { 526 NFSERR_STALE, 527 NFSERR_STALE, 528 NFSERR_BADHANDLE, 529 NFSERR_SERVERFAULT, 530 0, 531 }; 532 533 static const short nfsv3err_commit[] = { 534 NFSERR_IO, 535 NFSERR_IO, 536 NFSERR_STALE, 537 NFSERR_BADHANDLE, 538 NFSERR_SERVERFAULT, 539 0, 540 }; 541 542 static const short * const nfsrv_v3errmap[] = { 543 nfsv3err_null, 544 nfsv3err_getattr, 545 nfsv3err_setattr, 546 nfsv3err_lookup, 547 nfsv3err_access, 548 nfsv3err_readlink, 549 nfsv3err_read, 550 nfsv3err_write, 551 nfsv3err_create, 552 nfsv3err_mkdir, 553 nfsv3err_symlink, 554 nfsv3err_mknod, 555 nfsv3err_remove, 556 nfsv3err_rmdir, 557 nfsv3err_rename, 558 nfsv3err_link, 559 nfsv3err_readdir, 560 nfsv3err_readdirplus, 561 nfsv3err_fsstat, 562 nfsv3err_fsinfo, 563 nfsv3err_pathconf, 564 nfsv3err_commit, 565 }; 566 567 extern struct nfsrtt nfsrtt; 568 extern struct nfsnodehashhead *nfsnodehashtbl; 569 extern u_long nfsnodehash; 570 571 u_long nfsdirhashmask; 572 573 int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *)); 574 575 /* 576 * Create the header for an rpc request packet 577 * The hsiz is the size of the rest of the nfs request header. 578 * (just used to decide if a cluster is a good idea) 579 */ 580 struct mbuf * 581 nfsm_reqh(struct nfsnode *np, u_long procid, int hsiz, char **bposp) 582 { 583 struct mbuf *mb; 584 char *bpos; 585 586 mb = m_get(M_WAIT, MT_DATA); 587 MCLAIM(mb, &nfs_mowner); 588 if (hsiz >= MINCLSIZE) 589 m_clget(mb, M_WAIT); 590 mb->m_len = 0; 591 bpos = mtod(mb, void *); 592 593 /* Finally, return values */ 594 *bposp = bpos; 595 return (mb); 596 } 597 598 /* 599 * Build the RPC header and fill in the authorization info. 600 * The authorization string argument is only used when the credentials 601 * come from outside of the kernel. 602 * Returns the head of the mbuf list. 603 */ 604 struct mbuf * 605 nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, 606 verf_str, mrest, mrest_len, mbp, xidp) 607 kauth_cred_t cr; 608 int nmflag; 609 int procid; 610 int auth_type; 611 int auth_len; 612 char *auth_str; 613 int verf_len; 614 char *verf_str; 615 struct mbuf *mrest; 616 int mrest_len; 617 struct mbuf **mbp; 618 u_int32_t *xidp; 619 { 620 struct mbuf *mb; 621 u_int32_t *tl; 622 char *bpos; 623 int i; 624 struct mbuf *mreq; 625 int siz, grpsiz, authsiz; 626 627 authsiz = nfsm_rndup(auth_len); 628 mb = m_gethdr(M_WAIT, MT_DATA); 629 MCLAIM(mb, &nfs_mowner); 630 if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { 631 m_clget(mb, M_WAIT); 632 } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { 633 MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); 634 } else { 635 MH_ALIGN(mb, 8 * NFSX_UNSIGNED); 636 } 637 mb->m_len = 0; 638 mreq = mb; 639 bpos = mtod(mb, void *); 640 641 /* 642 * First the RPC header. 643 */ 644 nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED); 645 646 *tl++ = *xidp = nfs_getxid(); 647 *tl++ = rpc_call; 648 *tl++ = rpc_vers; 649 *tl++ = txdr_unsigned(NFS_PROG); 650 if (nmflag & NFSMNT_NFSV3) 651 *tl++ = txdr_unsigned(NFS_VER3); 652 else 653 *tl++ = txdr_unsigned(NFS_VER2); 654 if (nmflag & NFSMNT_NFSV3) 655 *tl++ = txdr_unsigned(procid); 656 else 657 *tl++ = txdr_unsigned(nfsv2_procid[procid]); 658 659 /* 660 * And then the authorization cred. 661 */ 662 *tl++ = txdr_unsigned(auth_type); 663 *tl = txdr_unsigned(authsiz); 664 switch (auth_type) { 665 case RPCAUTH_UNIX: 666 nfsm_build(tl, u_int32_t *, auth_len); 667 *tl++ = 0; /* stamp ?? */ 668 *tl++ = 0; /* NULL hostname */ 669 *tl++ = txdr_unsigned(kauth_cred_geteuid(cr)); 670 *tl++ = txdr_unsigned(kauth_cred_getegid(cr)); 671 grpsiz = (auth_len >> 2) - 5; 672 *tl++ = txdr_unsigned(grpsiz); 673 for (i = 0; i < grpsiz; i++) 674 *tl++ = txdr_unsigned(kauth_cred_group(cr, i)); /* XXX elad review */ 675 break; 676 case RPCAUTH_KERB4: 677 siz = auth_len; 678 while (siz > 0) { 679 if (M_TRAILINGSPACE(mb) == 0) { 680 struct mbuf *mb2; 681 mb2 = m_get(M_WAIT, MT_DATA); 682 MCLAIM(mb2, &nfs_mowner); 683 if (siz >= MINCLSIZE) 684 m_clget(mb2, M_WAIT); 685 mb->m_next = mb2; 686 mb = mb2; 687 mb->m_len = 0; 688 bpos = mtod(mb, void *); 689 } 690 i = min(siz, M_TRAILINGSPACE(mb)); 691 memcpy(bpos, auth_str, i); 692 mb->m_len += i; 693 auth_str += i; 694 bpos += i; 695 siz -= i; 696 } 697 if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { 698 for (i = 0; i < siz; i++) 699 *bpos++ = '\0'; 700 mb->m_len += siz; 701 } 702 break; 703 }; 704 705 /* 706 * And the verifier... 707 */ 708 nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 709 if (verf_str) { 710 *tl++ = txdr_unsigned(RPCAUTH_KERB4); 711 *tl = txdr_unsigned(verf_len); 712 siz = verf_len; 713 while (siz > 0) { 714 if (M_TRAILINGSPACE(mb) == 0) { 715 struct mbuf *mb2; 716 mb2 = m_get(M_WAIT, MT_DATA); 717 MCLAIM(mb2, &nfs_mowner); 718 if (siz >= MINCLSIZE) 719 m_clget(mb2, M_WAIT); 720 mb->m_next = mb2; 721 mb = mb2; 722 mb->m_len = 0; 723 bpos = mtod(mb, void *); 724 } 725 i = min(siz, M_TRAILINGSPACE(mb)); 726 memcpy(bpos, verf_str, i); 727 mb->m_len += i; 728 verf_str += i; 729 bpos += i; 730 siz -= i; 731 } 732 if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { 733 for (i = 0; i < siz; i++) 734 *bpos++ = '\0'; 735 mb->m_len += siz; 736 } 737 } else { 738 *tl++ = txdr_unsigned(RPCAUTH_NULL); 739 *tl = 0; 740 } 741 mb->m_next = mrest; 742 mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; 743 mreq->m_pkthdr.rcvif = (struct ifnet *)0; 744 *mbp = mb; 745 return (mreq); 746 } 747 748 /* 749 * copies mbuf chain to the uio scatter/gather list 750 */ 751 int 752 nfsm_mbuftouio(mrep, uiop, siz, dpos) 753 struct mbuf **mrep; 754 struct uio *uiop; 755 int siz; 756 char **dpos; 757 { 758 char *mbufcp, *uiocp; 759 int xfer, left, len; 760 struct mbuf *mp; 761 long uiosiz, rem; 762 int error = 0; 763 764 mp = *mrep; 765 mbufcp = *dpos; 766 len = mtod(mp, char *) + mp->m_len - mbufcp; 767 rem = nfsm_rndup(siz)-siz; 768 while (siz > 0) { 769 if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) 770 return (EFBIG); 771 left = uiop->uio_iov->iov_len; 772 uiocp = uiop->uio_iov->iov_base; 773 if (left > siz) 774 left = siz; 775 uiosiz = left; 776 while (left > 0) { 777 while (len == 0) { 778 mp = mp->m_next; 779 if (mp == NULL) 780 return (EBADRPC); 781 mbufcp = mtod(mp, void *); 782 len = mp->m_len; 783 } 784 xfer = (left > len) ? len : left; 785 error = copyout_vmspace(uiop->uio_vmspace, mbufcp, 786 uiocp, xfer); 787 if (error) { 788 return error; 789 } 790 left -= xfer; 791 len -= xfer; 792 mbufcp += xfer; 793 uiocp += xfer; 794 uiop->uio_offset += xfer; 795 uiop->uio_resid -= xfer; 796 } 797 if (uiop->uio_iov->iov_len <= siz) { 798 uiop->uio_iovcnt--; 799 uiop->uio_iov++; 800 } else { 801 uiop->uio_iov->iov_base = 802 (char *)uiop->uio_iov->iov_base + uiosiz; 803 uiop->uio_iov->iov_len -= uiosiz; 804 } 805 siz -= uiosiz; 806 } 807 *dpos = mbufcp; 808 *mrep = mp; 809 if (rem > 0) { 810 if (len < rem) 811 error = nfs_adv(mrep, dpos, rem, len); 812 else 813 *dpos += rem; 814 } 815 return (error); 816 } 817 818 /* 819 * copies a uio scatter/gather list to an mbuf chain. 820 * NOTE: can ony handle iovcnt == 1 821 */ 822 int 823 nfsm_uiotombuf(uiop, mq, siz, bpos) 824 struct uio *uiop; 825 struct mbuf **mq; 826 int siz; 827 char **bpos; 828 { 829 char *uiocp; 830 struct mbuf *mp, *mp2; 831 int xfer, left, mlen; 832 int uiosiz, clflg, rem; 833 char *cp; 834 int error; 835 836 #ifdef DIAGNOSTIC 837 if (uiop->uio_iovcnt != 1) 838 panic("nfsm_uiotombuf: iovcnt != 1"); 839 #endif 840 841 if (siz > MLEN) /* or should it >= MCLBYTES ?? */ 842 clflg = 1; 843 else 844 clflg = 0; 845 rem = nfsm_rndup(siz)-siz; 846 mp = mp2 = *mq; 847 while (siz > 0) { 848 left = uiop->uio_iov->iov_len; 849 uiocp = uiop->uio_iov->iov_base; 850 if (left > siz) 851 left = siz; 852 uiosiz = left; 853 while (left > 0) { 854 mlen = M_TRAILINGSPACE(mp); 855 if (mlen == 0) { 856 mp = m_get(M_WAIT, MT_DATA); 857 MCLAIM(mp, &nfs_mowner); 858 if (clflg) 859 m_clget(mp, M_WAIT); 860 mp->m_len = 0; 861 mp2->m_next = mp; 862 mp2 = mp; 863 mlen = M_TRAILINGSPACE(mp); 864 } 865 xfer = (left > mlen) ? mlen : left; 866 cp = mtod(mp, char *) + mp->m_len; 867 error = copyin_vmspace(uiop->uio_vmspace, uiocp, cp, 868 xfer); 869 if (error) { 870 /* XXX */ 871 } 872 mp->m_len += xfer; 873 left -= xfer; 874 uiocp += xfer; 875 uiop->uio_offset += xfer; 876 uiop->uio_resid -= xfer; 877 } 878 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + 879 uiosiz; 880 uiop->uio_iov->iov_len -= uiosiz; 881 siz -= uiosiz; 882 } 883 if (rem > 0) { 884 if (rem > M_TRAILINGSPACE(mp)) { 885 mp = m_get(M_WAIT, MT_DATA); 886 MCLAIM(mp, &nfs_mowner); 887 mp->m_len = 0; 888 mp2->m_next = mp; 889 } 890 cp = mtod(mp, char *) + mp->m_len; 891 for (left = 0; left < rem; left++) 892 *cp++ = '\0'; 893 mp->m_len += rem; 894 *bpos = cp; 895 } else 896 *bpos = mtod(mp, char *) + mp->m_len; 897 *mq = mp; 898 return (0); 899 } 900 901 /* 902 * Get at least "siz" bytes of correctly aligned data. 903 * When called the mbuf pointers are not necessarily correct, 904 * dsosp points to what ought to be in m_data and left contains 905 * what ought to be in m_len. 906 * This is used by the macros nfsm_dissect and nfsm_dissecton for tough 907 * cases. (The macros use the vars. dpos and dpos2) 908 */ 909 int 910 nfsm_disct(mdp, dposp, siz, left, cp2) 911 struct mbuf **mdp; 912 char **dposp; 913 int siz; 914 int left; 915 char **cp2; 916 { 917 struct mbuf *m1, *m2; 918 struct mbuf *havebuf = NULL; 919 char *src = *dposp; 920 char *dst; 921 int len; 922 923 #ifdef DEBUG 924 if (left < 0) 925 panic("nfsm_disct: left < 0"); 926 #endif 927 m1 = *mdp; 928 /* 929 * Skip through the mbuf chain looking for an mbuf with 930 * some data. If the first mbuf found has enough data 931 * and it is correctly aligned return it. 932 */ 933 while (left == 0) { 934 havebuf = m1; 935 *mdp = m1 = m1->m_next; 936 if (m1 == NULL) 937 return (EBADRPC); 938 src = mtod(m1, void *); 939 left = m1->m_len; 940 /* 941 * If we start a new mbuf and it is big enough 942 * and correctly aligned just return it, don't 943 * do any pull up. 944 */ 945 if (left >= siz && nfsm_aligned(src)) { 946 *cp2 = src; 947 *dposp = src + siz; 948 return (0); 949 } 950 } 951 if (m1->m_flags & M_EXT) { 952 if (havebuf) { 953 /* If the first mbuf with data has external data 954 * and there is a previous empty mbuf use it 955 * to move the data into. 956 */ 957 m2 = m1; 958 *mdp = m1 = havebuf; 959 if (m1->m_flags & M_EXT) { 960 MEXTREMOVE(m1); 961 } 962 } else { 963 /* 964 * If the first mbuf has a external data 965 * and there is no previous empty mbuf 966 * allocate a new mbuf and move the external 967 * data to the new mbuf. Also make the first 968 * mbuf look empty. 969 */ 970 m2 = m_get(M_WAIT, MT_DATA); 971 m2->m_ext = m1->m_ext; 972 m2->m_data = src; 973 m2->m_len = left; 974 MCLADDREFERENCE(m1, m2); 975 MEXTREMOVE(m1); 976 m2->m_next = m1->m_next; 977 m1->m_next = m2; 978 } 979 m1->m_len = 0; 980 if (m1->m_flags & M_PKTHDR) 981 dst = m1->m_pktdat; 982 else 983 dst = m1->m_dat; 984 m1->m_data = dst; 985 } else { 986 /* 987 * If the first mbuf has no external data 988 * move the data to the front of the mbuf. 989 */ 990 if (m1->m_flags & M_PKTHDR) 991 dst = m1->m_pktdat; 992 else 993 dst = m1->m_dat; 994 m1->m_data = dst; 995 if (dst != src) 996 memmove(dst, src, left); 997 dst += left; 998 m1->m_len = left; 999 m2 = m1->m_next; 1000 } 1001 *cp2 = m1->m_data; 1002 *dposp = mtod(m1, char *) + siz; 1003 /* 1004 * Loop through mbufs pulling data up into first mbuf until 1005 * the first mbuf is full or there is no more data to 1006 * pullup. 1007 */ 1008 while ((len = M_TRAILINGSPACE(m1)) != 0 && m2) { 1009 if ((len = min(len, m2->m_len)) != 0) 1010 memcpy(dst, m2->m_data, len); 1011 m1->m_len += len; 1012 dst += len; 1013 m2->m_data += len; 1014 m2->m_len -= len; 1015 m2 = m2->m_next; 1016 } 1017 if (m1->m_len < siz) 1018 return (EBADRPC); 1019 return (0); 1020 } 1021 1022 /* 1023 * Advance the position in the mbuf chain. 1024 */ 1025 int 1026 nfs_adv(mdp, dposp, offs, left) 1027 struct mbuf **mdp; 1028 char **dposp; 1029 int offs; 1030 int left; 1031 { 1032 struct mbuf *m; 1033 int s; 1034 1035 m = *mdp; 1036 s = left; 1037 while (s < offs) { 1038 offs -= s; 1039 m = m->m_next; 1040 if (m == NULL) 1041 return (EBADRPC); 1042 s = m->m_len; 1043 } 1044 *mdp = m; 1045 *dposp = mtod(m, char *) + offs; 1046 return (0); 1047 } 1048 1049 /* 1050 * Copy a string into mbufs for the hard cases... 1051 */ 1052 int 1053 nfsm_strtmbuf(mb, bpos, cp, siz) 1054 struct mbuf **mb; 1055 char **bpos; 1056 const char *cp; 1057 long siz; 1058 { 1059 struct mbuf *m1 = NULL, *m2; 1060 long left, xfer, len, tlen; 1061 u_int32_t *tl; 1062 int putsize; 1063 1064 putsize = 1; 1065 m2 = *mb; 1066 left = M_TRAILINGSPACE(m2); 1067 if (left > 0) { 1068 tl = ((u_int32_t *)(*bpos)); 1069 *tl++ = txdr_unsigned(siz); 1070 putsize = 0; 1071 left -= NFSX_UNSIGNED; 1072 m2->m_len += NFSX_UNSIGNED; 1073 if (left > 0) { 1074 memcpy((void *) tl, cp, left); 1075 siz -= left; 1076 cp += left; 1077 m2->m_len += left; 1078 left = 0; 1079 } 1080 } 1081 /* Loop around adding mbufs */ 1082 while (siz > 0) { 1083 m1 = m_get(M_WAIT, MT_DATA); 1084 MCLAIM(m1, &nfs_mowner); 1085 if (siz > MLEN) 1086 m_clget(m1, M_WAIT); 1087 m1->m_len = NFSMSIZ(m1); 1088 m2->m_next = m1; 1089 m2 = m1; 1090 tl = mtod(m1, u_int32_t *); 1091 tlen = 0; 1092 if (putsize) { 1093 *tl++ = txdr_unsigned(siz); 1094 m1->m_len -= NFSX_UNSIGNED; 1095 tlen = NFSX_UNSIGNED; 1096 putsize = 0; 1097 } 1098 if (siz < m1->m_len) { 1099 len = nfsm_rndup(siz); 1100 xfer = siz; 1101 if (xfer < len) 1102 *(tl+(xfer>>2)) = 0; 1103 } else { 1104 xfer = len = m1->m_len; 1105 } 1106 memcpy((void *) tl, cp, xfer); 1107 m1->m_len = len+tlen; 1108 siz -= xfer; 1109 cp += xfer; 1110 } 1111 *mb = m1; 1112 *bpos = mtod(m1, char *) + m1->m_len; 1113 return (0); 1114 } 1115 1116 /* 1117 * Directory caching routines. They work as follows: 1118 * - a cache is maintained per VDIR nfsnode. 1119 * - for each offset cookie that is exported to userspace, and can 1120 * thus be thrown back at us as an offset to VOP_READDIR, store 1121 * information in the cache. 1122 * - cached are: 1123 * - cookie itself 1124 * - blocknumber (essentially just a search key in the buffer cache) 1125 * - entry number in block. 1126 * - offset cookie of block in which this entry is stored 1127 * - 32 bit cookie if NFSMNT_XLATECOOKIE is used. 1128 * - entries are looked up in a hash table 1129 * - also maintained is an LRU list of entries, used to determine 1130 * which ones to delete if the cache grows too large. 1131 * - if 32 <-> 64 translation mode is requested for a filesystem, 1132 * the cache also functions as a translation table 1133 * - in the translation case, invalidating the cache does not mean 1134 * flushing it, but just marking entries as invalid, except for 1135 * the <64bit cookie, 32bitcookie> pair which is still valid, to 1136 * still be able to use the cache as a translation table. 1137 * - 32 bit cookies are uniquely created by combining the hash table 1138 * entry value, and one generation count per hash table entry, 1139 * incremented each time an entry is appended to the chain. 1140 * - the cache is invalidated each time a direcory is modified 1141 * - sanity checks are also done; if an entry in a block turns 1142 * out not to have a matching cookie, the cache is invalidated 1143 * and a new block starting from the wanted offset is fetched from 1144 * the server. 1145 * - directory entries as read from the server are extended to contain 1146 * the 64bit and, optionally, the 32bit cookies, for sanity checking 1147 * the cache and exporting them to userspace through the cookie 1148 * argument to VOP_READDIR. 1149 */ 1150 1151 u_long 1152 nfs_dirhash(off) 1153 off_t off; 1154 { 1155 int i; 1156 char *cp = (char *)&off; 1157 u_long sum = 0L; 1158 1159 for (i = 0 ; i < sizeof (off); i++) 1160 sum += *cp++; 1161 1162 return sum; 1163 } 1164 1165 #define _NFSDC_MTX(np) (&NFSTOV(np)->v_interlock) 1166 #define NFSDC_LOCK(np) mutex_enter(_NFSDC_MTX(np)) 1167 #define NFSDC_UNLOCK(np) mutex_exit(_NFSDC_MTX(np)) 1168 #define NFSDC_ASSERT_LOCKED(np) KASSERT(mutex_owned(_NFSDC_MTX(np))) 1169 1170 void 1171 nfs_initdircache(vp) 1172 struct vnode *vp; 1173 { 1174 struct nfsnode *np = VTONFS(vp); 1175 struct nfsdirhashhead *dircache; 1176 1177 dircache = hashinit(NFS_DIRHASHSIZ, HASH_LIST, M_NFSDIROFF, 1178 M_WAITOK, &nfsdirhashmask); 1179 1180 NFSDC_LOCK(np); 1181 if (np->n_dircache == NULL) { 1182 np->n_dircachesize = 0; 1183 np->n_dircache = dircache; 1184 dircache = NULL; 1185 TAILQ_INIT(&np->n_dirchain); 1186 } 1187 NFSDC_UNLOCK(np); 1188 if (dircache) 1189 hashdone(dircache, M_NFSDIROFF); 1190 } 1191 1192 void 1193 nfs_initdirxlatecookie(vp) 1194 struct vnode *vp; 1195 { 1196 struct nfsnode *np = VTONFS(vp); 1197 unsigned *dirgens; 1198 1199 KASSERT(VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_XLATECOOKIE); 1200 1201 dirgens = kmem_zalloc(NFS_DIRHASHSIZ * sizeof(unsigned), KM_SLEEP); 1202 NFSDC_LOCK(np); 1203 if (np->n_dirgens == NULL) { 1204 np->n_dirgens = dirgens; 1205 dirgens = NULL; 1206 } 1207 NFSDC_UNLOCK(np); 1208 if (dirgens) 1209 kmem_free(dirgens, NFS_DIRHASHSIZ * sizeof(unsigned)); 1210 } 1211 1212 static const struct nfsdircache dzero; 1213 1214 static void nfs_unlinkdircache __P((struct nfsnode *np, struct nfsdircache *)); 1215 static void nfs_putdircache_unlocked __P((struct nfsnode *, 1216 struct nfsdircache *)); 1217 1218 static void 1219 nfs_unlinkdircache(np, ndp) 1220 struct nfsnode *np; 1221 struct nfsdircache *ndp; 1222 { 1223 1224 NFSDC_ASSERT_LOCKED(np); 1225 KASSERT(ndp != &dzero); 1226 1227 if (LIST_NEXT(ndp, dc_hash) == (void *)-1) 1228 return; 1229 1230 TAILQ_REMOVE(&np->n_dirchain, ndp, dc_chain); 1231 LIST_REMOVE(ndp, dc_hash); 1232 LIST_NEXT(ndp, dc_hash) = (void *)-1; /* mark as unlinked */ 1233 1234 nfs_putdircache_unlocked(np, ndp); 1235 } 1236 1237 void 1238 nfs_putdircache(np, ndp) 1239 struct nfsnode *np; 1240 struct nfsdircache *ndp; 1241 { 1242 int ref; 1243 1244 if (ndp == &dzero) 1245 return; 1246 1247 KASSERT(ndp->dc_refcnt > 0); 1248 NFSDC_LOCK(np); 1249 ref = --ndp->dc_refcnt; 1250 NFSDC_UNLOCK(np); 1251 1252 if (ref == 0) 1253 kmem_free(ndp, sizeof(*ndp)); 1254 } 1255 1256 static void 1257 nfs_putdircache_unlocked(struct nfsnode *np, struct nfsdircache *ndp) 1258 { 1259 int ref; 1260 1261 NFSDC_ASSERT_LOCKED(np); 1262 1263 if (ndp == &dzero) 1264 return; 1265 1266 KASSERT(ndp->dc_refcnt > 0); 1267 ref = --ndp->dc_refcnt; 1268 if (ref == 0) 1269 kmem_free(ndp, sizeof(*ndp)); 1270 } 1271 1272 struct nfsdircache * 1273 nfs_searchdircache(vp, off, do32, hashent) 1274 struct vnode *vp; 1275 off_t off; 1276 int do32; 1277 int *hashent; 1278 { 1279 struct nfsdirhashhead *ndhp; 1280 struct nfsdircache *ndp = NULL; 1281 struct nfsnode *np = VTONFS(vp); 1282 unsigned ent; 1283 1284 /* 1285 * Zero is always a valid cookie. 1286 */ 1287 if (off == 0) 1288 /* XXXUNCONST */ 1289 return (struct nfsdircache *)__UNCONST(&dzero); 1290 1291 if (!np->n_dircache) 1292 return NULL; 1293 1294 /* 1295 * We use a 32bit cookie as search key, directly reconstruct 1296 * the hashentry. Else use the hashfunction. 1297 */ 1298 if (do32) { 1299 ent = (u_int32_t)off >> 24; 1300 if (ent >= NFS_DIRHASHSIZ) 1301 return NULL; 1302 ndhp = &np->n_dircache[ent]; 1303 } else { 1304 ndhp = NFSDIRHASH(np, off); 1305 } 1306 1307 if (hashent) 1308 *hashent = (int)(ndhp - np->n_dircache); 1309 1310 NFSDC_LOCK(np); 1311 if (do32) { 1312 LIST_FOREACH(ndp, ndhp, dc_hash) { 1313 if (ndp->dc_cookie32 == (u_int32_t)off) { 1314 /* 1315 * An invalidated entry will become the 1316 * start of a new block fetched from 1317 * the server. 1318 */ 1319 if (ndp->dc_flags & NFSDC_INVALID) { 1320 ndp->dc_blkcookie = ndp->dc_cookie; 1321 ndp->dc_entry = 0; 1322 ndp->dc_flags &= ~NFSDC_INVALID; 1323 } 1324 break; 1325 } 1326 } 1327 } else { 1328 LIST_FOREACH(ndp, ndhp, dc_hash) { 1329 if (ndp->dc_cookie == off) 1330 break; 1331 } 1332 } 1333 if (ndp != NULL) 1334 ndp->dc_refcnt++; 1335 NFSDC_UNLOCK(np); 1336 return ndp; 1337 } 1338 1339 1340 struct nfsdircache * 1341 nfs_enterdircache(struct vnode *vp, off_t off, off_t blkoff, int en, 1342 daddr_t blkno) 1343 { 1344 struct nfsnode *np = VTONFS(vp); 1345 struct nfsdirhashhead *ndhp; 1346 struct nfsdircache *ndp = NULL; 1347 struct nfsdircache *newndp = NULL; 1348 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1349 int hashent = 0, gen, overwrite; /* XXX: GCC */ 1350 1351 /* 1352 * XXX refuse entries for offset 0. amd(8) erroneously sets 1353 * cookie 0 for the '.' entry, making this necessary. This 1354 * isn't so bad, as 0 is a special case anyway. 1355 */ 1356 if (off == 0) 1357 /* XXXUNCONST */ 1358 return (struct nfsdircache *)__UNCONST(&dzero); 1359 1360 if (!np->n_dircache) 1361 /* 1362 * XXX would like to do this in nfs_nget but vtype 1363 * isn't known at that time. 1364 */ 1365 nfs_initdircache(vp); 1366 1367 if ((nmp->nm_flag & NFSMNT_XLATECOOKIE) && !np->n_dirgens) 1368 nfs_initdirxlatecookie(vp); 1369 1370 retry: 1371 ndp = nfs_searchdircache(vp, off, 0, &hashent); 1372 1373 NFSDC_LOCK(np); 1374 if (ndp && (ndp->dc_flags & NFSDC_INVALID) == 0) { 1375 /* 1376 * Overwriting an old entry. Check if it's the same. 1377 * If so, just return. If not, remove the old entry. 1378 */ 1379 if (ndp->dc_blkcookie == blkoff && ndp->dc_entry == en) 1380 goto done; 1381 nfs_unlinkdircache(np, ndp); 1382 nfs_putdircache_unlocked(np, ndp); 1383 ndp = NULL; 1384 } 1385 1386 ndhp = &np->n_dircache[hashent]; 1387 1388 if (!ndp) { 1389 if (newndp == NULL) { 1390 NFSDC_UNLOCK(np); 1391 newndp = kmem_alloc(sizeof(*newndp), KM_SLEEP); 1392 newndp->dc_refcnt = 1; 1393 LIST_NEXT(newndp, dc_hash) = (void *)-1; 1394 goto retry; 1395 } 1396 ndp = newndp; 1397 newndp = NULL; 1398 overwrite = 0; 1399 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 1400 /* 1401 * We're allocating a new entry, so bump the 1402 * generation number. 1403 */ 1404 KASSERT(np->n_dirgens); 1405 gen = ++np->n_dirgens[hashent]; 1406 if (gen == 0) { 1407 np->n_dirgens[hashent]++; 1408 gen++; 1409 } 1410 ndp->dc_cookie32 = (hashent << 24) | (gen & 0xffffff); 1411 } 1412 } else 1413 overwrite = 1; 1414 1415 ndp->dc_cookie = off; 1416 ndp->dc_blkcookie = blkoff; 1417 ndp->dc_entry = en; 1418 ndp->dc_flags = 0; 1419 1420 if (overwrite) 1421 goto done; 1422 1423 /* 1424 * If the maximum directory cookie cache size has been reached 1425 * for this node, take one off the front. The idea is that 1426 * directories are typically read front-to-back once, so that 1427 * the oldest entries can be thrown away without much performance 1428 * loss. 1429 */ 1430 if (np->n_dircachesize == NFS_MAXDIRCACHE) { 1431 nfs_unlinkdircache(np, TAILQ_FIRST(&np->n_dirchain)); 1432 } else 1433 np->n_dircachesize++; 1434 1435 KASSERT(ndp->dc_refcnt == 1); 1436 LIST_INSERT_HEAD(ndhp, ndp, dc_hash); 1437 TAILQ_INSERT_TAIL(&np->n_dirchain, ndp, dc_chain); 1438 ndp->dc_refcnt++; 1439 done: 1440 KASSERT(ndp->dc_refcnt > 0); 1441 NFSDC_UNLOCK(np); 1442 if (newndp) 1443 nfs_putdircache(np, newndp); 1444 return ndp; 1445 } 1446 1447 void 1448 nfs_invaldircache(vp, flags) 1449 struct vnode *vp; 1450 int flags; 1451 { 1452 struct nfsnode *np = VTONFS(vp); 1453 struct nfsdircache *ndp = NULL; 1454 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1455 const bool forcefree = flags & NFS_INVALDIRCACHE_FORCE; 1456 1457 #ifdef DIAGNOSTIC 1458 if (vp->v_type != VDIR) 1459 panic("nfs: invaldircache: not dir"); 1460 #endif 1461 1462 if ((flags & NFS_INVALDIRCACHE_KEEPEOF) == 0) 1463 np->n_flag &= ~NEOFVALID; 1464 1465 if (!np->n_dircache) 1466 return; 1467 1468 NFSDC_LOCK(np); 1469 if (!(nmp->nm_flag & NFSMNT_XLATECOOKIE) || forcefree) { 1470 while ((ndp = TAILQ_FIRST(&np->n_dirchain)) != NULL) { 1471 KASSERT(!forcefree || ndp->dc_refcnt == 1); 1472 nfs_unlinkdircache(np, ndp); 1473 } 1474 np->n_dircachesize = 0; 1475 if (forcefree && np->n_dirgens) { 1476 kmem_free(np->n_dirgens, 1477 NFS_DIRHASHSIZ * sizeof(unsigned)); 1478 np->n_dirgens = NULL; 1479 } 1480 } else { 1481 TAILQ_FOREACH(ndp, &np->n_dirchain, dc_chain) 1482 ndp->dc_flags |= NFSDC_INVALID; 1483 } 1484 1485 NFSDC_UNLOCK(np); 1486 } 1487 1488 /* 1489 * Called once before VFS init to initialize shared and 1490 * server-specific data structures. 1491 */ 1492 static int 1493 nfs_init0(void) 1494 { 1495 1496 nfsrtt.pos = 0; 1497 rpc_vers = txdr_unsigned(RPC_VER2); 1498 rpc_call = txdr_unsigned(RPC_CALL); 1499 rpc_reply = txdr_unsigned(RPC_REPLY); 1500 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); 1501 rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); 1502 rpc_mismatch = txdr_unsigned(RPC_MISMATCH); 1503 rpc_autherr = txdr_unsigned(RPC_AUTHERR); 1504 rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); 1505 rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); 1506 nfs_prog = txdr_unsigned(NFS_PROG); 1507 nfs_true = txdr_unsigned(true); 1508 nfs_false = txdr_unsigned(false); 1509 nfs_xdrneg1 = txdr_unsigned(-1); 1510 nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; 1511 if (nfs_ticks < 1) 1512 nfs_ticks = 1; 1513 #ifdef NFSSERVER 1514 nfsrv_init(0); /* Init server data structures */ 1515 nfsrv_initcache(); /* Init the server request cache */ 1516 { 1517 extern krwlock_t netexport_lock; /* XXX */ 1518 rw_init(&netexport_lock); 1519 } 1520 #endif /* NFSSERVER */ 1521 1522 #if defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY)) 1523 nfsdreq_init(); 1524 #endif /* defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY)) */ 1525 1526 /* 1527 * Initialize reply list and start timer 1528 */ 1529 TAILQ_INIT(&nfs_reqq); 1530 nfs_timer_init(); 1531 MOWNER_ATTACH(&nfs_mowner); 1532 1533 #ifdef NFS 1534 /* Initialize the kqueue structures */ 1535 nfs_kqinit(); 1536 /* Initialize the iod structures */ 1537 nfs_iodinit(); 1538 #endif 1539 return 0; 1540 } 1541 1542 void 1543 nfs_init(void) 1544 { 1545 static ONCE_DECL(nfs_init_once); 1546 1547 RUN_ONCE(&nfs_init_once, nfs_init0); 1548 } 1549 1550 #ifdef NFS 1551 /* 1552 * Called once at VFS init to initialize client-specific data structures. 1553 */ 1554 void 1555 nfs_vfs_init() 1556 { 1557 /* Initialize NFS server / client shared data. */ 1558 nfs_init(); 1559 1560 nfs_nhinit(); /* Init the nfsnode table */ 1561 nfs_commitsize = uvmexp.npages << (PAGE_SHIFT - 4); 1562 } 1563 1564 void 1565 nfs_vfs_reinit() 1566 { 1567 nfs_nhreinit(); 1568 } 1569 1570 void 1571 nfs_vfs_done() 1572 { 1573 nfs_nhdone(); 1574 } 1575 1576 /* 1577 * Attribute cache routines. 1578 * nfs_loadattrcache() - loads or updates the cache contents from attributes 1579 * that are on the mbuf list 1580 * nfs_getattrcache() - returns valid attributes if found in cache, returns 1581 * error otherwise 1582 */ 1583 1584 /* 1585 * Load the attribute cache (that lives in the nfsnode entry) with 1586 * the values on the mbuf list and 1587 * Iff vap not NULL 1588 * copy the attributes to *vaper 1589 */ 1590 int 1591 nfsm_loadattrcache(vpp, mdp, dposp, vaper, flags) 1592 struct vnode **vpp; 1593 struct mbuf **mdp; 1594 char **dposp; 1595 struct vattr *vaper; 1596 int flags; 1597 { 1598 int32_t t1; 1599 char *cp2; 1600 int error = 0; 1601 struct mbuf *md; 1602 int v3 = NFS_ISV3(*vpp); 1603 1604 md = *mdp; 1605 t1 = (mtod(md, char *) + md->m_len) - *dposp; 1606 error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2); 1607 if (error) 1608 return (error); 1609 return nfs_loadattrcache(vpp, (struct nfs_fattr *)cp2, vaper, flags); 1610 } 1611 1612 int 1613 nfs_loadattrcache(vpp, fp, vaper, flags) 1614 struct vnode **vpp; 1615 struct nfs_fattr *fp; 1616 struct vattr *vaper; 1617 int flags; 1618 { 1619 struct vnode *vp = *vpp; 1620 struct vattr *vap; 1621 int v3 = NFS_ISV3(vp); 1622 enum vtype vtyp; 1623 u_short vmode; 1624 struct timespec mtime; 1625 struct timespec ctime; 1626 int32_t rdev; 1627 struct nfsnode *np; 1628 extern int (**spec_nfsv2nodeop_p) __P((void *)); 1629 uid_t uid; 1630 gid_t gid; 1631 1632 if (v3) { 1633 vtyp = nfsv3tov_type(fp->fa_type); 1634 vmode = fxdr_unsigned(u_short, fp->fa_mode); 1635 rdev = makedev(fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata1), 1636 fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata2)); 1637 fxdr_nfsv3time(&fp->fa3_mtime, &mtime); 1638 fxdr_nfsv3time(&fp->fa3_ctime, &ctime); 1639 } else { 1640 vtyp = nfsv2tov_type(fp->fa_type); 1641 vmode = fxdr_unsigned(u_short, fp->fa_mode); 1642 if (vtyp == VNON || vtyp == VREG) 1643 vtyp = IFTOVT(vmode); 1644 rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); 1645 fxdr_nfsv2time(&fp->fa2_mtime, &mtime); 1646 ctime.tv_sec = fxdr_unsigned(u_int32_t, 1647 fp->fa2_ctime.nfsv2_sec); 1648 ctime.tv_nsec = 0; 1649 1650 /* 1651 * Really ugly NFSv2 kludge. 1652 */ 1653 if (vtyp == VCHR && rdev == 0xffffffff) 1654 vtyp = VFIFO; 1655 } 1656 1657 vmode &= ALLPERMS; 1658 1659 /* 1660 * If v_type == VNON it is a new node, so fill in the v_type, 1661 * n_mtime fields. Check to see if it represents a special 1662 * device, and if so, check for a possible alias. Once the 1663 * correct vnode has been obtained, fill in the rest of the 1664 * information. 1665 */ 1666 np = VTONFS(vp); 1667 if (vp->v_type == VNON) { 1668 vp->v_type = vtyp; 1669 if (vp->v_type == VFIFO) { 1670 extern int (**fifo_nfsv2nodeop_p) __P((void *)); 1671 vp->v_op = fifo_nfsv2nodeop_p; 1672 } else if (vp->v_type == VREG) { 1673 mutex_init(&np->n_commitlock, MUTEX_DEFAULT, IPL_NONE); 1674 } else if (vp->v_type == VCHR || vp->v_type == VBLK) { 1675 vp->v_op = spec_nfsv2nodeop_p; 1676 spec_node_init(vp, (dev_t)rdev); 1677 } 1678 np->n_mtime = mtime; 1679 } 1680 uid = fxdr_unsigned(uid_t, fp->fa_uid); 1681 gid = fxdr_unsigned(gid_t, fp->fa_gid); 1682 vap = np->n_vattr; 1683 1684 /* 1685 * Invalidate access cache if uid, gid, mode or ctime changed. 1686 */ 1687 if (np->n_accstamp != -1 && 1688 (gid != vap->va_gid || uid != vap->va_uid || vmode != vap->va_mode 1689 || timespeccmp(&ctime, &vap->va_ctime, !=))) 1690 np->n_accstamp = -1; 1691 1692 vap->va_type = vtyp; 1693 vap->va_mode = vmode; 1694 vap->va_rdev = (dev_t)rdev; 1695 vap->va_mtime = mtime; 1696 vap->va_ctime = ctime; 1697 vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0]; 1698 switch (vtyp) { 1699 case VDIR: 1700 vap->va_blocksize = NFS_DIRFRAGSIZ; 1701 break; 1702 case VBLK: 1703 vap->va_blocksize = BLKDEV_IOSIZE; 1704 break; 1705 case VCHR: 1706 vap->va_blocksize = MAXBSIZE; 1707 break; 1708 default: 1709 vap->va_blocksize = v3 ? vp->v_mount->mnt_stat.f_iosize : 1710 fxdr_unsigned(int32_t, fp->fa2_blocksize); 1711 break; 1712 } 1713 if (v3) { 1714 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 1715 vap->va_uid = uid; 1716 vap->va_gid = gid; 1717 vap->va_size = fxdr_hyper(&fp->fa3_size); 1718 vap->va_bytes = fxdr_hyper(&fp->fa3_used); 1719 vap->va_fileid = fxdr_hyper(&fp->fa3_fileid); 1720 fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); 1721 vap->va_flags = 0; 1722 vap->va_filerev = 0; 1723 } else { 1724 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 1725 vap->va_uid = uid; 1726 vap->va_gid = gid; 1727 vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); 1728 vap->va_bytes = fxdr_unsigned(int32_t, fp->fa2_blocks) 1729 * NFS_FABLKSIZE; 1730 vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); 1731 fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); 1732 vap->va_flags = 0; 1733 vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec); 1734 vap->va_filerev = 0; 1735 } 1736 if (vap->va_size > VFSTONFS(vp->v_mount)->nm_maxfilesize) { 1737 return EFBIG; 1738 } 1739 if (vap->va_size != np->n_size) { 1740 if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) { 1741 vap->va_size = np->n_size; 1742 } else { 1743 np->n_size = vap->va_size; 1744 if (vap->va_type == VREG) { 1745 /* 1746 * we can't free pages if NAC_NOTRUNC because 1747 * the pages can be owned by ourselves. 1748 */ 1749 if (flags & NAC_NOTRUNC) { 1750 np->n_flag |= NTRUNCDELAYED; 1751 } else { 1752 genfs_node_wrlock(vp); 1753 mutex_enter(&vp->v_interlock); 1754 (void)VOP_PUTPAGES(vp, 0, 1755 0, PGO_SYNCIO | PGO_CLEANIT | 1756 PGO_FREE | PGO_ALLPAGES); 1757 uvm_vnp_setsize(vp, np->n_size); 1758 genfs_node_unlock(vp); 1759 } 1760 } 1761 } 1762 } 1763 np->n_attrstamp = time_second; 1764 if (vaper != NULL) { 1765 memcpy((void *)vaper, (void *)vap, sizeof(*vap)); 1766 if (np->n_flag & NCHG) { 1767 if (np->n_flag & NACC) 1768 vaper->va_atime = np->n_atim; 1769 if (np->n_flag & NUPD) 1770 vaper->va_mtime = np->n_mtim; 1771 } 1772 } 1773 return (0); 1774 } 1775 1776 /* 1777 * Check the time stamp 1778 * If the cache is valid, copy contents to *vap and return 0 1779 * otherwise return an error 1780 */ 1781 int 1782 nfs_getattrcache(vp, vaper) 1783 struct vnode *vp; 1784 struct vattr *vaper; 1785 { 1786 struct nfsnode *np = VTONFS(vp); 1787 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1788 struct vattr *vap; 1789 1790 if (np->n_attrstamp == 0 || 1791 (time_second - np->n_attrstamp) >= nfs_attrtimeo(nmp, np)) { 1792 nfsstats.attrcache_misses++; 1793 return (ENOENT); 1794 } 1795 nfsstats.attrcache_hits++; 1796 vap = np->n_vattr; 1797 if (vap->va_size != np->n_size) { 1798 if (vap->va_type == VREG) { 1799 if ((np->n_flag & NMODIFIED) != 0 && 1800 vap->va_size < np->n_size) { 1801 vap->va_size = np->n_size; 1802 } else { 1803 np->n_size = vap->va_size; 1804 } 1805 genfs_node_wrlock(vp); 1806 uvm_vnp_setsize(vp, np->n_size); 1807 genfs_node_unlock(vp); 1808 } else 1809 np->n_size = vap->va_size; 1810 } 1811 memcpy((void *)vaper, (void *)vap, sizeof(struct vattr)); 1812 if (np->n_flag & NCHG) { 1813 if (np->n_flag & NACC) 1814 vaper->va_atime = np->n_atim; 1815 if (np->n_flag & NUPD) 1816 vaper->va_mtime = np->n_mtim; 1817 } 1818 return (0); 1819 } 1820 1821 void 1822 nfs_delayedtruncate(vp) 1823 struct vnode *vp; 1824 { 1825 struct nfsnode *np = VTONFS(vp); 1826 1827 if (np->n_flag & NTRUNCDELAYED) { 1828 np->n_flag &= ~NTRUNCDELAYED; 1829 genfs_node_wrlock(vp); 1830 mutex_enter(&vp->v_interlock); 1831 (void)VOP_PUTPAGES(vp, 0, 1832 0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES); 1833 uvm_vnp_setsize(vp, np->n_size); 1834 genfs_node_unlock(vp); 1835 } 1836 } 1837 1838 #define NFS_WCCKLUDGE_TIMEOUT (24 * 60 * 60) /* 1 day */ 1839 #define NFS_WCCKLUDGE(nmp, now) \ 1840 (((nmp)->nm_iflag & NFSMNT_WCCKLUDGE) && \ 1841 ((now) - (nmp)->nm_wcckludgetime - NFS_WCCKLUDGE_TIMEOUT) < 0) 1842 1843 /* 1844 * nfs_check_wccdata: check inaccurate wcc_data 1845 * 1846 * => return non-zero if we shouldn't trust the wcc_data. 1847 * => NFS_WCCKLUDGE_TIMEOUT is for the case that the server is "fixed". 1848 */ 1849 1850 int 1851 nfs_check_wccdata(struct nfsnode *np, const struct timespec *ctime, 1852 struct timespec *mtime, bool docheck) 1853 { 1854 int error = 0; 1855 1856 #if !defined(NFS_V2_ONLY) 1857 1858 if (docheck) { 1859 struct vnode *vp = NFSTOV(np); 1860 struct nfsmount *nmp; 1861 long now = time_second; 1862 const struct timespec *omtime = &np->n_vattr->va_mtime; 1863 const struct timespec *octime = &np->n_vattr->va_ctime; 1864 const char *reason = NULL; /* XXX: gcc */ 1865 1866 if (timespeccmp(omtime, mtime, <=)) { 1867 reason = "mtime"; 1868 error = EINVAL; 1869 } 1870 1871 if (vp->v_type == VDIR && timespeccmp(octime, ctime, <=)) { 1872 reason = "ctime"; 1873 error = EINVAL; 1874 } 1875 1876 nmp = VFSTONFS(vp->v_mount); 1877 if (error) { 1878 1879 /* 1880 * despite of the fact that we've updated the file, 1881 * timestamps of the file were not updated as we 1882 * expected. 1883 * it means that the server has incompatible 1884 * semantics of timestamps or (more likely) 1885 * the server time is not precise enough to 1886 * track each modifications. 1887 * in that case, we disable wcc processing. 1888 * 1889 * yes, strictly speaking, we should disable all 1890 * caching. it's a compromise. 1891 */ 1892 1893 mutex_enter(&nmp->nm_lock); 1894 if (!NFS_WCCKLUDGE(nmp, now)) { 1895 printf("%s: inaccurate wcc data (%s) detected," 1896 " disabling wcc" 1897 " (ctime %u.%09u %u.%09u," 1898 " mtime %u.%09u %u.%09u)\n", 1899 vp->v_mount->mnt_stat.f_mntfromname, 1900 reason, 1901 (unsigned int)octime->tv_sec, 1902 (unsigned int)octime->tv_nsec, 1903 (unsigned int)ctime->tv_sec, 1904 (unsigned int)ctime->tv_nsec, 1905 (unsigned int)omtime->tv_sec, 1906 (unsigned int)omtime->tv_nsec, 1907 (unsigned int)mtime->tv_sec, 1908 (unsigned int)mtime->tv_nsec); 1909 } 1910 nmp->nm_iflag |= NFSMNT_WCCKLUDGE; 1911 nmp->nm_wcckludgetime = now; 1912 mutex_exit(&nmp->nm_lock); 1913 } else if (NFS_WCCKLUDGE(nmp, now)) { 1914 error = EPERM; /* XXX */ 1915 } else if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) { 1916 mutex_enter(&nmp->nm_lock); 1917 if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) { 1918 printf("%s: re-enabling wcc\n", 1919 vp->v_mount->mnt_stat.f_mntfromname); 1920 nmp->nm_iflag &= ~NFSMNT_WCCKLUDGE; 1921 } 1922 mutex_exit(&nmp->nm_lock); 1923 } 1924 } 1925 1926 #endif /* !defined(NFS_V2_ONLY) */ 1927 1928 return error; 1929 } 1930 1931 /* 1932 * Heuristic to see if the server XDR encodes directory cookies or not. 1933 * it is not supposed to, but a lot of servers may do this. Also, since 1934 * most/all servers will implement V2 as well, it is expected that they 1935 * may return just 32 bits worth of cookie information, so we need to 1936 * find out in which 32 bits this information is available. We do this 1937 * to avoid trouble with emulated binaries that can't handle 64 bit 1938 * directory offsets. 1939 */ 1940 1941 void 1942 nfs_cookieheuristic(vp, flagp, l, cred) 1943 struct vnode *vp; 1944 int *flagp; 1945 struct lwp *l; 1946 kauth_cred_t cred; 1947 { 1948 struct uio auio; 1949 struct iovec aiov; 1950 char *tbuf, *cp; 1951 struct dirent *dp; 1952 off_t *cookies = NULL, *cop; 1953 int error, eof, nc, len; 1954 1955 MALLOC(tbuf, void *, NFS_DIRFRAGSIZ, M_TEMP, M_WAITOK); 1956 1957 aiov.iov_base = tbuf; 1958 aiov.iov_len = NFS_DIRFRAGSIZ; 1959 auio.uio_iov = &aiov; 1960 auio.uio_iovcnt = 1; 1961 auio.uio_rw = UIO_READ; 1962 auio.uio_resid = NFS_DIRFRAGSIZ; 1963 auio.uio_offset = 0; 1964 UIO_SETUP_SYSSPACE(&auio); 1965 1966 error = VOP_READDIR(vp, &auio, cred, &eof, &cookies, &nc); 1967 1968 len = NFS_DIRFRAGSIZ - auio.uio_resid; 1969 if (error || len == 0) { 1970 FREE(tbuf, M_TEMP); 1971 if (cookies) 1972 free(cookies, M_TEMP); 1973 return; 1974 } 1975 1976 /* 1977 * Find the first valid entry and look at its offset cookie. 1978 */ 1979 1980 cp = tbuf; 1981 for (cop = cookies; len > 0; len -= dp->d_reclen) { 1982 dp = (struct dirent *)cp; 1983 if (dp->d_fileno != 0 && len >= dp->d_reclen) { 1984 if ((*cop >> 32) != 0 && (*cop & 0xffffffffLL) == 0) { 1985 *flagp |= NFSMNT_SWAPCOOKIE; 1986 nfs_invaldircache(vp, 0); 1987 nfs_vinvalbuf(vp, 0, cred, l, 1); 1988 } 1989 break; 1990 } 1991 cop++; 1992 cp += dp->d_reclen; 1993 } 1994 1995 FREE(tbuf, M_TEMP); 1996 free(cookies, M_TEMP); 1997 } 1998 #endif /* NFS */ 1999 2000 #ifdef NFSSERVER 2001 /* 2002 * Set up nameidata for a lookup() call and do it. 2003 * 2004 * If pubflag is set, this call is done for a lookup operation on the 2005 * public filehandle. In that case we allow crossing mountpoints and 2006 * absolute pathnames. However, the caller is expected to check that 2007 * the lookup result is within the public fs, and deny access if 2008 * it is not. 2009 */ 2010 int 2011 nfs_namei(ndp, nsfh, len, slp, nam, mdp, dposp, retdirp, l, kerbflag, pubflag) 2012 struct nameidata *ndp; 2013 nfsrvfh_t *nsfh; 2014 uint32_t len; 2015 struct nfssvc_sock *slp; 2016 struct mbuf *nam; 2017 struct mbuf **mdp; 2018 char **dposp; 2019 struct vnode **retdirp; 2020 struct lwp *l; 2021 int kerbflag, pubflag; 2022 { 2023 int i, rem; 2024 struct mbuf *md; 2025 char *fromcp, *tocp, *cp; 2026 struct iovec aiov; 2027 struct uio auio; 2028 struct vnode *dp; 2029 int error, rdonly, linklen; 2030 struct componentname *cnp = &ndp->ni_cnd; 2031 2032 *retdirp = NULL; 2033 2034 if ((len + 1) > MAXPATHLEN) 2035 return (ENAMETOOLONG); 2036 if (len == 0) 2037 return (EACCES); 2038 cnp->cn_pnbuf = PNBUF_GET(); 2039 2040 /* 2041 * Copy the name from the mbuf list to ndp->ni_pnbuf 2042 * and set the various ndp fields appropriately. 2043 */ 2044 fromcp = *dposp; 2045 tocp = cnp->cn_pnbuf; 2046 md = *mdp; 2047 rem = mtod(md, char *) + md->m_len - fromcp; 2048 for (i = 0; i < len; i++) { 2049 while (rem == 0) { 2050 md = md->m_next; 2051 if (md == NULL) { 2052 error = EBADRPC; 2053 goto out; 2054 } 2055 fromcp = mtod(md, void *); 2056 rem = md->m_len; 2057 } 2058 if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) { 2059 error = EACCES; 2060 goto out; 2061 } 2062 *tocp++ = *fromcp++; 2063 rem--; 2064 } 2065 *tocp = '\0'; 2066 *mdp = md; 2067 *dposp = fromcp; 2068 len = nfsm_rndup(len)-len; 2069 if (len > 0) { 2070 if (rem >= len) 2071 *dposp += len; 2072 else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0) 2073 goto out; 2074 } 2075 2076 /* 2077 * Extract and set starting directory. 2078 */ 2079 error = nfsrv_fhtovp(nsfh, false, &dp, ndp->ni_cnd.cn_cred, slp, 2080 nam, &rdonly, kerbflag, pubflag); 2081 if (error) 2082 goto out; 2083 if (dp->v_type != VDIR) { 2084 vrele(dp); 2085 error = ENOTDIR; 2086 goto out; 2087 } 2088 2089 if (rdonly) 2090 cnp->cn_flags |= RDONLY; 2091 2092 *retdirp = dp; 2093 2094 if (pubflag) { 2095 /* 2096 * Oh joy. For WebNFS, handle those pesky '%' escapes, 2097 * and the 'native path' indicator. 2098 */ 2099 cp = PNBUF_GET(); 2100 fromcp = cnp->cn_pnbuf; 2101 tocp = cp; 2102 if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) { 2103 switch ((unsigned char)*fromcp) { 2104 case WEBNFS_NATIVE_CHAR: 2105 /* 2106 * 'Native' path for us is the same 2107 * as a path according to the NFS spec, 2108 * just skip the escape char. 2109 */ 2110 fromcp++; 2111 break; 2112 /* 2113 * More may be added in the future, range 0x80-0xff 2114 */ 2115 default: 2116 error = EIO; 2117 vrele(dp); 2118 PNBUF_PUT(cp); 2119 goto out; 2120 } 2121 } 2122 /* 2123 * Translate the '%' escapes, URL-style. 2124 */ 2125 while (*fromcp != '\0') { 2126 if (*fromcp == WEBNFS_ESC_CHAR) { 2127 if (fromcp[1] != '\0' && fromcp[2] != '\0') { 2128 fromcp++; 2129 *tocp++ = HEXSTRTOI(fromcp); 2130 fromcp += 2; 2131 continue; 2132 } else { 2133 error = ENOENT; 2134 vrele(dp); 2135 PNBUF_PUT(cp); 2136 goto out; 2137 } 2138 } else 2139 *tocp++ = *fromcp++; 2140 } 2141 *tocp = '\0'; 2142 PNBUF_PUT(cnp->cn_pnbuf); 2143 cnp->cn_pnbuf = cp; 2144 } 2145 2146 ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1; 2147 ndp->ni_segflg = UIO_SYSSPACE; 2148 ndp->ni_rootdir = rootvnode; 2149 ndp->ni_erootdir = NULL; 2150 2151 if (pubflag) { 2152 ndp->ni_loopcnt = 0; 2153 if (cnp->cn_pnbuf[0] == '/') 2154 dp = rootvnode; 2155 } else { 2156 cnp->cn_flags |= NOCROSSMOUNT; 2157 } 2158 2159 VREF(dp); 2160 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 2161 2162 for (;;) { 2163 cnp->cn_nameptr = cnp->cn_pnbuf; 2164 ndp->ni_startdir = dp; 2165 2166 /* 2167 * And call lookup() to do the real work 2168 */ 2169 error = lookup(ndp); 2170 if (error) { 2171 if (ndp->ni_dvp) { 2172 vput(ndp->ni_dvp); 2173 } 2174 PNBUF_PUT(cnp->cn_pnbuf); 2175 return (error); 2176 } 2177 2178 /* 2179 * Check for encountering a symbolic link 2180 */ 2181 if ((cnp->cn_flags & ISSYMLINK) == 0) { 2182 if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp) { 2183 if (ndp->ni_dvp == ndp->ni_vp) { 2184 vrele(ndp->ni_dvp); 2185 } else { 2186 vput(ndp->ni_dvp); 2187 } 2188 } 2189 if (cnp->cn_flags & (SAVENAME | SAVESTART)) 2190 cnp->cn_flags |= HASBUF; 2191 else 2192 PNBUF_PUT(cnp->cn_pnbuf); 2193 return (0); 2194 } else { 2195 if (!pubflag) { 2196 error = EINVAL; 2197 break; 2198 } 2199 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 2200 error = ELOOP; 2201 break; 2202 } 2203 if (ndp->ni_vp->v_mount->mnt_flag & MNT_SYMPERM) { 2204 error = VOP_ACCESS(ndp->ni_vp, VEXEC, cnp->cn_cred); 2205 if (error != 0) 2206 break; 2207 } 2208 if (ndp->ni_pathlen > 1) 2209 cp = PNBUF_GET(); 2210 else 2211 cp = cnp->cn_pnbuf; 2212 aiov.iov_base = cp; 2213 aiov.iov_len = MAXPATHLEN; 2214 auio.uio_iov = &aiov; 2215 auio.uio_iovcnt = 1; 2216 auio.uio_offset = 0; 2217 auio.uio_rw = UIO_READ; 2218 auio.uio_resid = MAXPATHLEN; 2219 UIO_SETUP_SYSSPACE(&auio); 2220 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 2221 if (error) { 2222 badlink: 2223 if (ndp->ni_pathlen > 1) 2224 PNBUF_PUT(cp); 2225 break; 2226 } 2227 linklen = MAXPATHLEN - auio.uio_resid; 2228 if (linklen == 0) { 2229 error = ENOENT; 2230 goto badlink; 2231 } 2232 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 2233 error = ENAMETOOLONG; 2234 goto badlink; 2235 } 2236 if (ndp->ni_pathlen > 1) { 2237 memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen); 2238 PNBUF_PUT(cnp->cn_pnbuf); 2239 cnp->cn_pnbuf = cp; 2240 } else 2241 cnp->cn_pnbuf[linklen] = '\0'; 2242 ndp->ni_pathlen += linklen; 2243 vput(ndp->ni_vp); 2244 dp = ndp->ni_dvp; 2245 2246 /* 2247 * Check if root directory should replace current directory. 2248 */ 2249 if (cnp->cn_pnbuf[0] == '/') { 2250 vput(dp); 2251 dp = ndp->ni_rootdir; 2252 VREF(dp); 2253 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 2254 } 2255 } 2256 } 2257 vput(ndp->ni_dvp); 2258 vput(ndp->ni_vp); 2259 ndp->ni_vp = NULL; 2260 out: 2261 PNBUF_PUT(cnp->cn_pnbuf); 2262 return (error); 2263 } 2264 #endif /* NFSSERVER */ 2265 2266 /* 2267 * A fiddled version of m_adj() that ensures null fill to a 32-bit 2268 * boundary and only trims off the back end 2269 * 2270 * 1. trim off 'len' bytes as m_adj(mp, -len). 2271 * 2. add zero-padding 'nul' bytes at the end of the mbuf chain. 2272 */ 2273 void 2274 nfs_zeropad(mp, len, nul) 2275 struct mbuf *mp; 2276 int len; 2277 int nul; 2278 { 2279 struct mbuf *m; 2280 int count; 2281 2282 /* 2283 * Trim from tail. Scan the mbuf chain, 2284 * calculating its length and finding the last mbuf. 2285 * If the adjustment only affects this mbuf, then just 2286 * adjust and return. Otherwise, rescan and truncate 2287 * after the remaining size. 2288 */ 2289 count = 0; 2290 m = mp; 2291 for (;;) { 2292 count += m->m_len; 2293 if (m->m_next == NULL) 2294 break; 2295 m = m->m_next; 2296 } 2297 2298 KDASSERT(count >= len); 2299 2300 if (m->m_len >= len) { 2301 m->m_len -= len; 2302 } else { 2303 count -= len; 2304 /* 2305 * Correct length for chain is "count". 2306 * Find the mbuf with last data, adjust its length, 2307 * and toss data from remaining mbufs on chain. 2308 */ 2309 for (m = mp; m; m = m->m_next) { 2310 if (m->m_len >= count) { 2311 m->m_len = count; 2312 break; 2313 } 2314 count -= m->m_len; 2315 } 2316 KASSERT(m && m->m_next); 2317 m_freem(m->m_next); 2318 m->m_next = NULL; 2319 } 2320 2321 KDASSERT(m->m_next == NULL); 2322 2323 /* 2324 * zero-padding. 2325 */ 2326 if (nul > 0) { 2327 char *cp; 2328 int i; 2329 2330 if (M_ROMAP(m) || M_TRAILINGSPACE(m) < nul) { 2331 struct mbuf *n; 2332 2333 KDASSERT(MLEN >= nul); 2334 n = m_get(M_WAIT, MT_DATA); 2335 MCLAIM(n, &nfs_mowner); 2336 n->m_len = nul; 2337 n->m_next = NULL; 2338 m->m_next = n; 2339 cp = mtod(n, void *); 2340 } else { 2341 cp = mtod(m, char *) + m->m_len; 2342 m->m_len += nul; 2343 } 2344 for (i = 0; i < nul; i++) 2345 *cp++ = '\0'; 2346 } 2347 return; 2348 } 2349 2350 /* 2351 * Make these functions instead of macros, so that the kernel text size 2352 * doesn't get too big... 2353 */ 2354 void 2355 nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) 2356 struct nfsrv_descript *nfsd; 2357 int before_ret; 2358 struct vattr *before_vap; 2359 int after_ret; 2360 struct vattr *after_vap; 2361 struct mbuf **mbp; 2362 char **bposp; 2363 { 2364 struct mbuf *mb = *mbp; 2365 char *bpos = *bposp; 2366 u_int32_t *tl; 2367 2368 if (before_ret) { 2369 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); 2370 *tl = nfs_false; 2371 } else { 2372 nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED); 2373 *tl++ = nfs_true; 2374 txdr_hyper(before_vap->va_size, tl); 2375 tl += 2; 2376 txdr_nfsv3time(&(before_vap->va_mtime), tl); 2377 tl += 2; 2378 txdr_nfsv3time(&(before_vap->va_ctime), tl); 2379 } 2380 *bposp = bpos; 2381 *mbp = mb; 2382 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); 2383 } 2384 2385 void 2386 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) 2387 struct nfsrv_descript *nfsd; 2388 int after_ret; 2389 struct vattr *after_vap; 2390 struct mbuf **mbp; 2391 char **bposp; 2392 { 2393 struct mbuf *mb = *mbp; 2394 char *bpos = *bposp; 2395 u_int32_t *tl; 2396 struct nfs_fattr *fp; 2397 2398 if (after_ret) { 2399 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); 2400 *tl = nfs_false; 2401 } else { 2402 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR); 2403 *tl++ = nfs_true; 2404 fp = (struct nfs_fattr *)tl; 2405 nfsm_srvfattr(nfsd, after_vap, fp); 2406 } 2407 *mbp = mb; 2408 *bposp = bpos; 2409 } 2410 2411 void 2412 nfsm_srvfattr(nfsd, vap, fp) 2413 struct nfsrv_descript *nfsd; 2414 struct vattr *vap; 2415 struct nfs_fattr *fp; 2416 { 2417 2418 fp->fa_nlink = txdr_unsigned(vap->va_nlink); 2419 fp->fa_uid = txdr_unsigned(vap->va_uid); 2420 fp->fa_gid = txdr_unsigned(vap->va_gid); 2421 if (nfsd->nd_flag & ND_NFSV3) { 2422 fp->fa_type = vtonfsv3_type(vap->va_type); 2423 fp->fa_mode = vtonfsv3_mode(vap->va_mode); 2424 txdr_hyper(vap->va_size, &fp->fa3_size); 2425 txdr_hyper(vap->va_bytes, &fp->fa3_used); 2426 fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev)); 2427 fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev)); 2428 fp->fa3_fsid.nfsuquad[0] = 0; 2429 fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); 2430 txdr_hyper(vap->va_fileid, &fp->fa3_fileid); 2431 txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); 2432 txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); 2433 txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); 2434 } else { 2435 fp->fa_type = vtonfsv2_type(vap->va_type); 2436 fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 2437 fp->fa2_size = txdr_unsigned(vap->va_size); 2438 fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); 2439 if (vap->va_type == VFIFO) 2440 fp->fa2_rdev = 0xffffffff; 2441 else 2442 fp->fa2_rdev = txdr_unsigned(vap->va_rdev); 2443 fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); 2444 fp->fa2_fsid = txdr_unsigned(vap->va_fsid); 2445 fp->fa2_fileid = txdr_unsigned(vap->va_fileid); 2446 txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); 2447 txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); 2448 txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); 2449 } 2450 } 2451 2452 #ifdef NFSSERVER 2453 /* 2454 * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) 2455 * - look up fsid in mount list (if not found ret error) 2456 * - get vp and export rights by calling VFS_FHTOVP() 2457 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon 2458 * - if not lockflag unlock it with VOP_UNLOCK() 2459 */ 2460 int 2461 nfsrv_fhtovp(nfsrvfh_t *nsfh, int lockflag, struct vnode **vpp, 2462 kauth_cred_t cred, struct nfssvc_sock *slp, struct mbuf *nam, int *rdonlyp, 2463 int kerbflag, int pubflag) 2464 { 2465 struct mount *mp; 2466 kauth_cred_t credanon; 2467 int error, exflags; 2468 struct sockaddr_in *saddr; 2469 fhandle_t *fhp; 2470 2471 fhp = NFSRVFH_FHANDLE(nsfh); 2472 *vpp = (struct vnode *)0; 2473 2474 if (nfs_ispublicfh(nsfh)) { 2475 if (!pubflag || !nfs_pub.np_valid) 2476 return (ESTALE); 2477 fhp = nfs_pub.np_handle; 2478 } 2479 2480 error = netexport_check(&fhp->fh_fsid, nam, &mp, &exflags, &credanon); 2481 if (error) { 2482 return error; 2483 } 2484 2485 error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp); 2486 if (error) 2487 return (error); 2488 2489 if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) { 2490 saddr = mtod(nam, struct sockaddr_in *); 2491 if ((saddr->sin_family == AF_INET) && 2492 ntohs(saddr->sin_port) >= IPPORT_RESERVED) { 2493 vput(*vpp); 2494 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 2495 } 2496 #ifdef INET6 2497 if ((saddr->sin_family == AF_INET6) && 2498 ntohs(saddr->sin_port) >= IPV6PORT_RESERVED) { 2499 vput(*vpp); 2500 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 2501 } 2502 #endif 2503 } 2504 /* 2505 * Check/setup credentials. 2506 */ 2507 if (exflags & MNT_EXKERB) { 2508 if (!kerbflag) { 2509 vput(*vpp); 2510 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 2511 } 2512 } else if (kerbflag) { 2513 vput(*vpp); 2514 return (NFSERR_AUTHERR | AUTH_TOOWEAK); 2515 } else if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 2516 NULL) == 0 || (exflags & MNT_EXPORTANON)) { 2517 kauth_cred_clone(credanon, cred); 2518 } 2519 if (exflags & MNT_EXRDONLY) 2520 *rdonlyp = 1; 2521 else 2522 *rdonlyp = 0; 2523 if (!lockflag) 2524 VOP_UNLOCK(*vpp, 0); 2525 return (0); 2526 } 2527 2528 /* 2529 * WebNFS: check if a filehandle is a public filehandle. For v3, this 2530 * means a length of 0, for v2 it means all zeroes. 2531 */ 2532 int 2533 nfs_ispublicfh(const nfsrvfh_t *nsfh) 2534 { 2535 const char *cp = (const void *)(NFSRVFH_DATA(nsfh)); 2536 int i; 2537 2538 if (NFSRVFH_SIZE(nsfh) == 0) { 2539 return true; 2540 } 2541 if (NFSRVFH_SIZE(nsfh) != NFSX_V2FH) { 2542 return false; 2543 } 2544 for (i = 0; i < NFSX_V2FH; i++) 2545 if (*cp++ != 0) 2546 return false; 2547 return true; 2548 } 2549 #endif /* NFSSERVER */ 2550 2551 /* 2552 * This function compares two net addresses by family and returns true 2553 * if they are the same host. 2554 * If there is any doubt, return false. 2555 * The AF_INET family is handled as a special case so that address mbufs 2556 * don't need to be saved to store "struct in_addr", which is only 4 bytes. 2557 */ 2558 int 2559 netaddr_match(family, haddr, nam) 2560 int family; 2561 union nethostaddr *haddr; 2562 struct mbuf *nam; 2563 { 2564 struct sockaddr_in *inetaddr; 2565 2566 switch (family) { 2567 case AF_INET: 2568 inetaddr = mtod(nam, struct sockaddr_in *); 2569 if (inetaddr->sin_family == AF_INET && 2570 inetaddr->sin_addr.s_addr == haddr->had_inetaddr) 2571 return (1); 2572 break; 2573 #ifdef INET6 2574 case AF_INET6: 2575 { 2576 struct sockaddr_in6 *sin6_1, *sin6_2; 2577 2578 sin6_1 = mtod(nam, struct sockaddr_in6 *); 2579 sin6_2 = mtod(haddr->had_nam, struct sockaddr_in6 *); 2580 if (sin6_1->sin6_family == AF_INET6 && 2581 IN6_ARE_ADDR_EQUAL(&sin6_1->sin6_addr, &sin6_2->sin6_addr)) 2582 return 1; 2583 } 2584 #endif 2585 #ifdef ISO 2586 case AF_ISO: 2587 { 2588 struct sockaddr_iso *isoaddr1, *isoaddr2; 2589 2590 isoaddr1 = mtod(nam, struct sockaddr_iso *); 2591 isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); 2592 if (isoaddr1->siso_family == AF_ISO && 2593 isoaddr1->siso_nlen > 0 && 2594 isoaddr1->siso_nlen == isoaddr2->siso_nlen && 2595 SAME_ISOADDR(isoaddr1, isoaddr2)) 2596 return (1); 2597 break; 2598 } 2599 #endif /* ISO */ 2600 default: 2601 break; 2602 }; 2603 return (0); 2604 } 2605 2606 /* 2607 * The write verifier has changed (probably due to a server reboot), so all 2608 * PG_NEEDCOMMIT pages will have to be written again. Since they are marked 2609 * as dirty or are being written out just now, all this takes is clearing 2610 * the PG_NEEDCOMMIT flag. Once done the new write verifier can be set for 2611 * the mount point. 2612 */ 2613 void 2614 nfs_clearcommit(mp) 2615 struct mount *mp; 2616 { 2617 struct vnode *vp; 2618 struct nfsnode *np; 2619 struct vm_page *pg; 2620 struct nfsmount *nmp = VFSTONFS(mp); 2621 2622 rw_enter(&nmp->nm_writeverflock, RW_WRITER); 2623 mutex_enter(&mntvnode_lock); 2624 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2625 KASSERT(vp->v_mount == mp); 2626 if (vp->v_type != VREG) 2627 continue; 2628 np = VTONFS(vp); 2629 np->n_pushlo = np->n_pushhi = np->n_pushedlo = 2630 np->n_pushedhi = 0; 2631 np->n_commitflags &= 2632 ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID); 2633 mutex_enter(&vp->v_uobj.vmobjlock); 2634 TAILQ_FOREACH(pg, &vp->v_uobj.memq, listq) { 2635 pg->flags &= ~PG_NEEDCOMMIT; 2636 } 2637 mutex_exit(&vp->v_uobj.vmobjlock); 2638 } 2639 mutex_exit(&mntvnode_lock); 2640 mutex_enter(&nmp->nm_lock); 2641 nmp->nm_iflag &= ~NFSMNT_STALEWRITEVERF; 2642 mutex_exit(&nmp->nm_lock); 2643 rw_exit(&nmp->nm_writeverflock); 2644 } 2645 2646 void 2647 nfs_merge_commit_ranges(vp) 2648 struct vnode *vp; 2649 { 2650 struct nfsnode *np = VTONFS(vp); 2651 2652 KASSERT(np->n_commitflags & NFS_COMMIT_PUSH_VALID); 2653 2654 if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) { 2655 np->n_pushedlo = np->n_pushlo; 2656 np->n_pushedhi = np->n_pushhi; 2657 np->n_commitflags |= NFS_COMMIT_PUSHED_VALID; 2658 } else { 2659 if (np->n_pushlo < np->n_pushedlo) 2660 np->n_pushedlo = np->n_pushlo; 2661 if (np->n_pushhi > np->n_pushedhi) 2662 np->n_pushedhi = np->n_pushhi; 2663 } 2664 2665 np->n_pushlo = np->n_pushhi = 0; 2666 np->n_commitflags &= ~NFS_COMMIT_PUSH_VALID; 2667 2668 #ifdef NFS_DEBUG_COMMIT 2669 printf("merge: committed: %u - %u\n", (unsigned)np->n_pushedlo, 2670 (unsigned)np->n_pushedhi); 2671 #endif 2672 } 2673 2674 int 2675 nfs_in_committed_range(vp, off, len) 2676 struct vnode *vp; 2677 off_t off, len; 2678 { 2679 struct nfsnode *np = VTONFS(vp); 2680 off_t lo, hi; 2681 2682 if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) 2683 return 0; 2684 lo = off; 2685 hi = lo + len; 2686 2687 return (lo >= np->n_pushedlo && hi <= np->n_pushedhi); 2688 } 2689 2690 int 2691 nfs_in_tobecommitted_range(vp, off, len) 2692 struct vnode *vp; 2693 off_t off, len; 2694 { 2695 struct nfsnode *np = VTONFS(vp); 2696 off_t lo, hi; 2697 2698 if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) 2699 return 0; 2700 lo = off; 2701 hi = lo + len; 2702 2703 return (lo >= np->n_pushlo && hi <= np->n_pushhi); 2704 } 2705 2706 void 2707 nfs_add_committed_range(vp, off, len) 2708 struct vnode *vp; 2709 off_t off, len; 2710 { 2711 struct nfsnode *np = VTONFS(vp); 2712 off_t lo, hi; 2713 2714 lo = off; 2715 hi = lo + len; 2716 2717 if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) { 2718 np->n_pushedlo = lo; 2719 np->n_pushedhi = hi; 2720 np->n_commitflags |= NFS_COMMIT_PUSHED_VALID; 2721 } else { 2722 if (hi > np->n_pushedhi) 2723 np->n_pushedhi = hi; 2724 if (lo < np->n_pushedlo) 2725 np->n_pushedlo = lo; 2726 } 2727 #ifdef NFS_DEBUG_COMMIT 2728 printf("add: committed: %u - %u\n", (unsigned)np->n_pushedlo, 2729 (unsigned)np->n_pushedhi); 2730 #endif 2731 } 2732 2733 void 2734 nfs_del_committed_range(vp, off, len) 2735 struct vnode *vp; 2736 off_t off, len; 2737 { 2738 struct nfsnode *np = VTONFS(vp); 2739 off_t lo, hi; 2740 2741 if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) 2742 return; 2743 2744 lo = off; 2745 hi = lo + len; 2746 2747 if (lo > np->n_pushedhi || hi < np->n_pushedlo) 2748 return; 2749 if (lo <= np->n_pushedlo) 2750 np->n_pushedlo = hi; 2751 else if (hi >= np->n_pushedhi) 2752 np->n_pushedhi = lo; 2753 else { 2754 /* 2755 * XXX There's only one range. If the deleted range 2756 * is in the middle, pick the largest of the 2757 * contiguous ranges that it leaves. 2758 */ 2759 if ((np->n_pushedlo - lo) > (hi - np->n_pushedhi)) 2760 np->n_pushedhi = lo; 2761 else 2762 np->n_pushedlo = hi; 2763 } 2764 #ifdef NFS_DEBUG_COMMIT 2765 printf("del: committed: %u - %u\n", (unsigned)np->n_pushedlo, 2766 (unsigned)np->n_pushedhi); 2767 #endif 2768 } 2769 2770 void 2771 nfs_add_tobecommitted_range(vp, off, len) 2772 struct vnode *vp; 2773 off_t off, len; 2774 { 2775 struct nfsnode *np = VTONFS(vp); 2776 off_t lo, hi; 2777 2778 lo = off; 2779 hi = lo + len; 2780 2781 if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) { 2782 np->n_pushlo = lo; 2783 np->n_pushhi = hi; 2784 np->n_commitflags |= NFS_COMMIT_PUSH_VALID; 2785 } else { 2786 if (lo < np->n_pushlo) 2787 np->n_pushlo = lo; 2788 if (hi > np->n_pushhi) 2789 np->n_pushhi = hi; 2790 } 2791 #ifdef NFS_DEBUG_COMMIT 2792 printf("add: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo, 2793 (unsigned)np->n_pushhi); 2794 #endif 2795 } 2796 2797 void 2798 nfs_del_tobecommitted_range(vp, off, len) 2799 struct vnode *vp; 2800 off_t off, len; 2801 { 2802 struct nfsnode *np = VTONFS(vp); 2803 off_t lo, hi; 2804 2805 if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) 2806 return; 2807 2808 lo = off; 2809 hi = lo + len; 2810 2811 if (lo > np->n_pushhi || hi < np->n_pushlo) 2812 return; 2813 2814 if (lo <= np->n_pushlo) 2815 np->n_pushlo = hi; 2816 else if (hi >= np->n_pushhi) 2817 np->n_pushhi = lo; 2818 else { 2819 /* 2820 * XXX There's only one range. If the deleted range 2821 * is in the middle, pick the largest of the 2822 * contiguous ranges that it leaves. 2823 */ 2824 if ((np->n_pushlo - lo) > (hi - np->n_pushhi)) 2825 np->n_pushhi = lo; 2826 else 2827 np->n_pushlo = hi; 2828 } 2829 #ifdef NFS_DEBUG_COMMIT 2830 printf("del: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo, 2831 (unsigned)np->n_pushhi); 2832 #endif 2833 } 2834 2835 /* 2836 * Map errnos to NFS error numbers. For Version 3 also filter out error 2837 * numbers not specified for the associated procedure. 2838 */ 2839 int 2840 nfsrv_errmap(nd, err) 2841 struct nfsrv_descript *nd; 2842 int err; 2843 { 2844 const short *defaulterrp, *errp; 2845 2846 if (nd->nd_flag & ND_NFSV3) { 2847 if (nd->nd_procnum <= NFSPROC_COMMIT) { 2848 errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; 2849 while (*++errp) { 2850 if (*errp == err) 2851 return (err); 2852 else if (*errp > err) 2853 break; 2854 } 2855 return ((int)*defaulterrp); 2856 } else 2857 return (err & 0xffff); 2858 } 2859 if (err <= ELAST) 2860 return ((int)nfsrv_v2errmap[err - 1]); 2861 return (NFSERR_IO); 2862 } 2863 2864 u_int32_t 2865 nfs_getxid() 2866 { 2867 static u_int32_t base; 2868 static u_int32_t nfs_xid = 0; 2869 static struct simplelock nfs_xidlock = SIMPLELOCK_INITIALIZER; 2870 u_int32_t newxid; 2871 2872 simple_lock(&nfs_xidlock); 2873 /* 2874 * derive initial xid from system time 2875 * XXX time is invalid if root not yet mounted 2876 */ 2877 if (__predict_false(!base && (rootvp))) { 2878 struct timeval tv; 2879 2880 microtime(&tv); 2881 base = tv.tv_sec << 12; 2882 nfs_xid = base; 2883 } 2884 2885 /* 2886 * Skip zero xid if it should ever happen. 2887 */ 2888 if (__predict_false(++nfs_xid == 0)) 2889 nfs_xid++; 2890 newxid = nfs_xid; 2891 simple_unlock(&nfs_xidlock); 2892 2893 return txdr_unsigned(newxid); 2894 } 2895 2896 /* 2897 * assign a new xid for existing request. 2898 * used for NFSERR_JUKEBOX handling. 2899 */ 2900 void 2901 nfs_renewxid(struct nfsreq *req) 2902 { 2903 u_int32_t xid; 2904 int off; 2905 2906 xid = nfs_getxid(); 2907 if (req->r_nmp->nm_sotype == SOCK_STREAM) 2908 off = sizeof(u_int32_t); /* RPC record mark */ 2909 else 2910 off = 0; 2911 2912 m_copyback(req->r_mreq, off, sizeof(xid), (void *)&xid); 2913 req->r_xid = xid; 2914 } 2915 2916 #if defined(NFSSERVER) 2917 int 2918 nfsrv_composefh(struct vnode *vp, nfsrvfh_t *nsfh, bool v3) 2919 { 2920 int error; 2921 size_t fhsize; 2922 2923 fhsize = NFSD_MAXFHSIZE; 2924 error = vfs_composefh(vp, (void *)NFSRVFH_DATA(nsfh), &fhsize); 2925 if (NFSX_FHTOOBIG_P(fhsize, v3)) { 2926 error = EOPNOTSUPP; 2927 } 2928 if (error != 0) { 2929 return error; 2930 } 2931 if (!v3 && fhsize < NFSX_V2FH) { 2932 memset((char *)NFSRVFH_DATA(nsfh) + fhsize, 0, 2933 NFSX_V2FH - fhsize); 2934 fhsize = NFSX_V2FH; 2935 } 2936 if ((fhsize % NFSX_UNSIGNED) != 0) { 2937 return EOPNOTSUPP; 2938 } 2939 nsfh->nsfh_size = fhsize; 2940 return 0; 2941 } 2942 2943 int 2944 nfsrv_comparefh(const nfsrvfh_t *fh1, const nfsrvfh_t *fh2) 2945 { 2946 2947 if (NFSRVFH_SIZE(fh1) != NFSRVFH_SIZE(fh2)) { 2948 return NFSRVFH_SIZE(fh2) - NFSRVFH_SIZE(fh1); 2949 } 2950 return memcmp(NFSRVFH_DATA(fh1), NFSRVFH_DATA(fh2), NFSRVFH_SIZE(fh1)); 2951 } 2952 2953 void 2954 nfsrv_copyfh(nfsrvfh_t *fh1, const nfsrvfh_t *fh2) 2955 { 2956 size_t size; 2957 2958 fh1->nsfh_size = size = NFSRVFH_SIZE(fh2); 2959 memcpy(NFSRVFH_DATA(fh1), NFSRVFH_DATA(fh2), size); 2960 } 2961 #endif /* defined(NFSSERVER) */ 2962 2963 #if defined(NFS) 2964 /* 2965 * Set the attribute timeout based on how recently the file has been modified. 2966 */ 2967 2968 time_t 2969 nfs_attrtimeo(struct nfsmount *nmp, struct nfsnode *np) 2970 { 2971 time_t timeo; 2972 2973 if ((nmp->nm_flag & NFSMNT_NOAC) != 0) 2974 return 0; 2975 2976 if (((np)->n_flag & NMODIFIED) != 0) 2977 return NFS_MINATTRTIMO; 2978 2979 timeo = (time_second - np->n_mtime.tv_sec) / 10; 2980 timeo = max(timeo, NFS_MINATTRTIMO); 2981 timeo = min(timeo, NFS_MAXATTRTIMO); 2982 return timeo; 2983 } 2984 #endif /* defined(NFS) */ 2985