1 /* $NetBSD: vfs_lookup.c,v 1.229 2021/06/29 22:39:21 dholland Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_lookup.c 8.10 (Berkeley) 5/27/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.229 2021/06/29 22:39:21 dholland Exp $"); 41 42 #ifdef _KERNEL_OPT 43 #include "opt_magiclinks.h" 44 #endif 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/syslimits.h> 50 #include <sys/time.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/vnode_impl.h> 54 #include <sys/mount.h> 55 #include <sys/errno.h> 56 #include <sys/filedesc.h> 57 #include <sys/hash.h> 58 #include <sys/proc.h> 59 #include <sys/syslog.h> 60 #include <sys/kauth.h> 61 #include <sys/ktrace.h> 62 #include <sys/dirent.h> 63 64 #ifndef MAGICLINKS 65 #define MAGICLINKS 0 66 #endif 67 68 int vfs_magiclinks = MAGICLINKS; 69 70 __CTASSERT(MAXNAMLEN == NAME_MAX); 71 72 /* 73 * Substitute replacement text for 'magic' strings in symlinks. 74 * Returns 0 if successful, and returns non-zero if an error 75 * occurs. (Currently, the only possible error is running out 76 * of temporary pathname space.) 77 * 78 * Looks for "@<string>" and "@<string>/", where <string> is a 79 * recognized 'magic' string. Replaces the "@<string>" with the 80 * appropriate replacement text. (Note that in some cases the 81 * replacement text may have zero length.) 82 * 83 * This would have been table driven, but the variance in 84 * replacement strings (and replacement string lengths) made 85 * that impractical. 86 */ 87 #define VNL(x) \ 88 (sizeof(x) - 1) 89 90 #define VO '{' 91 #define VC '}' 92 93 #define MATCH(str) \ 94 ((termchar == '/' && i + VNL(str) == *len) || \ 95 (i + VNL(str) < *len && \ 96 cp[i + VNL(str)] == termchar)) && \ 97 !strncmp((str), &cp[i], VNL(str)) 98 99 #define SUBSTITUTE(m, s, sl) \ 100 if ((newlen + (sl)) >= MAXPATHLEN) \ 101 return 1; \ 102 i += VNL(m); \ 103 if (termchar != '/') \ 104 i++; \ 105 (void)memcpy(&tmp[newlen], (s), (sl)); \ 106 newlen += (sl); \ 107 change = 1; \ 108 termchar = '/'; 109 110 static int 111 symlink_magic(struct proc *p, char *cp, size_t *len) 112 { 113 char *tmp; 114 size_t change, i, newlen, slen; 115 char termchar = '/'; 116 char idtmp[11]; /* enough for 32 bit *unsigned* integer */ 117 118 119 tmp = PNBUF_GET(); 120 for (change = i = newlen = 0; i < *len; ) { 121 if (cp[i] != '@') { 122 tmp[newlen++] = cp[i++]; 123 continue; 124 } 125 126 i++; 127 128 /* Check for @{var} syntax. */ 129 if (cp[i] == VO) { 130 termchar = VC; 131 i++; 132 } 133 134 /* 135 * The following checks should be ordered according 136 * to frequency of use. 137 */ 138 if (MATCH("machine_arch")) { 139 slen = VNL(MACHINE_ARCH); 140 SUBSTITUTE("machine_arch", MACHINE_ARCH, slen); 141 } else if (MATCH("machine")) { 142 slen = VNL(MACHINE); 143 SUBSTITUTE("machine", MACHINE, slen); 144 } else if (MATCH("hostname")) { 145 SUBSTITUTE("hostname", hostname, hostnamelen); 146 } else if (MATCH("osrelease")) { 147 slen = strlen(osrelease); 148 SUBSTITUTE("osrelease", osrelease, slen); 149 } else if (MATCH("emul")) { 150 slen = strlen(p->p_emul->e_name); 151 SUBSTITUTE("emul", p->p_emul->e_name, slen); 152 } else if (MATCH("kernel_ident")) { 153 slen = strlen(kernel_ident); 154 SUBSTITUTE("kernel_ident", kernel_ident, slen); 155 } else if (MATCH("domainname")) { 156 SUBSTITUTE("domainname", domainname, domainnamelen); 157 } else if (MATCH("ostype")) { 158 slen = strlen(ostype); 159 SUBSTITUTE("ostype", ostype, slen); 160 } else if (MATCH("uid")) { 161 slen = snprintf(idtmp, sizeof(idtmp), "%u", 162 kauth_cred_geteuid(kauth_cred_get())); 163 SUBSTITUTE("uid", idtmp, slen); 164 } else if (MATCH("ruid")) { 165 slen = snprintf(idtmp, sizeof(idtmp), "%u", 166 kauth_cred_getuid(kauth_cred_get())); 167 SUBSTITUTE("ruid", idtmp, slen); 168 } else if (MATCH("gid")) { 169 slen = snprintf(idtmp, sizeof(idtmp), "%u", 170 kauth_cred_getegid(kauth_cred_get())); 171 SUBSTITUTE("gid", idtmp, slen); 172 } else if (MATCH("rgid")) { 173 slen = snprintf(idtmp, sizeof(idtmp), "%u", 174 kauth_cred_getgid(kauth_cred_get())); 175 SUBSTITUTE("rgid", idtmp, slen); 176 } else { 177 tmp[newlen++] = '@'; 178 if (termchar == VC) 179 tmp[newlen++] = VO; 180 } 181 } 182 183 if (change) { 184 (void)memcpy(cp, tmp, newlen); 185 *len = newlen; 186 } 187 PNBUF_PUT(tmp); 188 189 return 0; 190 } 191 192 #undef VNL 193 #undef VO 194 #undef VC 195 #undef MATCH 196 #undef SUBSTITUTE 197 198 //////////////////////////////////////////////////////////// 199 200 /* 201 * Determine the namei hash (for the namecache) for name. 202 * If *ep != NULL, hash from name to ep-1. 203 * If *ep == NULL, hash from name until the first NUL or '/', and 204 * return the location of this termination character in *ep. 205 * 206 * This function returns an equivalent hash to the MI hash32_strn(). 207 * The latter isn't used because in the *ep == NULL case, determining 208 * the length of the string to the first NUL or `/' and then calling 209 * hash32_strn() involves unnecessary double-handling of the data. 210 */ 211 uint32_t 212 namei_hash(const char *name, const char **ep) 213 { 214 uint32_t hash; 215 216 hash = HASH32_STR_INIT; 217 if (*ep != NULL) { 218 for (; name < *ep; name++) 219 hash = hash * 33 + *(const uint8_t *)name; 220 } else { 221 for (; *name != '\0' && *name != '/'; name++) 222 hash = hash * 33 + *(const uint8_t *)name; 223 *ep = name; 224 } 225 return (hash + (hash >> 5)); 226 } 227 228 //////////////////////////////////////////////////////////// 229 230 /* 231 * Sealed abstraction for pathnames. 232 * 233 * System-call-layer level code that is going to call namei should 234 * first create a pathbuf and adjust all the bells and whistles on it 235 * as needed by context. 236 */ 237 238 struct pathbuf { 239 char *pb_path; 240 char *pb_pathcopy; 241 unsigned pb_pathcopyuses; 242 }; 243 244 static struct pathbuf * 245 pathbuf_create_raw(void) 246 { 247 struct pathbuf *pb; 248 249 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 250 pb->pb_path = PNBUF_GET(); 251 if (pb->pb_path == NULL) { 252 kmem_free(pb, sizeof(*pb)); 253 return NULL; 254 } 255 pb->pb_pathcopy = NULL; 256 pb->pb_pathcopyuses = 0; 257 return pb; 258 } 259 260 void 261 pathbuf_destroy(struct pathbuf *pb) 262 { 263 KASSERT(pb->pb_pathcopyuses == 0); 264 KASSERT(pb->pb_pathcopy == NULL); 265 PNBUF_PUT(pb->pb_path); 266 kmem_free(pb, sizeof(*pb)); 267 } 268 269 struct pathbuf * 270 pathbuf_assimilate(char *pnbuf) 271 { 272 struct pathbuf *pb; 273 274 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 275 pb->pb_path = pnbuf; 276 pb->pb_pathcopy = NULL; 277 pb->pb_pathcopyuses = 0; 278 return pb; 279 } 280 281 struct pathbuf * 282 pathbuf_create(const char *path) 283 { 284 struct pathbuf *pb; 285 int error; 286 287 pb = pathbuf_create_raw(); 288 if (pb == NULL) { 289 return NULL; 290 } 291 error = copystr(path, pb->pb_path, PATH_MAX, NULL); 292 if (error != 0) { 293 KASSERT(!"kernel path too long in pathbuf_create"); 294 /* make sure it's null-terminated, just in case */ 295 pb->pb_path[PATH_MAX-1] = '\0'; 296 } 297 return pb; 298 } 299 300 int 301 pathbuf_copyin(const char *userpath, struct pathbuf **ret) 302 { 303 struct pathbuf *pb; 304 int error; 305 306 pb = pathbuf_create_raw(); 307 if (pb == NULL) { 308 return ENOMEM; 309 } 310 error = copyinstr(userpath, pb->pb_path, PATH_MAX, NULL); 311 if (error) { 312 pathbuf_destroy(pb); 313 return error; 314 } 315 *ret = pb; 316 return 0; 317 } 318 319 /* 320 * XXX should not exist: 321 * 1. whether a pointer is kernel or user should be statically checkable. 322 * 2. copyin should be handled by the upper part of the syscall layer, 323 * not in here. 324 */ 325 int 326 pathbuf_maybe_copyin(const char *path, enum uio_seg seg, struct pathbuf **ret) 327 { 328 if (seg == UIO_USERSPACE) { 329 return pathbuf_copyin(path, ret); 330 } else { 331 *ret = pathbuf_create(path); 332 if (*ret == NULL) { 333 return ENOMEM; 334 } 335 return 0; 336 } 337 } 338 339 /* 340 * Get a copy of the path buffer as it currently exists. If this is 341 * called after namei starts the results may be arbitrary. 342 */ 343 void 344 pathbuf_copystring(const struct pathbuf *pb, char *buf, size_t maxlen) 345 { 346 strlcpy(buf, pb->pb_path, maxlen); 347 } 348 349 /* 350 * These two functions allow access to a saved copy of the original 351 * path string. The first copy should be gotten before namei is 352 * called. Each copy that is gotten should be put back. 353 */ 354 355 const char * 356 pathbuf_stringcopy_get(struct pathbuf *pb) 357 { 358 if (pb->pb_pathcopyuses == 0) { 359 pb->pb_pathcopy = PNBUF_GET(); 360 strcpy(pb->pb_pathcopy, pb->pb_path); 361 } 362 pb->pb_pathcopyuses++; 363 return pb->pb_pathcopy; 364 } 365 366 void 367 pathbuf_stringcopy_put(struct pathbuf *pb, const char *str) 368 { 369 KASSERT(str == pb->pb_pathcopy); 370 KASSERT(pb->pb_pathcopyuses > 0); 371 pb->pb_pathcopyuses--; 372 if (pb->pb_pathcopyuses == 0) { 373 PNBUF_PUT(pb->pb_pathcopy); 374 pb->pb_pathcopy = NULL; 375 } 376 } 377 378 379 //////////////////////////////////////////////////////////// 380 381 /* 382 * namei: convert a pathname into a pointer to a (maybe-locked) vnode, 383 * and maybe also its parent directory vnode, and assorted other guff. 384 * See namei(9) for the interface documentation. 385 * 386 * 387 * The FOLLOW flag is set when symbolic links are to be followed 388 * when they occur at the end of the name translation process. 389 * Symbolic links are always followed for all other pathname 390 * components other than the last. 391 * 392 * The segflg defines whether the name is to be copied from user 393 * space or kernel space. 394 * 395 * Overall outline of namei: 396 * 397 * copy in name 398 * get starting directory 399 * while (!done && !error) { 400 * call lookup to search path. 401 * if symbolic link, massage name in buffer and continue 402 * } 403 */ 404 405 /* 406 * Search a pathname. 407 * This is a very central and rather complicated routine. 408 * 409 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 410 * The starting directory is passed in. The pathname is descended 411 * until done, or a symbolic link is encountered. The variable ni_more 412 * is clear if the path is completed; it is set to one if a symbolic 413 * link needing interpretation is encountered. 414 * 415 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 416 * whether the name is to be looked up, created, renamed, or deleted. 417 * When CREATE, RENAME, or DELETE is specified, information usable in 418 * creating, renaming, or deleting a directory entry may be calculated. 419 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 420 * locked. Otherwise the parent directory is not returned. If the target 421 * of the pathname exists and LOCKLEAF is or'ed into the flag the target 422 * is returned locked, otherwise it is returned unlocked. When creating 423 * or renaming and LOCKPARENT is specified, the target may not be ".". 424 * When deleting and LOCKPARENT is specified, the target may be ".". 425 * 426 * Overall outline of lookup: 427 * 428 * dirloop: 429 * identify next component of name at ndp->ni_ptr 430 * handle degenerate case where name is null string 431 * if .. and crossing mount points and on mounted filesys, find parent 432 * call VOP_LOOKUP routine for next component name 433 * directory vnode returned in ni_dvp, locked. 434 * component vnode returned in ni_vp (if it exists), locked. 435 * if result vnode is mounted on and crossing mount points, 436 * find mounted on vnode 437 * if more components of name, do next level at dirloop 438 * return the answer in ni_vp, locked if LOCKLEAF set 439 * if LOCKPARENT set, return locked parent in ni_dvp 440 */ 441 442 443 /* 444 * Internal state for a namei operation. 445 * 446 * cnp is always equal to &ndp->ni_cnp. 447 */ 448 struct namei_state { 449 struct nameidata *ndp; 450 struct componentname *cnp; 451 452 int docache; /* == 0 do not cache last component */ 453 int rdonly; /* lookup read-only flag bit */ 454 int slashes; 455 456 unsigned attempt_retry:1; /* true if error allows emul retry */ 457 unsigned root_referenced:1; /* true if ndp->ni_rootdir and 458 ndp->ni_erootdir were referenced */ 459 }; 460 461 462 /* 463 * Initialize the namei working state. 464 */ 465 static void 466 namei_init(struct namei_state *state, struct nameidata *ndp) 467 { 468 469 state->ndp = ndp; 470 state->cnp = &ndp->ni_cnd; 471 472 state->docache = 0; 473 state->rdonly = 0; 474 state->slashes = 0; 475 476 state->root_referenced = 0; 477 478 KASSERTMSG((state->cnp->cn_cred != NULL), "namei: bad cred/proc"); 479 KASSERTMSG(((state->cnp->cn_nameiop & (~OPMASK)) == 0), 480 "namei: nameiop contaminated with flags: %08"PRIx32, 481 state->cnp->cn_nameiop); 482 KASSERTMSG(((state->cnp->cn_flags & OPMASK) == 0), 483 "name: flags contaminated with nameiops: %08"PRIx32, 484 state->cnp->cn_flags); 485 486 /* 487 * The buffer for name translation shall be the one inside the 488 * pathbuf. 489 */ 490 state->ndp->ni_pnbuf = state->ndp->ni_pathbuf->pb_path; 491 } 492 493 /* 494 * Clean up the working namei state, leaving things ready for return 495 * from namei. 496 */ 497 static void 498 namei_cleanup(struct namei_state *state) 499 { 500 KASSERT(state->cnp == &state->ndp->ni_cnd); 501 502 if (state->root_referenced) { 503 if (state->ndp->ni_rootdir != NULL) 504 vrele(state->ndp->ni_rootdir); 505 if (state->ndp->ni_erootdir != NULL) 506 vrele(state->ndp->ni_erootdir); 507 } 508 } 509 510 ////////////////////////////// 511 512 /* 513 * Get the directory context. 514 * Initializes the rootdir and erootdir state and returns a reference 515 * to the starting dir. 516 */ 517 static struct vnode * 518 namei_getstartdir(struct namei_state *state) 519 { 520 struct nameidata *ndp = state->ndp; 521 struct componentname *cnp = state->cnp; 522 struct cwdinfo *cwdi; /* pointer to cwd state */ 523 struct lwp *self = curlwp; /* thread doing namei() */ 524 struct vnode *rootdir, *erootdir, *curdir, *startdir; 525 526 if (state->root_referenced) { 527 if (state->ndp->ni_rootdir != NULL) 528 vrele(state->ndp->ni_rootdir); 529 if (state->ndp->ni_erootdir != NULL) 530 vrele(state->ndp->ni_erootdir); 531 state->root_referenced = 0; 532 } 533 534 cwdi = self->l_proc->p_cwdi; 535 rw_enter(&cwdi->cwdi_lock, RW_READER); 536 537 /* root dir */ 538 if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) { 539 rootdir = rootvnode; 540 } else { 541 rootdir = cwdi->cwdi_rdir; 542 } 543 544 /* emulation root dir, if any */ 545 if ((cnp->cn_flags & TRYEMULROOT) == 0) { 546 /* if we don't want it, don't fetch it */ 547 erootdir = NULL; 548 } else if (cnp->cn_flags & EMULROOTSET) { 549 /* explicitly set emulroot; "/../" doesn't override this */ 550 erootdir = ndp->ni_erootdir; 551 } else if (!strncmp(ndp->ni_pnbuf, "/../", 4)) { 552 /* explicit reference to real rootdir */ 553 erootdir = NULL; 554 } else { 555 /* may be null */ 556 erootdir = cwdi->cwdi_edir; 557 } 558 559 /* current dir */ 560 curdir = cwdi->cwdi_cdir; 561 562 if (ndp->ni_pnbuf[0] != '/') { 563 if (ndp->ni_atdir != NULL) { 564 startdir = ndp->ni_atdir; 565 } else { 566 startdir = curdir; 567 } 568 erootdir = NULL; 569 } else if (cnp->cn_flags & TRYEMULROOT && erootdir != NULL) { 570 startdir = erootdir; 571 } else { 572 startdir = rootdir; 573 erootdir = NULL; 574 } 575 576 state->ndp->ni_rootdir = rootdir; 577 state->ndp->ni_erootdir = erootdir; 578 579 /* 580 * Get a reference to the start dir so we can safely unlock cwdi. 581 * 582 * Must hold references to rootdir and erootdir while we're running. 583 * A multithreaded process may chroot during namei. 584 */ 585 if (startdir != NULL) 586 vref(startdir); 587 if (state->ndp->ni_rootdir != NULL) 588 vref(state->ndp->ni_rootdir); 589 if (state->ndp->ni_erootdir != NULL) 590 vref(state->ndp->ni_erootdir); 591 state->root_referenced = 1; 592 593 rw_exit(&cwdi->cwdi_lock); 594 return startdir; 595 } 596 597 /* 598 * Get the directory context for the nfsd case, in parallel to 599 * getstartdir. Initializes the rootdir and erootdir state and 600 * returns a reference to the passed-in starting dir. 601 */ 602 static struct vnode * 603 namei_getstartdir_for_nfsd(struct namei_state *state) 604 { 605 KASSERT(state->ndp->ni_atdir != NULL); 606 607 /* always use the real root, and never set an emulation root */ 608 if (rootvnode == NULL) { 609 return NULL; 610 } 611 state->ndp->ni_rootdir = rootvnode; 612 state->ndp->ni_erootdir = NULL; 613 614 vref(state->ndp->ni_atdir); 615 KASSERT(! state->root_referenced); 616 vref(state->ndp->ni_rootdir); 617 state->root_referenced = 1; 618 return state->ndp->ni_atdir; 619 } 620 621 622 /* 623 * Ktrace the namei operation. 624 */ 625 static void 626 namei_ktrace(struct namei_state *state) 627 { 628 struct nameidata *ndp = state->ndp; 629 struct componentname *cnp = state->cnp; 630 struct lwp *self = curlwp; /* thread doing namei() */ 631 const char *emul_path; 632 633 if (ktrpoint(KTR_NAMEI)) { 634 if (ndp->ni_erootdir != NULL) { 635 /* 636 * To make any sense, the trace entry need to have the 637 * text of the emulation path prepended. 638 * Usually we can get this from the current process, 639 * but when called from emul_find_interp() it is only 640 * in the exec_package - so we get it passed in ni_next 641 * (this is a hack). 642 */ 643 if (cnp->cn_flags & EMULROOTSET) 644 emul_path = ndp->ni_next; 645 else 646 emul_path = self->l_proc->p_emul->e_path; 647 ktrnamei2(emul_path, strlen(emul_path), 648 ndp->ni_pnbuf, ndp->ni_pathlen); 649 } else 650 ktrnamei(ndp->ni_pnbuf, ndp->ni_pathlen); 651 } 652 } 653 654 /* 655 * Start up namei. Find the root dir and cwd, establish the starting 656 * directory for lookup, and lock it. Also calls ktrace when 657 * appropriate. 658 */ 659 static int 660 namei_start(struct namei_state *state, int isnfsd, 661 struct vnode **startdir_ret) 662 { 663 struct nameidata *ndp = state->ndp; 664 struct vnode *startdir; 665 666 /* length includes null terminator (was originally from copyinstr) */ 667 ndp->ni_pathlen = strlen(ndp->ni_pnbuf) + 1; 668 669 /* 670 * POSIX.1 requirement: "" is not a valid file name. 671 */ 672 if (ndp->ni_pathlen == 1) { 673 ndp->ni_erootdir = NULL; 674 return ENOENT; 675 } 676 677 ndp->ni_loopcnt = 0; 678 679 /* Get starting directory, set up root, and ktrace. */ 680 if (isnfsd) { 681 startdir = namei_getstartdir_for_nfsd(state); 682 /* no ktrace */ 683 } else { 684 startdir = namei_getstartdir(state); 685 namei_ktrace(state); 686 } 687 688 if (startdir == NULL) { 689 return ENOENT; 690 } 691 692 /* NDAT may feed us with a non directory namei_getstartdir */ 693 if (startdir->v_type != VDIR) { 694 vrele(startdir); 695 return ENOTDIR; 696 } 697 698 *startdir_ret = startdir; 699 return 0; 700 } 701 702 /* 703 * Check for being at a symlink that we're going to follow. 704 */ 705 static inline int 706 namei_atsymlink(struct namei_state *state, struct vnode *foundobj) 707 { 708 return (foundobj->v_type == VLNK) && 709 (state->cnp->cn_flags & (FOLLOW|REQUIREDIR)); 710 } 711 712 /* 713 * Follow a symlink. 714 * 715 * Updates searchdir. inhibitmagic causes magic symlinks to not be 716 * interpreted; this is used by nfsd. 717 * 718 * Unlocks foundobj on success (ugh) 719 */ 720 static inline int 721 namei_follow(struct namei_state *state, int inhibitmagic, 722 struct vnode *searchdir, struct vnode *foundobj, 723 struct vnode **newsearchdir_ret) 724 { 725 struct nameidata *ndp = state->ndp; 726 struct componentname *cnp = state->cnp; 727 728 struct lwp *self = curlwp; /* thread doing namei() */ 729 struct iovec aiov; /* uio for reading symbolic links */ 730 struct uio auio; 731 char *cp; /* pointer into pathname argument */ 732 size_t linklen; 733 int error; 734 735 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 736 return ELOOP; 737 } 738 739 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 740 if (foundobj->v_mount->mnt_flag & MNT_SYMPERM) { 741 error = VOP_ACCESS(foundobj, VEXEC, cnp->cn_cred); 742 if (error != 0) { 743 VOP_UNLOCK(foundobj); 744 return error; 745 } 746 } 747 748 /* FUTURE: fix this to not use a second buffer */ 749 cp = PNBUF_GET(); 750 aiov.iov_base = cp; 751 aiov.iov_len = MAXPATHLEN; 752 auio.uio_iov = &aiov; 753 auio.uio_iovcnt = 1; 754 auio.uio_offset = 0; 755 auio.uio_rw = UIO_READ; 756 auio.uio_resid = MAXPATHLEN; 757 UIO_SETUP_SYSSPACE(&auio); 758 error = VOP_READLINK(foundobj, &auio, cnp->cn_cred); 759 VOP_UNLOCK(foundobj); 760 if (error) { 761 PNBUF_PUT(cp); 762 return error; 763 } 764 linklen = MAXPATHLEN - auio.uio_resid; 765 if (linklen == 0) { 766 PNBUF_PUT(cp); 767 return ENOENT; 768 } 769 770 /* 771 * Do symlink substitution, if appropriate, and 772 * check length for potential overflow. 773 * 774 * Inhibit symlink substitution for nfsd. 775 * XXX: This is how it was before; is that a bug or a feature? 776 */ 777 if ((!inhibitmagic && vfs_magiclinks && 778 symlink_magic(self->l_proc, cp, &linklen)) || 779 (linklen + ndp->ni_pathlen >= MAXPATHLEN)) { 780 PNBUF_PUT(cp); 781 return ENAMETOOLONG; 782 } 783 if (ndp->ni_pathlen > 1) { 784 /* includes a null-terminator */ 785 memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen); 786 } else { 787 cp[linklen] = '\0'; 788 } 789 ndp->ni_pathlen += linklen; 790 memcpy(ndp->ni_pnbuf, cp, ndp->ni_pathlen); 791 PNBUF_PUT(cp); 792 793 /* we're now starting from the beginning of the buffer again */ 794 cnp->cn_nameptr = ndp->ni_pnbuf; 795 796 /* 797 * Check if root directory should replace current directory. 798 */ 799 if (ndp->ni_pnbuf[0] == '/') { 800 vrele(searchdir); 801 /* Keep absolute symbolic links inside emulation root */ 802 searchdir = ndp->ni_erootdir; 803 if (searchdir == NULL || 804 (ndp->ni_pnbuf[1] == '.' 805 && ndp->ni_pnbuf[2] == '.' 806 && ndp->ni_pnbuf[3] == '/')) { 807 ndp->ni_erootdir = NULL; 808 searchdir = ndp->ni_rootdir; 809 } 810 vref(searchdir); 811 while (cnp->cn_nameptr[0] == '/') { 812 cnp->cn_nameptr++; 813 ndp->ni_pathlen--; 814 } 815 } 816 817 *newsearchdir_ret = searchdir; 818 return 0; 819 } 820 821 ////////////////////////////// 822 823 /* 824 * Inspect the leading path component and update the state accordingly. 825 */ 826 static int 827 lookup_parsepath(struct namei_state *state, struct vnode *searchdir) 828 { 829 const char *cp; /* pointer into pathname argument */ 830 int error; 831 832 struct componentname *cnp = state->cnp; 833 struct nameidata *ndp = state->ndp; 834 835 KASSERT(cnp == &ndp->ni_cnd); 836 837 /* 838 * Search a new directory. 839 * 840 * The last component of the filename is left accessible via 841 * cnp->cn_nameptr for callers that need the name. Callers needing 842 * the name set the SAVENAME flag. When done, they assume 843 * responsibility for freeing the pathname buffer. 844 * 845 * At this point, our only vnode state is that the search dir 846 * is held. 847 */ 848 error = VOP_PARSEPATH(searchdir, cnp->cn_nameptr, &cnp->cn_namelen); 849 if (error) { 850 return error; 851 } 852 cp = cnp->cn_nameptr + cnp->cn_namelen; 853 if (cnp->cn_namelen > KERNEL_NAME_MAX) { 854 return ENAMETOOLONG; 855 } 856 #ifdef NAMEI_DIAGNOSTIC 857 { char c = *cp; 858 *(char *)cp = '\0'; 859 printf("{%s}: ", cnp->cn_nameptr); 860 *(char *)cp = c; } 861 #endif /* NAMEI_DIAGNOSTIC */ 862 ndp->ni_pathlen -= cnp->cn_namelen; 863 ndp->ni_next = cp; 864 /* 865 * If this component is followed by a slash, then move the pointer to 866 * the next component forward, and remember that this component must be 867 * a directory. 868 */ 869 if (*cp == '/') { 870 do { 871 cp++; 872 } while (*cp == '/'); 873 state->slashes = cp - ndp->ni_next; 874 ndp->ni_pathlen -= state->slashes; 875 ndp->ni_next = cp; 876 cnp->cn_flags |= REQUIREDIR; 877 } else { 878 state->slashes = 0; 879 cnp->cn_flags &= ~REQUIREDIR; 880 } 881 /* 882 * We do special processing on the last component, whether or not it's 883 * a directory. Cache all intervening lookups, but not the final one. 884 */ 885 if (*cp == '\0') { 886 if (state->docache) 887 cnp->cn_flags |= MAKEENTRY; 888 else 889 cnp->cn_flags &= ~MAKEENTRY; 890 cnp->cn_flags |= ISLASTCN; 891 } else { 892 cnp->cn_flags |= MAKEENTRY; 893 cnp->cn_flags &= ~ISLASTCN; 894 } 895 if (cnp->cn_namelen == 2 && 896 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 897 cnp->cn_flags |= ISDOTDOT; 898 else 899 cnp->cn_flags &= ~ISDOTDOT; 900 901 return 0; 902 } 903 904 /* 905 * Take care of crossing a mounted-on vnode. On error, foundobj_ret will be 906 * vrele'd, but searchdir is left alone. 907 */ 908 static int 909 lookup_crossmount(struct namei_state *state, 910 struct vnode **searchdir_ret, 911 struct vnode **foundobj_ret, 912 bool *searchdir_locked) 913 { 914 struct componentname *cnp = state->cnp; 915 struct vnode *foundobj, *vp; 916 struct vnode *searchdir; 917 struct mount *mp; 918 int error, lktype; 919 920 searchdir = *searchdir_ret; 921 foundobj = *foundobj_ret; 922 error = 0; 923 924 KASSERT((cnp->cn_flags & NOCROSSMOUNT) == 0); 925 926 /* First, unlock searchdir (oof). */ 927 if (*searchdir_locked) { 928 KASSERT(searchdir != NULL); 929 lktype = VOP_ISLOCKED(searchdir); 930 VOP_UNLOCK(searchdir); 931 *searchdir_locked = false; 932 } else { 933 lktype = LK_NONE; 934 } 935 936 /* 937 * Do an unlocked check to see if the vnode has been mounted on; if 938 * so find the root of the mounted file system. 939 */ 940 while (foundobj->v_type == VDIR && 941 (mp = foundobj->v_mountedhere) != NULL && 942 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 943 KASSERTMSG(searchdir != foundobj, "same vn %p", searchdir); 944 945 /* 946 * Try the namecache first. If that doesn't work, do 947 * it the hard way. 948 */ 949 if (cache_lookup_mount(foundobj, &vp)) { 950 vrele(foundobj); 951 foundobj = vp; 952 } else { 953 /* First get the vnode stable. */ 954 error = vn_lock(foundobj, LK_SHARED); 955 if (error != 0) { 956 vrele(foundobj); 957 foundobj = NULL; 958 break; 959 } 960 961 /* 962 * Check to see if something is still mounted on it. 963 */ 964 if ((mp = foundobj->v_mountedhere) == NULL) { 965 VOP_UNLOCK(foundobj); 966 break; 967 } 968 969 /* 970 * Get a reference to the mountpoint, and unlock 971 * foundobj. 972 */ 973 error = vfs_busy(mp); 974 VOP_UNLOCK(foundobj); 975 if (error != 0) { 976 vrele(foundobj); 977 foundobj = NULL; 978 break; 979 } 980 981 /* 982 * Now get a reference on the root vnode. 983 * XXX Future - maybe allow only VDIR here. 984 */ 985 error = VFS_ROOT(mp, LK_NONE, &vp); 986 987 /* 988 * If successful, enter it into the cache while 989 * holding the mount busy (competing with unmount). 990 */ 991 if (error == 0) { 992 cache_enter_mount(foundobj, vp); 993 } 994 995 /* Finally, drop references to foundobj & mountpoint. */ 996 vrele(foundobj); 997 vfs_unbusy(mp); 998 if (error) { 999 foundobj = NULL; 1000 break; 1001 } 1002 foundobj = vp; 1003 } 1004 1005 /* 1006 * Avoid locking vnodes from two filesystems because 1007 * it's prone to deadlock, e.g. when using puffs. 1008 * Also, it isn't a good idea to propagate slowness of 1009 * a filesystem up to the root directory. For now, 1010 * only handle the common case, where foundobj is 1011 * VDIR. 1012 * 1013 * In this case set searchdir to null to avoid using 1014 * it again. It is not correct to set searchdir == 1015 * foundobj here as that will confuse the caller. 1016 * (See PR 40740.) 1017 */ 1018 if (searchdir == NULL) { 1019 /* already been here once; do nothing further */ 1020 } else if (foundobj->v_type == VDIR) { 1021 vrele(searchdir); 1022 *searchdir_ret = searchdir = NULL; 1023 lktype = LK_NONE; 1024 } 1025 } 1026 1027 /* If searchdir is still around, re-lock it. */ 1028 if (error == 0 && lktype != LK_NONE) { 1029 vn_lock(searchdir, lktype | LK_RETRY); 1030 *searchdir_locked = true; 1031 } 1032 *foundobj_ret = foundobj; 1033 return error; 1034 } 1035 1036 /* 1037 * Determine the desired locking mode for the directory of a lookup. 1038 */ 1039 static int 1040 lookup_lktype(struct vnode *searchdir, struct componentname *cnp) 1041 { 1042 1043 /* 1044 * If the file system supports VOP_LOOKUP() with a shared lock, and 1045 * we are not making any modifications (nameiop LOOKUP) or this is 1046 * not the last component then get a shared lock. Where we can't do 1047 * fast-forwarded lookups (for example with layered file systems) 1048 * then this is the fallback for reducing lock contention. 1049 */ 1050 if ((searchdir->v_mount->mnt_iflag & IMNT_SHRLOOKUP) != 0 && 1051 (cnp->cn_nameiop == LOOKUP || (cnp->cn_flags & ISLASTCN) == 0)) { 1052 return LK_SHARED; 1053 } else { 1054 return LK_EXCLUSIVE; 1055 } 1056 } 1057 1058 /* 1059 * Call VOP_LOOKUP for a single lookup; return a new search directory 1060 * (used when crossing mountpoints up or searching union mounts down) and 1061 * the found object, which for create operations may be NULL on success. 1062 * 1063 * Note that the new search directory may be null, which means the 1064 * searchdir was unlocked and released. This happens in the common case 1065 * when crossing a mount point downwards, in order to avoid coupling 1066 * locks between different file system volumes. Importantly, this can 1067 * happen even if the call fails. (XXX: this is gross and should be 1068 * tidied somehow.) 1069 */ 1070 static int 1071 lookup_once(struct namei_state *state, 1072 struct vnode *searchdir, 1073 struct vnode **newsearchdir_ret, 1074 struct vnode **foundobj_ret, 1075 bool *newsearchdir_locked_ret) 1076 { 1077 struct vnode *tmpvn; /* scratch vnode */ 1078 struct vnode *foundobj; /* result */ 1079 struct lwp *l = curlwp; 1080 bool searchdir_locked = false; 1081 int error, lktype; 1082 1083 struct componentname *cnp = state->cnp; 1084 struct nameidata *ndp = state->ndp; 1085 1086 KASSERT(cnp == &ndp->ni_cnd); 1087 *newsearchdir_ret = searchdir; 1088 1089 /* 1090 * Handle "..": two special cases. 1091 * 1. If at root directory (e.g. after chroot) 1092 * or at absolute root directory 1093 * then ignore it so can't get out. 1094 * 1a. If at the root of the emulation filesystem go to the real 1095 * root. So "/../<path>" is always absolute. 1096 * 1b. If we have somehow gotten out of a jail, warn 1097 * and also ignore it so we can't get farther out. 1098 * 2. If this vnode is the root of a mounted 1099 * filesystem, then replace it with the 1100 * vnode which was mounted on so we take the 1101 * .. in the other file system. 1102 */ 1103 if (cnp->cn_flags & ISDOTDOT) { 1104 struct proc *p = l->l_proc; 1105 1106 for (;;) { 1107 if (searchdir == ndp->ni_rootdir || 1108 searchdir == rootvnode) { 1109 foundobj = searchdir; 1110 vref(foundobj); 1111 *foundobj_ret = foundobj; 1112 if (cnp->cn_flags & LOCKPARENT) { 1113 lktype = lookup_lktype(searchdir, cnp); 1114 vn_lock(searchdir, lktype | LK_RETRY); 1115 searchdir_locked = true; 1116 } 1117 error = 0; 1118 goto done; 1119 } 1120 if (ndp->ni_rootdir != rootvnode) { 1121 int retval; 1122 1123 retval = vn_isunder(searchdir, ndp->ni_rootdir, l); 1124 if (!retval) { 1125 /* Oops! We got out of jail! */ 1126 log(LOG_WARNING, 1127 "chrooted pid %d uid %d (%s) " 1128 "detected outside of its chroot\n", 1129 p->p_pid, kauth_cred_geteuid(l->l_cred), 1130 p->p_comm); 1131 /* Put us at the jail root. */ 1132 vrele(searchdir); 1133 searchdir = NULL; 1134 foundobj = ndp->ni_rootdir; 1135 vref(foundobj); 1136 vref(foundobj); 1137 *newsearchdir_ret = foundobj; 1138 *foundobj_ret = foundobj; 1139 error = 0; 1140 goto done; 1141 } 1142 } 1143 if ((searchdir->v_vflag & VV_ROOT) == 0 || 1144 (cnp->cn_flags & NOCROSSMOUNT)) 1145 break; 1146 tmpvn = searchdir; 1147 searchdir = searchdir->v_mount->mnt_vnodecovered; 1148 vref(searchdir); 1149 vrele(tmpvn); 1150 *newsearchdir_ret = searchdir; 1151 } 1152 } 1153 1154 lktype = lookup_lktype(searchdir, cnp); 1155 1156 /* 1157 * We now have a segment name to search for, and a directory to search. 1158 * Our vnode state here is that "searchdir" is held. 1159 */ 1160 unionlookup: 1161 foundobj = NULL; 1162 if (!searchdir_locked) { 1163 vn_lock(searchdir, lktype | LK_RETRY); 1164 searchdir_locked = true; 1165 } 1166 error = VOP_LOOKUP(searchdir, &foundobj, cnp); 1167 1168 if (error != 0) { 1169 KASSERTMSG((foundobj == NULL), 1170 "leaf `%s' should be empty but is %p", 1171 cnp->cn_nameptr, foundobj); 1172 #ifdef NAMEI_DIAGNOSTIC 1173 printf("not found\n"); 1174 #endif /* NAMEI_DIAGNOSTIC */ 1175 1176 /* 1177 * If ENOLCK, the file system needs us to retry the lookup 1178 * with an exclusive lock. It's likely nothing was found in 1179 * cache and/or modifications need to be made. 1180 */ 1181 if (error == ENOLCK) { 1182 KASSERT(VOP_ISLOCKED(searchdir) == LK_SHARED); 1183 KASSERT(searchdir_locked); 1184 if (vn_lock(searchdir, LK_UPGRADE | LK_NOWAIT)) { 1185 VOP_UNLOCK(searchdir); 1186 searchdir_locked = false; 1187 } 1188 lktype = LK_EXCLUSIVE; 1189 goto unionlookup; 1190 } 1191 1192 if ((error == ENOENT) && 1193 (searchdir->v_vflag & VV_ROOT) && 1194 (searchdir->v_mount->mnt_flag & MNT_UNION)) { 1195 tmpvn = searchdir; 1196 searchdir = searchdir->v_mount->mnt_vnodecovered; 1197 vref(searchdir); 1198 vput(tmpvn); 1199 searchdir_locked = false; 1200 *newsearchdir_ret = searchdir; 1201 goto unionlookup; 1202 } 1203 1204 if (error != EJUSTRETURN) 1205 goto done; 1206 1207 /* 1208 * If this was not the last component, or there were trailing 1209 * slashes, and we are not going to create a directory, 1210 * then the name must exist. 1211 */ 1212 if ((cnp->cn_flags & (REQUIREDIR | CREATEDIR)) == REQUIREDIR) { 1213 error = ENOENT; 1214 goto done; 1215 } 1216 1217 /* 1218 * If creating and at end of pathname, then can consider 1219 * allowing file to be created. 1220 */ 1221 if (state->rdonly) { 1222 error = EROFS; 1223 goto done; 1224 } 1225 1226 /* 1227 * We return success and a NULL foundobj to indicate 1228 * that the entry doesn't currently exist, leaving a 1229 * pointer to the (normally, locked) directory vnode 1230 * as searchdir. 1231 */ 1232 *foundobj_ret = NULL; 1233 error = 0; 1234 goto done; 1235 } 1236 #ifdef NAMEI_DIAGNOSTIC 1237 printf("found\n"); 1238 #endif /* NAMEI_DIAGNOSTIC */ 1239 1240 /* Unlock, unless the caller needs the parent locked. */ 1241 if (searchdir != NULL) { 1242 KASSERT(searchdir_locked); 1243 if ((cnp->cn_flags & (ISLASTCN | LOCKPARENT)) != 1244 (ISLASTCN | LOCKPARENT)) { 1245 VOP_UNLOCK(searchdir); 1246 searchdir_locked = false; 1247 } 1248 } else { 1249 KASSERT(!searchdir_locked); 1250 } 1251 1252 *foundobj_ret = foundobj; 1253 error = 0; 1254 done: 1255 *newsearchdir_locked_ret = searchdir_locked; 1256 return error; 1257 } 1258 1259 /* 1260 * Parse out the first path name component that we need to to consider. 1261 * 1262 * While doing this, attempt to use the name cache to fast-forward through 1263 * as many "easy" to find components of the path as possible. 1264 * 1265 * We use the namecache's node locks to form a chain, and avoid as many 1266 * vnode references and locks as possible. In the ideal case, only the 1267 * final vnode will have its reference count adjusted and lock taken. 1268 */ 1269 static int 1270 lookup_fastforward(struct namei_state *state, struct vnode **searchdir_ret, 1271 struct vnode **foundobj_ret) 1272 { 1273 struct componentname *cnp = state->cnp; 1274 struct nameidata *ndp = state->ndp; 1275 krwlock_t *plock; 1276 struct vnode *foundobj, *searchdir; 1277 int error, error2; 1278 size_t oldpathlen; 1279 const char *oldnameptr; 1280 bool terminal; 1281 1282 /* 1283 * Eat as many path name components as possible before giving up and 1284 * letting lookup_once() handle it. Remember the starting point in 1285 * case we can't get vnode references and need to roll back. 1286 */ 1287 plock = NULL; 1288 searchdir = *searchdir_ret; 1289 oldnameptr = cnp->cn_nameptr; 1290 oldpathlen = ndp->ni_pathlen; 1291 terminal = false; 1292 for (;;) { 1293 foundobj = NULL; 1294 1295 /* 1296 * Get the next component name. There should be no slashes 1297 * here, and we shouldn't have looped around if we were 1298 * done. 1299 */ 1300 KASSERT(cnp->cn_nameptr[0] != '/'); 1301 KASSERT(cnp->cn_nameptr[0] != '\0'); 1302 if ((error = lookup_parsepath(state, searchdir)) != 0) { 1303 break; 1304 } 1305 1306 /* 1307 * Can't deal with DOTDOT lookups if NOCROSSMOUNT or the 1308 * lookup is chrooted. 1309 */ 1310 if ((cnp->cn_flags & ISDOTDOT) != 0) { 1311 if ((searchdir->v_vflag & VV_ROOT) != 0 && 1312 (cnp->cn_flags & NOCROSSMOUNT)) { 1313 error = EOPNOTSUPP; 1314 break; 1315 } 1316 if (ndp->ni_rootdir != rootvnode) { 1317 error = EOPNOTSUPP; 1318 break; 1319 } 1320 } 1321 1322 /* 1323 * Can't deal with last component when modifying; this needs 1324 * searchdir locked and VOP_LOOKUP() called (which can and 1325 * does modify state, despite the name). NB: this case means 1326 * terminal is never set true when LOCKPARENT. 1327 */ 1328 if ((cnp->cn_flags & ISLASTCN) != 0) { 1329 if (cnp->cn_nameiop != LOOKUP || 1330 (cnp->cn_flags & LOCKPARENT) != 0) { 1331 error = EOPNOTSUPP; 1332 break; 1333 } 1334 } 1335 1336 /* 1337 * Good, now look for it in cache. cache_lookup_linked() 1338 * will fail if there's nothing there, or if there's no 1339 * ownership info for the directory, or if the user doesn't 1340 * have permission to look up files in this directory. 1341 */ 1342 if (!cache_lookup_linked(searchdir, cnp->cn_nameptr, 1343 cnp->cn_namelen, &foundobj, &plock, cnp->cn_cred)) { 1344 error = EOPNOTSUPP; 1345 break; 1346 } 1347 KASSERT(plock != NULL && rw_lock_held(plock)); 1348 1349 /* 1350 * Scored a hit. Negative is good too (ENOENT). If there's 1351 * a '-o union' mount here, punt and let lookup_once() deal 1352 * with it. 1353 */ 1354 if (foundobj == NULL) { 1355 if ((searchdir->v_vflag & VV_ROOT) != 0 && 1356 (searchdir->v_mount->mnt_flag & MNT_UNION) != 0) { 1357 error = EOPNOTSUPP; 1358 } else { 1359 error = ENOENT; 1360 terminal = ((cnp->cn_flags & ISLASTCN) != 0); 1361 } 1362 break; 1363 } 1364 1365 /* 1366 * Stop and get a hold on the vnode if we've encountered 1367 * something other than a dirctory. 1368 */ 1369 if (foundobj->v_type != VDIR) { 1370 error = vcache_tryvget(foundobj); 1371 if (error != 0) { 1372 foundobj = NULL; 1373 error = EOPNOTSUPP; 1374 } else { 1375 terminal = (foundobj->v_type != VLNK && 1376 (cnp->cn_flags & ISLASTCN) != 0); 1377 } 1378 break; 1379 } 1380 1381 /* 1382 * Try to cross mountpoints, bearing in mind that they can 1383 * be stacked. If at any point we can't go further, stop 1384 * and try to get a reference on the vnode. If we are able 1385 * to get a ref then lookup_crossmount() will take care of 1386 * it, otherwise we'll fall through to lookup_once(). 1387 */ 1388 if (foundobj->v_mountedhere != NULL) { 1389 while (foundobj->v_mountedhere != NULL && 1390 (cnp->cn_flags & NOCROSSMOUNT) == 0 && 1391 cache_cross_mount(&foundobj, &plock)) { 1392 KASSERT(foundobj != NULL); 1393 KASSERT(foundobj->v_type == VDIR); 1394 } 1395 if (foundobj->v_mountedhere != NULL) { 1396 error = vcache_tryvget(foundobj); 1397 if (error != 0) { 1398 foundobj = NULL; 1399 error = EOPNOTSUPP; 1400 } 1401 break; 1402 } else { 1403 searchdir = NULL; 1404 } 1405 } 1406 1407 /* 1408 * Time to stop if we found the last component & traversed 1409 * all mounts. 1410 */ 1411 if ((cnp->cn_flags & ISLASTCN) != 0) { 1412 error = vcache_tryvget(foundobj); 1413 if (error != 0) { 1414 foundobj = NULL; 1415 error = EOPNOTSUPP; 1416 } else { 1417 terminal = (foundobj->v_type != VLNK); 1418 } 1419 break; 1420 } 1421 1422 /* 1423 * Otherwise, we're still in business. Set the found VDIR 1424 * vnode as the search dir for the next component and 1425 * continue on to it. 1426 */ 1427 cnp->cn_nameptr = ndp->ni_next; 1428 searchdir = foundobj; 1429 } 1430 1431 if (terminal) { 1432 /* 1433 * If we exited the loop above having successfully located 1434 * the last component with a zero error code, and it's not a 1435 * symbolic link, then the parent directory is not needed. 1436 * Release reference to the starting parent and make the 1437 * terminal parent disappear into thin air. 1438 */ 1439 KASSERT(plock != NULL); 1440 rw_exit(plock); 1441 vrele(*searchdir_ret); 1442 *searchdir_ret = NULL; 1443 } else if (searchdir != *searchdir_ret) { 1444 /* 1445 * Otherwise we need to return the parent. If we ended up 1446 * with a new search dir, ref it before dropping the 1447 * namecache's lock. The lock prevents both searchdir and 1448 * foundobj from disappearing. If we can't ref the new 1449 * searchdir, we have a bit of a problem. Roll back the 1450 * fastforward to the beginning and let lookup_once() take 1451 * care of it. 1452 */ 1453 if (searchdir == NULL) { 1454 /* 1455 * It's possible for searchdir to be NULL in the 1456 * case of a root vnode being reclaimed while 1457 * trying to cross a mount. 1458 */ 1459 error2 = EOPNOTSUPP; 1460 } else { 1461 error2 = vcache_tryvget(searchdir); 1462 } 1463 KASSERT(plock != NULL); 1464 rw_exit(plock); 1465 if (__predict_true(error2 == 0)) { 1466 /* Returning new searchdir, and maybe new foundobj. */ 1467 vrele(*searchdir_ret); 1468 *searchdir_ret = searchdir; 1469 } else { 1470 /* Returning nothing. */ 1471 if (foundobj != NULL) { 1472 vrele(foundobj); 1473 foundobj = NULL; 1474 } 1475 cnp->cn_nameptr = oldnameptr; 1476 ndp->ni_pathlen = oldpathlen; 1477 if (searchdir == NULL) { 1478 error = EOPNOTSUPP; 1479 } else { 1480 error = lookup_parsepath(state, searchdir); 1481 if (error == 0) { 1482 error = EOPNOTSUPP; 1483 } 1484 } 1485 } 1486 } else if (plock != NULL) { 1487 /* Drop any namecache lock still held. */ 1488 rw_exit(plock); 1489 } 1490 1491 KASSERT(error == 0 ? foundobj != NULL : foundobj == NULL); 1492 *foundobj_ret = foundobj; 1493 return error; 1494 } 1495 1496 ////////////////////////////// 1497 1498 /* 1499 * Do a complete path search from a single root directory. 1500 * (This is called up to twice if TRYEMULROOT is in effect.) 1501 */ 1502 static int 1503 namei_oneroot(struct namei_state *state, 1504 int neverfollow, int inhibitmagic, int isnfsd) 1505 { 1506 struct nameidata *ndp = state->ndp; 1507 struct componentname *cnp = state->cnp; 1508 struct vnode *searchdir, *foundobj; 1509 bool searchdir_locked = false; 1510 int error; 1511 1512 error = namei_start(state, isnfsd, &searchdir); 1513 if (error) { 1514 ndp->ni_dvp = NULL; 1515 ndp->ni_vp = NULL; 1516 return error; 1517 } 1518 KASSERT(searchdir->v_type == VDIR); 1519 1520 /* 1521 * Setup: break out flag bits into variables. 1522 */ 1523 state->docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 1524 if (cnp->cn_nameiop == DELETE) 1525 state->docache = 0; 1526 state->rdonly = cnp->cn_flags & RDONLY; 1527 1528 /* 1529 * Keep going until we run out of path components. 1530 */ 1531 cnp->cn_nameptr = ndp->ni_pnbuf; 1532 1533 /* drop leading slashes (already used them to choose startdir) */ 1534 while (cnp->cn_nameptr[0] == '/') { 1535 cnp->cn_nameptr++; 1536 ndp->ni_pathlen--; 1537 } 1538 /* was it just "/"? */ 1539 if (cnp->cn_nameptr[0] == '\0') { 1540 foundobj = searchdir; 1541 searchdir = NULL; 1542 cnp->cn_flags |= ISLASTCN; 1543 1544 /* bleh */ 1545 goto skiploop; 1546 } 1547 1548 for (;;) { 1549 KASSERT(searchdir != NULL); 1550 KASSERT(!searchdir_locked); 1551 1552 /* 1553 * Parse out the first path name component that we need to 1554 * to consider. While doing this, attempt to use the name 1555 * cache to fast-forward through as many "easy" to find 1556 * components of the path as possible. 1557 */ 1558 error = lookup_fastforward(state, &searchdir, &foundobj); 1559 1560 /* 1561 * If we didn't get a good answer from the namecache, then 1562 * go directly to the file system. 1563 */ 1564 if (error == EOPNOTSUPP) { 1565 error = lookup_once(state, searchdir, &searchdir, 1566 &foundobj, &searchdir_locked); 1567 } 1568 1569 /* 1570 * If the vnode we found is mounted on, then cross the mount 1571 * and get the root vnode in foundobj. If this encounters 1572 * an error, it will dispose of foundobj, but searchdir is 1573 * untouched. 1574 */ 1575 if (error == 0 && foundobj != NULL && 1576 foundobj->v_type == VDIR && 1577 foundobj->v_mountedhere != NULL && 1578 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 1579 error = lookup_crossmount(state, &searchdir, 1580 &foundobj, &searchdir_locked); 1581 } 1582 1583 if (error) { 1584 if (searchdir != NULL) { 1585 if (searchdir_locked) { 1586 searchdir_locked = false; 1587 vput(searchdir); 1588 } else { 1589 vrele(searchdir); 1590 } 1591 } 1592 ndp->ni_dvp = NULL; 1593 ndp->ni_vp = NULL; 1594 /* 1595 * Note that if we're doing TRYEMULROOT we can 1596 * retry with the normal root. Where this is 1597 * currently set matches previous practice, 1598 * but the previous practice didn't make much 1599 * sense and somebody should sit down and 1600 * figure out which cases should cause retry 1601 * and which shouldn't. XXX. 1602 */ 1603 state->attempt_retry = 1; 1604 return (error); 1605 } 1606 1607 if (foundobj == NULL) { 1608 /* 1609 * Success with no object returned means we're 1610 * creating something and it isn't already 1611 * there. Break out of the main loop now so 1612 * the code below doesn't have to test for 1613 * foundobj == NULL. 1614 */ 1615 /* lookup_once can't have dropped the searchdir */ 1616 KASSERT(searchdir != NULL || 1617 (cnp->cn_flags & ISLASTCN) != 0); 1618 break; 1619 } 1620 1621 /* 1622 * Check for symbolic link. If we've reached one, 1623 * follow it, unless we aren't supposed to. Back up 1624 * over any slashes that we skipped, as we will need 1625 * them again. 1626 */ 1627 if (namei_atsymlink(state, foundobj)) { 1628 /* Don't need searchdir locked any more. */ 1629 if (searchdir_locked) { 1630 searchdir_locked = false; 1631 VOP_UNLOCK(searchdir); 1632 } 1633 ndp->ni_pathlen += state->slashes; 1634 ndp->ni_next -= state->slashes; 1635 if (neverfollow) { 1636 error = EINVAL; 1637 } else if (searchdir == NULL) { 1638 /* 1639 * dholland 20160410: lookup_once only 1640 * drops searchdir if it crossed a 1641 * mount point. Therefore, if we get 1642 * here it means we crossed a mount 1643 * point to a mounted filesystem whose 1644 * root vnode is a symlink. In theory 1645 * we could continue at this point by 1646 * using the pre-crossing searchdir 1647 * (e.g. just take out an extra 1648 * reference on it before calling 1649 * lookup_once so we still have it), 1650 * but this will make an ugly mess and 1651 * it should never happen in practice 1652 * as only badly broken filesystems 1653 * have non-directory root vnodes. (I 1654 * have seen this sort of thing with 1655 * NFS occasionally but even then it 1656 * means something's badly wrong.) 1657 */ 1658 error = ENOTDIR; 1659 } else { 1660 /* 1661 * dholland 20110410: if we're at a 1662 * union mount it might make sense to 1663 * use the top of the union stack here 1664 * rather than the layer we found the 1665 * symlink in. (FUTURE) 1666 */ 1667 error = namei_follow(state, inhibitmagic, 1668 searchdir, foundobj, 1669 &searchdir); 1670 } 1671 if (error) { 1672 KASSERT(searchdir != foundobj); 1673 if (searchdir != NULL) { 1674 vrele(searchdir); 1675 } 1676 vrele(foundobj); 1677 ndp->ni_dvp = NULL; 1678 ndp->ni_vp = NULL; 1679 return error; 1680 } 1681 vrele(foundobj); 1682 foundobj = NULL; 1683 1684 /* 1685 * If we followed a symlink to `/' and there 1686 * are no more components after the symlink, 1687 * we're done with the loop and what we found 1688 * is the searchdir. 1689 */ 1690 if (cnp->cn_nameptr[0] == '\0') { 1691 KASSERT(searchdir != NULL); 1692 foundobj = searchdir; 1693 searchdir = NULL; 1694 cnp->cn_flags |= ISLASTCN; 1695 break; 1696 } 1697 1698 continue; 1699 } 1700 1701 /* 1702 * Not a symbolic link. 1703 * 1704 * Check for directory, if the component was 1705 * followed by a series of slashes. 1706 */ 1707 if ((foundobj->v_type != VDIR) && 1708 (cnp->cn_flags & REQUIREDIR)) { 1709 KASSERT(foundobj != searchdir); 1710 if (searchdir) { 1711 if (searchdir_locked) { 1712 searchdir_locked = false; 1713 vput(searchdir); 1714 } else { 1715 vrele(searchdir); 1716 } 1717 } else { 1718 KASSERT(!searchdir_locked); 1719 } 1720 vrele(foundobj); 1721 ndp->ni_dvp = NULL; 1722 ndp->ni_vp = NULL; 1723 state->attempt_retry = 1; 1724 return ENOTDIR; 1725 } 1726 1727 /* 1728 * Stop if we've reached the last component. 1729 */ 1730 if (cnp->cn_flags & ISLASTCN) { 1731 break; 1732 } 1733 1734 /* 1735 * Continue with the next component. 1736 */ 1737 cnp->cn_nameptr = ndp->ni_next; 1738 if (searchdir != NULL) { 1739 if (searchdir_locked) { 1740 searchdir_locked = false; 1741 vput(searchdir); 1742 } else { 1743 vrele(searchdir); 1744 } 1745 } 1746 searchdir = foundobj; 1747 foundobj = NULL; 1748 } 1749 1750 KASSERT((cnp->cn_flags & LOCKPARENT) == 0 || searchdir == NULL || 1751 VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 1752 1753 skiploop: 1754 1755 if (foundobj != NULL) { 1756 if (foundobj == ndp->ni_erootdir) { 1757 /* 1758 * We are about to return the emulation root. 1759 * This isn't a good idea because code might 1760 * repeatedly lookup ".." until the file 1761 * matches that returned for "/" and loop 1762 * forever. So convert it to the real root. 1763 */ 1764 if (searchdir != NULL) { 1765 if (searchdir_locked) { 1766 vput(searchdir); 1767 searchdir_locked = false; 1768 } else { 1769 vrele(searchdir); 1770 } 1771 searchdir = NULL; 1772 } 1773 vrele(foundobj); 1774 foundobj = ndp->ni_rootdir; 1775 vref(foundobj); 1776 } 1777 1778 /* 1779 * If the caller requested the parent node (i.e. it's 1780 * a CREATE, DELETE, or RENAME), and we don't have one 1781 * (because this is the root directory, or we crossed 1782 * a mount point), then we must fail. 1783 * 1784 * 20210604 dholland when NONEXCLHACK is set (open 1785 * with O_CREAT but not O_EXCL) skip this logic. Since 1786 * we have a foundobj, open will not be creating, so 1787 * it doesn't actually need or use the searchdir, so 1788 * it's ok to return it even if it's on a different 1789 * volume, and it's also ok to return NULL; by setting 1790 * NONEXCLHACK the open code promises to cope with 1791 * those cases correctly. (That is, it should do what 1792 * it would do anyway, that is, just release the 1793 * searchdir, except not crash if it's null.) This is 1794 * needed because otherwise opening mountpoints with 1795 * O_CREAT but not O_EXCL fails... which is a silly 1796 * thing to do but ought to work. (This whole issue 1797 * came to light because 3rd party code wanted to open 1798 * certain procfs nodes with O_CREAT for some 3rd 1799 * party reason, and it failed.) 1800 * 1801 * Note that NONEXCLHACK is properly a different 1802 * nameiop (it is partway between LOOKUP and CREATE) 1803 * but it was stuffed in as a flag instead to make the 1804 * resulting patch less invasive for pullup. Blah. 1805 */ 1806 if (cnp->cn_nameiop != LOOKUP && 1807 (searchdir == NULL || 1808 searchdir->v_mount != foundobj->v_mount) && 1809 (cnp->cn_flags & NONEXCLHACK) == 0) { 1810 if (searchdir) { 1811 if (searchdir_locked) { 1812 vput(searchdir); 1813 searchdir_locked = false; 1814 } else { 1815 vrele(searchdir); 1816 } 1817 searchdir = NULL; 1818 } 1819 vrele(foundobj); 1820 foundobj = NULL; 1821 ndp->ni_dvp = NULL; 1822 ndp->ni_vp = NULL; 1823 state->attempt_retry = 1; 1824 1825 switch (cnp->cn_nameiop) { 1826 case CREATE: 1827 return EEXIST; 1828 case DELETE: 1829 case RENAME: 1830 return EBUSY; 1831 default: 1832 break; 1833 } 1834 panic("Invalid nameiop\n"); 1835 } 1836 1837 /* 1838 * Disallow directory write attempts on read-only lookups. 1839 * Prefers EEXIST over EROFS for the CREATE case. 1840 */ 1841 if (state->rdonly && 1842 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1843 if (searchdir) { 1844 if (searchdir_locked) { 1845 vput(searchdir); 1846 searchdir_locked = false; 1847 } else { 1848 vrele(searchdir); 1849 } 1850 searchdir = NULL; 1851 } 1852 vrele(foundobj); 1853 foundobj = NULL; 1854 ndp->ni_dvp = NULL; 1855 ndp->ni_vp = NULL; 1856 state->attempt_retry = 1; 1857 return EROFS; 1858 } 1859 1860 /* Lock the leaf node if requested. */ 1861 if ((cnp->cn_flags & (LOCKLEAF | LOCKPARENT)) == LOCKPARENT && 1862 searchdir == foundobj) { 1863 /* 1864 * Note: if LOCKPARENT but not LOCKLEAF is 1865 * set, and searchdir == foundobj, this code 1866 * necessarily unlocks the parent as well as 1867 * the leaf. That is, just because you specify 1868 * LOCKPARENT doesn't mean you necessarily get 1869 * a locked parent vnode. The code in 1870 * vfs_syscalls.c, and possibly elsewhere, 1871 * that uses this combination "knows" this, so 1872 * it can't be safely changed. Feh. XXX 1873 */ 1874 KASSERT(searchdir_locked); 1875 VOP_UNLOCK(searchdir); 1876 searchdir_locked = false; 1877 } else if ((cnp->cn_flags & LOCKLEAF) != 0 && 1878 (searchdir != foundobj || 1879 (cnp->cn_flags & LOCKPARENT) == 0)) { 1880 const int lktype = (cnp->cn_flags & LOCKSHARED) != 0 ? 1881 LK_SHARED : LK_EXCLUSIVE; 1882 vn_lock(foundobj, lktype | LK_RETRY); 1883 } 1884 } 1885 1886 /* 1887 * Done. 1888 */ 1889 1890 /* 1891 * If LOCKPARENT is not set, the parent directory isn't returned. 1892 */ 1893 if ((cnp->cn_flags & LOCKPARENT) == 0 && searchdir != NULL) { 1894 vrele(searchdir); 1895 searchdir = NULL; 1896 } 1897 1898 ndp->ni_dvp = searchdir; 1899 ndp->ni_vp = foundobj; 1900 return 0; 1901 } 1902 1903 /* 1904 * Do namei; wrapper layer that handles TRYEMULROOT. 1905 */ 1906 static int 1907 namei_tryemulroot(struct namei_state *state, 1908 int neverfollow, int inhibitmagic, int isnfsd) 1909 { 1910 int error; 1911 1912 struct nameidata *ndp = state->ndp; 1913 struct componentname *cnp = state->cnp; 1914 const char *savepath = NULL; 1915 1916 KASSERT(cnp == &ndp->ni_cnd); 1917 1918 if (cnp->cn_flags & TRYEMULROOT) { 1919 savepath = pathbuf_stringcopy_get(ndp->ni_pathbuf); 1920 } 1921 1922 emul_retry: 1923 state->attempt_retry = 0; 1924 1925 error = namei_oneroot(state, neverfollow, inhibitmagic, isnfsd); 1926 if (error) { 1927 /* 1928 * Once namei has started up, the existence of ni_erootdir 1929 * tells us whether we're working from an emulation root. 1930 * The TRYEMULROOT flag isn't necessarily authoritative. 1931 */ 1932 if (ndp->ni_erootdir != NULL && state->attempt_retry) { 1933 /* Retry the whole thing using the normal root */ 1934 cnp->cn_flags &= ~TRYEMULROOT; 1935 state->attempt_retry = 0; 1936 1937 /* kinda gross */ 1938 strcpy(ndp->ni_pathbuf->pb_path, savepath); 1939 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1940 savepath = NULL; 1941 1942 goto emul_retry; 1943 } 1944 } 1945 if (savepath != NULL) { 1946 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1947 } 1948 return error; 1949 } 1950 1951 /* 1952 * External interface. 1953 */ 1954 int 1955 namei(struct nameidata *ndp) 1956 { 1957 struct namei_state state; 1958 int error; 1959 1960 namei_init(&state, ndp); 1961 error = namei_tryemulroot(&state, 1962 0/*!neverfollow*/, 0/*!inhibitmagic*/, 1963 0/*isnfsd*/); 1964 namei_cleanup(&state); 1965 1966 if (error) { 1967 /* make sure no stray refs leak out */ 1968 KASSERT(ndp->ni_dvp == NULL); 1969 KASSERT(ndp->ni_vp == NULL); 1970 } 1971 1972 return error; 1973 } 1974 1975 //////////////////////////////////////////////////////////// 1976 1977 /* 1978 * External interface used by nfsd. This is basically different from 1979 * namei only in that it has the ability to pass in the "current 1980 * directory", and uses an extra flag "neverfollow" for which there's 1981 * no physical flag defined in namei.h. (There used to be a cut&paste 1982 * copy of about half of namei in nfsd to allow these minor 1983 * adjustments to exist.) 1984 * 1985 * XXX: the namei interface should be adjusted so nfsd can just use 1986 * ordinary namei(). 1987 */ 1988 int 1989 lookup_for_nfsd(struct nameidata *ndp, struct vnode *forcecwd, int neverfollow) 1990 { 1991 struct namei_state state; 1992 int error; 1993 1994 KASSERT(ndp->ni_atdir == NULL); 1995 ndp->ni_atdir = forcecwd; 1996 1997 namei_init(&state, ndp); 1998 error = namei_tryemulroot(&state, 1999 neverfollow, 1/*inhibitmagic*/, 1/*isnfsd*/); 2000 namei_cleanup(&state); 2001 2002 if (error) { 2003 /* make sure no stray refs leak out */ 2004 KASSERT(ndp->ni_dvp == NULL); 2005 KASSERT(ndp->ni_vp == NULL); 2006 } 2007 2008 return error; 2009 } 2010 2011 /* 2012 * A second external interface used by nfsd. This turns out to be a 2013 * single lookup used by the WebNFS code (ha!) to get "index.html" or 2014 * equivalent when asked for a directory. It should eventually evolve 2015 * into some kind of namei_once() call; for the time being it's kind 2016 * of a mess. XXX. 2017 * 2018 * dholland 20110109: I don't think it works, and I don't think it 2019 * worked before I started hacking and slashing either, and I doubt 2020 * anyone will ever notice. 2021 */ 2022 2023 /* 2024 * Internals. This calls lookup_once() after setting up the assorted 2025 * pieces of state the way they ought to be. 2026 */ 2027 static int 2028 do_lookup_for_nfsd_index(struct namei_state *state) 2029 { 2030 int error; 2031 2032 struct componentname *cnp = state->cnp; 2033 struct nameidata *ndp = state->ndp; 2034 struct vnode *startdir; 2035 struct vnode *foundobj; 2036 bool startdir_locked; 2037 const char *cp; /* pointer into pathname argument */ 2038 2039 KASSERT(cnp == &ndp->ni_cnd); 2040 2041 startdir = state->ndp->ni_atdir; 2042 2043 cnp->cn_nameptr = ndp->ni_pnbuf; 2044 state->docache = 1; 2045 state->rdonly = cnp->cn_flags & RDONLY; 2046 ndp->ni_dvp = NULL; 2047 2048 error = VOP_PARSEPATH(startdir, cnp->cn_nameptr, &cnp->cn_namelen); 2049 if (error) { 2050 return error; 2051 } 2052 2053 cp = cnp->cn_nameptr + cnp->cn_namelen; 2054 KASSERT(cnp->cn_namelen <= KERNEL_NAME_MAX); 2055 ndp->ni_pathlen -= cnp->cn_namelen; 2056 ndp->ni_next = cp; 2057 state->slashes = 0; 2058 cnp->cn_flags &= ~REQUIREDIR; 2059 cnp->cn_flags |= MAKEENTRY|ISLASTCN; 2060 2061 if (cnp->cn_namelen == 2 && 2062 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 2063 cnp->cn_flags |= ISDOTDOT; 2064 else 2065 cnp->cn_flags &= ~ISDOTDOT; 2066 2067 /* 2068 * Because lookup_once can change the startdir, we need our 2069 * own reference to it to avoid consuming the caller's. 2070 */ 2071 vref(startdir); 2072 error = lookup_once(state, startdir, &startdir, &foundobj, 2073 &startdir_locked); 2074 2075 KASSERT((cnp->cn_flags & LOCKPARENT) == 0); 2076 if (startdir_locked) { 2077 VOP_UNLOCK(startdir); 2078 startdir_locked = false; 2079 } 2080 2081 /* 2082 * If the vnode we found is mounted on, then cross the mount and get 2083 * the root vnode in foundobj. If this encounters an error, it will 2084 * dispose of foundobj, but searchdir is untouched. 2085 */ 2086 if (error == 0 && foundobj != NULL && 2087 foundobj->v_type == VDIR && 2088 foundobj->v_mountedhere != NULL && 2089 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 2090 error = lookup_crossmount(state, &startdir, &foundobj, 2091 &startdir_locked); 2092 } 2093 2094 /* Now toss startdir and see if we have an error. */ 2095 if (startdir != NULL) 2096 vrele(startdir); 2097 if (error) 2098 foundobj = NULL; 2099 else if (foundobj != NULL && (cnp->cn_flags & LOCKLEAF) != 0) 2100 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 2101 2102 ndp->ni_vp = foundobj; 2103 return (error); 2104 } 2105 2106 /* 2107 * External interface. The partitioning between this function and the 2108 * above isn't very clear - the above function exists mostly so code 2109 * that uses "state->" can be shuffled around without having to change 2110 * it to "state.". 2111 */ 2112 int 2113 lookup_for_nfsd_index(struct nameidata *ndp, struct vnode *startdir) 2114 { 2115 struct namei_state state; 2116 int error; 2117 2118 KASSERT(ndp->ni_atdir == NULL); 2119 ndp->ni_atdir = startdir; 2120 2121 /* 2122 * Note: the name sent in here (is not|should not be) allowed 2123 * to contain a slash. 2124 */ 2125 if (strlen(ndp->ni_pathbuf->pb_path) > KERNEL_NAME_MAX) { 2126 return ENAMETOOLONG; 2127 } 2128 if (strchr(ndp->ni_pathbuf->pb_path, '/')) { 2129 return EINVAL; 2130 } 2131 2132 ndp->ni_pathlen = strlen(ndp->ni_pathbuf->pb_path) + 1; 2133 ndp->ni_pnbuf = NULL; 2134 ndp->ni_cnd.cn_nameptr = NULL; 2135 2136 namei_init(&state, ndp); 2137 error = do_lookup_for_nfsd_index(&state); 2138 namei_cleanup(&state); 2139 2140 return error; 2141 } 2142 2143 //////////////////////////////////////////////////////////// 2144 2145 /* 2146 * Reacquire a path name component. 2147 * dvp is locked on entry and exit. 2148 * *vpp is locked on exit unless it's NULL. 2149 */ 2150 int 2151 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int dummy) 2152 { 2153 int rdonly; /* lookup read-only flag bit */ 2154 int error = 0; 2155 #ifdef DEBUG 2156 size_t newlen; /* DEBUG: check name len */ 2157 const char *cp; /* DEBUG: check name ptr */ 2158 #endif /* DEBUG */ 2159 2160 (void)dummy; 2161 2162 /* 2163 * Setup: break out flag bits into variables. 2164 */ 2165 rdonly = cnp->cn_flags & RDONLY; 2166 2167 /* 2168 * Search a new directory. 2169 * 2170 * The cn_hash value is for use by vfs_cache. 2171 * The last component of the filename is left accessible via 2172 * cnp->cn_nameptr for callers that need the name. Callers needing 2173 * the name set the SAVENAME flag. When done, they assume 2174 * responsibility for freeing the pathname buffer. 2175 */ 2176 #ifdef DEBUG 2177 #if 0 2178 cp = NULL; 2179 newhash = namei_hash(cnp->cn_nameptr, &cp); 2180 if ((uint32_t)newhash != (uint32_t)cnp->cn_hash) 2181 panic("relookup: bad hash"); 2182 #endif 2183 error = VOP_PARSEPATH(dvp, cnp->cn_nameptr, &newlen); 2184 if (error) { 2185 panic("relookup: parsepath failed with error %d", error); 2186 } 2187 if (cnp->cn_namelen != newlen) 2188 panic("relookup: bad len"); 2189 cp = cnp->cn_nameptr + cnp->cn_namelen; 2190 while (*cp == '/') 2191 cp++; 2192 if (*cp != 0) 2193 panic("relookup: not last component"); 2194 #endif /* DEBUG */ 2195 2196 /* 2197 * Check for degenerate name (e.g. / or "") 2198 * which is a way of talking about a directory, 2199 * e.g. like "/." or ".". 2200 */ 2201 if (cnp->cn_nameptr[0] == '\0') 2202 panic("relookup: null name"); 2203 2204 if (cnp->cn_flags & ISDOTDOT) 2205 panic("relookup: lookup on dot-dot"); 2206 2207 /* 2208 * We now have a segment name to search for, and a directory to search. 2209 */ 2210 *vpp = NULL; 2211 error = VOP_LOOKUP(dvp, vpp, cnp); 2212 if ((error) != 0) { 2213 KASSERTMSG((*vpp == NULL), 2214 "leaf `%s' should be empty but is %p", 2215 cnp->cn_nameptr, *vpp); 2216 if (error != EJUSTRETURN) 2217 goto bad; 2218 } 2219 2220 /* 2221 * Check for symbolic link 2222 */ 2223 KASSERTMSG((*vpp == NULL || (*vpp)->v_type != VLNK || 2224 (cnp->cn_flags & FOLLOW) == 0), 2225 "relookup: symlink found"); 2226 2227 /* 2228 * Check for read-only lookups. 2229 */ 2230 if (rdonly && cnp->cn_nameiop != LOOKUP) { 2231 error = EROFS; 2232 if (*vpp) { 2233 vrele(*vpp); 2234 } 2235 goto bad; 2236 } 2237 /* 2238 * Lock result. 2239 */ 2240 if (*vpp && *vpp != dvp) { 2241 error = vn_lock(*vpp, LK_EXCLUSIVE); 2242 if (error != 0) { 2243 vrele(*vpp); 2244 goto bad; 2245 } 2246 } 2247 return (0); 2248 2249 bad: 2250 *vpp = NULL; 2251 return (error); 2252 } 2253 2254 /* 2255 * namei_simple - simple forms of namei. 2256 * 2257 * These are wrappers to allow the simple case callers of namei to be 2258 * left alone while everything else changes under them. 2259 */ 2260 2261 /* Flags */ 2262 struct namei_simple_flags_type { 2263 int dummy; 2264 }; 2265 static const struct namei_simple_flags_type ns_nn, ns_nt, ns_fn, ns_ft; 2266 const namei_simple_flags_t NSM_NOFOLLOW_NOEMULROOT = &ns_nn; 2267 const namei_simple_flags_t NSM_NOFOLLOW_TRYEMULROOT = &ns_nt; 2268 const namei_simple_flags_t NSM_FOLLOW_NOEMULROOT = &ns_fn; 2269 const namei_simple_flags_t NSM_FOLLOW_TRYEMULROOT = &ns_ft; 2270 2271 static 2272 int 2273 namei_simple_convert_flags(namei_simple_flags_t sflags) 2274 { 2275 if (sflags == NSM_NOFOLLOW_NOEMULROOT) 2276 return NOFOLLOW | 0; 2277 if (sflags == NSM_NOFOLLOW_TRYEMULROOT) 2278 return NOFOLLOW | TRYEMULROOT; 2279 if (sflags == NSM_FOLLOW_NOEMULROOT) 2280 return FOLLOW | 0; 2281 if (sflags == NSM_FOLLOW_TRYEMULROOT) 2282 return FOLLOW | TRYEMULROOT; 2283 panic("namei_simple_convert_flags: bogus sflags\n"); 2284 return 0; 2285 } 2286 2287 int 2288 namei_simple_kernel(const char *path, namei_simple_flags_t sflags, 2289 struct vnode **vp_ret) 2290 { 2291 return nameiat_simple_kernel(NULL, path, sflags, vp_ret); 2292 } 2293 2294 int 2295 nameiat_simple_kernel(struct vnode *dvp, const char *path, 2296 namei_simple_flags_t sflags, struct vnode **vp_ret) 2297 { 2298 struct nameidata nd; 2299 struct pathbuf *pb; 2300 int err; 2301 2302 pb = pathbuf_create(path); 2303 if (pb == NULL) { 2304 return ENOMEM; 2305 } 2306 2307 NDINIT(&nd, 2308 LOOKUP, 2309 namei_simple_convert_flags(sflags), 2310 pb); 2311 2312 if (dvp != NULL) 2313 NDAT(&nd, dvp); 2314 2315 err = namei(&nd); 2316 if (err != 0) { 2317 pathbuf_destroy(pb); 2318 return err; 2319 } 2320 *vp_ret = nd.ni_vp; 2321 pathbuf_destroy(pb); 2322 return 0; 2323 } 2324 2325 int 2326 namei_simple_user(const char *path, namei_simple_flags_t sflags, 2327 struct vnode **vp_ret) 2328 { 2329 return nameiat_simple_user(NULL, path, sflags, vp_ret); 2330 } 2331 2332 int 2333 nameiat_simple_user(struct vnode *dvp, const char *path, 2334 namei_simple_flags_t sflags, struct vnode **vp_ret) 2335 { 2336 struct pathbuf *pb; 2337 struct nameidata nd; 2338 int err; 2339 2340 err = pathbuf_copyin(path, &pb); 2341 if (err) { 2342 return err; 2343 } 2344 2345 NDINIT(&nd, 2346 LOOKUP, 2347 namei_simple_convert_flags(sflags), 2348 pb); 2349 2350 if (dvp != NULL) 2351 NDAT(&nd, dvp); 2352 2353 err = namei(&nd); 2354 if (err != 0) { 2355 pathbuf_destroy(pb); 2356 return err; 2357 } 2358 *vp_ret = nd.ni_vp; 2359 pathbuf_destroy(pb); 2360 return 0; 2361 } 2362