1 /* $NetBSD: vfs_lookup.c,v 1.214 2020/02/23 22:14:03 ad Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_lookup.c 8.10 (Berkeley) 5/27/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.214 2020/02/23 22:14:03 ad Exp $"); 41 42 #ifdef _KERNEL_OPT 43 #include "opt_magiclinks.h" 44 #endif 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/syslimits.h> 50 #include <sys/time.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/mount.h> 54 #include <sys/errno.h> 55 #include <sys/filedesc.h> 56 #include <sys/hash.h> 57 #include <sys/proc.h> 58 #include <sys/syslog.h> 59 #include <sys/kauth.h> 60 #include <sys/ktrace.h> 61 #include <sys/dirent.h> 62 63 #ifndef MAGICLINKS 64 #define MAGICLINKS 0 65 #endif 66 67 int vfs_magiclinks = MAGICLINKS; 68 69 __CTASSERT(MAXNAMLEN == NAME_MAX); 70 71 /* 72 * Substitute replacement text for 'magic' strings in symlinks. 73 * Returns 0 if successful, and returns non-zero if an error 74 * occurs. (Currently, the only possible error is running out 75 * of temporary pathname space.) 76 * 77 * Looks for "@<string>" and "@<string>/", where <string> is a 78 * recognized 'magic' string. Replaces the "@<string>" with the 79 * appropriate replacement text. (Note that in some cases the 80 * replacement text may have zero length.) 81 * 82 * This would have been table driven, but the variance in 83 * replacement strings (and replacement string lengths) made 84 * that impractical. 85 */ 86 #define VNL(x) \ 87 (sizeof(x) - 1) 88 89 #define VO '{' 90 #define VC '}' 91 92 #define MATCH(str) \ 93 ((termchar == '/' && i + VNL(str) == *len) || \ 94 (i + VNL(str) < *len && \ 95 cp[i + VNL(str)] == termchar)) && \ 96 !strncmp((str), &cp[i], VNL(str)) 97 98 #define SUBSTITUTE(m, s, sl) \ 99 if ((newlen + (sl)) >= MAXPATHLEN) \ 100 return 1; \ 101 i += VNL(m); \ 102 if (termchar != '/') \ 103 i++; \ 104 (void)memcpy(&tmp[newlen], (s), (sl)); \ 105 newlen += (sl); \ 106 change = 1; \ 107 termchar = '/'; 108 109 static int 110 symlink_magic(struct proc *p, char *cp, size_t *len) 111 { 112 char *tmp; 113 size_t change, i, newlen, slen; 114 char termchar = '/'; 115 char idtmp[11]; /* enough for 32 bit *unsigned* integer */ 116 117 118 tmp = PNBUF_GET(); 119 for (change = i = newlen = 0; i < *len; ) { 120 if (cp[i] != '@') { 121 tmp[newlen++] = cp[i++]; 122 continue; 123 } 124 125 i++; 126 127 /* Check for @{var} syntax. */ 128 if (cp[i] == VO) { 129 termchar = VC; 130 i++; 131 } 132 133 /* 134 * The following checks should be ordered according 135 * to frequency of use. 136 */ 137 if (MATCH("machine_arch")) { 138 slen = VNL(MACHINE_ARCH); 139 SUBSTITUTE("machine_arch", MACHINE_ARCH, slen); 140 } else if (MATCH("machine")) { 141 slen = VNL(MACHINE); 142 SUBSTITUTE("machine", MACHINE, slen); 143 } else if (MATCH("hostname")) { 144 SUBSTITUTE("hostname", hostname, hostnamelen); 145 } else if (MATCH("osrelease")) { 146 slen = strlen(osrelease); 147 SUBSTITUTE("osrelease", osrelease, slen); 148 } else if (MATCH("emul")) { 149 slen = strlen(p->p_emul->e_name); 150 SUBSTITUTE("emul", p->p_emul->e_name, slen); 151 } else if (MATCH("kernel_ident")) { 152 slen = strlen(kernel_ident); 153 SUBSTITUTE("kernel_ident", kernel_ident, slen); 154 } else if (MATCH("domainname")) { 155 SUBSTITUTE("domainname", domainname, domainnamelen); 156 } else if (MATCH("ostype")) { 157 slen = strlen(ostype); 158 SUBSTITUTE("ostype", ostype, slen); 159 } else if (MATCH("uid")) { 160 slen = snprintf(idtmp, sizeof(idtmp), "%u", 161 kauth_cred_geteuid(kauth_cred_get())); 162 SUBSTITUTE("uid", idtmp, slen); 163 } else if (MATCH("ruid")) { 164 slen = snprintf(idtmp, sizeof(idtmp), "%u", 165 kauth_cred_getuid(kauth_cred_get())); 166 SUBSTITUTE("ruid", idtmp, slen); 167 } else if (MATCH("gid")) { 168 slen = snprintf(idtmp, sizeof(idtmp), "%u", 169 kauth_cred_getegid(kauth_cred_get())); 170 SUBSTITUTE("gid", idtmp, slen); 171 } else if (MATCH("rgid")) { 172 slen = snprintf(idtmp, sizeof(idtmp), "%u", 173 kauth_cred_getgid(kauth_cred_get())); 174 SUBSTITUTE("rgid", idtmp, slen); 175 } else { 176 tmp[newlen++] = '@'; 177 if (termchar == VC) 178 tmp[newlen++] = VO; 179 } 180 } 181 182 if (change) { 183 (void)memcpy(cp, tmp, newlen); 184 *len = newlen; 185 } 186 PNBUF_PUT(tmp); 187 188 return 0; 189 } 190 191 #undef VNL 192 #undef VO 193 #undef VC 194 #undef MATCH 195 #undef SUBSTITUTE 196 197 //////////////////////////////////////////////////////////// 198 199 /* 200 * Determine the namei hash (for the namecache) for name. 201 * If *ep != NULL, hash from name to ep-1. 202 * If *ep == NULL, hash from name until the first NUL or '/', and 203 * return the location of this termination character in *ep. 204 * 205 * This function returns an equivalent hash to the MI hash32_strn(). 206 * The latter isn't used because in the *ep == NULL case, determining 207 * the length of the string to the first NUL or `/' and then calling 208 * hash32_strn() involves unnecessary double-handling of the data. 209 */ 210 uint32_t 211 namei_hash(const char *name, const char **ep) 212 { 213 uint32_t hash; 214 215 hash = HASH32_STR_INIT; 216 if (*ep != NULL) { 217 for (; name < *ep; name++) 218 hash = hash * 33 + *(const uint8_t *)name; 219 } else { 220 for (; *name != '\0' && *name != '/'; name++) 221 hash = hash * 33 + *(const uint8_t *)name; 222 *ep = name; 223 } 224 return (hash + (hash >> 5)); 225 } 226 227 /* 228 * Find the end of the first path component in NAME and return its 229 * length. 230 */ 231 static size_t 232 namei_getcomponent(const char *name) 233 { 234 size_t pos; 235 236 pos = 0; 237 while (name[pos] != '\0' && name[pos] != '/') { 238 pos++; 239 } 240 return pos; 241 } 242 243 //////////////////////////////////////////////////////////// 244 245 /* 246 * Sealed abstraction for pathnames. 247 * 248 * System-call-layer level code that is going to call namei should 249 * first create a pathbuf and adjust all the bells and whistles on it 250 * as needed by context. 251 */ 252 253 struct pathbuf { 254 char *pb_path; 255 char *pb_pathcopy; 256 unsigned pb_pathcopyuses; 257 }; 258 259 static struct pathbuf * 260 pathbuf_create_raw(void) 261 { 262 struct pathbuf *pb; 263 264 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 265 pb->pb_path = PNBUF_GET(); 266 if (pb->pb_path == NULL) { 267 kmem_free(pb, sizeof(*pb)); 268 return NULL; 269 } 270 pb->pb_pathcopy = NULL; 271 pb->pb_pathcopyuses = 0; 272 return pb; 273 } 274 275 void 276 pathbuf_destroy(struct pathbuf *pb) 277 { 278 KASSERT(pb->pb_pathcopyuses == 0); 279 KASSERT(pb->pb_pathcopy == NULL); 280 PNBUF_PUT(pb->pb_path); 281 kmem_free(pb, sizeof(*pb)); 282 } 283 284 struct pathbuf * 285 pathbuf_assimilate(char *pnbuf) 286 { 287 struct pathbuf *pb; 288 289 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 290 pb->pb_path = pnbuf; 291 pb->pb_pathcopy = NULL; 292 pb->pb_pathcopyuses = 0; 293 return pb; 294 } 295 296 struct pathbuf * 297 pathbuf_create(const char *path) 298 { 299 struct pathbuf *pb; 300 int error; 301 302 pb = pathbuf_create_raw(); 303 if (pb == NULL) { 304 return NULL; 305 } 306 error = copystr(path, pb->pb_path, PATH_MAX, NULL); 307 if (error != 0) { 308 KASSERT(!"kernel path too long in pathbuf_create"); 309 /* make sure it's null-terminated, just in case */ 310 pb->pb_path[PATH_MAX-1] = '\0'; 311 } 312 return pb; 313 } 314 315 int 316 pathbuf_copyin(const char *userpath, struct pathbuf **ret) 317 { 318 struct pathbuf *pb; 319 int error; 320 321 pb = pathbuf_create_raw(); 322 if (pb == NULL) { 323 return ENOMEM; 324 } 325 error = copyinstr(userpath, pb->pb_path, PATH_MAX, NULL); 326 if (error) { 327 pathbuf_destroy(pb); 328 return error; 329 } 330 *ret = pb; 331 return 0; 332 } 333 334 /* 335 * XXX should not exist: 336 * 1. whether a pointer is kernel or user should be statically checkable. 337 * 2. copyin should be handled by the upper part of the syscall layer, 338 * not in here. 339 */ 340 int 341 pathbuf_maybe_copyin(const char *path, enum uio_seg seg, struct pathbuf **ret) 342 { 343 if (seg == UIO_USERSPACE) { 344 return pathbuf_copyin(path, ret); 345 } else { 346 *ret = pathbuf_create(path); 347 if (*ret == NULL) { 348 return ENOMEM; 349 } 350 return 0; 351 } 352 } 353 354 /* 355 * Get a copy of the path buffer as it currently exists. If this is 356 * called after namei starts the results may be arbitrary. 357 */ 358 void 359 pathbuf_copystring(const struct pathbuf *pb, char *buf, size_t maxlen) 360 { 361 strlcpy(buf, pb->pb_path, maxlen); 362 } 363 364 /* 365 * These two functions allow access to a saved copy of the original 366 * path string. The first copy should be gotten before namei is 367 * called. Each copy that is gotten should be put back. 368 */ 369 370 const char * 371 pathbuf_stringcopy_get(struct pathbuf *pb) 372 { 373 if (pb->pb_pathcopyuses == 0) { 374 pb->pb_pathcopy = PNBUF_GET(); 375 strcpy(pb->pb_pathcopy, pb->pb_path); 376 } 377 pb->pb_pathcopyuses++; 378 return pb->pb_pathcopy; 379 } 380 381 void 382 pathbuf_stringcopy_put(struct pathbuf *pb, const char *str) 383 { 384 KASSERT(str == pb->pb_pathcopy); 385 KASSERT(pb->pb_pathcopyuses > 0); 386 pb->pb_pathcopyuses--; 387 if (pb->pb_pathcopyuses == 0) { 388 PNBUF_PUT(pb->pb_pathcopy); 389 pb->pb_pathcopy = NULL; 390 } 391 } 392 393 394 //////////////////////////////////////////////////////////// 395 396 /* 397 * namei: convert a pathname into a pointer to a (maybe-locked) vnode, 398 * and maybe also its parent directory vnode, and assorted other guff. 399 * See namei(9) for the interface documentation. 400 * 401 * 402 * The FOLLOW flag is set when symbolic links are to be followed 403 * when they occur at the end of the name translation process. 404 * Symbolic links are always followed for all other pathname 405 * components other than the last. 406 * 407 * The segflg defines whether the name is to be copied from user 408 * space or kernel space. 409 * 410 * Overall outline of namei: 411 * 412 * copy in name 413 * get starting directory 414 * while (!done && !error) { 415 * call lookup to search path. 416 * if symbolic link, massage name in buffer and continue 417 * } 418 */ 419 420 /* 421 * Search a pathname. 422 * This is a very central and rather complicated routine. 423 * 424 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 425 * The starting directory is passed in. The pathname is descended 426 * until done, or a symbolic link is encountered. The variable ni_more 427 * is clear if the path is completed; it is set to one if a symbolic 428 * link needing interpretation is encountered. 429 * 430 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 431 * whether the name is to be looked up, created, renamed, or deleted. 432 * When CREATE, RENAME, or DELETE is specified, information usable in 433 * creating, renaming, or deleting a directory entry may be calculated. 434 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 435 * locked. Otherwise the parent directory is not returned. If the target 436 * of the pathname exists and LOCKLEAF is or'ed into the flag the target 437 * is returned locked, otherwise it is returned unlocked. When creating 438 * or renaming and LOCKPARENT is specified, the target may not be ".". 439 * When deleting and LOCKPARENT is specified, the target may be ".". 440 * 441 * Overall outline of lookup: 442 * 443 * dirloop: 444 * identify next component of name at ndp->ni_ptr 445 * handle degenerate case where name is null string 446 * if .. and crossing mount points and on mounted filesys, find parent 447 * call VOP_LOOKUP routine for next component name 448 * directory vnode returned in ni_dvp, locked. 449 * component vnode returned in ni_vp (if it exists), locked. 450 * if result vnode is mounted on and crossing mount points, 451 * find mounted on vnode 452 * if more components of name, do next level at dirloop 453 * return the answer in ni_vp, locked if LOCKLEAF set 454 * if LOCKPARENT set, return locked parent in ni_dvp 455 */ 456 457 458 /* 459 * Internal state for a namei operation. 460 * 461 * cnp is always equal to &ndp->ni_cnp. 462 */ 463 struct namei_state { 464 struct nameidata *ndp; 465 struct componentname *cnp; 466 467 int docache; /* == 0 do not cache last component */ 468 int rdonly; /* lookup read-only flag bit */ 469 int slashes; 470 471 unsigned attempt_retry:1; /* true if error allows emul retry */ 472 unsigned root_referenced:1; /* true if ndp->ni_rootdir and 473 ndp->ni_erootdir were referenced */ 474 }; 475 476 477 /* 478 * Initialize the namei working state. 479 */ 480 static void 481 namei_init(struct namei_state *state, struct nameidata *ndp) 482 { 483 484 state->ndp = ndp; 485 state->cnp = &ndp->ni_cnd; 486 487 state->docache = 0; 488 state->rdonly = 0; 489 state->slashes = 0; 490 491 state->root_referenced = 0; 492 493 KASSERTMSG((state->cnp->cn_cred != NULL), "namei: bad cred/proc"); 494 KASSERTMSG(((state->cnp->cn_nameiop & (~OPMASK)) == 0), 495 "namei: nameiop contaminated with flags: %08"PRIx32, 496 state->cnp->cn_nameiop); 497 KASSERTMSG(((state->cnp->cn_flags & OPMASK) == 0), 498 "name: flags contaminated with nameiops: %08"PRIx32, 499 state->cnp->cn_flags); 500 501 /* 502 * The buffer for name translation shall be the one inside the 503 * pathbuf. 504 */ 505 state->ndp->ni_pnbuf = state->ndp->ni_pathbuf->pb_path; 506 } 507 508 /* 509 * Clean up the working namei state, leaving things ready for return 510 * from namei. 511 */ 512 static void 513 namei_cleanup(struct namei_state *state) 514 { 515 KASSERT(state->cnp == &state->ndp->ni_cnd); 516 517 if (state->root_referenced) { 518 if (state->ndp->ni_rootdir != NULL) 519 vrele(state->ndp->ni_rootdir); 520 if (state->ndp->ni_erootdir != NULL) 521 vrele(state->ndp->ni_erootdir); 522 } 523 } 524 525 ////////////////////////////// 526 527 /* 528 * Get the directory context. 529 * Initializes the rootdir and erootdir state and returns a reference 530 * to the starting dir. 531 */ 532 static struct vnode * 533 namei_getstartdir(struct namei_state *state) 534 { 535 struct nameidata *ndp = state->ndp; 536 struct componentname *cnp = state->cnp; 537 struct cwdinfo *cwdi; /* pointer to cwd state */ 538 struct vnode *rootdir, *erootdir, *curdir, *startdir; 539 540 if (state->root_referenced) { 541 if (state->ndp->ni_rootdir != NULL) 542 vrele(state->ndp->ni_rootdir); 543 if (state->ndp->ni_erootdir != NULL) 544 vrele(state->ndp->ni_erootdir); 545 state->root_referenced = 0; 546 } 547 548 /* NB: must not block while inspecting the cwdinfo. */ 549 cwdi = cwdenter(RW_READER); 550 551 /* root dir */ 552 if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) { 553 rootdir = rootvnode; 554 } else { 555 rootdir = cwdi->cwdi_rdir; 556 } 557 558 /* emulation root dir, if any */ 559 if ((cnp->cn_flags & TRYEMULROOT) == 0) { 560 /* if we don't want it, don't fetch it */ 561 erootdir = NULL; 562 } else if (cnp->cn_flags & EMULROOTSET) { 563 /* explicitly set emulroot; "/../" doesn't override this */ 564 erootdir = ndp->ni_erootdir; 565 } else if (!strncmp(ndp->ni_pnbuf, "/../", 4)) { 566 /* explicit reference to real rootdir */ 567 erootdir = NULL; 568 } else { 569 /* may be null */ 570 erootdir = cwdi->cwdi_edir; 571 } 572 573 /* current dir */ 574 curdir = cwdi->cwdi_cdir; 575 576 if (ndp->ni_pnbuf[0] != '/') { 577 if (ndp->ni_atdir != NULL) { 578 startdir = ndp->ni_atdir; 579 } else { 580 startdir = curdir; 581 } 582 erootdir = NULL; 583 } else if (cnp->cn_flags & TRYEMULROOT && erootdir != NULL) { 584 startdir = erootdir; 585 } else { 586 startdir = rootdir; 587 erootdir = NULL; 588 } 589 590 state->ndp->ni_rootdir = rootdir; 591 state->ndp->ni_erootdir = erootdir; 592 593 /* 594 * Get a reference to the start dir so we can safely unlock cwdi. 595 * 596 * Must hold references to rootdir and erootdir while we're running. 597 * A multithreaded process may chroot during namei. 598 */ 599 if (startdir != NULL) 600 vref(startdir); 601 if (state->ndp->ni_rootdir != NULL) 602 vref(state->ndp->ni_rootdir); 603 if (state->ndp->ni_erootdir != NULL) 604 vref(state->ndp->ni_erootdir); 605 state->root_referenced = 1; 606 607 cwdexit(cwdi); 608 return startdir; 609 } 610 611 /* 612 * Get the directory context for the nfsd case, in parallel to 613 * getstartdir. Initializes the rootdir and erootdir state and 614 * returns a reference to the passed-in starting dir. 615 */ 616 static struct vnode * 617 namei_getstartdir_for_nfsd(struct namei_state *state) 618 { 619 KASSERT(state->ndp->ni_atdir != NULL); 620 621 /* always use the real root, and never set an emulation root */ 622 if (rootvnode == NULL) { 623 return NULL; 624 } 625 state->ndp->ni_rootdir = rootvnode; 626 state->ndp->ni_erootdir = NULL; 627 628 vref(state->ndp->ni_atdir); 629 KASSERT(! state->root_referenced); 630 vref(state->ndp->ni_rootdir); 631 state->root_referenced = 1; 632 return state->ndp->ni_atdir; 633 } 634 635 636 /* 637 * Ktrace the namei operation. 638 */ 639 static void 640 namei_ktrace(struct namei_state *state) 641 { 642 struct nameidata *ndp = state->ndp; 643 struct componentname *cnp = state->cnp; 644 struct lwp *self = curlwp; /* thread doing namei() */ 645 const char *emul_path; 646 647 if (ktrpoint(KTR_NAMEI)) { 648 if (ndp->ni_erootdir != NULL) { 649 /* 650 * To make any sense, the trace entry need to have the 651 * text of the emulation path prepended. 652 * Usually we can get this from the current process, 653 * but when called from emul_find_interp() it is only 654 * in the exec_package - so we get it passed in ni_next 655 * (this is a hack). 656 */ 657 if (cnp->cn_flags & EMULROOTSET) 658 emul_path = ndp->ni_next; 659 else 660 emul_path = self->l_proc->p_emul->e_path; 661 ktrnamei2(emul_path, strlen(emul_path), 662 ndp->ni_pnbuf, ndp->ni_pathlen); 663 } else 664 ktrnamei(ndp->ni_pnbuf, ndp->ni_pathlen); 665 } 666 } 667 668 /* 669 * Start up namei. Find the root dir and cwd, establish the starting 670 * directory for lookup, and lock it. Also calls ktrace when 671 * appropriate. 672 */ 673 static int 674 namei_start(struct namei_state *state, int isnfsd, 675 struct vnode **startdir_ret) 676 { 677 struct nameidata *ndp = state->ndp; 678 struct vnode *startdir; 679 680 /* length includes null terminator (was originally from copyinstr) */ 681 ndp->ni_pathlen = strlen(ndp->ni_pnbuf) + 1; 682 683 /* 684 * POSIX.1 requirement: "" is not a valid file name. 685 */ 686 if (ndp->ni_pathlen == 1) { 687 ndp->ni_erootdir = NULL; 688 return ENOENT; 689 } 690 691 ndp->ni_loopcnt = 0; 692 693 /* Get starting directory, set up root, and ktrace. */ 694 if (isnfsd) { 695 startdir = namei_getstartdir_for_nfsd(state); 696 /* no ktrace */ 697 } else { 698 startdir = namei_getstartdir(state); 699 namei_ktrace(state); 700 } 701 702 if (startdir == NULL) { 703 return ENOENT; 704 } 705 706 /* NDAT may feed us with a non directory namei_getstartdir */ 707 if (startdir->v_type != VDIR) { 708 vrele(startdir); 709 return ENOTDIR; 710 } 711 712 vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); 713 714 *startdir_ret = startdir; 715 return 0; 716 } 717 718 /* 719 * Check for being at a symlink that we're going to follow. 720 */ 721 static inline int 722 namei_atsymlink(struct namei_state *state, struct vnode *foundobj) 723 { 724 return (foundobj->v_type == VLNK) && 725 (state->cnp->cn_flags & (FOLLOW|REQUIREDIR)); 726 } 727 728 /* 729 * Follow a symlink. 730 * 731 * Updates searchdir. inhibitmagic causes magic symlinks to not be 732 * interpreted; this is used by nfsd. 733 * 734 * Unlocks foundobj on success (ugh) 735 */ 736 static inline int 737 namei_follow(struct namei_state *state, int inhibitmagic, 738 struct vnode *searchdir, struct vnode *foundobj, 739 struct vnode **newsearchdir_ret) 740 { 741 struct nameidata *ndp = state->ndp; 742 struct componentname *cnp = state->cnp; 743 744 struct lwp *self = curlwp; /* thread doing namei() */ 745 struct iovec aiov; /* uio for reading symbolic links */ 746 struct uio auio; 747 char *cp; /* pointer into pathname argument */ 748 size_t linklen; 749 int error; 750 751 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 752 KASSERT(VOP_ISLOCKED(foundobj) == LK_EXCLUSIVE); 753 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 754 return ELOOP; 755 } 756 if (foundobj->v_mount->mnt_flag & MNT_SYMPERM) { 757 error = VOP_ACCESS(foundobj, VEXEC, cnp->cn_cred); 758 if (error != 0) 759 return error; 760 } 761 762 /* FUTURE: fix this to not use a second buffer */ 763 cp = PNBUF_GET(); 764 aiov.iov_base = cp; 765 aiov.iov_len = MAXPATHLEN; 766 auio.uio_iov = &aiov; 767 auio.uio_iovcnt = 1; 768 auio.uio_offset = 0; 769 auio.uio_rw = UIO_READ; 770 auio.uio_resid = MAXPATHLEN; 771 UIO_SETUP_SYSSPACE(&auio); 772 error = VOP_READLINK(foundobj, &auio, cnp->cn_cred); 773 if (error) { 774 PNBUF_PUT(cp); 775 return error; 776 } 777 linklen = MAXPATHLEN - auio.uio_resid; 778 if (linklen == 0) { 779 PNBUF_PUT(cp); 780 return ENOENT; 781 } 782 783 /* 784 * Do symlink substitution, if appropriate, and 785 * check length for potential overflow. 786 * 787 * Inhibit symlink substitution for nfsd. 788 * XXX: This is how it was before; is that a bug or a feature? 789 */ 790 if ((!inhibitmagic && vfs_magiclinks && 791 symlink_magic(self->l_proc, cp, &linklen)) || 792 (linklen + ndp->ni_pathlen >= MAXPATHLEN)) { 793 PNBUF_PUT(cp); 794 return ENAMETOOLONG; 795 } 796 if (ndp->ni_pathlen > 1) { 797 /* includes a null-terminator */ 798 memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen); 799 } else { 800 cp[linklen] = '\0'; 801 } 802 ndp->ni_pathlen += linklen; 803 memcpy(ndp->ni_pnbuf, cp, ndp->ni_pathlen); 804 PNBUF_PUT(cp); 805 806 /* we're now starting from the beginning of the buffer again */ 807 cnp->cn_nameptr = ndp->ni_pnbuf; 808 809 /* must unlock this before relocking searchdir */ 810 VOP_UNLOCK(foundobj); 811 812 /* 813 * Check if root directory should replace current directory. 814 */ 815 if (ndp->ni_pnbuf[0] == '/') { 816 vput(searchdir); 817 /* Keep absolute symbolic links inside emulation root */ 818 searchdir = ndp->ni_erootdir; 819 if (searchdir == NULL || 820 (ndp->ni_pnbuf[1] == '.' 821 && ndp->ni_pnbuf[2] == '.' 822 && ndp->ni_pnbuf[3] == '/')) { 823 ndp->ni_erootdir = NULL; 824 searchdir = ndp->ni_rootdir; 825 } 826 vref(searchdir); 827 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 828 while (cnp->cn_nameptr[0] == '/') { 829 cnp->cn_nameptr++; 830 ndp->ni_pathlen--; 831 } 832 } 833 834 *newsearchdir_ret = searchdir; 835 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 836 return 0; 837 } 838 839 ////////////////////////////// 840 841 /* 842 * Inspect the leading path component and update the state accordingly. 843 */ 844 static int 845 lookup_parsepath(struct namei_state *state) 846 { 847 const char *cp; /* pointer into pathname argument */ 848 849 struct componentname *cnp = state->cnp; 850 struct nameidata *ndp = state->ndp; 851 852 KASSERT(cnp == &ndp->ni_cnd); 853 854 /* 855 * Search a new directory. 856 * 857 * The last component of the filename is left accessible via 858 * cnp->cn_nameptr for callers that need the name. Callers needing 859 * the name set the SAVENAME flag. When done, they assume 860 * responsibility for freeing the pathname buffer. 861 * 862 * At this point, our only vnode state is that the search dir 863 * is held and locked. 864 */ 865 cnp->cn_consume = 0; 866 cnp->cn_namelen = namei_getcomponent(cnp->cn_nameptr); 867 cp = cnp->cn_nameptr + cnp->cn_namelen; 868 if (cnp->cn_namelen > KERNEL_NAME_MAX) { 869 return ENAMETOOLONG; 870 } 871 #ifdef NAMEI_DIAGNOSTIC 872 { char c = *cp; 873 *(char *)cp = '\0'; 874 printf("{%s}: ", cnp->cn_nameptr); 875 *(char *)cp = c; } 876 #endif /* NAMEI_DIAGNOSTIC */ 877 ndp->ni_pathlen -= cnp->cn_namelen; 878 ndp->ni_next = cp; 879 /* 880 * If this component is followed by a slash, then move the pointer to 881 * the next component forward, and remember that this component must be 882 * a directory. 883 */ 884 if (*cp == '/') { 885 do { 886 cp++; 887 } while (*cp == '/'); 888 state->slashes = cp - ndp->ni_next; 889 ndp->ni_pathlen -= state->slashes; 890 ndp->ni_next = cp; 891 cnp->cn_flags |= REQUIREDIR; 892 } else { 893 state->slashes = 0; 894 cnp->cn_flags &= ~REQUIREDIR; 895 } 896 /* 897 * We do special processing on the last component, whether or not it's 898 * a directory. Cache all intervening lookups, but not the final one. 899 */ 900 if (*cp == '\0') { 901 if (state->docache) 902 cnp->cn_flags |= MAKEENTRY; 903 else 904 cnp->cn_flags &= ~MAKEENTRY; 905 cnp->cn_flags |= ISLASTCN; 906 } else { 907 cnp->cn_flags |= MAKEENTRY; 908 cnp->cn_flags &= ~ISLASTCN; 909 } 910 if (cnp->cn_namelen == 2 && 911 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 912 cnp->cn_flags |= ISDOTDOT; 913 else 914 cnp->cn_flags &= ~ISDOTDOT; 915 916 return 0; 917 } 918 919 /* 920 * Call VOP_LOOKUP for a single lookup; return a new search directory 921 * (used when crossing mountpoints up or searching union mounts down) and 922 * the found object, which for create operations may be NULL on success. 923 * 924 * Note that the new search directory may be null, which means the 925 * searchdir was unlocked and released. This happens in the common case 926 * when crossing a mount point downwards, in order to avoid coupling 927 * locks between different file system volumes. Importantly, this can 928 * happen even if the call fails. (XXX: this is gross and should be 929 * tidied somehow.) 930 */ 931 static int 932 lookup_once(struct namei_state *state, 933 struct vnode *searchdir, 934 struct vnode **newsearchdir_ret, 935 struct vnode **foundobj_ret) 936 { 937 struct vnode *tmpvn; /* scratch vnode */ 938 struct vnode *foundobj; /* result */ 939 struct mount *mp; /* mount table entry */ 940 struct lwp *l = curlwp; 941 int error; 942 943 struct componentname *cnp = state->cnp; 944 struct nameidata *ndp = state->ndp; 945 946 KASSERT(cnp == &ndp->ni_cnd); 947 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 948 *newsearchdir_ret = searchdir; 949 950 /* 951 * Handle "..": two special cases. 952 * 1. If at root directory (e.g. after chroot) 953 * or at absolute root directory 954 * then ignore it so can't get out. 955 * 1a. If at the root of the emulation filesystem go to the real 956 * root. So "/../<path>" is always absolute. 957 * 1b. If we have somehow gotten out of a jail, warn 958 * and also ignore it so we can't get farther out. 959 * 2. If this vnode is the root of a mounted 960 * filesystem, then replace it with the 961 * vnode which was mounted on so we take the 962 * .. in the other file system. 963 */ 964 if (cnp->cn_flags & ISDOTDOT) { 965 struct proc *p = l->l_proc; 966 967 for (;;) { 968 if (searchdir == ndp->ni_rootdir || 969 searchdir == rootvnode) { 970 foundobj = searchdir; 971 vref(foundobj); 972 *foundobj_ret = foundobj; 973 error = 0; 974 goto done; 975 } 976 if (ndp->ni_rootdir != rootvnode) { 977 int retval; 978 979 VOP_UNLOCK(searchdir); 980 retval = vn_isunder(searchdir, ndp->ni_rootdir, l); 981 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 982 if (!retval) { 983 /* Oops! We got out of jail! */ 984 log(LOG_WARNING, 985 "chrooted pid %d uid %d (%s) " 986 "detected outside of its chroot\n", 987 p->p_pid, kauth_cred_geteuid(l->l_cred), 988 p->p_comm); 989 /* Put us at the jail root. */ 990 vput(searchdir); 991 searchdir = NULL; 992 foundobj = ndp->ni_rootdir; 993 vref(foundobj); 994 vref(foundobj); 995 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 996 *newsearchdir_ret = foundobj; 997 *foundobj_ret = foundobj; 998 error = 0; 999 goto done; 1000 } 1001 } 1002 if ((searchdir->v_vflag & VV_ROOT) == 0 || 1003 (cnp->cn_flags & NOCROSSMOUNT)) 1004 break; 1005 tmpvn = searchdir; 1006 searchdir = searchdir->v_mount->mnt_vnodecovered; 1007 vref(searchdir); 1008 vput(tmpvn); 1009 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1010 *newsearchdir_ret = searchdir; 1011 } 1012 } 1013 1014 /* 1015 * We now have a segment name to search for, and a directory to search. 1016 * Our vnode state here is that "searchdir" is held and locked. 1017 */ 1018 unionlookup: 1019 foundobj = NULL; 1020 error = VOP_LOOKUP(searchdir, &foundobj, cnp); 1021 1022 if (error != 0) { 1023 KASSERTMSG((foundobj == NULL), 1024 "leaf `%s' should be empty but is %p", 1025 cnp->cn_nameptr, foundobj); 1026 #ifdef NAMEI_DIAGNOSTIC 1027 printf("not found\n"); 1028 #endif /* NAMEI_DIAGNOSTIC */ 1029 if ((error == ENOENT) && 1030 (searchdir->v_vflag & VV_ROOT) && 1031 (searchdir->v_mount->mnt_flag & MNT_UNION)) { 1032 tmpvn = searchdir; 1033 searchdir = searchdir->v_mount->mnt_vnodecovered; 1034 vref(searchdir); 1035 vput(tmpvn); 1036 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1037 *newsearchdir_ret = searchdir; 1038 goto unionlookup; 1039 } 1040 1041 if (error != EJUSTRETURN) 1042 goto done; 1043 1044 /* 1045 * If this was not the last component, or there were trailing 1046 * slashes, and we are not going to create a directory, 1047 * then the name must exist. 1048 */ 1049 if ((cnp->cn_flags & (REQUIREDIR | CREATEDIR)) == REQUIREDIR) { 1050 error = ENOENT; 1051 goto done; 1052 } 1053 1054 /* 1055 * If creating and at end of pathname, then can consider 1056 * allowing file to be created. 1057 */ 1058 if (state->rdonly) { 1059 error = EROFS; 1060 goto done; 1061 } 1062 1063 /* 1064 * We return success and a NULL foundobj to indicate 1065 * that the entry doesn't currently exist, leaving a 1066 * pointer to the (normally, locked) directory vnode 1067 * as searchdir. 1068 */ 1069 *foundobj_ret = NULL; 1070 error = 0; 1071 goto done; 1072 } 1073 #ifdef NAMEI_DIAGNOSTIC 1074 printf("found\n"); 1075 #endif /* NAMEI_DIAGNOSTIC */ 1076 1077 /* 1078 * Take into account any additional components consumed by the 1079 * underlying filesystem. This will include any trailing slashes after 1080 * the last component consumed. 1081 */ 1082 if (cnp->cn_consume > 0) { 1083 ndp->ni_pathlen -= cnp->cn_consume - state->slashes; 1084 ndp->ni_next += cnp->cn_consume - state->slashes; 1085 cnp->cn_consume = 0; 1086 if (ndp->ni_next[0] == '\0') 1087 cnp->cn_flags |= ISLASTCN; 1088 } 1089 1090 /* 1091 * "searchdir" is locked and held, "foundobj" is held, 1092 * they may be the same vnode. 1093 */ 1094 if (searchdir != foundobj) { 1095 if (cnp->cn_flags & ISDOTDOT) 1096 VOP_UNLOCK(searchdir); 1097 error = vn_lock(foundobj, LK_EXCLUSIVE); 1098 if (cnp->cn_flags & ISDOTDOT) 1099 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1100 if (error != 0) { 1101 vrele(foundobj); 1102 goto done; 1103 } 1104 } 1105 1106 /* 1107 * Check to see if the vnode has been mounted on; 1108 * if so find the root of the mounted file system. 1109 */ 1110 KASSERT(searchdir != NULL); 1111 while (foundobj->v_type == VDIR && 1112 (mp = foundobj->v_mountedhere) != NULL && 1113 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 1114 1115 KASSERT(searchdir != foundobj); 1116 1117 error = vfs_busy(mp); 1118 if (error != 0) { 1119 vput(foundobj); 1120 goto done; 1121 } 1122 if (searchdir != NULL) { 1123 VOP_UNLOCK(searchdir); 1124 } 1125 vput(foundobj); 1126 error = VFS_ROOT(mp, LK_EXCLUSIVE, &foundobj); 1127 vfs_unbusy(mp); 1128 if (error) { 1129 if (searchdir != NULL) { 1130 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1131 } 1132 goto done; 1133 } 1134 /* 1135 * Avoid locking vnodes from two filesystems because 1136 * it's prone to deadlock, e.g. when using puffs. 1137 * Also, it isn't a good idea to propagate slowness of 1138 * a filesystem up to the root directory. For now, 1139 * only handle the common case, where foundobj is 1140 * VDIR. 1141 * 1142 * In this case set searchdir to null to avoid using 1143 * it again. It is not correct to set searchdir == 1144 * foundobj here as that will confuse the caller. 1145 * (See PR 40740.) 1146 */ 1147 if (searchdir == NULL) { 1148 /* already been here once; do nothing further */ 1149 } else if (foundobj->v_type == VDIR) { 1150 vrele(searchdir); 1151 *newsearchdir_ret = searchdir = NULL; 1152 } else { 1153 VOP_UNLOCK(foundobj); 1154 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1155 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 1156 } 1157 } 1158 1159 *foundobj_ret = foundobj; 1160 error = 0; 1161 done: 1162 KASSERT(*newsearchdir_ret == NULL || 1163 VOP_ISLOCKED(*newsearchdir_ret) == LK_EXCLUSIVE); 1164 /* 1165 * *foundobj_ret is valid only if error == 0. 1166 */ 1167 KASSERT(error != 0 || *foundobj_ret == NULL || 1168 VOP_ISLOCKED(*foundobj_ret) == LK_EXCLUSIVE); 1169 return error; 1170 } 1171 1172 ////////////////////////////// 1173 1174 /* 1175 * Do a complete path search from a single root directory. 1176 * (This is called up to twice if TRYEMULROOT is in effect.) 1177 */ 1178 static int 1179 namei_oneroot(struct namei_state *state, 1180 int neverfollow, int inhibitmagic, int isnfsd) 1181 { 1182 struct nameidata *ndp = state->ndp; 1183 struct componentname *cnp = state->cnp; 1184 struct vnode *searchdir, *foundobj; 1185 int error; 1186 1187 error = namei_start(state, isnfsd, &searchdir); 1188 if (error) { 1189 ndp->ni_dvp = NULL; 1190 ndp->ni_vp = NULL; 1191 return error; 1192 } 1193 KASSERT(searchdir->v_type == VDIR); 1194 1195 /* 1196 * Setup: break out flag bits into variables. 1197 */ 1198 state->docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 1199 if (cnp->cn_nameiop == DELETE) 1200 state->docache = 0; 1201 state->rdonly = cnp->cn_flags & RDONLY; 1202 1203 /* 1204 * Keep going until we run out of path components. 1205 */ 1206 cnp->cn_nameptr = ndp->ni_pnbuf; 1207 1208 /* drop leading slashes (already used them to choose startdir) */ 1209 while (cnp->cn_nameptr[0] == '/') { 1210 cnp->cn_nameptr++; 1211 ndp->ni_pathlen--; 1212 } 1213 /* was it just "/"? */ 1214 if (cnp->cn_nameptr[0] == '\0') { 1215 foundobj = searchdir; 1216 searchdir = NULL; 1217 cnp->cn_flags |= ISLASTCN; 1218 1219 /* bleh */ 1220 goto skiploop; 1221 } 1222 1223 for (;;) { 1224 KASSERT(searchdir != NULL); 1225 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 1226 1227 /* 1228 * If the directory we're on is unmounted, bail out. 1229 * XXX: should this also check if it's unlinked? 1230 * XXX: yes it should... but how? 1231 */ 1232 if (searchdir->v_mount == NULL) { 1233 vput(searchdir); 1234 ndp->ni_dvp = NULL; 1235 ndp->ni_vp = NULL; 1236 return (ENOENT); 1237 } 1238 1239 /* 1240 * Look up the next path component. 1241 * (currently, this may consume more than one) 1242 */ 1243 1244 /* There should be no slashes here. */ 1245 KASSERT(cnp->cn_nameptr[0] != '/'); 1246 1247 /* and we shouldn't have looped around if we were done */ 1248 KASSERT(cnp->cn_nameptr[0] != '\0'); 1249 1250 error = lookup_parsepath(state); 1251 if (error) { 1252 vput(searchdir); 1253 ndp->ni_dvp = NULL; 1254 ndp->ni_vp = NULL; 1255 state->attempt_retry = 1; 1256 return (error); 1257 } 1258 1259 error = lookup_once(state, searchdir, &searchdir, &foundobj); 1260 if (error) { 1261 if (searchdir != NULL) { 1262 vput(searchdir); 1263 } 1264 ndp->ni_dvp = NULL; 1265 ndp->ni_vp = NULL; 1266 /* 1267 * Note that if we're doing TRYEMULROOT we can 1268 * retry with the normal root. Where this is 1269 * currently set matches previous practice, 1270 * but the previous practice didn't make much 1271 * sense and somebody should sit down and 1272 * figure out which cases should cause retry 1273 * and which shouldn't. XXX. 1274 */ 1275 state->attempt_retry = 1; 1276 return (error); 1277 } 1278 1279 if (foundobj == NULL) { 1280 /* 1281 * Success with no object returned means we're 1282 * creating something and it isn't already 1283 * there. Break out of the main loop now so 1284 * the code below doesn't have to test for 1285 * foundobj == NULL. 1286 */ 1287 /* lookup_once can't have dropped the searchdir */ 1288 KASSERT(searchdir != NULL); 1289 break; 1290 } 1291 1292 /* 1293 * Check for symbolic link. If we've reached one, 1294 * follow it, unless we aren't supposed to. Back up 1295 * over any slashes that we skipped, as we will need 1296 * them again. 1297 */ 1298 if (namei_atsymlink(state, foundobj)) { 1299 ndp->ni_pathlen += state->slashes; 1300 ndp->ni_next -= state->slashes; 1301 if (neverfollow) { 1302 error = EINVAL; 1303 } else if (searchdir == NULL) { 1304 /* 1305 * dholland 20160410: lookup_once only 1306 * drops searchdir if it crossed a 1307 * mount point. Therefore, if we get 1308 * here it means we crossed a mount 1309 * point to a mounted filesystem whose 1310 * root vnode is a symlink. In theory 1311 * we could continue at this point by 1312 * using the pre-crossing searchdir 1313 * (e.g. just take out an extra 1314 * reference on it before calling 1315 * lookup_once so we still have it), 1316 * but this will make an ugly mess and 1317 * it should never happen in practice 1318 * as only badly broken filesystems 1319 * have non-directory root vnodes. (I 1320 * have seen this sort of thing with 1321 * NFS occasionally but even then it 1322 * means something's badly wrong.) 1323 */ 1324 error = ENOTDIR; 1325 } else { 1326 /* 1327 * dholland 20110410: if we're at a 1328 * union mount it might make sense to 1329 * use the top of the union stack here 1330 * rather than the layer we found the 1331 * symlink in. (FUTURE) 1332 */ 1333 error = namei_follow(state, inhibitmagic, 1334 searchdir, foundobj, 1335 &searchdir); 1336 } 1337 if (error) { 1338 KASSERT(searchdir != foundobj); 1339 if (searchdir != NULL) { 1340 vput(searchdir); 1341 } 1342 vput(foundobj); 1343 ndp->ni_dvp = NULL; 1344 ndp->ni_vp = NULL; 1345 return error; 1346 } 1347 /* namei_follow unlocks it (ugh) so rele, not put */ 1348 vrele(foundobj); 1349 foundobj = NULL; 1350 1351 /* 1352 * If we followed a symlink to `/' and there 1353 * are no more components after the symlink, 1354 * we're done with the loop and what we found 1355 * is the searchdir. 1356 */ 1357 if (cnp->cn_nameptr[0] == '\0') { 1358 KASSERT(searchdir != NULL); 1359 foundobj = searchdir; 1360 searchdir = NULL; 1361 cnp->cn_flags |= ISLASTCN; 1362 break; 1363 } 1364 1365 continue; 1366 } 1367 1368 /* 1369 * Not a symbolic link. 1370 * 1371 * Check for directory, if the component was 1372 * followed by a series of slashes. 1373 */ 1374 if ((foundobj->v_type != VDIR) && 1375 (cnp->cn_flags & REQUIREDIR)) { 1376 KASSERT(foundobj != searchdir); 1377 if (searchdir) { 1378 vput(searchdir); 1379 } 1380 vput(foundobj); 1381 ndp->ni_dvp = NULL; 1382 ndp->ni_vp = NULL; 1383 state->attempt_retry = 1; 1384 return ENOTDIR; 1385 } 1386 1387 /* 1388 * Stop if we've reached the last component. 1389 */ 1390 if (cnp->cn_flags & ISLASTCN) { 1391 break; 1392 } 1393 1394 /* 1395 * Continue with the next component. 1396 */ 1397 cnp->cn_nameptr = ndp->ni_next; 1398 if (searchdir == foundobj) { 1399 vrele(searchdir); 1400 } else if (searchdir != NULL) { 1401 vput(searchdir); 1402 } 1403 searchdir = foundobj; 1404 foundobj = NULL; 1405 } 1406 1407 skiploop: 1408 1409 if (foundobj != NULL) { 1410 if (foundobj == ndp->ni_erootdir) { 1411 /* 1412 * We are about to return the emulation root. 1413 * This isn't a good idea because code might 1414 * repeatedly lookup ".." until the file 1415 * matches that returned for "/" and loop 1416 * forever. So convert it to the real root. 1417 */ 1418 if (searchdir != NULL) { 1419 if (searchdir == foundobj) 1420 vrele(searchdir); 1421 else 1422 vput(searchdir); 1423 searchdir = NULL; 1424 } 1425 vput(foundobj); 1426 foundobj = ndp->ni_rootdir; 1427 vref(foundobj); 1428 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 1429 } 1430 1431 /* 1432 * If the caller requested the parent node (i.e. it's 1433 * a CREATE, DELETE, or RENAME), and we don't have one 1434 * (because this is the root directory, or we crossed 1435 * a mount point), then we must fail. 1436 */ 1437 if (cnp->cn_nameiop != LOOKUP && 1438 (searchdir == NULL || 1439 searchdir->v_mount != foundobj->v_mount)) { 1440 if (searchdir) { 1441 vput(searchdir); 1442 } 1443 vput(foundobj); 1444 foundobj = NULL; 1445 ndp->ni_dvp = NULL; 1446 ndp->ni_vp = NULL; 1447 state->attempt_retry = 1; 1448 1449 switch (cnp->cn_nameiop) { 1450 case CREATE: 1451 return EEXIST; 1452 case DELETE: 1453 case RENAME: 1454 return EBUSY; 1455 default: 1456 break; 1457 } 1458 panic("Invalid nameiop\n"); 1459 } 1460 1461 /* 1462 * Disallow directory write attempts on read-only lookups. 1463 * Prefers EEXIST over EROFS for the CREATE case. 1464 */ 1465 if (state->rdonly && 1466 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1467 if (searchdir) { 1468 if (foundobj != searchdir) { 1469 vput(searchdir); 1470 } else { 1471 vrele(searchdir); 1472 } 1473 searchdir = NULL; 1474 } 1475 vput(foundobj); 1476 foundobj = NULL; 1477 ndp->ni_dvp = NULL; 1478 ndp->ni_vp = NULL; 1479 state->attempt_retry = 1; 1480 return EROFS; 1481 } 1482 if ((cnp->cn_flags & LOCKLEAF) == 0) { 1483 /* 1484 * Note: if LOCKPARENT but not LOCKLEAF is 1485 * set, and searchdir == foundobj, this code 1486 * necessarily unlocks the parent as well as 1487 * the leaf. That is, just because you specify 1488 * LOCKPARENT doesn't mean you necessarily get 1489 * a locked parent vnode. The code in 1490 * vfs_syscalls.c, and possibly elsewhere, 1491 * that uses this combination "knows" this, so 1492 * it can't be safely changed. Feh. XXX 1493 */ 1494 VOP_UNLOCK(foundobj); 1495 } 1496 } 1497 1498 /* 1499 * Done. 1500 */ 1501 1502 /* 1503 * If LOCKPARENT is not set, the parent directory isn't returned. 1504 */ 1505 if ((cnp->cn_flags & LOCKPARENT) == 0 && searchdir != NULL) { 1506 if (searchdir == foundobj) { 1507 vrele(searchdir); 1508 } else { 1509 vput(searchdir); 1510 } 1511 searchdir = NULL; 1512 } 1513 1514 ndp->ni_dvp = searchdir; 1515 ndp->ni_vp = foundobj; 1516 return 0; 1517 } 1518 1519 /* 1520 * Do namei; wrapper layer that handles TRYEMULROOT. 1521 */ 1522 static int 1523 namei_tryemulroot(struct namei_state *state, 1524 int neverfollow, int inhibitmagic, int isnfsd) 1525 { 1526 int error; 1527 1528 struct nameidata *ndp = state->ndp; 1529 struct componentname *cnp = state->cnp; 1530 const char *savepath = NULL; 1531 1532 KASSERT(cnp == &ndp->ni_cnd); 1533 1534 if (cnp->cn_flags & TRYEMULROOT) { 1535 savepath = pathbuf_stringcopy_get(ndp->ni_pathbuf); 1536 } 1537 1538 emul_retry: 1539 state->attempt_retry = 0; 1540 1541 error = namei_oneroot(state, neverfollow, inhibitmagic, isnfsd); 1542 if (error) { 1543 /* 1544 * Once namei has started up, the existence of ni_erootdir 1545 * tells us whether we're working from an emulation root. 1546 * The TRYEMULROOT flag isn't necessarily authoritative. 1547 */ 1548 if (ndp->ni_erootdir != NULL && state->attempt_retry) { 1549 /* Retry the whole thing using the normal root */ 1550 cnp->cn_flags &= ~TRYEMULROOT; 1551 state->attempt_retry = 0; 1552 1553 /* kinda gross */ 1554 strcpy(ndp->ni_pathbuf->pb_path, savepath); 1555 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1556 savepath = NULL; 1557 1558 goto emul_retry; 1559 } 1560 } 1561 if (savepath != NULL) { 1562 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1563 } 1564 return error; 1565 } 1566 1567 /* 1568 * External interface. 1569 */ 1570 int 1571 namei(struct nameidata *ndp) 1572 { 1573 struct namei_state state; 1574 int error; 1575 1576 namei_init(&state, ndp); 1577 error = namei_tryemulroot(&state, 1578 0/*!neverfollow*/, 0/*!inhibitmagic*/, 1579 0/*isnfsd*/); 1580 namei_cleanup(&state); 1581 1582 if (error) { 1583 /* make sure no stray refs leak out */ 1584 KASSERT(ndp->ni_dvp == NULL); 1585 KASSERT(ndp->ni_vp == NULL); 1586 } 1587 1588 return error; 1589 } 1590 1591 //////////////////////////////////////////////////////////// 1592 1593 /* 1594 * External interface used by nfsd. This is basically different from 1595 * namei only in that it has the ability to pass in the "current 1596 * directory", and uses an extra flag "neverfollow" for which there's 1597 * no physical flag defined in namei.h. (There used to be a cut&paste 1598 * copy of about half of namei in nfsd to allow these minor 1599 * adjustments to exist.) 1600 * 1601 * XXX: the namei interface should be adjusted so nfsd can just use 1602 * ordinary namei(). 1603 */ 1604 int 1605 lookup_for_nfsd(struct nameidata *ndp, struct vnode *forcecwd, int neverfollow) 1606 { 1607 struct namei_state state; 1608 int error; 1609 1610 KASSERT(ndp->ni_atdir == NULL); 1611 ndp->ni_atdir = forcecwd; 1612 1613 namei_init(&state, ndp); 1614 error = namei_tryemulroot(&state, 1615 neverfollow, 1/*inhibitmagic*/, 1/*isnfsd*/); 1616 namei_cleanup(&state); 1617 1618 if (error) { 1619 /* make sure no stray refs leak out */ 1620 KASSERT(ndp->ni_dvp == NULL); 1621 KASSERT(ndp->ni_vp == NULL); 1622 } 1623 1624 return error; 1625 } 1626 1627 /* 1628 * A second external interface used by nfsd. This turns out to be a 1629 * single lookup used by the WebNFS code (ha!) to get "index.html" or 1630 * equivalent when asked for a directory. It should eventually evolve 1631 * into some kind of namei_once() call; for the time being it's kind 1632 * of a mess. XXX. 1633 * 1634 * dholland 20110109: I don't think it works, and I don't think it 1635 * worked before I started hacking and slashing either, and I doubt 1636 * anyone will ever notice. 1637 */ 1638 1639 /* 1640 * Internals. This calls lookup_once() after setting up the assorted 1641 * pieces of state the way they ought to be. 1642 */ 1643 static int 1644 do_lookup_for_nfsd_index(struct namei_state *state) 1645 { 1646 int error = 0; 1647 1648 struct componentname *cnp = state->cnp; 1649 struct nameidata *ndp = state->ndp; 1650 struct vnode *startdir; 1651 struct vnode *foundobj; 1652 const char *cp; /* pointer into pathname argument */ 1653 1654 KASSERT(cnp == &ndp->ni_cnd); 1655 1656 startdir = state->ndp->ni_atdir; 1657 1658 cnp->cn_nameptr = ndp->ni_pnbuf; 1659 state->docache = 1; 1660 state->rdonly = cnp->cn_flags & RDONLY; 1661 ndp->ni_dvp = NULL; 1662 1663 cnp->cn_consume = 0; 1664 cnp->cn_namelen = namei_getcomponent(cnp->cn_nameptr); 1665 cp = cnp->cn_nameptr + cnp->cn_namelen; 1666 KASSERT(cnp->cn_namelen <= KERNEL_NAME_MAX); 1667 ndp->ni_pathlen -= cnp->cn_namelen; 1668 ndp->ni_next = cp; 1669 state->slashes = 0; 1670 cnp->cn_flags &= ~REQUIREDIR; 1671 cnp->cn_flags |= MAKEENTRY|ISLASTCN; 1672 1673 if (cnp->cn_namelen == 2 && 1674 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 1675 cnp->cn_flags |= ISDOTDOT; 1676 else 1677 cnp->cn_flags &= ~ISDOTDOT; 1678 1679 /* 1680 * Because lookup_once can change the startdir, we need our 1681 * own reference to it to avoid consuming the caller's. 1682 */ 1683 vref(startdir); 1684 vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); 1685 error = lookup_once(state, startdir, &startdir, &foundobj); 1686 if (error == 0 && startdir == foundobj) { 1687 vrele(startdir); 1688 } else if (startdir != NULL) { 1689 vput(startdir); 1690 } 1691 if (error) { 1692 goto bad; 1693 } 1694 ndp->ni_vp = foundobj; 1695 1696 if (foundobj == NULL) { 1697 return 0; 1698 } 1699 1700 KASSERT((cnp->cn_flags & LOCKPARENT) == 0); 1701 if ((cnp->cn_flags & LOCKLEAF) == 0) { 1702 VOP_UNLOCK(foundobj); 1703 } 1704 return (0); 1705 1706 bad: 1707 ndp->ni_vp = NULL; 1708 return (error); 1709 } 1710 1711 /* 1712 * External interface. The partitioning between this function and the 1713 * above isn't very clear - the above function exists mostly so code 1714 * that uses "state->" can be shuffled around without having to change 1715 * it to "state.". 1716 */ 1717 int 1718 lookup_for_nfsd_index(struct nameidata *ndp, struct vnode *startdir) 1719 { 1720 struct namei_state state; 1721 int error; 1722 1723 KASSERT(ndp->ni_atdir == NULL); 1724 ndp->ni_atdir = startdir; 1725 1726 /* 1727 * Note: the name sent in here (is not|should not be) allowed 1728 * to contain a slash. 1729 */ 1730 if (strlen(ndp->ni_pathbuf->pb_path) > KERNEL_NAME_MAX) { 1731 return ENAMETOOLONG; 1732 } 1733 if (strchr(ndp->ni_pathbuf->pb_path, '/')) { 1734 return EINVAL; 1735 } 1736 1737 ndp->ni_pathlen = strlen(ndp->ni_pathbuf->pb_path) + 1; 1738 ndp->ni_pnbuf = NULL; 1739 ndp->ni_cnd.cn_nameptr = NULL; 1740 1741 namei_init(&state, ndp); 1742 error = do_lookup_for_nfsd_index(&state); 1743 namei_cleanup(&state); 1744 1745 return error; 1746 } 1747 1748 //////////////////////////////////////////////////////////// 1749 1750 /* 1751 * Reacquire a path name component. 1752 * dvp is locked on entry and exit. 1753 * *vpp is locked on exit unless it's NULL. 1754 */ 1755 int 1756 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int dummy) 1757 { 1758 int rdonly; /* lookup read-only flag bit */ 1759 int error = 0; 1760 #ifdef DEBUG 1761 size_t newlen; /* DEBUG: check name len */ 1762 const char *cp; /* DEBUG: check name ptr */ 1763 #endif /* DEBUG */ 1764 1765 (void)dummy; 1766 1767 /* 1768 * Setup: break out flag bits into variables. 1769 */ 1770 rdonly = cnp->cn_flags & RDONLY; 1771 1772 /* 1773 * Search a new directory. 1774 * 1775 * The cn_hash value is for use by vfs_cache. 1776 * The last component of the filename is left accessible via 1777 * cnp->cn_nameptr for callers that need the name. Callers needing 1778 * the name set the SAVENAME flag. When done, they assume 1779 * responsibility for freeing the pathname buffer. 1780 */ 1781 #ifdef DEBUG 1782 #if 0 1783 cp = NULL; 1784 newhash = namei_hash(cnp->cn_nameptr, &cp); 1785 if ((uint32_t)newhash != (uint32_t)cnp->cn_hash) 1786 panic("relookup: bad hash"); 1787 #endif 1788 newlen = namei_getcomponent(cnp->cn_nameptr); 1789 if (cnp->cn_namelen != newlen) 1790 panic("relookup: bad len"); 1791 cp = cnp->cn_nameptr + cnp->cn_namelen; 1792 while (*cp == '/') 1793 cp++; 1794 if (*cp != 0) 1795 panic("relookup: not last component"); 1796 #endif /* DEBUG */ 1797 1798 /* 1799 * Check for degenerate name (e.g. / or "") 1800 * which is a way of talking about a directory, 1801 * e.g. like "/." or ".". 1802 */ 1803 if (cnp->cn_nameptr[0] == '\0') 1804 panic("relookup: null name"); 1805 1806 if (cnp->cn_flags & ISDOTDOT) 1807 panic("relookup: lookup on dot-dot"); 1808 1809 /* 1810 * We now have a segment name to search for, and a directory to search. 1811 */ 1812 *vpp = NULL; 1813 error = VOP_LOOKUP(dvp, vpp, cnp); 1814 if ((error) != 0) { 1815 KASSERTMSG((*vpp == NULL), 1816 "leaf `%s' should be empty but is %p", 1817 cnp->cn_nameptr, *vpp); 1818 if (error != EJUSTRETURN) 1819 goto bad; 1820 } 1821 1822 /* 1823 * Check for symbolic link 1824 */ 1825 KASSERTMSG((*vpp == NULL || (*vpp)->v_type != VLNK || 1826 (cnp->cn_flags & FOLLOW) == 0), 1827 "relookup: symlink found"); 1828 1829 /* 1830 * Check for read-only lookups. 1831 */ 1832 if (rdonly && cnp->cn_nameiop != LOOKUP) { 1833 error = EROFS; 1834 if (*vpp) { 1835 vrele(*vpp); 1836 } 1837 goto bad; 1838 } 1839 /* 1840 * Lock result. 1841 */ 1842 if (*vpp && *vpp != dvp) { 1843 error = vn_lock(*vpp, LK_EXCLUSIVE); 1844 if (error != 0) { 1845 vrele(*vpp); 1846 goto bad; 1847 } 1848 } 1849 return (0); 1850 1851 bad: 1852 *vpp = NULL; 1853 return (error); 1854 } 1855 1856 /* 1857 * namei_simple - simple forms of namei. 1858 * 1859 * These are wrappers to allow the simple case callers of namei to be 1860 * left alone while everything else changes under them. 1861 */ 1862 1863 /* Flags */ 1864 struct namei_simple_flags_type { 1865 int dummy; 1866 }; 1867 static const struct namei_simple_flags_type ns_nn, ns_nt, ns_fn, ns_ft; 1868 const namei_simple_flags_t NSM_NOFOLLOW_NOEMULROOT = &ns_nn; 1869 const namei_simple_flags_t NSM_NOFOLLOW_TRYEMULROOT = &ns_nt; 1870 const namei_simple_flags_t NSM_FOLLOW_NOEMULROOT = &ns_fn; 1871 const namei_simple_flags_t NSM_FOLLOW_TRYEMULROOT = &ns_ft; 1872 1873 static 1874 int 1875 namei_simple_convert_flags(namei_simple_flags_t sflags) 1876 { 1877 if (sflags == NSM_NOFOLLOW_NOEMULROOT) 1878 return NOFOLLOW | 0; 1879 if (sflags == NSM_NOFOLLOW_TRYEMULROOT) 1880 return NOFOLLOW | TRYEMULROOT; 1881 if (sflags == NSM_FOLLOW_NOEMULROOT) 1882 return FOLLOW | 0; 1883 if (sflags == NSM_FOLLOW_TRYEMULROOT) 1884 return FOLLOW | TRYEMULROOT; 1885 panic("namei_simple_convert_flags: bogus sflags\n"); 1886 return 0; 1887 } 1888 1889 int 1890 namei_simple_kernel(const char *path, namei_simple_flags_t sflags, 1891 struct vnode **vp_ret) 1892 { 1893 return nameiat_simple_kernel(NULL, path, sflags, vp_ret); 1894 } 1895 1896 int 1897 nameiat_simple_kernel(struct vnode *dvp, const char *path, 1898 namei_simple_flags_t sflags, struct vnode **vp_ret) 1899 { 1900 struct nameidata nd; 1901 struct pathbuf *pb; 1902 int err; 1903 1904 pb = pathbuf_create(path); 1905 if (pb == NULL) { 1906 return ENOMEM; 1907 } 1908 1909 NDINIT(&nd, 1910 LOOKUP, 1911 namei_simple_convert_flags(sflags), 1912 pb); 1913 1914 if (dvp != NULL) 1915 NDAT(&nd, dvp); 1916 1917 err = namei(&nd); 1918 if (err != 0) { 1919 pathbuf_destroy(pb); 1920 return err; 1921 } 1922 *vp_ret = nd.ni_vp; 1923 pathbuf_destroy(pb); 1924 return 0; 1925 } 1926 1927 int 1928 namei_simple_user(const char *path, namei_simple_flags_t sflags, 1929 struct vnode **vp_ret) 1930 { 1931 return nameiat_simple_user(NULL, path, sflags, vp_ret); 1932 } 1933 1934 int 1935 nameiat_simple_user(struct vnode *dvp, const char *path, 1936 namei_simple_flags_t sflags, struct vnode **vp_ret) 1937 { 1938 struct pathbuf *pb; 1939 struct nameidata nd; 1940 int err; 1941 1942 err = pathbuf_copyin(path, &pb); 1943 if (err) { 1944 return err; 1945 } 1946 1947 NDINIT(&nd, 1948 LOOKUP, 1949 namei_simple_convert_flags(sflags), 1950 pb); 1951 1952 if (dvp != NULL) 1953 NDAT(&nd, dvp); 1954 1955 err = namei(&nd); 1956 if (err != 0) { 1957 pathbuf_destroy(pb); 1958 return err; 1959 } 1960 *vp_ret = nd.ni_vp; 1961 pathbuf_destroy(pb); 1962 return 0; 1963 } 1964