1 /* $NetBSD: vfs_lookup.c,v 1.208 2017/07/09 22:48:44 dholland Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_lookup.c 8.10 (Berkeley) 5/27/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.208 2017/07/09 22:48:44 dholland Exp $"); 41 42 #ifdef _KERNEL_OPT 43 #include "opt_magiclinks.h" 44 #endif 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/syslimits.h> 50 #include <sys/time.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/mount.h> 54 #include <sys/errno.h> 55 #include <sys/filedesc.h> 56 #include <sys/hash.h> 57 #include <sys/proc.h> 58 #include <sys/syslog.h> 59 #include <sys/kauth.h> 60 #include <sys/ktrace.h> 61 #include <sys/dirent.h> 62 63 #ifndef MAGICLINKS 64 #define MAGICLINKS 0 65 #endif 66 67 int vfs_magiclinks = MAGICLINKS; 68 69 __CTASSERT(MAXNAMLEN == NAME_MAX); 70 71 /* 72 * Substitute replacement text for 'magic' strings in symlinks. 73 * Returns 0 if successful, and returns non-zero if an error 74 * occurs. (Currently, the only possible error is running out 75 * of temporary pathname space.) 76 * 77 * Looks for "@<string>" and "@<string>/", where <string> is a 78 * recognized 'magic' string. Replaces the "@<string>" with the 79 * appropriate replacement text. (Note that in some cases the 80 * replacement text may have zero length.) 81 * 82 * This would have been table driven, but the variance in 83 * replacement strings (and replacement string lengths) made 84 * that impractical. 85 */ 86 #define VNL(x) \ 87 (sizeof(x) - 1) 88 89 #define VO '{' 90 #define VC '}' 91 92 #define MATCH(str) \ 93 ((termchar == '/' && i + VNL(str) == *len) || \ 94 (i + VNL(str) < *len && \ 95 cp[i + VNL(str)] == termchar)) && \ 96 !strncmp((str), &cp[i], VNL(str)) 97 98 #define SUBSTITUTE(m, s, sl) \ 99 if ((newlen + (sl)) >= MAXPATHLEN) \ 100 return 1; \ 101 i += VNL(m); \ 102 if (termchar != '/') \ 103 i++; \ 104 (void)memcpy(&tmp[newlen], (s), (sl)); \ 105 newlen += (sl); \ 106 change = 1; \ 107 termchar = '/'; 108 109 static int 110 symlink_magic(struct proc *p, char *cp, size_t *len) 111 { 112 char *tmp; 113 size_t change, i, newlen, slen; 114 char termchar = '/'; 115 char idtmp[11]; /* enough for 32 bit *unsigned* integer */ 116 117 118 tmp = PNBUF_GET(); 119 for (change = i = newlen = 0; i < *len; ) { 120 if (cp[i] != '@') { 121 tmp[newlen++] = cp[i++]; 122 continue; 123 } 124 125 i++; 126 127 /* Check for @{var} syntax. */ 128 if (cp[i] == VO) { 129 termchar = VC; 130 i++; 131 } 132 133 /* 134 * The following checks should be ordered according 135 * to frequency of use. 136 */ 137 if (MATCH("machine_arch")) { 138 slen = VNL(MACHINE_ARCH); 139 SUBSTITUTE("machine_arch", MACHINE_ARCH, slen); 140 } else if (MATCH("machine")) { 141 slen = VNL(MACHINE); 142 SUBSTITUTE("machine", MACHINE, slen); 143 } else if (MATCH("hostname")) { 144 SUBSTITUTE("hostname", hostname, hostnamelen); 145 } else if (MATCH("osrelease")) { 146 slen = strlen(osrelease); 147 SUBSTITUTE("osrelease", osrelease, slen); 148 } else if (MATCH("emul")) { 149 slen = strlen(p->p_emul->e_name); 150 SUBSTITUTE("emul", p->p_emul->e_name, slen); 151 } else if (MATCH("kernel_ident")) { 152 slen = strlen(kernel_ident); 153 SUBSTITUTE("kernel_ident", kernel_ident, slen); 154 } else if (MATCH("domainname")) { 155 SUBSTITUTE("domainname", domainname, domainnamelen); 156 } else if (MATCH("ostype")) { 157 slen = strlen(ostype); 158 SUBSTITUTE("ostype", ostype, slen); 159 } else if (MATCH("uid")) { 160 slen = snprintf(idtmp, sizeof(idtmp), "%u", 161 kauth_cred_geteuid(kauth_cred_get())); 162 SUBSTITUTE("uid", idtmp, slen); 163 } else if (MATCH("ruid")) { 164 slen = snprintf(idtmp, sizeof(idtmp), "%u", 165 kauth_cred_getuid(kauth_cred_get())); 166 SUBSTITUTE("ruid", idtmp, slen); 167 } else if (MATCH("gid")) { 168 slen = snprintf(idtmp, sizeof(idtmp), "%u", 169 kauth_cred_getegid(kauth_cred_get())); 170 SUBSTITUTE("gid", idtmp, slen); 171 } else if (MATCH("rgid")) { 172 slen = snprintf(idtmp, sizeof(idtmp), "%u", 173 kauth_cred_getgid(kauth_cred_get())); 174 SUBSTITUTE("rgid", idtmp, slen); 175 } else { 176 tmp[newlen++] = '@'; 177 if (termchar == VC) 178 tmp[newlen++] = VO; 179 } 180 } 181 182 if (change) { 183 (void)memcpy(cp, tmp, newlen); 184 *len = newlen; 185 } 186 PNBUF_PUT(tmp); 187 188 return 0; 189 } 190 191 #undef VNL 192 #undef VO 193 #undef VC 194 #undef MATCH 195 #undef SUBSTITUTE 196 197 //////////////////////////////////////////////////////////// 198 199 /* 200 * Determine the namei hash (for the namecache) for name. 201 * If *ep != NULL, hash from name to ep-1. 202 * If *ep == NULL, hash from name until the first NUL or '/', and 203 * return the location of this termination character in *ep. 204 * 205 * This function returns an equivalent hash to the MI hash32_strn(). 206 * The latter isn't used because in the *ep == NULL case, determining 207 * the length of the string to the first NUL or `/' and then calling 208 * hash32_strn() involves unnecessary double-handling of the data. 209 */ 210 uint32_t 211 namei_hash(const char *name, const char **ep) 212 { 213 uint32_t hash; 214 215 hash = HASH32_STR_INIT; 216 if (*ep != NULL) { 217 for (; name < *ep; name++) 218 hash = hash * 33 + *(const uint8_t *)name; 219 } else { 220 for (; *name != '\0' && *name != '/'; name++) 221 hash = hash * 33 + *(const uint8_t *)name; 222 *ep = name; 223 } 224 return (hash + (hash >> 5)); 225 } 226 227 /* 228 * Find the end of the first path component in NAME and return its 229 * length. 230 */ 231 static size_t 232 namei_getcomponent(const char *name) 233 { 234 size_t pos; 235 236 pos = 0; 237 while (name[pos] != '\0' && name[pos] != '/') { 238 pos++; 239 } 240 return pos; 241 } 242 243 //////////////////////////////////////////////////////////// 244 245 /* 246 * Sealed abstraction for pathnames. 247 * 248 * System-call-layer level code that is going to call namei should 249 * first create a pathbuf and adjust all the bells and whistles on it 250 * as needed by context. 251 */ 252 253 struct pathbuf { 254 char *pb_path; 255 char *pb_pathcopy; 256 unsigned pb_pathcopyuses; 257 }; 258 259 static struct pathbuf * 260 pathbuf_create_raw(void) 261 { 262 struct pathbuf *pb; 263 264 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 265 pb->pb_path = PNBUF_GET(); 266 if (pb->pb_path == NULL) { 267 kmem_free(pb, sizeof(*pb)); 268 return NULL; 269 } 270 pb->pb_pathcopy = NULL; 271 pb->pb_pathcopyuses = 0; 272 return pb; 273 } 274 275 void 276 pathbuf_destroy(struct pathbuf *pb) 277 { 278 KASSERT(pb->pb_pathcopyuses == 0); 279 KASSERT(pb->pb_pathcopy == NULL); 280 PNBUF_PUT(pb->pb_path); 281 kmem_free(pb, sizeof(*pb)); 282 } 283 284 struct pathbuf * 285 pathbuf_assimilate(char *pnbuf) 286 { 287 struct pathbuf *pb; 288 289 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 290 pb->pb_path = pnbuf; 291 pb->pb_pathcopy = NULL; 292 pb->pb_pathcopyuses = 0; 293 return pb; 294 } 295 296 struct pathbuf * 297 pathbuf_create(const char *path) 298 { 299 struct pathbuf *pb; 300 int error; 301 302 pb = pathbuf_create_raw(); 303 if (pb == NULL) { 304 return NULL; 305 } 306 error = copystr(path, pb->pb_path, PATH_MAX, NULL); 307 if (error != 0) { 308 KASSERT(!"kernel path too long in pathbuf_create"); 309 /* make sure it's null-terminated, just in case */ 310 pb->pb_path[PATH_MAX-1] = '\0'; 311 } 312 return pb; 313 } 314 315 int 316 pathbuf_copyin(const char *userpath, struct pathbuf **ret) 317 { 318 struct pathbuf *pb; 319 int error; 320 321 pb = pathbuf_create_raw(); 322 if (pb == NULL) { 323 return ENOMEM; 324 } 325 error = copyinstr(userpath, pb->pb_path, PATH_MAX, NULL); 326 if (error) { 327 pathbuf_destroy(pb); 328 return error; 329 } 330 *ret = pb; 331 return 0; 332 } 333 334 /* 335 * XXX should not exist: 336 * 1. whether a pointer is kernel or user should be statically checkable. 337 * 2. copyin should be handled by the upper part of the syscall layer, 338 * not in here. 339 */ 340 int 341 pathbuf_maybe_copyin(const char *path, enum uio_seg seg, struct pathbuf **ret) 342 { 343 if (seg == UIO_USERSPACE) { 344 return pathbuf_copyin(path, ret); 345 } else { 346 *ret = pathbuf_create(path); 347 if (*ret == NULL) { 348 return ENOMEM; 349 } 350 return 0; 351 } 352 } 353 354 /* 355 * Get a copy of the path buffer as it currently exists. If this is 356 * called after namei starts the results may be arbitrary. 357 */ 358 void 359 pathbuf_copystring(const struct pathbuf *pb, char *buf, size_t maxlen) 360 { 361 strlcpy(buf, pb->pb_path, maxlen); 362 } 363 364 /* 365 * These two functions allow access to a saved copy of the original 366 * path string. The first copy should be gotten before namei is 367 * called. Each copy that is gotten should be put back. 368 */ 369 370 const char * 371 pathbuf_stringcopy_get(struct pathbuf *pb) 372 { 373 if (pb->pb_pathcopyuses == 0) { 374 pb->pb_pathcopy = PNBUF_GET(); 375 strcpy(pb->pb_pathcopy, pb->pb_path); 376 } 377 pb->pb_pathcopyuses++; 378 return pb->pb_pathcopy; 379 } 380 381 void 382 pathbuf_stringcopy_put(struct pathbuf *pb, const char *str) 383 { 384 KASSERT(str == pb->pb_pathcopy); 385 KASSERT(pb->pb_pathcopyuses > 0); 386 pb->pb_pathcopyuses--; 387 if (pb->pb_pathcopyuses == 0) { 388 PNBUF_PUT(pb->pb_pathcopy); 389 pb->pb_pathcopy = NULL; 390 } 391 } 392 393 394 //////////////////////////////////////////////////////////// 395 396 /* 397 * namei: convert a pathname into a pointer to a (maybe-locked) vnode, 398 * and maybe also its parent directory vnode, and assorted other guff. 399 * See namei(9) for the interface documentation. 400 * 401 * 402 * The FOLLOW flag is set when symbolic links are to be followed 403 * when they occur at the end of the name translation process. 404 * Symbolic links are always followed for all other pathname 405 * components other than the last. 406 * 407 * The segflg defines whether the name is to be copied from user 408 * space or kernel space. 409 * 410 * Overall outline of namei: 411 * 412 * copy in name 413 * get starting directory 414 * while (!done && !error) { 415 * call lookup to search path. 416 * if symbolic link, massage name in buffer and continue 417 * } 418 */ 419 420 /* 421 * Search a pathname. 422 * This is a very central and rather complicated routine. 423 * 424 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 425 * The starting directory is passed in. The pathname is descended 426 * until done, or a symbolic link is encountered. The variable ni_more 427 * is clear if the path is completed; it is set to one if a symbolic 428 * link needing interpretation is encountered. 429 * 430 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 431 * whether the name is to be looked up, created, renamed, or deleted. 432 * When CREATE, RENAME, or DELETE is specified, information usable in 433 * creating, renaming, or deleting a directory entry may be calculated. 434 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 435 * locked. Otherwise the parent directory is not returned. If the target 436 * of the pathname exists and LOCKLEAF is or'ed into the flag the target 437 * is returned locked, otherwise it is returned unlocked. When creating 438 * or renaming and LOCKPARENT is specified, the target may not be ".". 439 * When deleting and LOCKPARENT is specified, the target may be ".". 440 * 441 * Overall outline of lookup: 442 * 443 * dirloop: 444 * identify next component of name at ndp->ni_ptr 445 * handle degenerate case where name is null string 446 * if .. and crossing mount points and on mounted filesys, find parent 447 * call VOP_LOOKUP routine for next component name 448 * directory vnode returned in ni_dvp, locked. 449 * component vnode returned in ni_vp (if it exists), locked. 450 * if result vnode is mounted on and crossing mount points, 451 * find mounted on vnode 452 * if more components of name, do next level at dirloop 453 * return the answer in ni_vp, locked if LOCKLEAF set 454 * if LOCKPARENT set, return locked parent in ni_dvp 455 */ 456 457 458 /* 459 * Internal state for a namei operation. 460 * 461 * cnp is always equal to &ndp->ni_cnp. 462 */ 463 struct namei_state { 464 struct nameidata *ndp; 465 struct componentname *cnp; 466 467 int docache; /* == 0 do not cache last component */ 468 int rdonly; /* lookup read-only flag bit */ 469 int slashes; 470 471 unsigned attempt_retry:1; /* true if error allows emul retry */ 472 }; 473 474 475 /* 476 * Initialize the namei working state. 477 */ 478 static void 479 namei_init(struct namei_state *state, struct nameidata *ndp) 480 { 481 482 state->ndp = ndp; 483 state->cnp = &ndp->ni_cnd; 484 485 state->docache = 0; 486 state->rdonly = 0; 487 state->slashes = 0; 488 489 KASSERTMSG((state->cnp->cn_cred != NULL), "namei: bad cred/proc"); 490 KASSERTMSG(((state->cnp->cn_nameiop & (~OPMASK)) == 0), 491 "namei: nameiop contaminated with flags: %08"PRIx32, 492 state->cnp->cn_nameiop); 493 KASSERTMSG(((state->cnp->cn_flags & OPMASK) == 0), 494 "name: flags contaminated with nameiops: %08"PRIx32, 495 state->cnp->cn_flags); 496 497 /* 498 * The buffer for name translation shall be the one inside the 499 * pathbuf. 500 */ 501 state->ndp->ni_pnbuf = state->ndp->ni_pathbuf->pb_path; 502 } 503 504 /* 505 * Clean up the working namei state, leaving things ready for return 506 * from namei. 507 */ 508 static void 509 namei_cleanup(struct namei_state *state) 510 { 511 KASSERT(state->cnp == &state->ndp->ni_cnd); 512 513 /* nothing for now */ 514 (void)state; 515 } 516 517 ////////////////////////////// 518 519 /* 520 * Get the directory context. 521 * Initializes the rootdir and erootdir state and returns a reference 522 * to the starting dir. 523 */ 524 static struct vnode * 525 namei_getstartdir(struct namei_state *state) 526 { 527 struct nameidata *ndp = state->ndp; 528 struct componentname *cnp = state->cnp; 529 struct cwdinfo *cwdi; /* pointer to cwd state */ 530 struct lwp *self = curlwp; /* thread doing namei() */ 531 struct vnode *rootdir, *erootdir, *curdir, *startdir; 532 533 cwdi = self->l_proc->p_cwdi; 534 rw_enter(&cwdi->cwdi_lock, RW_READER); 535 536 /* root dir */ 537 if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) { 538 rootdir = rootvnode; 539 } else { 540 rootdir = cwdi->cwdi_rdir; 541 } 542 543 /* emulation root dir, if any */ 544 if ((cnp->cn_flags & TRYEMULROOT) == 0) { 545 /* if we don't want it, don't fetch it */ 546 erootdir = NULL; 547 } else if (cnp->cn_flags & EMULROOTSET) { 548 /* explicitly set emulroot; "/../" doesn't override this */ 549 erootdir = ndp->ni_erootdir; 550 } else if (!strncmp(ndp->ni_pnbuf, "/../", 4)) { 551 /* explicit reference to real rootdir */ 552 erootdir = NULL; 553 } else { 554 /* may be null */ 555 erootdir = cwdi->cwdi_edir; 556 } 557 558 /* current dir */ 559 curdir = cwdi->cwdi_cdir; 560 561 if (ndp->ni_pnbuf[0] != '/') { 562 if (ndp->ni_atdir != NULL) { 563 startdir = ndp->ni_atdir; 564 } else { 565 startdir = curdir; 566 } 567 erootdir = NULL; 568 } else if (cnp->cn_flags & TRYEMULROOT && erootdir != NULL) { 569 startdir = erootdir; 570 } else { 571 startdir = rootdir; 572 erootdir = NULL; 573 } 574 575 state->ndp->ni_rootdir = rootdir; 576 state->ndp->ni_erootdir = erootdir; 577 578 /* 579 * Get a reference to the start dir so we can safely unlock cwdi. 580 * 581 * XXX: should we hold references to rootdir and erootdir while 582 * we're running? What happens if a multithreaded process chroots 583 * during namei? 584 */ 585 vref(startdir); 586 587 rw_exit(&cwdi->cwdi_lock); 588 return startdir; 589 } 590 591 /* 592 * Get the directory context for the nfsd case, in parallel to 593 * getstartdir. Initializes the rootdir and erootdir state and 594 * returns a reference to the passed-in starting dir. 595 */ 596 static struct vnode * 597 namei_getstartdir_for_nfsd(struct namei_state *state) 598 { 599 KASSERT(state->ndp->ni_atdir != NULL); 600 601 /* always use the real root, and never set an emulation root */ 602 state->ndp->ni_rootdir = rootvnode; 603 state->ndp->ni_erootdir = NULL; 604 605 vref(state->ndp->ni_atdir); 606 return state->ndp->ni_atdir; 607 } 608 609 610 /* 611 * Ktrace the namei operation. 612 */ 613 static void 614 namei_ktrace(struct namei_state *state) 615 { 616 struct nameidata *ndp = state->ndp; 617 struct componentname *cnp = state->cnp; 618 struct lwp *self = curlwp; /* thread doing namei() */ 619 const char *emul_path; 620 621 if (ktrpoint(KTR_NAMEI)) { 622 if (ndp->ni_erootdir != NULL) { 623 /* 624 * To make any sense, the trace entry need to have the 625 * text of the emulation path prepended. 626 * Usually we can get this from the current process, 627 * but when called from emul_find_interp() it is only 628 * in the exec_package - so we get it passed in ni_next 629 * (this is a hack). 630 */ 631 if (cnp->cn_flags & EMULROOTSET) 632 emul_path = ndp->ni_next; 633 else 634 emul_path = self->l_proc->p_emul->e_path; 635 ktrnamei2(emul_path, strlen(emul_path), 636 ndp->ni_pnbuf, ndp->ni_pathlen); 637 } else 638 ktrnamei(ndp->ni_pnbuf, ndp->ni_pathlen); 639 } 640 } 641 642 /* 643 * Start up namei. Find the root dir and cwd, establish the starting 644 * directory for lookup, and lock it. Also calls ktrace when 645 * appropriate. 646 */ 647 static int 648 namei_start(struct namei_state *state, int isnfsd, 649 struct vnode **startdir_ret) 650 { 651 struct nameidata *ndp = state->ndp; 652 struct vnode *startdir; 653 654 /* length includes null terminator (was originally from copyinstr) */ 655 ndp->ni_pathlen = strlen(ndp->ni_pnbuf) + 1; 656 657 /* 658 * POSIX.1 requirement: "" is not a valid file name. 659 */ 660 if (ndp->ni_pathlen == 1) { 661 return ENOENT; 662 } 663 664 ndp->ni_loopcnt = 0; 665 666 /* Get starting directory, set up root, and ktrace. */ 667 if (isnfsd) { 668 startdir = namei_getstartdir_for_nfsd(state); 669 /* no ktrace */ 670 } else { 671 startdir = namei_getstartdir(state); 672 namei_ktrace(state); 673 } 674 675 /* NDAT may feed us with a non directory namei_getstartdir */ 676 if (startdir->v_type != VDIR) { 677 vrele(startdir); 678 return ENOTDIR; 679 } 680 681 vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); 682 683 *startdir_ret = startdir; 684 return 0; 685 } 686 687 /* 688 * Check for being at a symlink that we're going to follow. 689 */ 690 static inline int 691 namei_atsymlink(struct namei_state *state, struct vnode *foundobj) 692 { 693 return (foundobj->v_type == VLNK) && 694 (state->cnp->cn_flags & (FOLLOW|REQUIREDIR)); 695 } 696 697 /* 698 * Follow a symlink. 699 * 700 * Updates searchdir. inhibitmagic causes magic symlinks to not be 701 * interpreted; this is used by nfsd. 702 * 703 * Unlocks foundobj on success (ugh) 704 */ 705 static inline int 706 namei_follow(struct namei_state *state, int inhibitmagic, 707 struct vnode *searchdir, struct vnode *foundobj, 708 struct vnode **newsearchdir_ret) 709 { 710 struct nameidata *ndp = state->ndp; 711 struct componentname *cnp = state->cnp; 712 713 struct lwp *self = curlwp; /* thread doing namei() */ 714 struct iovec aiov; /* uio for reading symbolic links */ 715 struct uio auio; 716 char *cp; /* pointer into pathname argument */ 717 size_t linklen; 718 int error; 719 720 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 721 KASSERT(VOP_ISLOCKED(foundobj) == LK_EXCLUSIVE); 722 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 723 return ELOOP; 724 } 725 if (foundobj->v_mount->mnt_flag & MNT_SYMPERM) { 726 error = VOP_ACCESS(foundobj, VEXEC, cnp->cn_cred); 727 if (error != 0) 728 return error; 729 } 730 731 /* FUTURE: fix this to not use a second buffer */ 732 cp = PNBUF_GET(); 733 aiov.iov_base = cp; 734 aiov.iov_len = MAXPATHLEN; 735 auio.uio_iov = &aiov; 736 auio.uio_iovcnt = 1; 737 auio.uio_offset = 0; 738 auio.uio_rw = UIO_READ; 739 auio.uio_resid = MAXPATHLEN; 740 UIO_SETUP_SYSSPACE(&auio); 741 error = VOP_READLINK(foundobj, &auio, cnp->cn_cred); 742 if (error) { 743 PNBUF_PUT(cp); 744 return error; 745 } 746 linklen = MAXPATHLEN - auio.uio_resid; 747 if (linklen == 0) { 748 PNBUF_PUT(cp); 749 return ENOENT; 750 } 751 752 /* 753 * Do symlink substitution, if appropriate, and 754 * check length for potential overflow. 755 * 756 * Inhibit symlink substitution for nfsd. 757 * XXX: This is how it was before; is that a bug or a feature? 758 */ 759 if ((!inhibitmagic && vfs_magiclinks && 760 symlink_magic(self->l_proc, cp, &linklen)) || 761 (linklen + ndp->ni_pathlen >= MAXPATHLEN)) { 762 PNBUF_PUT(cp); 763 return ENAMETOOLONG; 764 } 765 if (ndp->ni_pathlen > 1) { 766 /* includes a null-terminator */ 767 memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen); 768 } else { 769 cp[linklen] = '\0'; 770 } 771 ndp->ni_pathlen += linklen; 772 memcpy(ndp->ni_pnbuf, cp, ndp->ni_pathlen); 773 PNBUF_PUT(cp); 774 775 /* we're now starting from the beginning of the buffer again */ 776 cnp->cn_nameptr = ndp->ni_pnbuf; 777 778 /* must unlock this before relocking searchdir */ 779 VOP_UNLOCK(foundobj); 780 781 /* 782 * Check if root directory should replace current directory. 783 */ 784 if (ndp->ni_pnbuf[0] == '/') { 785 vput(searchdir); 786 /* Keep absolute symbolic links inside emulation root */ 787 searchdir = ndp->ni_erootdir; 788 if (searchdir == NULL || 789 (ndp->ni_pnbuf[1] == '.' 790 && ndp->ni_pnbuf[2] == '.' 791 && ndp->ni_pnbuf[3] == '/')) { 792 ndp->ni_erootdir = NULL; 793 searchdir = ndp->ni_rootdir; 794 } 795 vref(searchdir); 796 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 797 while (cnp->cn_nameptr[0] == '/') { 798 cnp->cn_nameptr++; 799 ndp->ni_pathlen--; 800 } 801 } 802 803 *newsearchdir_ret = searchdir; 804 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 805 return 0; 806 } 807 808 ////////////////////////////// 809 810 /* 811 * Inspect the leading path component and update the state accordingly. 812 */ 813 static int 814 lookup_parsepath(struct namei_state *state) 815 { 816 const char *cp; /* pointer into pathname argument */ 817 818 struct componentname *cnp = state->cnp; 819 struct nameidata *ndp = state->ndp; 820 821 KASSERT(cnp == &ndp->ni_cnd); 822 823 /* 824 * Search a new directory. 825 * 826 * The last component of the filename is left accessible via 827 * cnp->cn_nameptr for callers that need the name. Callers needing 828 * the name set the SAVENAME flag. When done, they assume 829 * responsibility for freeing the pathname buffer. 830 * 831 * At this point, our only vnode state is that the search dir 832 * is held and locked. 833 */ 834 cnp->cn_consume = 0; 835 cnp->cn_namelen = namei_getcomponent(cnp->cn_nameptr); 836 cp = cnp->cn_nameptr + cnp->cn_namelen; 837 if (cnp->cn_namelen > KERNEL_NAME_MAX) { 838 return ENAMETOOLONG; 839 } 840 #ifdef NAMEI_DIAGNOSTIC 841 { char c = *cp; 842 *(char *)cp = '\0'; 843 printf("{%s}: ", cnp->cn_nameptr); 844 *(char *)cp = c; } 845 #endif /* NAMEI_DIAGNOSTIC */ 846 ndp->ni_pathlen -= cnp->cn_namelen; 847 ndp->ni_next = cp; 848 /* 849 * If this component is followed by a slash, then move the pointer to 850 * the next component forward, and remember that this component must be 851 * a directory. 852 */ 853 if (*cp == '/') { 854 do { 855 cp++; 856 } while (*cp == '/'); 857 state->slashes = cp - ndp->ni_next; 858 ndp->ni_pathlen -= state->slashes; 859 ndp->ni_next = cp; 860 cnp->cn_flags |= REQUIREDIR; 861 } else { 862 state->slashes = 0; 863 cnp->cn_flags &= ~REQUIREDIR; 864 } 865 /* 866 * We do special processing on the last component, whether or not it's 867 * a directory. Cache all intervening lookups, but not the final one. 868 */ 869 if (*cp == '\0') { 870 if (state->docache) 871 cnp->cn_flags |= MAKEENTRY; 872 else 873 cnp->cn_flags &= ~MAKEENTRY; 874 cnp->cn_flags |= ISLASTCN; 875 } else { 876 cnp->cn_flags |= MAKEENTRY; 877 cnp->cn_flags &= ~ISLASTCN; 878 } 879 if (cnp->cn_namelen == 2 && 880 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 881 cnp->cn_flags |= ISDOTDOT; 882 else 883 cnp->cn_flags &= ~ISDOTDOT; 884 885 return 0; 886 } 887 888 /* 889 * Call VOP_LOOKUP for a single lookup; return a new search directory 890 * (used when crossing mountpoints up or searching union mounts down) and 891 * the found object, which for create operations may be NULL on success. 892 * 893 * Note that the new search directory may be null, which means the 894 * searchdir was unlocked and released. This happens in the common case 895 * when crossing a mount point downwards, in order to avoid coupling 896 * locks between different file system volumes. Importantly, this can 897 * happen even if the call fails. (XXX: this is gross and should be 898 * tidied somehow.) 899 */ 900 static int 901 lookup_once(struct namei_state *state, 902 struct vnode *searchdir, 903 struct vnode **newsearchdir_ret, 904 struct vnode **foundobj_ret) 905 { 906 struct vnode *tmpvn; /* scratch vnode */ 907 struct vnode *foundobj; /* result */ 908 struct mount *mp; /* mount table entry */ 909 struct lwp *l = curlwp; 910 int error; 911 912 struct componentname *cnp = state->cnp; 913 struct nameidata *ndp = state->ndp; 914 915 KASSERT(cnp == &ndp->ni_cnd); 916 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 917 *newsearchdir_ret = searchdir; 918 919 /* 920 * Handle "..": two special cases. 921 * 1. If at root directory (e.g. after chroot) 922 * or at absolute root directory 923 * then ignore it so can't get out. 924 * 1a. If at the root of the emulation filesystem go to the real 925 * root. So "/../<path>" is always absolute. 926 * 1b. If we have somehow gotten out of a jail, warn 927 * and also ignore it so we can't get farther out. 928 * 2. If this vnode is the root of a mounted 929 * filesystem, then replace it with the 930 * vnode which was mounted on so we take the 931 * .. in the other file system. 932 */ 933 if (cnp->cn_flags & ISDOTDOT) { 934 struct proc *p = l->l_proc; 935 936 for (;;) { 937 if (searchdir == ndp->ni_rootdir || 938 searchdir == rootvnode) { 939 foundobj = searchdir; 940 vref(foundobj); 941 *foundobj_ret = foundobj; 942 error = 0; 943 goto done; 944 } 945 if (ndp->ni_rootdir != rootvnode) { 946 int retval; 947 948 VOP_UNLOCK(searchdir); 949 retval = vn_isunder(searchdir, ndp->ni_rootdir, l); 950 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 951 if (!retval) { 952 /* Oops! We got out of jail! */ 953 log(LOG_WARNING, 954 "chrooted pid %d uid %d (%s) " 955 "detected outside of its chroot\n", 956 p->p_pid, kauth_cred_geteuid(l->l_cred), 957 p->p_comm); 958 /* Put us at the jail root. */ 959 vput(searchdir); 960 searchdir = NULL; 961 foundobj = ndp->ni_rootdir; 962 vref(foundobj); 963 vref(foundobj); 964 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 965 *newsearchdir_ret = foundobj; 966 *foundobj_ret = foundobj; 967 error = 0; 968 goto done; 969 } 970 } 971 if ((searchdir->v_vflag & VV_ROOT) == 0 || 972 (cnp->cn_flags & NOCROSSMOUNT)) 973 break; 974 tmpvn = searchdir; 975 searchdir = searchdir->v_mount->mnt_vnodecovered; 976 vref(searchdir); 977 vput(tmpvn); 978 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 979 *newsearchdir_ret = searchdir; 980 } 981 } 982 983 /* 984 * We now have a segment name to search for, and a directory to search. 985 * Our vnode state here is that "searchdir" is held and locked. 986 */ 987 unionlookup: 988 foundobj = NULL; 989 error = VOP_LOOKUP(searchdir, &foundobj, cnp); 990 991 if (error != 0) { 992 KASSERTMSG((foundobj == NULL), 993 "leaf `%s' should be empty but is %p", 994 cnp->cn_nameptr, foundobj); 995 #ifdef NAMEI_DIAGNOSTIC 996 printf("not found\n"); 997 #endif /* NAMEI_DIAGNOSTIC */ 998 if ((error == ENOENT) && 999 (searchdir->v_vflag & VV_ROOT) && 1000 (searchdir->v_mount->mnt_flag & MNT_UNION)) { 1001 tmpvn = searchdir; 1002 searchdir = searchdir->v_mount->mnt_vnodecovered; 1003 vref(searchdir); 1004 vput(tmpvn); 1005 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1006 *newsearchdir_ret = searchdir; 1007 goto unionlookup; 1008 } 1009 1010 if (error != EJUSTRETURN) 1011 goto done; 1012 1013 /* 1014 * If this was not the last component, or there were trailing 1015 * slashes, and we are not going to create a directory, 1016 * then the name must exist. 1017 */ 1018 if ((cnp->cn_flags & (REQUIREDIR | CREATEDIR)) == REQUIREDIR) { 1019 error = ENOENT; 1020 goto done; 1021 } 1022 1023 /* 1024 * If creating and at end of pathname, then can consider 1025 * allowing file to be created. 1026 */ 1027 if (state->rdonly) { 1028 error = EROFS; 1029 goto done; 1030 } 1031 1032 /* 1033 * We return success and a NULL foundobj to indicate 1034 * that the entry doesn't currently exist, leaving a 1035 * pointer to the (normally, locked) directory vnode 1036 * as searchdir. 1037 */ 1038 *foundobj_ret = NULL; 1039 error = 0; 1040 goto done; 1041 } 1042 #ifdef NAMEI_DIAGNOSTIC 1043 printf("found\n"); 1044 #endif /* NAMEI_DIAGNOSTIC */ 1045 1046 /* 1047 * Take into account any additional components consumed by the 1048 * underlying filesystem. This will include any trailing slashes after 1049 * the last component consumed. 1050 */ 1051 if (cnp->cn_consume > 0) { 1052 ndp->ni_pathlen -= cnp->cn_consume - state->slashes; 1053 ndp->ni_next += cnp->cn_consume - state->slashes; 1054 cnp->cn_consume = 0; 1055 if (ndp->ni_next[0] == '\0') 1056 cnp->cn_flags |= ISLASTCN; 1057 } 1058 1059 /* 1060 * "searchdir" is locked and held, "foundobj" is held, 1061 * they may be the same vnode. 1062 */ 1063 if (searchdir != foundobj) { 1064 if (cnp->cn_flags & ISDOTDOT) 1065 VOP_UNLOCK(searchdir); 1066 error = vn_lock(foundobj, LK_EXCLUSIVE); 1067 if (cnp->cn_flags & ISDOTDOT) 1068 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1069 if (error != 0) { 1070 vrele(foundobj); 1071 goto done; 1072 } 1073 } 1074 1075 /* 1076 * Check to see if the vnode has been mounted on; 1077 * if so find the root of the mounted file system. 1078 */ 1079 KASSERT(searchdir != NULL); 1080 while (foundobj->v_type == VDIR && 1081 (mp = foundobj->v_mountedhere) != NULL && 1082 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 1083 1084 KASSERT(searchdir != foundobj); 1085 1086 error = vfs_busy(mp); 1087 if (error != 0) { 1088 vput(foundobj); 1089 goto done; 1090 } 1091 if (searchdir != NULL) { 1092 VOP_UNLOCK(searchdir); 1093 } 1094 vput(foundobj); 1095 error = VFS_ROOT(mp, &foundobj); 1096 vfs_unbusy(mp); 1097 if (error) { 1098 if (searchdir != NULL) { 1099 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1100 } 1101 goto done; 1102 } 1103 /* 1104 * Avoid locking vnodes from two filesystems because 1105 * it's prone to deadlock, e.g. when using puffs. 1106 * Also, it isn't a good idea to propagate slowness of 1107 * a filesystem up to the root directory. For now, 1108 * only handle the common case, where foundobj is 1109 * VDIR. 1110 * 1111 * In this case set searchdir to null to avoid using 1112 * it again. It is not correct to set searchdir == 1113 * foundobj here as that will confuse the caller. 1114 * (See PR 40740.) 1115 */ 1116 if (searchdir == NULL) { 1117 /* already been here once; do nothing further */ 1118 } else if (foundobj->v_type == VDIR) { 1119 vrele(searchdir); 1120 *newsearchdir_ret = searchdir = NULL; 1121 } else { 1122 VOP_UNLOCK(foundobj); 1123 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1124 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 1125 } 1126 } 1127 1128 *foundobj_ret = foundobj; 1129 error = 0; 1130 done: 1131 KASSERT(*newsearchdir_ret == NULL || 1132 VOP_ISLOCKED(*newsearchdir_ret) == LK_EXCLUSIVE); 1133 /* 1134 * *foundobj_ret is valid only if error == 0. 1135 */ 1136 KASSERT(error != 0 || *foundobj_ret == NULL || 1137 VOP_ISLOCKED(*foundobj_ret) == LK_EXCLUSIVE); 1138 return error; 1139 } 1140 1141 ////////////////////////////// 1142 1143 /* 1144 * Do a complete path search from a single root directory. 1145 * (This is called up to twice if TRYEMULROOT is in effect.) 1146 */ 1147 static int 1148 namei_oneroot(struct namei_state *state, 1149 int neverfollow, int inhibitmagic, int isnfsd) 1150 { 1151 struct nameidata *ndp = state->ndp; 1152 struct componentname *cnp = state->cnp; 1153 struct vnode *searchdir, *foundobj; 1154 int error; 1155 1156 error = namei_start(state, isnfsd, &searchdir); 1157 if (error) { 1158 ndp->ni_dvp = NULL; 1159 ndp->ni_vp = NULL; 1160 return error; 1161 } 1162 KASSERT(searchdir->v_type == VDIR); 1163 1164 /* 1165 * Setup: break out flag bits into variables. 1166 */ 1167 state->docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 1168 if (cnp->cn_nameiop == DELETE) 1169 state->docache = 0; 1170 state->rdonly = cnp->cn_flags & RDONLY; 1171 1172 /* 1173 * Keep going until we run out of path components. 1174 */ 1175 cnp->cn_nameptr = ndp->ni_pnbuf; 1176 1177 /* drop leading slashes (already used them to choose startdir) */ 1178 while (cnp->cn_nameptr[0] == '/') { 1179 cnp->cn_nameptr++; 1180 ndp->ni_pathlen--; 1181 } 1182 /* was it just "/"? */ 1183 if (cnp->cn_nameptr[0] == '\0') { 1184 foundobj = searchdir; 1185 searchdir = NULL; 1186 cnp->cn_flags |= ISLASTCN; 1187 1188 /* bleh */ 1189 goto skiploop; 1190 } 1191 1192 for (;;) { 1193 KASSERT(searchdir != NULL); 1194 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 1195 1196 /* 1197 * If the directory we're on is unmounted, bail out. 1198 * XXX: should this also check if it's unlinked? 1199 * XXX: yes it should... but how? 1200 */ 1201 if (searchdir->v_mount == NULL) { 1202 vput(searchdir); 1203 ndp->ni_dvp = NULL; 1204 ndp->ni_vp = NULL; 1205 return (ENOENT); 1206 } 1207 1208 /* 1209 * Look up the next path component. 1210 * (currently, this may consume more than one) 1211 */ 1212 1213 /* There should be no slashes here. */ 1214 KASSERT(cnp->cn_nameptr[0] != '/'); 1215 1216 /* and we shouldn't have looped around if we were done */ 1217 KASSERT(cnp->cn_nameptr[0] != '\0'); 1218 1219 error = lookup_parsepath(state); 1220 if (error) { 1221 vput(searchdir); 1222 ndp->ni_dvp = NULL; 1223 ndp->ni_vp = NULL; 1224 state->attempt_retry = 1; 1225 return (error); 1226 } 1227 1228 error = lookup_once(state, searchdir, &searchdir, &foundobj); 1229 if (error) { 1230 if (searchdir != NULL) { 1231 vput(searchdir); 1232 } 1233 ndp->ni_dvp = NULL; 1234 ndp->ni_vp = NULL; 1235 /* 1236 * Note that if we're doing TRYEMULROOT we can 1237 * retry with the normal root. Where this is 1238 * currently set matches previous practice, 1239 * but the previous practice didn't make much 1240 * sense and somebody should sit down and 1241 * figure out which cases should cause retry 1242 * and which shouldn't. XXX. 1243 */ 1244 state->attempt_retry = 1; 1245 return (error); 1246 } 1247 1248 if (foundobj == NULL) { 1249 /* 1250 * Success with no object returned means we're 1251 * creating something and it isn't already 1252 * there. Break out of the main loop now so 1253 * the code below doesn't have to test for 1254 * foundobj == NULL. 1255 */ 1256 /* lookup_once can't have dropped the searchdir */ 1257 KASSERT(searchdir != NULL); 1258 break; 1259 } 1260 1261 /* 1262 * Check for symbolic link. If we've reached one, 1263 * follow it, unless we aren't supposed to. Back up 1264 * over any slashes that we skipped, as we will need 1265 * them again. 1266 */ 1267 if (namei_atsymlink(state, foundobj)) { 1268 ndp->ni_pathlen += state->slashes; 1269 ndp->ni_next -= state->slashes; 1270 if (neverfollow) { 1271 error = EINVAL; 1272 } else if (searchdir == NULL) { 1273 /* 1274 * dholland 20160410: lookup_once only 1275 * drops searchdir if it crossed a 1276 * mount point. Therefore, if we get 1277 * here it means we crossed a mount 1278 * point to a mounted filesystem whose 1279 * root vnode is a symlink. In theory 1280 * we could continue at this point by 1281 * using the pre-crossing searchdir 1282 * (e.g. just take out an extra 1283 * reference on it before calling 1284 * lookup_once so we still have it), 1285 * but this will make an ugly mess and 1286 * it should never happen in practice 1287 * as only badly broken filesystems 1288 * have non-directory root vnodes. (I 1289 * have seen this sort of thing with 1290 * NFS occasionally but even then it 1291 * means something's badly wrong.) 1292 */ 1293 error = ENOTDIR; 1294 } else { 1295 /* 1296 * dholland 20110410: if we're at a 1297 * union mount it might make sense to 1298 * use the top of the union stack here 1299 * rather than the layer we found the 1300 * symlink in. (FUTURE) 1301 */ 1302 error = namei_follow(state, inhibitmagic, 1303 searchdir, foundobj, 1304 &searchdir); 1305 } 1306 if (error) { 1307 KASSERT(searchdir != foundobj); 1308 if (searchdir != NULL) { 1309 vput(searchdir); 1310 } 1311 vput(foundobj); 1312 ndp->ni_dvp = NULL; 1313 ndp->ni_vp = NULL; 1314 return error; 1315 } 1316 /* namei_follow unlocks it (ugh) so rele, not put */ 1317 vrele(foundobj); 1318 foundobj = NULL; 1319 1320 /* 1321 * If we followed a symlink to `/' and there 1322 * are no more components after the symlink, 1323 * we're done with the loop and what we found 1324 * is the searchdir. 1325 */ 1326 if (cnp->cn_nameptr[0] == '\0') { 1327 KASSERT(searchdir != NULL); 1328 foundobj = searchdir; 1329 searchdir = NULL; 1330 cnp->cn_flags |= ISLASTCN; 1331 break; 1332 } 1333 1334 continue; 1335 } 1336 1337 /* 1338 * Not a symbolic link. 1339 * 1340 * Check for directory, if the component was 1341 * followed by a series of slashes. 1342 */ 1343 if ((foundobj->v_type != VDIR) && 1344 (cnp->cn_flags & REQUIREDIR)) { 1345 KASSERT(foundobj != searchdir); 1346 if (searchdir) { 1347 vput(searchdir); 1348 } 1349 vput(foundobj); 1350 ndp->ni_dvp = NULL; 1351 ndp->ni_vp = NULL; 1352 state->attempt_retry = 1; 1353 return ENOTDIR; 1354 } 1355 1356 /* 1357 * Stop if we've reached the last component. 1358 */ 1359 if (cnp->cn_flags & ISLASTCN) { 1360 break; 1361 } 1362 1363 /* 1364 * Continue with the next component. 1365 */ 1366 cnp->cn_nameptr = ndp->ni_next; 1367 if (searchdir == foundobj) { 1368 vrele(searchdir); 1369 } else if (searchdir != NULL) { 1370 vput(searchdir); 1371 } 1372 searchdir = foundobj; 1373 foundobj = NULL; 1374 } 1375 1376 skiploop: 1377 1378 if (foundobj != NULL) { 1379 if (foundobj == ndp->ni_erootdir) { 1380 /* 1381 * We are about to return the emulation root. 1382 * This isn't a good idea because code might 1383 * repeatedly lookup ".." until the file 1384 * matches that returned for "/" and loop 1385 * forever. So convert it to the real root. 1386 */ 1387 if (searchdir != NULL) { 1388 if (searchdir == foundobj) 1389 vrele(searchdir); 1390 else 1391 vput(searchdir); 1392 searchdir = NULL; 1393 } 1394 vput(foundobj); 1395 foundobj = ndp->ni_rootdir; 1396 vref(foundobj); 1397 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 1398 } 1399 1400 /* 1401 * If the caller requested the parent node (i.e. it's 1402 * a CREATE, DELETE, or RENAME), and we don't have one 1403 * (because this is the root directory, or we crossed 1404 * a mount point), then we must fail. 1405 */ 1406 if (cnp->cn_nameiop != LOOKUP && 1407 (searchdir == NULL || 1408 searchdir->v_mount != foundobj->v_mount)) { 1409 if (searchdir) { 1410 vput(searchdir); 1411 } 1412 vput(foundobj); 1413 foundobj = NULL; 1414 ndp->ni_dvp = NULL; 1415 ndp->ni_vp = NULL; 1416 state->attempt_retry = 1; 1417 1418 switch (cnp->cn_nameiop) { 1419 case CREATE: 1420 return EEXIST; 1421 case DELETE: 1422 case RENAME: 1423 return EBUSY; 1424 default: 1425 break; 1426 } 1427 panic("Invalid nameiop\n"); 1428 } 1429 1430 /* 1431 * Disallow directory write attempts on read-only lookups. 1432 * Prefers EEXIST over EROFS for the CREATE case. 1433 */ 1434 if (state->rdonly && 1435 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1436 if (searchdir) { 1437 if (foundobj != searchdir) { 1438 vput(searchdir); 1439 } else { 1440 vrele(searchdir); 1441 } 1442 searchdir = NULL; 1443 } 1444 vput(foundobj); 1445 foundobj = NULL; 1446 ndp->ni_dvp = NULL; 1447 ndp->ni_vp = NULL; 1448 state->attempt_retry = 1; 1449 return EROFS; 1450 } 1451 if ((cnp->cn_flags & LOCKLEAF) == 0) { 1452 /* 1453 * Note: if LOCKPARENT but not LOCKLEAF is 1454 * set, and searchdir == foundobj, this code 1455 * necessarily unlocks the parent as well as 1456 * the leaf. That is, just because you specify 1457 * LOCKPARENT doesn't mean you necessarily get 1458 * a locked parent vnode. The code in 1459 * vfs_syscalls.c, and possibly elsewhere, 1460 * that uses this combination "knows" this, so 1461 * it can't be safely changed. Feh. XXX 1462 */ 1463 VOP_UNLOCK(foundobj); 1464 } 1465 } 1466 1467 /* 1468 * Done. 1469 */ 1470 1471 /* 1472 * If LOCKPARENT is not set, the parent directory isn't returned. 1473 */ 1474 if ((cnp->cn_flags & LOCKPARENT) == 0 && searchdir != NULL) { 1475 if (searchdir == foundobj) { 1476 vrele(searchdir); 1477 } else { 1478 vput(searchdir); 1479 } 1480 searchdir = NULL; 1481 } 1482 1483 ndp->ni_dvp = searchdir; 1484 ndp->ni_vp = foundobj; 1485 return 0; 1486 } 1487 1488 /* 1489 * Do namei; wrapper layer that handles TRYEMULROOT. 1490 */ 1491 static int 1492 namei_tryemulroot(struct namei_state *state, 1493 int neverfollow, int inhibitmagic, int isnfsd) 1494 { 1495 int error; 1496 1497 struct nameidata *ndp = state->ndp; 1498 struct componentname *cnp = state->cnp; 1499 const char *savepath = NULL; 1500 1501 KASSERT(cnp == &ndp->ni_cnd); 1502 1503 if (cnp->cn_flags & TRYEMULROOT) { 1504 savepath = pathbuf_stringcopy_get(ndp->ni_pathbuf); 1505 } 1506 1507 emul_retry: 1508 state->attempt_retry = 0; 1509 1510 error = namei_oneroot(state, neverfollow, inhibitmagic, isnfsd); 1511 if (error) { 1512 /* 1513 * Once namei has started up, the existence of ni_erootdir 1514 * tells us whether we're working from an emulation root. 1515 * The TRYEMULROOT flag isn't necessarily authoritative. 1516 */ 1517 if (ndp->ni_erootdir != NULL && state->attempt_retry) { 1518 /* Retry the whole thing using the normal root */ 1519 cnp->cn_flags &= ~TRYEMULROOT; 1520 state->attempt_retry = 0; 1521 1522 /* kinda gross */ 1523 strcpy(ndp->ni_pathbuf->pb_path, savepath); 1524 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1525 savepath = NULL; 1526 1527 goto emul_retry; 1528 } 1529 } 1530 if (savepath != NULL) { 1531 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1532 } 1533 return error; 1534 } 1535 1536 /* 1537 * External interface. 1538 */ 1539 int 1540 namei(struct nameidata *ndp) 1541 { 1542 struct namei_state state; 1543 int error; 1544 1545 namei_init(&state, ndp); 1546 error = namei_tryemulroot(&state, 1547 0/*!neverfollow*/, 0/*!inhibitmagic*/, 1548 0/*isnfsd*/); 1549 namei_cleanup(&state); 1550 1551 if (error) { 1552 /* make sure no stray refs leak out */ 1553 KASSERT(ndp->ni_dvp == NULL); 1554 KASSERT(ndp->ni_vp == NULL); 1555 } 1556 1557 return error; 1558 } 1559 1560 //////////////////////////////////////////////////////////// 1561 1562 /* 1563 * External interface used by nfsd. This is basically different from 1564 * namei only in that it has the ability to pass in the "current 1565 * directory", and uses an extra flag "neverfollow" for which there's 1566 * no physical flag defined in namei.h. (There used to be a cut&paste 1567 * copy of about half of namei in nfsd to allow these minor 1568 * adjustments to exist.) 1569 * 1570 * XXX: the namei interface should be adjusted so nfsd can just use 1571 * ordinary namei(). 1572 */ 1573 int 1574 lookup_for_nfsd(struct nameidata *ndp, struct vnode *forcecwd, int neverfollow) 1575 { 1576 struct namei_state state; 1577 int error; 1578 1579 KASSERT(ndp->ni_atdir == NULL); 1580 ndp->ni_atdir = forcecwd; 1581 1582 namei_init(&state, ndp); 1583 error = namei_tryemulroot(&state, 1584 neverfollow, 1/*inhibitmagic*/, 1/*isnfsd*/); 1585 namei_cleanup(&state); 1586 1587 if (error) { 1588 /* make sure no stray refs leak out */ 1589 KASSERT(ndp->ni_dvp == NULL); 1590 KASSERT(ndp->ni_vp == NULL); 1591 } 1592 1593 return error; 1594 } 1595 1596 /* 1597 * A second external interface used by nfsd. This turns out to be a 1598 * single lookup used by the WebNFS code (ha!) to get "index.html" or 1599 * equivalent when asked for a directory. It should eventually evolve 1600 * into some kind of namei_once() call; for the time being it's kind 1601 * of a mess. XXX. 1602 * 1603 * dholland 20110109: I don't think it works, and I don't think it 1604 * worked before I started hacking and slashing either, and I doubt 1605 * anyone will ever notice. 1606 */ 1607 1608 /* 1609 * Internals. This calls lookup_once() after setting up the assorted 1610 * pieces of state the way they ought to be. 1611 */ 1612 static int 1613 do_lookup_for_nfsd_index(struct namei_state *state) 1614 { 1615 int error = 0; 1616 1617 struct componentname *cnp = state->cnp; 1618 struct nameidata *ndp = state->ndp; 1619 struct vnode *startdir; 1620 struct vnode *foundobj; 1621 const char *cp; /* pointer into pathname argument */ 1622 1623 KASSERT(cnp == &ndp->ni_cnd); 1624 1625 startdir = state->ndp->ni_atdir; 1626 1627 cnp->cn_nameptr = ndp->ni_pnbuf; 1628 state->docache = 1; 1629 state->rdonly = cnp->cn_flags & RDONLY; 1630 ndp->ni_dvp = NULL; 1631 1632 cnp->cn_consume = 0; 1633 cnp->cn_namelen = namei_getcomponent(cnp->cn_nameptr); 1634 cp = cnp->cn_nameptr + cnp->cn_namelen; 1635 KASSERT(cnp->cn_namelen <= KERNEL_NAME_MAX); 1636 ndp->ni_pathlen -= cnp->cn_namelen; 1637 ndp->ni_next = cp; 1638 state->slashes = 0; 1639 cnp->cn_flags &= ~REQUIREDIR; 1640 cnp->cn_flags |= MAKEENTRY|ISLASTCN; 1641 1642 if (cnp->cn_namelen == 2 && 1643 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 1644 cnp->cn_flags |= ISDOTDOT; 1645 else 1646 cnp->cn_flags &= ~ISDOTDOT; 1647 1648 /* 1649 * Because lookup_once can change the startdir, we need our 1650 * own reference to it to avoid consuming the caller's. 1651 */ 1652 vref(startdir); 1653 vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); 1654 error = lookup_once(state, startdir, &startdir, &foundobj); 1655 if (error == 0 && startdir == foundobj) { 1656 vrele(startdir); 1657 } else if (startdir != NULL) { 1658 vput(startdir); 1659 } 1660 if (error) { 1661 goto bad; 1662 } 1663 ndp->ni_vp = foundobj; 1664 1665 if (foundobj == NULL) { 1666 return 0; 1667 } 1668 1669 KASSERT((cnp->cn_flags & LOCKPARENT) == 0); 1670 if ((cnp->cn_flags & LOCKLEAF) == 0) { 1671 VOP_UNLOCK(foundobj); 1672 } 1673 return (0); 1674 1675 bad: 1676 ndp->ni_vp = NULL; 1677 return (error); 1678 } 1679 1680 /* 1681 * External interface. The partitioning between this function and the 1682 * above isn't very clear - the above function exists mostly so code 1683 * that uses "state->" can be shuffled around without having to change 1684 * it to "state.". 1685 */ 1686 int 1687 lookup_for_nfsd_index(struct nameidata *ndp, struct vnode *startdir) 1688 { 1689 struct namei_state state; 1690 int error; 1691 1692 KASSERT(ndp->ni_atdir == NULL); 1693 ndp->ni_atdir = startdir; 1694 1695 /* 1696 * Note: the name sent in here (is not|should not be) allowed 1697 * to contain a slash. 1698 */ 1699 if (strlen(ndp->ni_pathbuf->pb_path) > KERNEL_NAME_MAX) { 1700 return ENAMETOOLONG; 1701 } 1702 if (strchr(ndp->ni_pathbuf->pb_path, '/')) { 1703 return EINVAL; 1704 } 1705 1706 ndp->ni_pathlen = strlen(ndp->ni_pathbuf->pb_path) + 1; 1707 ndp->ni_pnbuf = NULL; 1708 ndp->ni_cnd.cn_nameptr = NULL; 1709 1710 namei_init(&state, ndp); 1711 error = do_lookup_for_nfsd_index(&state); 1712 namei_cleanup(&state); 1713 1714 return error; 1715 } 1716 1717 //////////////////////////////////////////////////////////// 1718 1719 /* 1720 * Reacquire a path name component. 1721 * dvp is locked on entry and exit. 1722 * *vpp is locked on exit unless it's NULL. 1723 */ 1724 int 1725 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int dummy) 1726 { 1727 int rdonly; /* lookup read-only flag bit */ 1728 int error = 0; 1729 #ifdef DEBUG 1730 size_t newlen; /* DEBUG: check name len */ 1731 const char *cp; /* DEBUG: check name ptr */ 1732 #endif /* DEBUG */ 1733 1734 (void)dummy; 1735 1736 /* 1737 * Setup: break out flag bits into variables. 1738 */ 1739 rdonly = cnp->cn_flags & RDONLY; 1740 1741 /* 1742 * Search a new directory. 1743 * 1744 * The cn_hash value is for use by vfs_cache. 1745 * The last component of the filename is left accessible via 1746 * cnp->cn_nameptr for callers that need the name. Callers needing 1747 * the name set the SAVENAME flag. When done, they assume 1748 * responsibility for freeing the pathname buffer. 1749 */ 1750 #ifdef DEBUG 1751 #if 0 1752 cp = NULL; 1753 newhash = namei_hash(cnp->cn_nameptr, &cp); 1754 if ((uint32_t)newhash != (uint32_t)cnp->cn_hash) 1755 panic("relookup: bad hash"); 1756 #endif 1757 newlen = namei_getcomponent(cnp->cn_nameptr); 1758 if (cnp->cn_namelen != newlen) 1759 panic("relookup: bad len"); 1760 cp = cnp->cn_nameptr + cnp->cn_namelen; 1761 while (*cp == '/') 1762 cp++; 1763 if (*cp != 0) 1764 panic("relookup: not last component"); 1765 #endif /* DEBUG */ 1766 1767 /* 1768 * Check for degenerate name (e.g. / or "") 1769 * which is a way of talking about a directory, 1770 * e.g. like "/." or ".". 1771 */ 1772 if (cnp->cn_nameptr[0] == '\0') 1773 panic("relookup: null name"); 1774 1775 if (cnp->cn_flags & ISDOTDOT) 1776 panic("relookup: lookup on dot-dot"); 1777 1778 /* 1779 * We now have a segment name to search for, and a directory to search. 1780 */ 1781 *vpp = NULL; 1782 error = VOP_LOOKUP(dvp, vpp, cnp); 1783 if ((error) != 0) { 1784 KASSERTMSG((*vpp == NULL), 1785 "leaf `%s' should be empty but is %p", 1786 cnp->cn_nameptr, *vpp); 1787 if (error != EJUSTRETURN) 1788 goto bad; 1789 } 1790 1791 /* 1792 * Check for symbolic link 1793 */ 1794 KASSERTMSG((*vpp == NULL || (*vpp)->v_type != VLNK || 1795 (cnp->cn_flags & FOLLOW) == 0), 1796 "relookup: symlink found"); 1797 1798 /* 1799 * Check for read-only lookups. 1800 */ 1801 if (rdonly && cnp->cn_nameiop != LOOKUP) { 1802 error = EROFS; 1803 if (*vpp) { 1804 vrele(*vpp); 1805 } 1806 goto bad; 1807 } 1808 /* 1809 * Lock result. 1810 */ 1811 if (*vpp && *vpp != dvp) { 1812 error = vn_lock(*vpp, LK_EXCLUSIVE); 1813 if (error != 0) { 1814 vrele(*vpp); 1815 goto bad; 1816 } 1817 } 1818 return (0); 1819 1820 bad: 1821 *vpp = NULL; 1822 return (error); 1823 } 1824 1825 /* 1826 * namei_simple - simple forms of namei. 1827 * 1828 * These are wrappers to allow the simple case callers of namei to be 1829 * left alone while everything else changes under them. 1830 */ 1831 1832 /* Flags */ 1833 struct namei_simple_flags_type { 1834 int dummy; 1835 }; 1836 static const struct namei_simple_flags_type ns_nn, ns_nt, ns_fn, ns_ft; 1837 const namei_simple_flags_t NSM_NOFOLLOW_NOEMULROOT = &ns_nn; 1838 const namei_simple_flags_t NSM_NOFOLLOW_TRYEMULROOT = &ns_nt; 1839 const namei_simple_flags_t NSM_FOLLOW_NOEMULROOT = &ns_fn; 1840 const namei_simple_flags_t NSM_FOLLOW_TRYEMULROOT = &ns_ft; 1841 1842 static 1843 int 1844 namei_simple_convert_flags(namei_simple_flags_t sflags) 1845 { 1846 if (sflags == NSM_NOFOLLOW_NOEMULROOT) 1847 return NOFOLLOW | 0; 1848 if (sflags == NSM_NOFOLLOW_TRYEMULROOT) 1849 return NOFOLLOW | TRYEMULROOT; 1850 if (sflags == NSM_FOLLOW_NOEMULROOT) 1851 return FOLLOW | 0; 1852 if (sflags == NSM_FOLLOW_TRYEMULROOT) 1853 return FOLLOW | TRYEMULROOT; 1854 panic("namei_simple_convert_flags: bogus sflags\n"); 1855 return 0; 1856 } 1857 1858 int 1859 namei_simple_kernel(const char *path, namei_simple_flags_t sflags, 1860 struct vnode **vp_ret) 1861 { 1862 return nameiat_simple_kernel(NULL, path, sflags, vp_ret); 1863 } 1864 1865 int 1866 nameiat_simple_kernel(struct vnode *dvp, const char *path, 1867 namei_simple_flags_t sflags, struct vnode **vp_ret) 1868 { 1869 struct nameidata nd; 1870 struct pathbuf *pb; 1871 int err; 1872 1873 pb = pathbuf_create(path); 1874 if (pb == NULL) { 1875 return ENOMEM; 1876 } 1877 1878 NDINIT(&nd, 1879 LOOKUP, 1880 namei_simple_convert_flags(sflags), 1881 pb); 1882 1883 if (dvp != NULL) 1884 NDAT(&nd, dvp); 1885 1886 err = namei(&nd); 1887 if (err != 0) { 1888 pathbuf_destroy(pb); 1889 return err; 1890 } 1891 *vp_ret = nd.ni_vp; 1892 pathbuf_destroy(pb); 1893 return 0; 1894 } 1895 1896 int 1897 namei_simple_user(const char *path, namei_simple_flags_t sflags, 1898 struct vnode **vp_ret) 1899 { 1900 return nameiat_simple_user(NULL, path, sflags, vp_ret); 1901 } 1902 1903 int 1904 nameiat_simple_user(struct vnode *dvp, const char *path, 1905 namei_simple_flags_t sflags, struct vnode **vp_ret) 1906 { 1907 struct pathbuf *pb; 1908 struct nameidata nd; 1909 int err; 1910 1911 err = pathbuf_copyin(path, &pb); 1912 if (err) { 1913 return err; 1914 } 1915 1916 NDINIT(&nd, 1917 LOOKUP, 1918 namei_simple_convert_flags(sflags), 1919 pb); 1920 1921 if (dvp != NULL) 1922 NDAT(&nd, dvp); 1923 1924 err = namei(&nd); 1925 if (err != 0) { 1926 pathbuf_destroy(pb); 1927 return err; 1928 } 1929 *vp_ret = nd.ni_vp; 1930 pathbuf_destroy(pb); 1931 return 0; 1932 } 1933