1 /* $NetBSD: vfs_lookup.c,v 1.205 2016/04/22 05:34:58 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_lookup.c 8.10 (Berkeley) 5/27/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.205 2016/04/22 05:34:58 riastradh Exp $"); 41 42 #ifdef _KERNEL_OPT 43 #include "opt_magiclinks.h" 44 #endif 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/syslimits.h> 50 #include <sys/time.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/mount.h> 54 #include <sys/errno.h> 55 #include <sys/filedesc.h> 56 #include <sys/hash.h> 57 #include <sys/proc.h> 58 #include <sys/syslog.h> 59 #include <sys/kauth.h> 60 #include <sys/ktrace.h> 61 #include <sys/dirent.h> 62 63 #ifndef MAGICLINKS 64 #define MAGICLINKS 0 65 #endif 66 67 int vfs_magiclinks = MAGICLINKS; 68 69 __CTASSERT(MAXNAMLEN == NAME_MAX); 70 71 /* 72 * Substitute replacement text for 'magic' strings in symlinks. 73 * Returns 0 if successful, and returns non-zero if an error 74 * occurs. (Currently, the only possible error is running out 75 * of temporary pathname space.) 76 * 77 * Looks for "@<string>" and "@<string>/", where <string> is a 78 * recognized 'magic' string. Replaces the "@<string>" with the 79 * appropriate replacement text. (Note that in some cases the 80 * replacement text may have zero length.) 81 * 82 * This would have been table driven, but the variance in 83 * replacement strings (and replacement string lengths) made 84 * that impractical. 85 */ 86 #define VNL(x) \ 87 (sizeof(x) - 1) 88 89 #define VO '{' 90 #define VC '}' 91 92 #define MATCH(str) \ 93 ((termchar == '/' && i + VNL(str) == *len) || \ 94 (i + VNL(str) < *len && \ 95 cp[i + VNL(str)] == termchar)) && \ 96 !strncmp((str), &cp[i], VNL(str)) 97 98 #define SUBSTITUTE(m, s, sl) \ 99 if ((newlen + (sl)) >= MAXPATHLEN) \ 100 return 1; \ 101 i += VNL(m); \ 102 if (termchar != '/') \ 103 i++; \ 104 (void)memcpy(&tmp[newlen], (s), (sl)); \ 105 newlen += (sl); \ 106 change = 1; \ 107 termchar = '/'; 108 109 static int 110 symlink_magic(struct proc *p, char *cp, size_t *len) 111 { 112 char *tmp; 113 size_t change, i, newlen, slen; 114 char termchar = '/'; 115 char idtmp[11]; /* enough for 32 bit *unsigned* integer */ 116 117 118 tmp = PNBUF_GET(); 119 for (change = i = newlen = 0; i < *len; ) { 120 if (cp[i] != '@') { 121 tmp[newlen++] = cp[i++]; 122 continue; 123 } 124 125 i++; 126 127 /* Check for @{var} syntax. */ 128 if (cp[i] == VO) { 129 termchar = VC; 130 i++; 131 } 132 133 /* 134 * The following checks should be ordered according 135 * to frequency of use. 136 */ 137 if (MATCH("machine_arch")) { 138 slen = VNL(MACHINE_ARCH); 139 SUBSTITUTE("machine_arch", MACHINE_ARCH, slen); 140 } else if (MATCH("machine")) { 141 slen = VNL(MACHINE); 142 SUBSTITUTE("machine", MACHINE, slen); 143 } else if (MATCH("hostname")) { 144 SUBSTITUTE("hostname", hostname, hostnamelen); 145 } else if (MATCH("osrelease")) { 146 slen = strlen(osrelease); 147 SUBSTITUTE("osrelease", osrelease, slen); 148 } else if (MATCH("emul")) { 149 slen = strlen(p->p_emul->e_name); 150 SUBSTITUTE("emul", p->p_emul->e_name, slen); 151 } else if (MATCH("kernel_ident")) { 152 slen = strlen(kernel_ident); 153 SUBSTITUTE("kernel_ident", kernel_ident, slen); 154 } else if (MATCH("domainname")) { 155 SUBSTITUTE("domainname", domainname, domainnamelen); 156 } else if (MATCH("ostype")) { 157 slen = strlen(ostype); 158 SUBSTITUTE("ostype", ostype, slen); 159 } else if (MATCH("uid")) { 160 slen = snprintf(idtmp, sizeof(idtmp), "%u", 161 kauth_cred_geteuid(kauth_cred_get())); 162 SUBSTITUTE("uid", idtmp, slen); 163 } else if (MATCH("ruid")) { 164 slen = snprintf(idtmp, sizeof(idtmp), "%u", 165 kauth_cred_getuid(kauth_cred_get())); 166 SUBSTITUTE("ruid", idtmp, slen); 167 } else if (MATCH("gid")) { 168 slen = snprintf(idtmp, sizeof(idtmp), "%u", 169 kauth_cred_getegid(kauth_cred_get())); 170 SUBSTITUTE("gid", idtmp, slen); 171 } else if (MATCH("rgid")) { 172 slen = snprintf(idtmp, sizeof(idtmp), "%u", 173 kauth_cred_getgid(kauth_cred_get())); 174 SUBSTITUTE("rgid", idtmp, slen); 175 } else { 176 tmp[newlen++] = '@'; 177 if (termchar == VC) 178 tmp[newlen++] = VO; 179 } 180 } 181 182 if (change) { 183 (void)memcpy(cp, tmp, newlen); 184 *len = newlen; 185 } 186 PNBUF_PUT(tmp); 187 188 return 0; 189 } 190 191 #undef VNL 192 #undef VO 193 #undef VC 194 #undef MATCH 195 #undef SUBSTITUTE 196 197 //////////////////////////////////////////////////////////// 198 199 /* 200 * Determine the namei hash (for the namecache) for name. 201 * If *ep != NULL, hash from name to ep-1. 202 * If *ep == NULL, hash from name until the first NUL or '/', and 203 * return the location of this termination character in *ep. 204 * 205 * This function returns an equivalent hash to the MI hash32_strn(). 206 * The latter isn't used because in the *ep == NULL case, determining 207 * the length of the string to the first NUL or `/' and then calling 208 * hash32_strn() involves unnecessary double-handling of the data. 209 */ 210 uint32_t 211 namei_hash(const char *name, const char **ep) 212 { 213 uint32_t hash; 214 215 hash = HASH32_STR_INIT; 216 if (*ep != NULL) { 217 for (; name < *ep; name++) 218 hash = hash * 33 + *(const uint8_t *)name; 219 } else { 220 for (; *name != '\0' && *name != '/'; name++) 221 hash = hash * 33 + *(const uint8_t *)name; 222 *ep = name; 223 } 224 return (hash + (hash >> 5)); 225 } 226 227 /* 228 * Find the end of the first path component in NAME and return its 229 * length. 230 */ 231 static size_t 232 namei_getcomponent(const char *name) 233 { 234 size_t pos; 235 236 pos = 0; 237 while (name[pos] != '\0' && name[pos] != '/') { 238 pos++; 239 } 240 return pos; 241 } 242 243 //////////////////////////////////////////////////////////// 244 245 /* 246 * Sealed abstraction for pathnames. 247 * 248 * System-call-layer level code that is going to call namei should 249 * first create a pathbuf and adjust all the bells and whistles on it 250 * as needed by context. 251 */ 252 253 struct pathbuf { 254 char *pb_path; 255 char *pb_pathcopy; 256 unsigned pb_pathcopyuses; 257 }; 258 259 static struct pathbuf * 260 pathbuf_create_raw(void) 261 { 262 struct pathbuf *pb; 263 264 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 265 if (pb == NULL) { 266 return NULL; 267 } 268 pb->pb_path = PNBUF_GET(); 269 if (pb->pb_path == NULL) { 270 kmem_free(pb, sizeof(*pb)); 271 return NULL; 272 } 273 pb->pb_pathcopy = NULL; 274 pb->pb_pathcopyuses = 0; 275 return pb; 276 } 277 278 void 279 pathbuf_destroy(struct pathbuf *pb) 280 { 281 KASSERT(pb->pb_pathcopyuses == 0); 282 KASSERT(pb->pb_pathcopy == NULL); 283 PNBUF_PUT(pb->pb_path); 284 kmem_free(pb, sizeof(*pb)); 285 } 286 287 struct pathbuf * 288 pathbuf_assimilate(char *pnbuf) 289 { 290 struct pathbuf *pb; 291 292 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 293 if (pb == NULL) { 294 return NULL; 295 } 296 pb->pb_path = pnbuf; 297 pb->pb_pathcopy = NULL; 298 pb->pb_pathcopyuses = 0; 299 return pb; 300 } 301 302 struct pathbuf * 303 pathbuf_create(const char *path) 304 { 305 struct pathbuf *pb; 306 int error; 307 308 pb = pathbuf_create_raw(); 309 if (pb == NULL) { 310 return NULL; 311 } 312 error = copystr(path, pb->pb_path, PATH_MAX, NULL); 313 if (error != 0) { 314 KASSERT(!"kernel path too long in pathbuf_create"); 315 /* make sure it's null-terminated, just in case */ 316 pb->pb_path[PATH_MAX-1] = '\0'; 317 } 318 return pb; 319 } 320 321 int 322 pathbuf_copyin(const char *userpath, struct pathbuf **ret) 323 { 324 struct pathbuf *pb; 325 int error; 326 327 pb = pathbuf_create_raw(); 328 if (pb == NULL) { 329 return ENOMEM; 330 } 331 error = copyinstr(userpath, pb->pb_path, PATH_MAX, NULL); 332 if (error) { 333 pathbuf_destroy(pb); 334 return error; 335 } 336 *ret = pb; 337 return 0; 338 } 339 340 /* 341 * XXX should not exist: 342 * 1. whether a pointer is kernel or user should be statically checkable. 343 * 2. copyin should be handled by the upper part of the syscall layer, 344 * not in here. 345 */ 346 int 347 pathbuf_maybe_copyin(const char *path, enum uio_seg seg, struct pathbuf **ret) 348 { 349 if (seg == UIO_USERSPACE) { 350 return pathbuf_copyin(path, ret); 351 } else { 352 *ret = pathbuf_create(path); 353 if (*ret == NULL) { 354 return ENOMEM; 355 } 356 return 0; 357 } 358 } 359 360 /* 361 * Get a copy of the path buffer as it currently exists. If this is 362 * called after namei starts the results may be arbitrary. 363 */ 364 void 365 pathbuf_copystring(const struct pathbuf *pb, char *buf, size_t maxlen) 366 { 367 strlcpy(buf, pb->pb_path, maxlen); 368 } 369 370 /* 371 * These two functions allow access to a saved copy of the original 372 * path string. The first copy should be gotten before namei is 373 * called. Each copy that is gotten should be put back. 374 */ 375 376 const char * 377 pathbuf_stringcopy_get(struct pathbuf *pb) 378 { 379 if (pb->pb_pathcopyuses == 0) { 380 pb->pb_pathcopy = PNBUF_GET(); 381 strcpy(pb->pb_pathcopy, pb->pb_path); 382 } 383 pb->pb_pathcopyuses++; 384 return pb->pb_pathcopy; 385 } 386 387 void 388 pathbuf_stringcopy_put(struct pathbuf *pb, const char *str) 389 { 390 KASSERT(str == pb->pb_pathcopy); 391 KASSERT(pb->pb_pathcopyuses > 0); 392 pb->pb_pathcopyuses--; 393 if (pb->pb_pathcopyuses == 0) { 394 PNBUF_PUT(pb->pb_pathcopy); 395 pb->pb_pathcopy = NULL; 396 } 397 } 398 399 400 //////////////////////////////////////////////////////////// 401 402 /* 403 * namei: convert a pathname into a pointer to a (maybe-locked) vnode, 404 * and maybe also its parent directory vnode, and assorted other guff. 405 * See namei(9) for the interface documentation. 406 * 407 * 408 * The FOLLOW flag is set when symbolic links are to be followed 409 * when they occur at the end of the name translation process. 410 * Symbolic links are always followed for all other pathname 411 * components other than the last. 412 * 413 * The segflg defines whether the name is to be copied from user 414 * space or kernel space. 415 * 416 * Overall outline of namei: 417 * 418 * copy in name 419 * get starting directory 420 * while (!done && !error) { 421 * call lookup to search path. 422 * if symbolic link, massage name in buffer and continue 423 * } 424 */ 425 426 /* 427 * Search a pathname. 428 * This is a very central and rather complicated routine. 429 * 430 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 431 * The starting directory is passed in. The pathname is descended 432 * until done, or a symbolic link is encountered. The variable ni_more 433 * is clear if the path is completed; it is set to one if a symbolic 434 * link needing interpretation is encountered. 435 * 436 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 437 * whether the name is to be looked up, created, renamed, or deleted. 438 * When CREATE, RENAME, or DELETE is specified, information usable in 439 * creating, renaming, or deleting a directory entry may be calculated. 440 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 441 * locked. Otherwise the parent directory is not returned. If the target 442 * of the pathname exists and LOCKLEAF is or'ed into the flag the target 443 * is returned locked, otherwise it is returned unlocked. When creating 444 * or renaming and LOCKPARENT is specified, the target may not be ".". 445 * When deleting and LOCKPARENT is specified, the target may be ".". 446 * 447 * Overall outline of lookup: 448 * 449 * dirloop: 450 * identify next component of name at ndp->ni_ptr 451 * handle degenerate case where name is null string 452 * if .. and crossing mount points and on mounted filesys, find parent 453 * call VOP_LOOKUP routine for next component name 454 * directory vnode returned in ni_dvp, locked. 455 * component vnode returned in ni_vp (if it exists), locked. 456 * if result vnode is mounted on and crossing mount points, 457 * find mounted on vnode 458 * if more components of name, do next level at dirloop 459 * return the answer in ni_vp, locked if LOCKLEAF set 460 * if LOCKPARENT set, return locked parent in ni_dvp 461 */ 462 463 464 /* 465 * Internal state for a namei operation. 466 * 467 * cnp is always equal to &ndp->ni_cnp. 468 */ 469 struct namei_state { 470 struct nameidata *ndp; 471 struct componentname *cnp; 472 473 int docache; /* == 0 do not cache last component */ 474 int rdonly; /* lookup read-only flag bit */ 475 int slashes; 476 477 unsigned attempt_retry:1; /* true if error allows emul retry */ 478 }; 479 480 481 /* 482 * Initialize the namei working state. 483 */ 484 static void 485 namei_init(struct namei_state *state, struct nameidata *ndp) 486 { 487 488 state->ndp = ndp; 489 state->cnp = &ndp->ni_cnd; 490 491 state->docache = 0; 492 state->rdonly = 0; 493 state->slashes = 0; 494 495 KASSERTMSG((state->cnp->cn_cred != NULL), "namei: bad cred/proc"); 496 KASSERTMSG(((state->cnp->cn_nameiop & (~OPMASK)) == 0), 497 "namei: nameiop contaminated with flags: %08"PRIx32, 498 state->cnp->cn_nameiop); 499 KASSERTMSG(((state->cnp->cn_flags & OPMASK) == 0), 500 "name: flags contaminated with nameiops: %08"PRIx32, 501 state->cnp->cn_flags); 502 503 /* 504 * The buffer for name translation shall be the one inside the 505 * pathbuf. 506 */ 507 state->ndp->ni_pnbuf = state->ndp->ni_pathbuf->pb_path; 508 } 509 510 /* 511 * Clean up the working namei state, leaving things ready for return 512 * from namei. 513 */ 514 static void 515 namei_cleanup(struct namei_state *state) 516 { 517 KASSERT(state->cnp == &state->ndp->ni_cnd); 518 519 /* nothing for now */ 520 (void)state; 521 } 522 523 ////////////////////////////// 524 525 /* 526 * Get the directory context. 527 * Initializes the rootdir and erootdir state and returns a reference 528 * to the starting dir. 529 */ 530 static struct vnode * 531 namei_getstartdir(struct namei_state *state) 532 { 533 struct nameidata *ndp = state->ndp; 534 struct componentname *cnp = state->cnp; 535 struct cwdinfo *cwdi; /* pointer to cwd state */ 536 struct lwp *self = curlwp; /* thread doing namei() */ 537 struct vnode *rootdir, *erootdir, *curdir, *startdir; 538 539 cwdi = self->l_proc->p_cwdi; 540 rw_enter(&cwdi->cwdi_lock, RW_READER); 541 542 /* root dir */ 543 if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) { 544 rootdir = rootvnode; 545 } else { 546 rootdir = cwdi->cwdi_rdir; 547 } 548 549 /* emulation root dir, if any */ 550 if ((cnp->cn_flags & TRYEMULROOT) == 0) { 551 /* if we don't want it, don't fetch it */ 552 erootdir = NULL; 553 } else if (cnp->cn_flags & EMULROOTSET) { 554 /* explicitly set emulroot; "/../" doesn't override this */ 555 erootdir = ndp->ni_erootdir; 556 } else if (!strncmp(ndp->ni_pnbuf, "/../", 4)) { 557 /* explicit reference to real rootdir */ 558 erootdir = NULL; 559 } else { 560 /* may be null */ 561 erootdir = cwdi->cwdi_edir; 562 } 563 564 /* current dir */ 565 curdir = cwdi->cwdi_cdir; 566 567 if (ndp->ni_pnbuf[0] != '/') { 568 if (ndp->ni_atdir != NULL) { 569 startdir = ndp->ni_atdir; 570 } else { 571 startdir = curdir; 572 } 573 erootdir = NULL; 574 } else if (cnp->cn_flags & TRYEMULROOT && erootdir != NULL) { 575 startdir = erootdir; 576 } else { 577 startdir = rootdir; 578 erootdir = NULL; 579 } 580 581 state->ndp->ni_rootdir = rootdir; 582 state->ndp->ni_erootdir = erootdir; 583 584 /* 585 * Get a reference to the start dir so we can safely unlock cwdi. 586 * 587 * XXX: should we hold references to rootdir and erootdir while 588 * we're running? What happens if a multithreaded process chroots 589 * during namei? 590 */ 591 vref(startdir); 592 593 rw_exit(&cwdi->cwdi_lock); 594 return startdir; 595 } 596 597 /* 598 * Get the directory context for the nfsd case, in parallel to 599 * getstartdir. Initializes the rootdir and erootdir state and 600 * returns a reference to the passed-in starting dir. 601 */ 602 static struct vnode * 603 namei_getstartdir_for_nfsd(struct namei_state *state) 604 { 605 KASSERT(state->ndp->ni_atdir != NULL); 606 607 /* always use the real root, and never set an emulation root */ 608 state->ndp->ni_rootdir = rootvnode; 609 state->ndp->ni_erootdir = NULL; 610 611 vref(state->ndp->ni_atdir); 612 return state->ndp->ni_atdir; 613 } 614 615 616 /* 617 * Ktrace the namei operation. 618 */ 619 static void 620 namei_ktrace(struct namei_state *state) 621 { 622 struct nameidata *ndp = state->ndp; 623 struct componentname *cnp = state->cnp; 624 struct lwp *self = curlwp; /* thread doing namei() */ 625 const char *emul_path; 626 627 if (ktrpoint(KTR_NAMEI)) { 628 if (ndp->ni_erootdir != NULL) { 629 /* 630 * To make any sense, the trace entry need to have the 631 * text of the emulation path prepended. 632 * Usually we can get this from the current process, 633 * but when called from emul_find_interp() it is only 634 * in the exec_package - so we get it passed in ni_next 635 * (this is a hack). 636 */ 637 if (cnp->cn_flags & EMULROOTSET) 638 emul_path = ndp->ni_next; 639 else 640 emul_path = self->l_proc->p_emul->e_path; 641 ktrnamei2(emul_path, strlen(emul_path), 642 ndp->ni_pnbuf, ndp->ni_pathlen); 643 } else 644 ktrnamei(ndp->ni_pnbuf, ndp->ni_pathlen); 645 } 646 } 647 648 /* 649 * Start up namei. Find the root dir and cwd, establish the starting 650 * directory for lookup, and lock it. Also calls ktrace when 651 * appropriate. 652 */ 653 static int 654 namei_start(struct namei_state *state, int isnfsd, 655 struct vnode **startdir_ret) 656 { 657 struct nameidata *ndp = state->ndp; 658 struct vnode *startdir; 659 660 /* length includes null terminator (was originally from copyinstr) */ 661 ndp->ni_pathlen = strlen(ndp->ni_pnbuf) + 1; 662 663 /* 664 * POSIX.1 requirement: "" is not a valid file name. 665 */ 666 if (ndp->ni_pathlen == 1) { 667 return ENOENT; 668 } 669 670 ndp->ni_loopcnt = 0; 671 672 /* Get starting directory, set up root, and ktrace. */ 673 if (isnfsd) { 674 startdir = namei_getstartdir_for_nfsd(state); 675 /* no ktrace */ 676 } else { 677 startdir = namei_getstartdir(state); 678 namei_ktrace(state); 679 } 680 681 /* NDAT may feed us with a non directory namei_getstartdir */ 682 if (startdir->v_type != VDIR) 683 return ENOTDIR; 684 685 vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); 686 687 *startdir_ret = startdir; 688 return 0; 689 } 690 691 /* 692 * Check for being at a symlink that we're going to follow. 693 */ 694 static inline int 695 namei_atsymlink(struct namei_state *state, struct vnode *foundobj) 696 { 697 return (foundobj->v_type == VLNK) && 698 (state->cnp->cn_flags & (FOLLOW|REQUIREDIR)); 699 } 700 701 /* 702 * Follow a symlink. 703 * 704 * Updates searchdir. inhibitmagic causes magic symlinks to not be 705 * interpreted; this is used by nfsd. 706 * 707 * Unlocks foundobj on success (ugh) 708 */ 709 static inline int 710 namei_follow(struct namei_state *state, int inhibitmagic, 711 struct vnode *searchdir, struct vnode *foundobj, 712 struct vnode **newsearchdir_ret) 713 { 714 struct nameidata *ndp = state->ndp; 715 struct componentname *cnp = state->cnp; 716 717 struct lwp *self = curlwp; /* thread doing namei() */ 718 struct iovec aiov; /* uio for reading symbolic links */ 719 struct uio auio; 720 char *cp; /* pointer into pathname argument */ 721 size_t linklen; 722 int error; 723 724 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 725 KASSERT(VOP_ISLOCKED(foundobj) == LK_EXCLUSIVE); 726 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 727 return ELOOP; 728 } 729 if (foundobj->v_mount->mnt_flag & MNT_SYMPERM) { 730 error = VOP_ACCESS(foundobj, VEXEC, cnp->cn_cred); 731 if (error != 0) 732 return error; 733 } 734 735 /* FUTURE: fix this to not use a second buffer */ 736 cp = PNBUF_GET(); 737 aiov.iov_base = cp; 738 aiov.iov_len = MAXPATHLEN; 739 auio.uio_iov = &aiov; 740 auio.uio_iovcnt = 1; 741 auio.uio_offset = 0; 742 auio.uio_rw = UIO_READ; 743 auio.uio_resid = MAXPATHLEN; 744 UIO_SETUP_SYSSPACE(&auio); 745 error = VOP_READLINK(foundobj, &auio, cnp->cn_cred); 746 if (error) { 747 PNBUF_PUT(cp); 748 return error; 749 } 750 linklen = MAXPATHLEN - auio.uio_resid; 751 if (linklen == 0) { 752 PNBUF_PUT(cp); 753 return ENOENT; 754 } 755 756 /* 757 * Do symlink substitution, if appropriate, and 758 * check length for potential overflow. 759 * 760 * Inhibit symlink substitution for nfsd. 761 * XXX: This is how it was before; is that a bug or a feature? 762 */ 763 if ((!inhibitmagic && vfs_magiclinks && 764 symlink_magic(self->l_proc, cp, &linklen)) || 765 (linklen + ndp->ni_pathlen >= MAXPATHLEN)) { 766 PNBUF_PUT(cp); 767 return ENAMETOOLONG; 768 } 769 if (ndp->ni_pathlen > 1) { 770 /* includes a null-terminator */ 771 memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen); 772 } else { 773 cp[linklen] = '\0'; 774 } 775 ndp->ni_pathlen += linklen; 776 memcpy(ndp->ni_pnbuf, cp, ndp->ni_pathlen); 777 PNBUF_PUT(cp); 778 779 /* we're now starting from the beginning of the buffer again */ 780 cnp->cn_nameptr = ndp->ni_pnbuf; 781 782 /* must unlock this before relocking searchdir */ 783 VOP_UNLOCK(foundobj); 784 785 /* 786 * Check if root directory should replace current directory. 787 */ 788 if (ndp->ni_pnbuf[0] == '/') { 789 vput(searchdir); 790 /* Keep absolute symbolic links inside emulation root */ 791 searchdir = ndp->ni_erootdir; 792 if (searchdir == NULL || 793 (ndp->ni_pnbuf[1] == '.' 794 && ndp->ni_pnbuf[2] == '.' 795 && ndp->ni_pnbuf[3] == '/')) { 796 ndp->ni_erootdir = NULL; 797 searchdir = ndp->ni_rootdir; 798 } 799 vref(searchdir); 800 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 801 while (cnp->cn_nameptr[0] == '/') { 802 cnp->cn_nameptr++; 803 ndp->ni_pathlen--; 804 } 805 } 806 807 *newsearchdir_ret = searchdir; 808 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 809 return 0; 810 } 811 812 ////////////////////////////// 813 814 /* 815 * Inspect the leading path component and update the state accordingly. 816 */ 817 static int 818 lookup_parsepath(struct namei_state *state) 819 { 820 const char *cp; /* pointer into pathname argument */ 821 822 struct componentname *cnp = state->cnp; 823 struct nameidata *ndp = state->ndp; 824 825 KASSERT(cnp == &ndp->ni_cnd); 826 827 /* 828 * Search a new directory. 829 * 830 * The last component of the filename is left accessible via 831 * cnp->cn_nameptr for callers that need the name. Callers needing 832 * the name set the SAVENAME flag. When done, they assume 833 * responsibility for freeing the pathname buffer. 834 * 835 * At this point, our only vnode state is that the search dir 836 * is held and locked. 837 */ 838 cnp->cn_consume = 0; 839 cnp->cn_namelen = namei_getcomponent(cnp->cn_nameptr); 840 cp = cnp->cn_nameptr + cnp->cn_namelen; 841 if (cnp->cn_namelen > KERNEL_NAME_MAX) { 842 return ENAMETOOLONG; 843 } 844 #ifdef NAMEI_DIAGNOSTIC 845 { char c = *cp; 846 *(char *)cp = '\0'; 847 printf("{%s}: ", cnp->cn_nameptr); 848 *(char *)cp = c; } 849 #endif /* NAMEI_DIAGNOSTIC */ 850 ndp->ni_pathlen -= cnp->cn_namelen; 851 ndp->ni_next = cp; 852 /* 853 * If this component is followed by a slash, then move the pointer to 854 * the next component forward, and remember that this component must be 855 * a directory. 856 */ 857 if (*cp == '/') { 858 do { 859 cp++; 860 } while (*cp == '/'); 861 state->slashes = cp - ndp->ni_next; 862 ndp->ni_pathlen -= state->slashes; 863 ndp->ni_next = cp; 864 cnp->cn_flags |= REQUIREDIR; 865 } else { 866 state->slashes = 0; 867 cnp->cn_flags &= ~REQUIREDIR; 868 } 869 /* 870 * We do special processing on the last component, whether or not it's 871 * a directory. Cache all intervening lookups, but not the final one. 872 */ 873 if (*cp == '\0') { 874 if (state->docache) 875 cnp->cn_flags |= MAKEENTRY; 876 else 877 cnp->cn_flags &= ~MAKEENTRY; 878 cnp->cn_flags |= ISLASTCN; 879 } else { 880 cnp->cn_flags |= MAKEENTRY; 881 cnp->cn_flags &= ~ISLASTCN; 882 } 883 if (cnp->cn_namelen == 2 && 884 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 885 cnp->cn_flags |= ISDOTDOT; 886 else 887 cnp->cn_flags &= ~ISDOTDOT; 888 889 return 0; 890 } 891 892 /* 893 * Call VOP_LOOKUP for a single lookup; return a new search directory 894 * (used when crossing mountpoints up or searching union mounts down) and 895 * the found object, which for create operations may be NULL on success. 896 * 897 * Note that the new search directory may be null, which means the 898 * searchdir was unlocked and released. This happens in the common case 899 * when crossing a mount point downwards, in order to avoid coupling 900 * locks between different file system volumes. Importantly, this can 901 * happen even if the call fails. (XXX: this is gross and should be 902 * tidied somehow.) 903 */ 904 static int 905 lookup_once(struct namei_state *state, 906 struct vnode *searchdir, 907 struct vnode **newsearchdir_ret, 908 struct vnode **foundobj_ret) 909 { 910 struct vnode *tmpvn; /* scratch vnode */ 911 struct vnode *foundobj; /* result */ 912 struct mount *mp; /* mount table entry */ 913 struct lwp *l = curlwp; 914 int error; 915 916 struct componentname *cnp = state->cnp; 917 struct nameidata *ndp = state->ndp; 918 919 KASSERT(cnp == &ndp->ni_cnd); 920 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 921 *newsearchdir_ret = searchdir; 922 923 /* 924 * Handle "..": two special cases. 925 * 1. If at root directory (e.g. after chroot) 926 * or at absolute root directory 927 * then ignore it so can't get out. 928 * 1a. If at the root of the emulation filesystem go to the real 929 * root. So "/../<path>" is always absolute. 930 * 1b. If we have somehow gotten out of a jail, warn 931 * and also ignore it so we can't get farther out. 932 * 2. If this vnode is the root of a mounted 933 * filesystem, then replace it with the 934 * vnode which was mounted on so we take the 935 * .. in the other file system. 936 */ 937 if (cnp->cn_flags & ISDOTDOT) { 938 struct proc *p = l->l_proc; 939 940 for (;;) { 941 if (searchdir == ndp->ni_rootdir || 942 searchdir == rootvnode) { 943 foundobj = searchdir; 944 vref(foundobj); 945 *foundobj_ret = foundobj; 946 error = 0; 947 goto done; 948 } 949 if (ndp->ni_rootdir != rootvnode) { 950 int retval; 951 952 VOP_UNLOCK(searchdir); 953 retval = vn_isunder(searchdir, ndp->ni_rootdir, l); 954 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 955 if (!retval) { 956 /* Oops! We got out of jail! */ 957 log(LOG_WARNING, 958 "chrooted pid %d uid %d (%s) " 959 "detected outside of its chroot\n", 960 p->p_pid, kauth_cred_geteuid(l->l_cred), 961 p->p_comm); 962 /* Put us at the jail root. */ 963 vput(searchdir); 964 searchdir = NULL; 965 foundobj = ndp->ni_rootdir; 966 vref(foundobj); 967 vref(foundobj); 968 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 969 *newsearchdir_ret = foundobj; 970 *foundobj_ret = foundobj; 971 error = 0; 972 goto done; 973 } 974 } 975 if ((searchdir->v_vflag & VV_ROOT) == 0 || 976 (cnp->cn_flags & NOCROSSMOUNT)) 977 break; 978 tmpvn = searchdir; 979 searchdir = searchdir->v_mount->mnt_vnodecovered; 980 vref(searchdir); 981 vput(tmpvn); 982 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 983 *newsearchdir_ret = searchdir; 984 } 985 } 986 987 /* 988 * We now have a segment name to search for, and a directory to search. 989 * Our vnode state here is that "searchdir" is held and locked. 990 */ 991 unionlookup: 992 foundobj = NULL; 993 error = VOP_LOOKUP(searchdir, &foundobj, cnp); 994 995 if (error != 0) { 996 KASSERTMSG((foundobj == NULL), 997 "leaf `%s' should be empty but is %p", 998 cnp->cn_nameptr, foundobj); 999 #ifdef NAMEI_DIAGNOSTIC 1000 printf("not found\n"); 1001 #endif /* NAMEI_DIAGNOSTIC */ 1002 if ((error == ENOENT) && 1003 (searchdir->v_vflag & VV_ROOT) && 1004 (searchdir->v_mount->mnt_flag & MNT_UNION)) { 1005 tmpvn = searchdir; 1006 searchdir = searchdir->v_mount->mnt_vnodecovered; 1007 vref(searchdir); 1008 vput(tmpvn); 1009 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1010 *newsearchdir_ret = searchdir; 1011 goto unionlookup; 1012 } 1013 1014 if (error != EJUSTRETURN) 1015 goto done; 1016 1017 /* 1018 * If this was not the last component, or there were trailing 1019 * slashes, and we are not going to create a directory, 1020 * then the name must exist. 1021 */ 1022 if ((cnp->cn_flags & (REQUIREDIR | CREATEDIR)) == REQUIREDIR) { 1023 error = ENOENT; 1024 goto done; 1025 } 1026 1027 /* 1028 * If creating and at end of pathname, then can consider 1029 * allowing file to be created. 1030 */ 1031 if (state->rdonly) { 1032 error = EROFS; 1033 goto done; 1034 } 1035 1036 /* 1037 * We return success and a NULL foundobj to indicate 1038 * that the entry doesn't currently exist, leaving a 1039 * pointer to the (normally, locked) directory vnode 1040 * as searchdir. 1041 */ 1042 *foundobj_ret = NULL; 1043 error = 0; 1044 goto done; 1045 } 1046 #ifdef NAMEI_DIAGNOSTIC 1047 printf("found\n"); 1048 #endif /* NAMEI_DIAGNOSTIC */ 1049 1050 /* 1051 * Take into account any additional components consumed by the 1052 * underlying filesystem. This will include any trailing slashes after 1053 * the last component consumed. 1054 */ 1055 if (cnp->cn_consume > 0) { 1056 ndp->ni_pathlen -= cnp->cn_consume - state->slashes; 1057 ndp->ni_next += cnp->cn_consume - state->slashes; 1058 cnp->cn_consume = 0; 1059 if (ndp->ni_next[0] == '\0') 1060 cnp->cn_flags |= ISLASTCN; 1061 } 1062 1063 /* 1064 * "searchdir" is locked and held, "foundobj" is held, 1065 * they may be the same vnode. 1066 */ 1067 if (searchdir != foundobj) { 1068 if (cnp->cn_flags & ISDOTDOT) 1069 VOP_UNLOCK(searchdir); 1070 error = vn_lock(foundobj, LK_EXCLUSIVE); 1071 if (cnp->cn_flags & ISDOTDOT) 1072 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1073 if (error != 0) { 1074 vrele(foundobj); 1075 goto done; 1076 } 1077 } 1078 1079 /* 1080 * Check to see if the vnode has been mounted on; 1081 * if so find the root of the mounted file system. 1082 */ 1083 KASSERT(searchdir != NULL); 1084 while (foundobj->v_type == VDIR && 1085 (mp = foundobj->v_mountedhere) != NULL && 1086 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 1087 1088 KASSERT(searchdir != foundobj); 1089 1090 error = vfs_busy(mp, NULL); 1091 if (error != 0) { 1092 vput(foundobj); 1093 goto done; 1094 } 1095 if (searchdir != NULL) { 1096 VOP_UNLOCK(searchdir); 1097 } 1098 vput(foundobj); 1099 error = VFS_ROOT(mp, &foundobj); 1100 vfs_unbusy(mp, false, NULL); 1101 if (error) { 1102 if (searchdir != NULL) { 1103 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1104 } 1105 goto done; 1106 } 1107 /* 1108 * Avoid locking vnodes from two filesystems because 1109 * it's prone to deadlock, e.g. when using puffs. 1110 * Also, it isn't a good idea to propagate slowness of 1111 * a filesystem up to the root directory. For now, 1112 * only handle the common case, where foundobj is 1113 * VDIR. 1114 * 1115 * In this case set searchdir to null to avoid using 1116 * it again. It is not correct to set searchdir == 1117 * foundobj here as that will confuse the caller. 1118 * (See PR 40740.) 1119 */ 1120 if (searchdir == NULL) { 1121 /* already been here once; do nothing further */ 1122 } else if (foundobj->v_type == VDIR) { 1123 vrele(searchdir); 1124 *newsearchdir_ret = searchdir = NULL; 1125 } else { 1126 VOP_UNLOCK(foundobj); 1127 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1128 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 1129 } 1130 } 1131 1132 *foundobj_ret = foundobj; 1133 error = 0; 1134 done: 1135 KASSERT(*newsearchdir_ret == NULL || 1136 VOP_ISLOCKED(*newsearchdir_ret) == LK_EXCLUSIVE); 1137 /* 1138 * *foundobj_ret is valid only if error == 0. 1139 */ 1140 KASSERT(error != 0 || *foundobj_ret == NULL || 1141 VOP_ISLOCKED(*foundobj_ret) == LK_EXCLUSIVE); 1142 return error; 1143 } 1144 1145 ////////////////////////////// 1146 1147 /* 1148 * Do a complete path search from a single root directory. 1149 * (This is called up to twice if TRYEMULROOT is in effect.) 1150 */ 1151 static int 1152 namei_oneroot(struct namei_state *state, 1153 int neverfollow, int inhibitmagic, int isnfsd) 1154 { 1155 struct nameidata *ndp = state->ndp; 1156 struct componentname *cnp = state->cnp; 1157 struct vnode *searchdir, *foundobj; 1158 int error; 1159 1160 error = namei_start(state, isnfsd, &searchdir); 1161 if (error) { 1162 ndp->ni_dvp = NULL; 1163 ndp->ni_vp = NULL; 1164 return error; 1165 } 1166 KASSERT(searchdir->v_type == VDIR); 1167 1168 /* 1169 * Setup: break out flag bits into variables. 1170 */ 1171 state->docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 1172 if (cnp->cn_nameiop == DELETE) 1173 state->docache = 0; 1174 state->rdonly = cnp->cn_flags & RDONLY; 1175 1176 /* 1177 * Keep going until we run out of path components. 1178 */ 1179 cnp->cn_nameptr = ndp->ni_pnbuf; 1180 1181 /* drop leading slashes (already used them to choose startdir) */ 1182 while (cnp->cn_nameptr[0] == '/') { 1183 cnp->cn_nameptr++; 1184 ndp->ni_pathlen--; 1185 } 1186 /* was it just "/"? */ 1187 if (cnp->cn_nameptr[0] == '\0') { 1188 foundobj = searchdir; 1189 searchdir = NULL; 1190 cnp->cn_flags |= ISLASTCN; 1191 1192 /* bleh */ 1193 goto skiploop; 1194 } 1195 1196 for (;;) { 1197 KASSERT(searchdir != NULL); 1198 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 1199 1200 /* 1201 * If the directory we're on is unmounted, bail out. 1202 * XXX: should this also check if it's unlinked? 1203 * XXX: yes it should... but how? 1204 */ 1205 if (searchdir->v_mount == NULL) { 1206 vput(searchdir); 1207 ndp->ni_dvp = NULL; 1208 ndp->ni_vp = NULL; 1209 return (ENOENT); 1210 } 1211 1212 /* 1213 * Look up the next path component. 1214 * (currently, this may consume more than one) 1215 */ 1216 1217 /* There should be no slashes here. */ 1218 KASSERT(cnp->cn_nameptr[0] != '/'); 1219 1220 /* and we shouldn't have looped around if we were done */ 1221 KASSERT(cnp->cn_nameptr[0] != '\0'); 1222 1223 error = lookup_parsepath(state); 1224 if (error) { 1225 vput(searchdir); 1226 ndp->ni_dvp = NULL; 1227 ndp->ni_vp = NULL; 1228 state->attempt_retry = 1; 1229 return (error); 1230 } 1231 1232 error = lookup_once(state, searchdir, &searchdir, &foundobj); 1233 if (error) { 1234 if (searchdir != NULL) { 1235 vput(searchdir); 1236 } 1237 ndp->ni_dvp = NULL; 1238 ndp->ni_vp = NULL; 1239 /* 1240 * Note that if we're doing TRYEMULROOT we can 1241 * retry with the normal root. Where this is 1242 * currently set matches previous practice, 1243 * but the previous practice didn't make much 1244 * sense and somebody should sit down and 1245 * figure out which cases should cause retry 1246 * and which shouldn't. XXX. 1247 */ 1248 state->attempt_retry = 1; 1249 return (error); 1250 } 1251 1252 if (foundobj == NULL) { 1253 /* 1254 * Success with no object returned means we're 1255 * creating something and it isn't already 1256 * there. Break out of the main loop now so 1257 * the code below doesn't have to test for 1258 * foundobj == NULL. 1259 */ 1260 /* lookup_once can't have dropped the searchdir */ 1261 KASSERT(searchdir != NULL); 1262 break; 1263 } 1264 1265 /* 1266 * Check for symbolic link. If we've reached one, 1267 * follow it, unless we aren't supposed to. Back up 1268 * over any slashes that we skipped, as we will need 1269 * them again. 1270 */ 1271 if (namei_atsymlink(state, foundobj)) { 1272 ndp->ni_pathlen += state->slashes; 1273 ndp->ni_next -= state->slashes; 1274 if (neverfollow) { 1275 error = EINVAL; 1276 } else if (searchdir == NULL) { 1277 /* 1278 * dholland 20160410: lookup_once only 1279 * drops searchdir if it crossed a 1280 * mount point. Therefore, if we get 1281 * here it means we crossed a mount 1282 * point to a mounted filesystem whose 1283 * root vnode is a symlink. In theory 1284 * we could continue at this point by 1285 * using the pre-crossing searchdir 1286 * (e.g. just take out an extra 1287 * reference on it before calling 1288 * lookup_once so we still have it), 1289 * but this will make an ugly mess and 1290 * it should never happen in practice 1291 * as only badly broken filesystems 1292 * have non-directory root vnodes. (I 1293 * have seen this sort of thing with 1294 * NFS occasionally but even then it 1295 * means something's badly wrong.) 1296 */ 1297 error = ENOTDIR; 1298 } else { 1299 /* 1300 * dholland 20110410: if we're at a 1301 * union mount it might make sense to 1302 * use the top of the union stack here 1303 * rather than the layer we found the 1304 * symlink in. (FUTURE) 1305 */ 1306 error = namei_follow(state, inhibitmagic, 1307 searchdir, foundobj, 1308 &searchdir); 1309 } 1310 if (error) { 1311 KASSERT(searchdir != foundobj); 1312 if (searchdir != NULL) { 1313 vput(searchdir); 1314 } 1315 vput(foundobj); 1316 ndp->ni_dvp = NULL; 1317 ndp->ni_vp = NULL; 1318 return error; 1319 } 1320 /* namei_follow unlocks it (ugh) so rele, not put */ 1321 vrele(foundobj); 1322 foundobj = NULL; 1323 1324 /* 1325 * If we followed a symlink to `/' and there 1326 * are no more components after the symlink, 1327 * we're done with the loop and what we found 1328 * is the searchdir. 1329 */ 1330 if (cnp->cn_nameptr[0] == '\0') { 1331 KASSERT(searchdir != NULL); 1332 foundobj = searchdir; 1333 searchdir = NULL; 1334 cnp->cn_flags |= ISLASTCN; 1335 break; 1336 } 1337 1338 continue; 1339 } 1340 1341 /* 1342 * Not a symbolic link. 1343 * 1344 * Check for directory, if the component was 1345 * followed by a series of slashes. 1346 */ 1347 if ((foundobj->v_type != VDIR) && 1348 (cnp->cn_flags & REQUIREDIR)) { 1349 KASSERT(foundobj != searchdir); 1350 if (searchdir) { 1351 vput(searchdir); 1352 } 1353 vput(foundobj); 1354 ndp->ni_dvp = NULL; 1355 ndp->ni_vp = NULL; 1356 state->attempt_retry = 1; 1357 return ENOTDIR; 1358 } 1359 1360 /* 1361 * Stop if we've reached the last component. 1362 */ 1363 if (cnp->cn_flags & ISLASTCN) { 1364 break; 1365 } 1366 1367 /* 1368 * Continue with the next component. 1369 */ 1370 cnp->cn_nameptr = ndp->ni_next; 1371 if (searchdir == foundobj) { 1372 vrele(searchdir); 1373 } else if (searchdir != NULL) { 1374 vput(searchdir); 1375 } 1376 searchdir = foundobj; 1377 foundobj = NULL; 1378 } 1379 1380 skiploop: 1381 1382 if (foundobj != NULL) { 1383 if (foundobj == ndp->ni_erootdir) { 1384 /* 1385 * We are about to return the emulation root. 1386 * This isn't a good idea because code might 1387 * repeatedly lookup ".." until the file 1388 * matches that returned for "/" and loop 1389 * forever. So convert it to the real root. 1390 */ 1391 if (searchdir != NULL) { 1392 if (searchdir == foundobj) 1393 vrele(searchdir); 1394 else 1395 vput(searchdir); 1396 searchdir = NULL; 1397 } 1398 vput(foundobj); 1399 foundobj = ndp->ni_rootdir; 1400 vref(foundobj); 1401 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 1402 } 1403 1404 /* 1405 * If the caller requested the parent node (i.e. it's 1406 * a CREATE, DELETE, or RENAME), and we don't have one 1407 * (because this is the root directory, or we crossed 1408 * a mount point), then we must fail. 1409 */ 1410 if (cnp->cn_nameiop != LOOKUP && 1411 (searchdir == NULL || 1412 searchdir->v_mount != foundobj->v_mount)) { 1413 if (searchdir) { 1414 vput(searchdir); 1415 } 1416 vput(foundobj); 1417 foundobj = NULL; 1418 ndp->ni_dvp = NULL; 1419 ndp->ni_vp = NULL; 1420 state->attempt_retry = 1; 1421 1422 switch (cnp->cn_nameiop) { 1423 case CREATE: 1424 return EEXIST; 1425 case DELETE: 1426 case RENAME: 1427 return EBUSY; 1428 default: 1429 break; 1430 } 1431 panic("Invalid nameiop\n"); 1432 } 1433 1434 /* 1435 * Disallow directory write attempts on read-only lookups. 1436 * Prefers EEXIST over EROFS for the CREATE case. 1437 */ 1438 if (state->rdonly && 1439 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1440 if (searchdir) { 1441 if (foundobj != searchdir) { 1442 vput(searchdir); 1443 } else { 1444 vrele(searchdir); 1445 } 1446 searchdir = NULL; 1447 } 1448 vput(foundobj); 1449 foundobj = NULL; 1450 ndp->ni_dvp = NULL; 1451 ndp->ni_vp = NULL; 1452 state->attempt_retry = 1; 1453 return EROFS; 1454 } 1455 if ((cnp->cn_flags & LOCKLEAF) == 0) { 1456 /* 1457 * Note: if LOCKPARENT but not LOCKLEAF is 1458 * set, and searchdir == foundobj, this code 1459 * necessarily unlocks the parent as well as 1460 * the leaf. That is, just because you specify 1461 * LOCKPARENT doesn't mean you necessarily get 1462 * a locked parent vnode. The code in 1463 * vfs_syscalls.c, and possibly elsewhere, 1464 * that uses this combination "knows" this, so 1465 * it can't be safely changed. Feh. XXX 1466 */ 1467 VOP_UNLOCK(foundobj); 1468 } 1469 } 1470 1471 /* 1472 * Done. 1473 */ 1474 1475 /* 1476 * If LOCKPARENT is not set, the parent directory isn't returned. 1477 */ 1478 if ((cnp->cn_flags & LOCKPARENT) == 0 && searchdir != NULL) { 1479 if (searchdir == foundobj) { 1480 vrele(searchdir); 1481 } else { 1482 vput(searchdir); 1483 } 1484 searchdir = NULL; 1485 } 1486 1487 ndp->ni_dvp = searchdir; 1488 ndp->ni_vp = foundobj; 1489 return 0; 1490 } 1491 1492 /* 1493 * Do namei; wrapper layer that handles TRYEMULROOT. 1494 */ 1495 static int 1496 namei_tryemulroot(struct namei_state *state, 1497 int neverfollow, int inhibitmagic, int isnfsd) 1498 { 1499 int error; 1500 1501 struct nameidata *ndp = state->ndp; 1502 struct componentname *cnp = state->cnp; 1503 const char *savepath = NULL; 1504 1505 KASSERT(cnp == &ndp->ni_cnd); 1506 1507 if (cnp->cn_flags & TRYEMULROOT) { 1508 savepath = pathbuf_stringcopy_get(ndp->ni_pathbuf); 1509 } 1510 1511 emul_retry: 1512 state->attempt_retry = 0; 1513 1514 error = namei_oneroot(state, neverfollow, inhibitmagic, isnfsd); 1515 if (error) { 1516 /* 1517 * Once namei has started up, the existence of ni_erootdir 1518 * tells us whether we're working from an emulation root. 1519 * The TRYEMULROOT flag isn't necessarily authoritative. 1520 */ 1521 if (ndp->ni_erootdir != NULL && state->attempt_retry) { 1522 /* Retry the whole thing using the normal root */ 1523 cnp->cn_flags &= ~TRYEMULROOT; 1524 state->attempt_retry = 0; 1525 1526 /* kinda gross */ 1527 strcpy(ndp->ni_pathbuf->pb_path, savepath); 1528 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1529 savepath = NULL; 1530 1531 goto emul_retry; 1532 } 1533 } 1534 if (savepath != NULL) { 1535 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1536 } 1537 return error; 1538 } 1539 1540 /* 1541 * External interface. 1542 */ 1543 int 1544 namei(struct nameidata *ndp) 1545 { 1546 struct namei_state state; 1547 int error; 1548 1549 namei_init(&state, ndp); 1550 error = namei_tryemulroot(&state, 1551 0/*!neverfollow*/, 0/*!inhibitmagic*/, 1552 0/*isnfsd*/); 1553 namei_cleanup(&state); 1554 1555 if (error) { 1556 /* make sure no stray refs leak out */ 1557 KASSERT(ndp->ni_dvp == NULL); 1558 KASSERT(ndp->ni_vp == NULL); 1559 } 1560 1561 return error; 1562 } 1563 1564 //////////////////////////////////////////////////////////// 1565 1566 /* 1567 * External interface used by nfsd. This is basically different from 1568 * namei only in that it has the ability to pass in the "current 1569 * directory", and uses an extra flag "neverfollow" for which there's 1570 * no physical flag defined in namei.h. (There used to be a cut&paste 1571 * copy of about half of namei in nfsd to allow these minor 1572 * adjustments to exist.) 1573 * 1574 * XXX: the namei interface should be adjusted so nfsd can just use 1575 * ordinary namei(). 1576 */ 1577 int 1578 lookup_for_nfsd(struct nameidata *ndp, struct vnode *forcecwd, int neverfollow) 1579 { 1580 struct namei_state state; 1581 int error; 1582 1583 KASSERT(ndp->ni_atdir == NULL); 1584 ndp->ni_atdir = forcecwd; 1585 1586 namei_init(&state, ndp); 1587 error = namei_tryemulroot(&state, 1588 neverfollow, 1/*inhibitmagic*/, 1/*isnfsd*/); 1589 namei_cleanup(&state); 1590 1591 if (error) { 1592 /* make sure no stray refs leak out */ 1593 KASSERT(ndp->ni_dvp == NULL); 1594 KASSERT(ndp->ni_vp == NULL); 1595 } 1596 1597 return error; 1598 } 1599 1600 /* 1601 * A second external interface used by nfsd. This turns out to be a 1602 * single lookup used by the WebNFS code (ha!) to get "index.html" or 1603 * equivalent when asked for a directory. It should eventually evolve 1604 * into some kind of namei_once() call; for the time being it's kind 1605 * of a mess. XXX. 1606 * 1607 * dholland 20110109: I don't think it works, and I don't think it 1608 * worked before I started hacking and slashing either, and I doubt 1609 * anyone will ever notice. 1610 */ 1611 1612 /* 1613 * Internals. This calls lookup_once() after setting up the assorted 1614 * pieces of state the way they ought to be. 1615 */ 1616 static int 1617 do_lookup_for_nfsd_index(struct namei_state *state) 1618 { 1619 int error = 0; 1620 1621 struct componentname *cnp = state->cnp; 1622 struct nameidata *ndp = state->ndp; 1623 struct vnode *startdir; 1624 struct vnode *foundobj; 1625 const char *cp; /* pointer into pathname argument */ 1626 1627 KASSERT(cnp == &ndp->ni_cnd); 1628 1629 startdir = state->ndp->ni_atdir; 1630 1631 cnp->cn_nameptr = ndp->ni_pnbuf; 1632 state->docache = 1; 1633 state->rdonly = cnp->cn_flags & RDONLY; 1634 ndp->ni_dvp = NULL; 1635 1636 cnp->cn_consume = 0; 1637 cnp->cn_namelen = namei_getcomponent(cnp->cn_nameptr); 1638 cp = cnp->cn_nameptr + cnp->cn_namelen; 1639 KASSERT(cnp->cn_namelen <= KERNEL_NAME_MAX); 1640 ndp->ni_pathlen -= cnp->cn_namelen; 1641 ndp->ni_next = cp; 1642 state->slashes = 0; 1643 cnp->cn_flags &= ~REQUIREDIR; 1644 cnp->cn_flags |= MAKEENTRY|ISLASTCN; 1645 1646 if (cnp->cn_namelen == 2 && 1647 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 1648 cnp->cn_flags |= ISDOTDOT; 1649 else 1650 cnp->cn_flags &= ~ISDOTDOT; 1651 1652 /* 1653 * Because lookup_once can change the startdir, we need our 1654 * own reference to it to avoid consuming the caller's. 1655 */ 1656 vref(startdir); 1657 vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); 1658 error = lookup_once(state, startdir, &startdir, &foundobj); 1659 if (error == 0 && startdir == foundobj) { 1660 vrele(startdir); 1661 } else if (startdir != NULL) { 1662 vput(startdir); 1663 } 1664 if (error) { 1665 goto bad; 1666 } 1667 ndp->ni_vp = foundobj; 1668 1669 if (foundobj == NULL) { 1670 return 0; 1671 } 1672 1673 KASSERT((cnp->cn_flags & LOCKPARENT) == 0); 1674 if ((cnp->cn_flags & LOCKLEAF) == 0) { 1675 VOP_UNLOCK(foundobj); 1676 } 1677 return (0); 1678 1679 bad: 1680 ndp->ni_vp = NULL; 1681 return (error); 1682 } 1683 1684 /* 1685 * External interface. The partitioning between this function and the 1686 * above isn't very clear - the above function exists mostly so code 1687 * that uses "state->" can be shuffled around without having to change 1688 * it to "state.". 1689 */ 1690 int 1691 lookup_for_nfsd_index(struct nameidata *ndp, struct vnode *startdir) 1692 { 1693 struct namei_state state; 1694 int error; 1695 1696 KASSERT(ndp->ni_atdir == NULL); 1697 ndp->ni_atdir = startdir; 1698 1699 /* 1700 * Note: the name sent in here (is not|should not be) allowed 1701 * to contain a slash. 1702 */ 1703 if (strlen(ndp->ni_pathbuf->pb_path) > KERNEL_NAME_MAX) { 1704 return ENAMETOOLONG; 1705 } 1706 if (strchr(ndp->ni_pathbuf->pb_path, '/')) { 1707 return EINVAL; 1708 } 1709 1710 ndp->ni_pathlen = strlen(ndp->ni_pathbuf->pb_path) + 1; 1711 ndp->ni_pnbuf = NULL; 1712 ndp->ni_cnd.cn_nameptr = NULL; 1713 1714 namei_init(&state, ndp); 1715 error = do_lookup_for_nfsd_index(&state); 1716 namei_cleanup(&state); 1717 1718 return error; 1719 } 1720 1721 //////////////////////////////////////////////////////////// 1722 1723 /* 1724 * Reacquire a path name component. 1725 * dvp is locked on entry and exit. 1726 * *vpp is locked on exit unless it's NULL. 1727 */ 1728 int 1729 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int dummy) 1730 { 1731 int rdonly; /* lookup read-only flag bit */ 1732 int error = 0; 1733 #ifdef DEBUG 1734 size_t newlen; /* DEBUG: check name len */ 1735 const char *cp; /* DEBUG: check name ptr */ 1736 #endif /* DEBUG */ 1737 1738 (void)dummy; 1739 1740 /* 1741 * Setup: break out flag bits into variables. 1742 */ 1743 rdonly = cnp->cn_flags & RDONLY; 1744 1745 /* 1746 * Search a new directory. 1747 * 1748 * The cn_hash value is for use by vfs_cache. 1749 * The last component of the filename is left accessible via 1750 * cnp->cn_nameptr for callers that need the name. Callers needing 1751 * the name set the SAVENAME flag. When done, they assume 1752 * responsibility for freeing the pathname buffer. 1753 */ 1754 #ifdef DEBUG 1755 #if 0 1756 cp = NULL; 1757 newhash = namei_hash(cnp->cn_nameptr, &cp); 1758 if ((uint32_t)newhash != (uint32_t)cnp->cn_hash) 1759 panic("relookup: bad hash"); 1760 #endif 1761 newlen = namei_getcomponent(cnp->cn_nameptr); 1762 if (cnp->cn_namelen != newlen) 1763 panic("relookup: bad len"); 1764 cp = cnp->cn_nameptr + cnp->cn_namelen; 1765 while (*cp == '/') 1766 cp++; 1767 if (*cp != 0) 1768 panic("relookup: not last component"); 1769 #endif /* DEBUG */ 1770 1771 /* 1772 * Check for degenerate name (e.g. / or "") 1773 * which is a way of talking about a directory, 1774 * e.g. like "/." or ".". 1775 */ 1776 if (cnp->cn_nameptr[0] == '\0') 1777 panic("relookup: null name"); 1778 1779 if (cnp->cn_flags & ISDOTDOT) 1780 panic("relookup: lookup on dot-dot"); 1781 1782 /* 1783 * We now have a segment name to search for, and a directory to search. 1784 */ 1785 *vpp = NULL; 1786 error = VOP_LOOKUP(dvp, vpp, cnp); 1787 if ((error) != 0) { 1788 KASSERTMSG((*vpp == NULL), 1789 "leaf `%s' should be empty but is %p", 1790 cnp->cn_nameptr, *vpp); 1791 if (error != EJUSTRETURN) 1792 goto bad; 1793 } 1794 1795 /* 1796 * Check for symbolic link 1797 */ 1798 KASSERTMSG((*vpp == NULL || (*vpp)->v_type != VLNK || 1799 (cnp->cn_flags & FOLLOW) == 0), 1800 "relookup: symlink found"); 1801 1802 /* 1803 * Check for read-only lookups. 1804 */ 1805 if (rdonly && cnp->cn_nameiop != LOOKUP) { 1806 error = EROFS; 1807 if (*vpp) { 1808 vrele(*vpp); 1809 } 1810 goto bad; 1811 } 1812 /* 1813 * Lock result. 1814 */ 1815 if (*vpp && *vpp != dvp) { 1816 error = vn_lock(*vpp, LK_EXCLUSIVE); 1817 if (error != 0) { 1818 vrele(*vpp); 1819 goto bad; 1820 } 1821 } 1822 return (0); 1823 1824 bad: 1825 *vpp = NULL; 1826 return (error); 1827 } 1828 1829 /* 1830 * namei_simple - simple forms of namei. 1831 * 1832 * These are wrappers to allow the simple case callers of namei to be 1833 * left alone while everything else changes under them. 1834 */ 1835 1836 /* Flags */ 1837 struct namei_simple_flags_type { 1838 int dummy; 1839 }; 1840 static const struct namei_simple_flags_type ns_nn, ns_nt, ns_fn, ns_ft; 1841 const namei_simple_flags_t NSM_NOFOLLOW_NOEMULROOT = &ns_nn; 1842 const namei_simple_flags_t NSM_NOFOLLOW_TRYEMULROOT = &ns_nt; 1843 const namei_simple_flags_t NSM_FOLLOW_NOEMULROOT = &ns_fn; 1844 const namei_simple_flags_t NSM_FOLLOW_TRYEMULROOT = &ns_ft; 1845 1846 static 1847 int 1848 namei_simple_convert_flags(namei_simple_flags_t sflags) 1849 { 1850 if (sflags == NSM_NOFOLLOW_NOEMULROOT) 1851 return NOFOLLOW | 0; 1852 if (sflags == NSM_NOFOLLOW_TRYEMULROOT) 1853 return NOFOLLOW | TRYEMULROOT; 1854 if (sflags == NSM_FOLLOW_NOEMULROOT) 1855 return FOLLOW | 0; 1856 if (sflags == NSM_FOLLOW_TRYEMULROOT) 1857 return FOLLOW | TRYEMULROOT; 1858 panic("namei_simple_convert_flags: bogus sflags\n"); 1859 return 0; 1860 } 1861 1862 int 1863 namei_simple_kernel(const char *path, namei_simple_flags_t sflags, 1864 struct vnode **vp_ret) 1865 { 1866 return nameiat_simple_kernel(NULL, path, sflags, vp_ret); 1867 } 1868 1869 int 1870 nameiat_simple_kernel(struct vnode *dvp, const char *path, 1871 namei_simple_flags_t sflags, struct vnode **vp_ret) 1872 { 1873 struct nameidata nd; 1874 struct pathbuf *pb; 1875 int err; 1876 1877 pb = pathbuf_create(path); 1878 if (pb == NULL) { 1879 return ENOMEM; 1880 } 1881 1882 NDINIT(&nd, 1883 LOOKUP, 1884 namei_simple_convert_flags(sflags), 1885 pb); 1886 1887 if (dvp != NULL) 1888 NDAT(&nd, dvp); 1889 1890 err = namei(&nd); 1891 if (err != 0) { 1892 pathbuf_destroy(pb); 1893 return err; 1894 } 1895 *vp_ret = nd.ni_vp; 1896 pathbuf_destroy(pb); 1897 return 0; 1898 } 1899 1900 int 1901 namei_simple_user(const char *path, namei_simple_flags_t sflags, 1902 struct vnode **vp_ret) 1903 { 1904 return nameiat_simple_user(NULL, path, sflags, vp_ret); 1905 } 1906 1907 int 1908 nameiat_simple_user(struct vnode *dvp, const char *path, 1909 namei_simple_flags_t sflags, struct vnode **vp_ret) 1910 { 1911 struct pathbuf *pb; 1912 struct nameidata nd; 1913 int err; 1914 1915 err = pathbuf_copyin(path, &pb); 1916 if (err) { 1917 return err; 1918 } 1919 1920 NDINIT(&nd, 1921 LOOKUP, 1922 namei_simple_convert_flags(sflags), 1923 pb); 1924 1925 if (dvp != NULL) 1926 NDAT(&nd, dvp); 1927 1928 err = namei(&nd); 1929 if (err != 0) { 1930 pathbuf_destroy(pb); 1931 return err; 1932 } 1933 *vp_ret = nd.ni_vp; 1934 pathbuf_destroy(pb); 1935 return 0; 1936 } 1937