1 /* $NetBSD: vfs_lookup.c,v 1.203 2015/08/24 22:50:32 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_lookup.c 8.10 (Berkeley) 5/27/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.203 2015/08/24 22:50:32 pooka Exp $"); 41 42 #ifdef _KERNEL_OPT 43 #include "opt_magiclinks.h" 44 #endif 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/syslimits.h> 50 #include <sys/time.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/mount.h> 54 #include <sys/errno.h> 55 #include <sys/filedesc.h> 56 #include <sys/hash.h> 57 #include <sys/proc.h> 58 #include <sys/syslog.h> 59 #include <sys/kauth.h> 60 #include <sys/ktrace.h> 61 #include <sys/dirent.h> 62 63 #ifndef MAGICLINKS 64 #define MAGICLINKS 0 65 #endif 66 67 int vfs_magiclinks = MAGICLINKS; 68 69 __CTASSERT(MAXNAMLEN == NAME_MAX); 70 71 /* 72 * Substitute replacement text for 'magic' strings in symlinks. 73 * Returns 0 if successful, and returns non-zero if an error 74 * occurs. (Currently, the only possible error is running out 75 * of temporary pathname space.) 76 * 77 * Looks for "@<string>" and "@<string>/", where <string> is a 78 * recognized 'magic' string. Replaces the "@<string>" with the 79 * appropriate replacement text. (Note that in some cases the 80 * replacement text may have zero length.) 81 * 82 * This would have been table driven, but the variance in 83 * replacement strings (and replacement string lengths) made 84 * that impractical. 85 */ 86 #define VNL(x) \ 87 (sizeof(x) - 1) 88 89 #define VO '{' 90 #define VC '}' 91 92 #define MATCH(str) \ 93 ((termchar == '/' && i + VNL(str) == *len) || \ 94 (i + VNL(str) < *len && \ 95 cp[i + VNL(str)] == termchar)) && \ 96 !strncmp((str), &cp[i], VNL(str)) 97 98 #define SUBSTITUTE(m, s, sl) \ 99 if ((newlen + (sl)) >= MAXPATHLEN) \ 100 return 1; \ 101 i += VNL(m); \ 102 if (termchar != '/') \ 103 i++; \ 104 (void)memcpy(&tmp[newlen], (s), (sl)); \ 105 newlen += (sl); \ 106 change = 1; \ 107 termchar = '/'; 108 109 static int 110 symlink_magic(struct proc *p, char *cp, size_t *len) 111 { 112 char *tmp; 113 size_t change, i, newlen, slen; 114 char termchar = '/'; 115 char idtmp[11]; /* enough for 32 bit *unsigned* integer */ 116 117 118 tmp = PNBUF_GET(); 119 for (change = i = newlen = 0; i < *len; ) { 120 if (cp[i] != '@') { 121 tmp[newlen++] = cp[i++]; 122 continue; 123 } 124 125 i++; 126 127 /* Check for @{var} syntax. */ 128 if (cp[i] == VO) { 129 termchar = VC; 130 i++; 131 } 132 133 /* 134 * The following checks should be ordered according 135 * to frequency of use. 136 */ 137 if (MATCH("machine_arch")) { 138 slen = VNL(MACHINE_ARCH); 139 SUBSTITUTE("machine_arch", MACHINE_ARCH, slen); 140 } else if (MATCH("machine")) { 141 slen = VNL(MACHINE); 142 SUBSTITUTE("machine", MACHINE, slen); 143 } else if (MATCH("hostname")) { 144 SUBSTITUTE("hostname", hostname, hostnamelen); 145 } else if (MATCH("osrelease")) { 146 slen = strlen(osrelease); 147 SUBSTITUTE("osrelease", osrelease, slen); 148 } else if (MATCH("emul")) { 149 slen = strlen(p->p_emul->e_name); 150 SUBSTITUTE("emul", p->p_emul->e_name, slen); 151 } else if (MATCH("kernel_ident")) { 152 slen = strlen(kernel_ident); 153 SUBSTITUTE("kernel_ident", kernel_ident, slen); 154 } else if (MATCH("domainname")) { 155 SUBSTITUTE("domainname", domainname, domainnamelen); 156 } else if (MATCH("ostype")) { 157 slen = strlen(ostype); 158 SUBSTITUTE("ostype", ostype, slen); 159 } else if (MATCH("uid")) { 160 slen = snprintf(idtmp, sizeof(idtmp), "%u", 161 kauth_cred_geteuid(kauth_cred_get())); 162 SUBSTITUTE("uid", idtmp, slen); 163 } else if (MATCH("ruid")) { 164 slen = snprintf(idtmp, sizeof(idtmp), "%u", 165 kauth_cred_getuid(kauth_cred_get())); 166 SUBSTITUTE("ruid", idtmp, slen); 167 } else if (MATCH("gid")) { 168 slen = snprintf(idtmp, sizeof(idtmp), "%u", 169 kauth_cred_getegid(kauth_cred_get())); 170 SUBSTITUTE("gid", idtmp, slen); 171 } else if (MATCH("rgid")) { 172 slen = snprintf(idtmp, sizeof(idtmp), "%u", 173 kauth_cred_getgid(kauth_cred_get())); 174 SUBSTITUTE("rgid", idtmp, slen); 175 } else { 176 tmp[newlen++] = '@'; 177 if (termchar == VC) 178 tmp[newlen++] = VO; 179 } 180 } 181 182 if (change) { 183 (void)memcpy(cp, tmp, newlen); 184 *len = newlen; 185 } 186 PNBUF_PUT(tmp); 187 188 return 0; 189 } 190 191 #undef VNL 192 #undef VO 193 #undef VC 194 #undef MATCH 195 #undef SUBSTITUTE 196 197 //////////////////////////////////////////////////////////// 198 199 /* 200 * Determine the namei hash (for the namecache) for name. 201 * If *ep != NULL, hash from name to ep-1. 202 * If *ep == NULL, hash from name until the first NUL or '/', and 203 * return the location of this termination character in *ep. 204 * 205 * This function returns an equivalent hash to the MI hash32_strn(). 206 * The latter isn't used because in the *ep == NULL case, determining 207 * the length of the string to the first NUL or `/' and then calling 208 * hash32_strn() involves unnecessary double-handling of the data. 209 */ 210 uint32_t 211 namei_hash(const char *name, const char **ep) 212 { 213 uint32_t hash; 214 215 hash = HASH32_STR_INIT; 216 if (*ep != NULL) { 217 for (; name < *ep; name++) 218 hash = hash * 33 + *(const uint8_t *)name; 219 } else { 220 for (; *name != '\0' && *name != '/'; name++) 221 hash = hash * 33 + *(const uint8_t *)name; 222 *ep = name; 223 } 224 return (hash + (hash >> 5)); 225 } 226 227 /* 228 * Find the end of the first path component in NAME and return its 229 * length. 230 */ 231 static size_t 232 namei_getcomponent(const char *name) 233 { 234 size_t pos; 235 236 pos = 0; 237 while (name[pos] != '\0' && name[pos] != '/') { 238 pos++; 239 } 240 return pos; 241 } 242 243 //////////////////////////////////////////////////////////// 244 245 /* 246 * Sealed abstraction for pathnames. 247 * 248 * System-call-layer level code that is going to call namei should 249 * first create a pathbuf and adjust all the bells and whistles on it 250 * as needed by context. 251 */ 252 253 struct pathbuf { 254 char *pb_path; 255 char *pb_pathcopy; 256 unsigned pb_pathcopyuses; 257 }; 258 259 static struct pathbuf * 260 pathbuf_create_raw(void) 261 { 262 struct pathbuf *pb; 263 264 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 265 if (pb == NULL) { 266 return NULL; 267 } 268 pb->pb_path = PNBUF_GET(); 269 if (pb->pb_path == NULL) { 270 kmem_free(pb, sizeof(*pb)); 271 return NULL; 272 } 273 pb->pb_pathcopy = NULL; 274 pb->pb_pathcopyuses = 0; 275 return pb; 276 } 277 278 void 279 pathbuf_destroy(struct pathbuf *pb) 280 { 281 KASSERT(pb->pb_pathcopyuses == 0); 282 KASSERT(pb->pb_pathcopy == NULL); 283 PNBUF_PUT(pb->pb_path); 284 kmem_free(pb, sizeof(*pb)); 285 } 286 287 struct pathbuf * 288 pathbuf_assimilate(char *pnbuf) 289 { 290 struct pathbuf *pb; 291 292 pb = kmem_alloc(sizeof(*pb), KM_SLEEP); 293 if (pb == NULL) { 294 return NULL; 295 } 296 pb->pb_path = pnbuf; 297 pb->pb_pathcopy = NULL; 298 pb->pb_pathcopyuses = 0; 299 return pb; 300 } 301 302 struct pathbuf * 303 pathbuf_create(const char *path) 304 { 305 struct pathbuf *pb; 306 int error; 307 308 pb = pathbuf_create_raw(); 309 if (pb == NULL) { 310 return NULL; 311 } 312 error = copystr(path, pb->pb_path, PATH_MAX, NULL); 313 if (error != 0) { 314 KASSERT(!"kernel path too long in pathbuf_create"); 315 /* make sure it's null-terminated, just in case */ 316 pb->pb_path[PATH_MAX-1] = '\0'; 317 } 318 return pb; 319 } 320 321 int 322 pathbuf_copyin(const char *userpath, struct pathbuf **ret) 323 { 324 struct pathbuf *pb; 325 int error; 326 327 pb = pathbuf_create_raw(); 328 if (pb == NULL) { 329 return ENOMEM; 330 } 331 error = copyinstr(userpath, pb->pb_path, PATH_MAX, NULL); 332 if (error) { 333 pathbuf_destroy(pb); 334 return error; 335 } 336 *ret = pb; 337 return 0; 338 } 339 340 /* 341 * XXX should not exist: 342 * 1. whether a pointer is kernel or user should be statically checkable. 343 * 2. copyin should be handled by the upper part of the syscall layer, 344 * not in here. 345 */ 346 int 347 pathbuf_maybe_copyin(const char *path, enum uio_seg seg, struct pathbuf **ret) 348 { 349 if (seg == UIO_USERSPACE) { 350 return pathbuf_copyin(path, ret); 351 } else { 352 *ret = pathbuf_create(path); 353 if (*ret == NULL) { 354 return ENOMEM; 355 } 356 return 0; 357 } 358 } 359 360 /* 361 * Get a copy of the path buffer as it currently exists. If this is 362 * called after namei starts the results may be arbitrary. 363 */ 364 void 365 pathbuf_copystring(const struct pathbuf *pb, char *buf, size_t maxlen) 366 { 367 strlcpy(buf, pb->pb_path, maxlen); 368 } 369 370 /* 371 * These two functions allow access to a saved copy of the original 372 * path string. The first copy should be gotten before namei is 373 * called. Each copy that is gotten should be put back. 374 */ 375 376 const char * 377 pathbuf_stringcopy_get(struct pathbuf *pb) 378 { 379 if (pb->pb_pathcopyuses == 0) { 380 pb->pb_pathcopy = PNBUF_GET(); 381 strcpy(pb->pb_pathcopy, pb->pb_path); 382 } 383 pb->pb_pathcopyuses++; 384 return pb->pb_pathcopy; 385 } 386 387 void 388 pathbuf_stringcopy_put(struct pathbuf *pb, const char *str) 389 { 390 KASSERT(str == pb->pb_pathcopy); 391 KASSERT(pb->pb_pathcopyuses > 0); 392 pb->pb_pathcopyuses--; 393 if (pb->pb_pathcopyuses == 0) { 394 PNBUF_PUT(pb->pb_pathcopy); 395 pb->pb_pathcopy = NULL; 396 } 397 } 398 399 400 //////////////////////////////////////////////////////////// 401 402 /* 403 * namei: convert a pathname into a pointer to a (maybe-locked) vnode, 404 * and maybe also its parent directory vnode, and assorted other guff. 405 * See namei(9) for the interface documentation. 406 * 407 * 408 * The FOLLOW flag is set when symbolic links are to be followed 409 * when they occur at the end of the name translation process. 410 * Symbolic links are always followed for all other pathname 411 * components other than the last. 412 * 413 * The segflg defines whether the name is to be copied from user 414 * space or kernel space. 415 * 416 * Overall outline of namei: 417 * 418 * copy in name 419 * get starting directory 420 * while (!done && !error) { 421 * call lookup to search path. 422 * if symbolic link, massage name in buffer and continue 423 * } 424 */ 425 426 /* 427 * Search a pathname. 428 * This is a very central and rather complicated routine. 429 * 430 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 431 * The starting directory is passed in. The pathname is descended 432 * until done, or a symbolic link is encountered. The variable ni_more 433 * is clear if the path is completed; it is set to one if a symbolic 434 * link needing interpretation is encountered. 435 * 436 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 437 * whether the name is to be looked up, created, renamed, or deleted. 438 * When CREATE, RENAME, or DELETE is specified, information usable in 439 * creating, renaming, or deleting a directory entry may be calculated. 440 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 441 * locked. Otherwise the parent directory is not returned. If the target 442 * of the pathname exists and LOCKLEAF is or'ed into the flag the target 443 * is returned locked, otherwise it is returned unlocked. When creating 444 * or renaming and LOCKPARENT is specified, the target may not be ".". 445 * When deleting and LOCKPARENT is specified, the target may be ".". 446 * 447 * Overall outline of lookup: 448 * 449 * dirloop: 450 * identify next component of name at ndp->ni_ptr 451 * handle degenerate case where name is null string 452 * if .. and crossing mount points and on mounted filesys, find parent 453 * call VOP_LOOKUP routine for next component name 454 * directory vnode returned in ni_dvp, locked. 455 * component vnode returned in ni_vp (if it exists), locked. 456 * if result vnode is mounted on and crossing mount points, 457 * find mounted on vnode 458 * if more components of name, do next level at dirloop 459 * return the answer in ni_vp, locked if LOCKLEAF set 460 * if LOCKPARENT set, return locked parent in ni_dvp 461 */ 462 463 464 /* 465 * Internal state for a namei operation. 466 * 467 * cnp is always equal to &ndp->ni_cnp. 468 */ 469 struct namei_state { 470 struct nameidata *ndp; 471 struct componentname *cnp; 472 473 int docache; /* == 0 do not cache last component */ 474 int rdonly; /* lookup read-only flag bit */ 475 int slashes; 476 477 unsigned attempt_retry:1; /* true if error allows emul retry */ 478 }; 479 480 481 /* 482 * Initialize the namei working state. 483 */ 484 static void 485 namei_init(struct namei_state *state, struct nameidata *ndp) 486 { 487 488 state->ndp = ndp; 489 state->cnp = &ndp->ni_cnd; 490 491 state->docache = 0; 492 state->rdonly = 0; 493 state->slashes = 0; 494 495 #ifdef DIAGNOSTIC 496 if (!state->cnp->cn_cred) 497 panic("namei: bad cred/proc"); 498 if (state->cnp->cn_nameiop & (~OPMASK)) 499 panic("namei: nameiop contaminated with flags"); 500 if (state->cnp->cn_flags & OPMASK) 501 panic("namei: flags contaminated with nameiops"); 502 #endif 503 504 /* 505 * The buffer for name translation shall be the one inside the 506 * pathbuf. 507 */ 508 state->ndp->ni_pnbuf = state->ndp->ni_pathbuf->pb_path; 509 } 510 511 /* 512 * Clean up the working namei state, leaving things ready for return 513 * from namei. 514 */ 515 static void 516 namei_cleanup(struct namei_state *state) 517 { 518 KASSERT(state->cnp == &state->ndp->ni_cnd); 519 520 /* nothing for now */ 521 (void)state; 522 } 523 524 ////////////////////////////// 525 526 /* 527 * Get the directory context. 528 * Initializes the rootdir and erootdir state and returns a reference 529 * to the starting dir. 530 */ 531 static struct vnode * 532 namei_getstartdir(struct namei_state *state) 533 { 534 struct nameidata *ndp = state->ndp; 535 struct componentname *cnp = state->cnp; 536 struct cwdinfo *cwdi; /* pointer to cwd state */ 537 struct lwp *self = curlwp; /* thread doing namei() */ 538 struct vnode *rootdir, *erootdir, *curdir, *startdir; 539 540 cwdi = self->l_proc->p_cwdi; 541 rw_enter(&cwdi->cwdi_lock, RW_READER); 542 543 /* root dir */ 544 if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) { 545 rootdir = rootvnode; 546 } else { 547 rootdir = cwdi->cwdi_rdir; 548 } 549 550 /* emulation root dir, if any */ 551 if ((cnp->cn_flags & TRYEMULROOT) == 0) { 552 /* if we don't want it, don't fetch it */ 553 erootdir = NULL; 554 } else if (cnp->cn_flags & EMULROOTSET) { 555 /* explicitly set emulroot; "/../" doesn't override this */ 556 erootdir = ndp->ni_erootdir; 557 } else if (!strncmp(ndp->ni_pnbuf, "/../", 4)) { 558 /* explicit reference to real rootdir */ 559 erootdir = NULL; 560 } else { 561 /* may be null */ 562 erootdir = cwdi->cwdi_edir; 563 } 564 565 /* current dir */ 566 curdir = cwdi->cwdi_cdir; 567 568 if (ndp->ni_pnbuf[0] != '/') { 569 if (ndp->ni_atdir != NULL) { 570 startdir = ndp->ni_atdir; 571 } else { 572 startdir = curdir; 573 } 574 erootdir = NULL; 575 } else if (cnp->cn_flags & TRYEMULROOT && erootdir != NULL) { 576 startdir = erootdir; 577 } else { 578 startdir = rootdir; 579 erootdir = NULL; 580 } 581 582 state->ndp->ni_rootdir = rootdir; 583 state->ndp->ni_erootdir = erootdir; 584 585 /* 586 * Get a reference to the start dir so we can safely unlock cwdi. 587 * 588 * XXX: should we hold references to rootdir and erootdir while 589 * we're running? What happens if a multithreaded process chroots 590 * during namei? 591 */ 592 vref(startdir); 593 594 rw_exit(&cwdi->cwdi_lock); 595 return startdir; 596 } 597 598 /* 599 * Get the directory context for the nfsd case, in parallel to 600 * getstartdir. Initializes the rootdir and erootdir state and 601 * returns a reference to the passed-in starting dir. 602 */ 603 static struct vnode * 604 namei_getstartdir_for_nfsd(struct namei_state *state) 605 { 606 KASSERT(state->ndp->ni_atdir != NULL); 607 608 /* always use the real root, and never set an emulation root */ 609 state->ndp->ni_rootdir = rootvnode; 610 state->ndp->ni_erootdir = NULL; 611 612 vref(state->ndp->ni_atdir); 613 return state->ndp->ni_atdir; 614 } 615 616 617 /* 618 * Ktrace the namei operation. 619 */ 620 static void 621 namei_ktrace(struct namei_state *state) 622 { 623 struct nameidata *ndp = state->ndp; 624 struct componentname *cnp = state->cnp; 625 struct lwp *self = curlwp; /* thread doing namei() */ 626 const char *emul_path; 627 628 if (ktrpoint(KTR_NAMEI)) { 629 if (ndp->ni_erootdir != NULL) { 630 /* 631 * To make any sense, the trace entry need to have the 632 * text of the emulation path prepended. 633 * Usually we can get this from the current process, 634 * but when called from emul_find_interp() it is only 635 * in the exec_package - so we get it passed in ni_next 636 * (this is a hack). 637 */ 638 if (cnp->cn_flags & EMULROOTSET) 639 emul_path = ndp->ni_next; 640 else 641 emul_path = self->l_proc->p_emul->e_path; 642 ktrnamei2(emul_path, strlen(emul_path), 643 ndp->ni_pnbuf, ndp->ni_pathlen); 644 } else 645 ktrnamei(ndp->ni_pnbuf, ndp->ni_pathlen); 646 } 647 } 648 649 /* 650 * Start up namei. Find the root dir and cwd, establish the starting 651 * directory for lookup, and lock it. Also calls ktrace when 652 * appropriate. 653 */ 654 static int 655 namei_start(struct namei_state *state, int isnfsd, 656 struct vnode **startdir_ret) 657 { 658 struct nameidata *ndp = state->ndp; 659 struct vnode *startdir; 660 661 /* length includes null terminator (was originally from copyinstr) */ 662 ndp->ni_pathlen = strlen(ndp->ni_pnbuf) + 1; 663 664 /* 665 * POSIX.1 requirement: "" is not a valid file name. 666 */ 667 if (ndp->ni_pathlen == 1) { 668 return ENOENT; 669 } 670 671 ndp->ni_loopcnt = 0; 672 673 /* Get starting directory, set up root, and ktrace. */ 674 if (isnfsd) { 675 startdir = namei_getstartdir_for_nfsd(state); 676 /* no ktrace */ 677 } else { 678 startdir = namei_getstartdir(state); 679 namei_ktrace(state); 680 } 681 682 /* NDAT may feed us with a non directory namei_getstartdir */ 683 if (startdir->v_type != VDIR) 684 return ENOTDIR; 685 686 vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); 687 688 *startdir_ret = startdir; 689 return 0; 690 } 691 692 /* 693 * Check for being at a symlink that we're going to follow. 694 */ 695 static inline int 696 namei_atsymlink(struct namei_state *state, struct vnode *foundobj) 697 { 698 return (foundobj->v_type == VLNK) && 699 (state->cnp->cn_flags & (FOLLOW|REQUIREDIR)); 700 } 701 702 /* 703 * Follow a symlink. 704 * 705 * Updates searchdir. inhibitmagic causes magic symlinks to not be 706 * interpreted; this is used by nfsd. 707 * 708 * Unlocks foundobj on success (ugh) 709 */ 710 static inline int 711 namei_follow(struct namei_state *state, int inhibitmagic, 712 struct vnode *searchdir, struct vnode *foundobj, 713 struct vnode **newsearchdir_ret) 714 { 715 struct nameidata *ndp = state->ndp; 716 struct componentname *cnp = state->cnp; 717 718 struct lwp *self = curlwp; /* thread doing namei() */ 719 struct iovec aiov; /* uio for reading symbolic links */ 720 struct uio auio; 721 char *cp; /* pointer into pathname argument */ 722 size_t linklen; 723 int error; 724 725 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 726 KASSERT(VOP_ISLOCKED(foundobj) == LK_EXCLUSIVE); 727 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 728 return ELOOP; 729 } 730 if (foundobj->v_mount->mnt_flag & MNT_SYMPERM) { 731 error = VOP_ACCESS(foundobj, VEXEC, cnp->cn_cred); 732 if (error != 0) 733 return error; 734 } 735 736 /* FUTURE: fix this to not use a second buffer */ 737 cp = PNBUF_GET(); 738 aiov.iov_base = cp; 739 aiov.iov_len = MAXPATHLEN; 740 auio.uio_iov = &aiov; 741 auio.uio_iovcnt = 1; 742 auio.uio_offset = 0; 743 auio.uio_rw = UIO_READ; 744 auio.uio_resid = MAXPATHLEN; 745 UIO_SETUP_SYSSPACE(&auio); 746 error = VOP_READLINK(foundobj, &auio, cnp->cn_cred); 747 if (error) { 748 PNBUF_PUT(cp); 749 return error; 750 } 751 linklen = MAXPATHLEN - auio.uio_resid; 752 if (linklen == 0) { 753 PNBUF_PUT(cp); 754 return ENOENT; 755 } 756 757 /* 758 * Do symlink substitution, if appropriate, and 759 * check length for potential overflow. 760 * 761 * Inhibit symlink substitution for nfsd. 762 * XXX: This is how it was before; is that a bug or a feature? 763 */ 764 if ((!inhibitmagic && vfs_magiclinks && 765 symlink_magic(self->l_proc, cp, &linklen)) || 766 (linklen + ndp->ni_pathlen >= MAXPATHLEN)) { 767 PNBUF_PUT(cp); 768 return ENAMETOOLONG; 769 } 770 if (ndp->ni_pathlen > 1) { 771 /* includes a null-terminator */ 772 memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen); 773 } else { 774 cp[linklen] = '\0'; 775 } 776 ndp->ni_pathlen += linklen; 777 memcpy(ndp->ni_pnbuf, cp, ndp->ni_pathlen); 778 PNBUF_PUT(cp); 779 780 /* we're now starting from the beginning of the buffer again */ 781 cnp->cn_nameptr = ndp->ni_pnbuf; 782 783 /* must unlock this before relocking searchdir */ 784 VOP_UNLOCK(foundobj); 785 786 /* 787 * Check if root directory should replace current directory. 788 */ 789 if (ndp->ni_pnbuf[0] == '/') { 790 vput(searchdir); 791 /* Keep absolute symbolic links inside emulation root */ 792 searchdir = ndp->ni_erootdir; 793 if (searchdir == NULL || 794 (ndp->ni_pnbuf[1] == '.' 795 && ndp->ni_pnbuf[2] == '.' 796 && ndp->ni_pnbuf[3] == '/')) { 797 ndp->ni_erootdir = NULL; 798 searchdir = ndp->ni_rootdir; 799 } 800 vref(searchdir); 801 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 802 while (cnp->cn_nameptr[0] == '/') { 803 cnp->cn_nameptr++; 804 ndp->ni_pathlen--; 805 } 806 } 807 808 *newsearchdir_ret = searchdir; 809 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 810 return 0; 811 } 812 813 ////////////////////////////// 814 815 /* 816 * Inspect the leading path component and update the state accordingly. 817 */ 818 static int 819 lookup_parsepath(struct namei_state *state) 820 { 821 const char *cp; /* pointer into pathname argument */ 822 823 struct componentname *cnp = state->cnp; 824 struct nameidata *ndp = state->ndp; 825 826 KASSERT(cnp == &ndp->ni_cnd); 827 828 /* 829 * Search a new directory. 830 * 831 * The last component of the filename is left accessible via 832 * cnp->cn_nameptr for callers that need the name. Callers needing 833 * the name set the SAVENAME flag. When done, they assume 834 * responsibility for freeing the pathname buffer. 835 * 836 * At this point, our only vnode state is that the search dir 837 * is held and locked. 838 */ 839 cnp->cn_consume = 0; 840 cnp->cn_namelen = namei_getcomponent(cnp->cn_nameptr); 841 cp = cnp->cn_nameptr + cnp->cn_namelen; 842 if (cnp->cn_namelen > KERNEL_NAME_MAX) { 843 return ENAMETOOLONG; 844 } 845 #ifdef NAMEI_DIAGNOSTIC 846 { char c = *cp; 847 *(char *)cp = '\0'; 848 printf("{%s}: ", cnp->cn_nameptr); 849 *(char *)cp = c; } 850 #endif /* NAMEI_DIAGNOSTIC */ 851 ndp->ni_pathlen -= cnp->cn_namelen; 852 ndp->ni_next = cp; 853 /* 854 * If this component is followed by a slash, then move the pointer to 855 * the next component forward, and remember that this component must be 856 * a directory. 857 */ 858 if (*cp == '/') { 859 do { 860 cp++; 861 } while (*cp == '/'); 862 state->slashes = cp - ndp->ni_next; 863 ndp->ni_pathlen -= state->slashes; 864 ndp->ni_next = cp; 865 cnp->cn_flags |= REQUIREDIR; 866 } else { 867 state->slashes = 0; 868 cnp->cn_flags &= ~REQUIREDIR; 869 } 870 /* 871 * We do special processing on the last component, whether or not it's 872 * a directory. Cache all intervening lookups, but not the final one. 873 */ 874 if (*cp == '\0') { 875 if (state->docache) 876 cnp->cn_flags |= MAKEENTRY; 877 else 878 cnp->cn_flags &= ~MAKEENTRY; 879 cnp->cn_flags |= ISLASTCN; 880 } else { 881 cnp->cn_flags |= MAKEENTRY; 882 cnp->cn_flags &= ~ISLASTCN; 883 } 884 if (cnp->cn_namelen == 2 && 885 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 886 cnp->cn_flags |= ISDOTDOT; 887 else 888 cnp->cn_flags &= ~ISDOTDOT; 889 890 return 0; 891 } 892 893 /* 894 * Call VOP_LOOKUP for a single lookup; return a new search directory 895 * (used when crossing mountpoints up or searching union mounts down) and 896 * the found object, which for create operations may be NULL on success. 897 */ 898 static int 899 lookup_once(struct namei_state *state, 900 struct vnode *searchdir, 901 struct vnode **newsearchdir_ret, 902 struct vnode **foundobj_ret) 903 { 904 struct vnode *tmpvn; /* scratch vnode */ 905 struct vnode *foundobj; /* result */ 906 struct mount *mp; /* mount table entry */ 907 struct lwp *l = curlwp; 908 int error; 909 910 struct componentname *cnp = state->cnp; 911 struct nameidata *ndp = state->ndp; 912 913 KASSERT(cnp == &ndp->ni_cnd); 914 KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); 915 *newsearchdir_ret = searchdir; 916 917 /* 918 * Handle "..": two special cases. 919 * 1. If at root directory (e.g. after chroot) 920 * or at absolute root directory 921 * then ignore it so can't get out. 922 * 1a. If at the root of the emulation filesystem go to the real 923 * root. So "/../<path>" is always absolute. 924 * 1b. If we have somehow gotten out of a jail, warn 925 * and also ignore it so we can't get farther out. 926 * 2. If this vnode is the root of a mounted 927 * filesystem, then replace it with the 928 * vnode which was mounted on so we take the 929 * .. in the other file system. 930 */ 931 if (cnp->cn_flags & ISDOTDOT) { 932 struct proc *p = l->l_proc; 933 934 for (;;) { 935 if (searchdir == ndp->ni_rootdir || 936 searchdir == rootvnode) { 937 foundobj = searchdir; 938 vref(foundobj); 939 *foundobj_ret = foundobj; 940 error = 0; 941 goto done; 942 } 943 if (ndp->ni_rootdir != rootvnode) { 944 int retval; 945 946 VOP_UNLOCK(searchdir); 947 retval = vn_isunder(searchdir, ndp->ni_rootdir, l); 948 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 949 if (!retval) { 950 /* Oops! We got out of jail! */ 951 log(LOG_WARNING, 952 "chrooted pid %d uid %d (%s) " 953 "detected outside of its chroot\n", 954 p->p_pid, kauth_cred_geteuid(l->l_cred), 955 p->p_comm); 956 /* Put us at the jail root. */ 957 vput(searchdir); 958 searchdir = NULL; 959 foundobj = ndp->ni_rootdir; 960 vref(foundobj); 961 vref(foundobj); 962 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 963 *newsearchdir_ret = foundobj; 964 *foundobj_ret = foundobj; 965 error = 0; 966 goto done; 967 } 968 } 969 if ((searchdir->v_vflag & VV_ROOT) == 0 || 970 (cnp->cn_flags & NOCROSSMOUNT)) 971 break; 972 tmpvn = searchdir; 973 searchdir = searchdir->v_mount->mnt_vnodecovered; 974 vref(searchdir); 975 vput(tmpvn); 976 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 977 *newsearchdir_ret = searchdir; 978 } 979 } 980 981 /* 982 * We now have a segment name to search for, and a directory to search. 983 * Our vnode state here is that "searchdir" is held and locked. 984 */ 985 unionlookup: 986 foundobj = NULL; 987 error = VOP_LOOKUP(searchdir, &foundobj, cnp); 988 989 if (error != 0) { 990 #ifdef DIAGNOSTIC 991 if (foundobj != NULL) 992 panic("leaf `%s' should be empty", cnp->cn_nameptr); 993 #endif /* DIAGNOSTIC */ 994 #ifdef NAMEI_DIAGNOSTIC 995 printf("not found\n"); 996 #endif /* NAMEI_DIAGNOSTIC */ 997 if ((error == ENOENT) && 998 (searchdir->v_vflag & VV_ROOT) && 999 (searchdir->v_mount->mnt_flag & MNT_UNION)) { 1000 tmpvn = searchdir; 1001 searchdir = searchdir->v_mount->mnt_vnodecovered; 1002 vref(searchdir); 1003 vput(tmpvn); 1004 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1005 *newsearchdir_ret = searchdir; 1006 goto unionlookup; 1007 } 1008 1009 if (error != EJUSTRETURN) 1010 goto done; 1011 1012 /* 1013 * If this was not the last component, or there were trailing 1014 * slashes, and we are not going to create a directory, 1015 * then the name must exist. 1016 */ 1017 if ((cnp->cn_flags & (REQUIREDIR | CREATEDIR)) == REQUIREDIR) { 1018 error = ENOENT; 1019 goto done; 1020 } 1021 1022 /* 1023 * If creating and at end of pathname, then can consider 1024 * allowing file to be created. 1025 */ 1026 if (state->rdonly) { 1027 error = EROFS; 1028 goto done; 1029 } 1030 1031 /* 1032 * We return success and a NULL foundobj to indicate 1033 * that the entry doesn't currently exist, leaving a 1034 * pointer to the (normally, locked) directory vnode 1035 * as searchdir. 1036 */ 1037 *foundobj_ret = NULL; 1038 error = 0; 1039 goto done; 1040 } 1041 #ifdef NAMEI_DIAGNOSTIC 1042 printf("found\n"); 1043 #endif /* NAMEI_DIAGNOSTIC */ 1044 1045 /* 1046 * Take into account any additional components consumed by the 1047 * underlying filesystem. This will include any trailing slashes after 1048 * the last component consumed. 1049 */ 1050 if (cnp->cn_consume > 0) { 1051 ndp->ni_pathlen -= cnp->cn_consume - state->slashes; 1052 ndp->ni_next += cnp->cn_consume - state->slashes; 1053 cnp->cn_consume = 0; 1054 if (ndp->ni_next[0] == '\0') 1055 cnp->cn_flags |= ISLASTCN; 1056 } 1057 1058 /* 1059 * "searchdir" is locked and held, "foundobj" is held, 1060 * they may be the same vnode. 1061 */ 1062 if (searchdir != foundobj) { 1063 if (cnp->cn_flags & ISDOTDOT) 1064 VOP_UNLOCK(searchdir); 1065 error = vn_lock(foundobj, LK_EXCLUSIVE); 1066 if (cnp->cn_flags & ISDOTDOT) 1067 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1068 if (error != 0) { 1069 vrele(foundobj); 1070 goto done; 1071 } 1072 } 1073 1074 /* 1075 * Check to see if the vnode has been mounted on; 1076 * if so find the root of the mounted file system. 1077 */ 1078 while (foundobj->v_type == VDIR && 1079 (mp = foundobj->v_mountedhere) != NULL && 1080 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 1081 error = vfs_busy(mp, NULL); 1082 if (error != 0) { 1083 if (searchdir != foundobj) { 1084 vput(foundobj); 1085 } else { 1086 vrele(foundobj); 1087 } 1088 goto done; 1089 } 1090 if (searchdir != foundobj) { 1091 VOP_UNLOCK(searchdir); 1092 } 1093 vput(foundobj); 1094 error = VFS_ROOT(mp, &foundobj); 1095 vfs_unbusy(mp, false, NULL); 1096 if (error) { 1097 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1098 goto done; 1099 } 1100 /* 1101 * avoid locking vnodes from two filesystems because it's 1102 * prune to deadlock. eg. when using puffs. 1103 * also, it isn't a good idea to propagate slowness of a 1104 * filesystem up to the root directory. 1105 * for now, only handle the common case. (ie. foundobj is VDIR) 1106 */ 1107 if (foundobj->v_type == VDIR) { 1108 vrele(searchdir); 1109 *newsearchdir_ret = searchdir = foundobj; 1110 vref(searchdir); 1111 } else { 1112 VOP_UNLOCK(foundobj); 1113 vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); 1114 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 1115 } 1116 } 1117 1118 *foundobj_ret = foundobj; 1119 error = 0; 1120 done: 1121 KASSERT(VOP_ISLOCKED(*newsearchdir_ret) == LK_EXCLUSIVE); 1122 /* 1123 * *foundobj_ret is valid only if error == 0. 1124 */ 1125 KASSERT(error != 0 || *foundobj_ret == NULL || 1126 VOP_ISLOCKED(*foundobj_ret) == LK_EXCLUSIVE); 1127 return error; 1128 } 1129 1130 ////////////////////////////// 1131 1132 /* 1133 * Do a complete path search from a single root directory. 1134 * (This is called up to twice if TRYEMULROOT is in effect.) 1135 */ 1136 static int 1137 namei_oneroot(struct namei_state *state, 1138 int neverfollow, int inhibitmagic, int isnfsd) 1139 { 1140 struct nameidata *ndp = state->ndp; 1141 struct componentname *cnp = state->cnp; 1142 struct vnode *searchdir, *foundobj; 1143 int error; 1144 1145 error = namei_start(state, isnfsd, &searchdir); 1146 if (error) { 1147 ndp->ni_dvp = NULL; 1148 ndp->ni_vp = NULL; 1149 return error; 1150 } 1151 KASSERT(searchdir->v_type == VDIR); 1152 1153 /* 1154 * Setup: break out flag bits into variables. 1155 */ 1156 state->docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 1157 if (cnp->cn_nameiop == DELETE) 1158 state->docache = 0; 1159 state->rdonly = cnp->cn_flags & RDONLY; 1160 1161 /* 1162 * Keep going until we run out of path components. 1163 */ 1164 cnp->cn_nameptr = ndp->ni_pnbuf; 1165 1166 /* drop leading slashes (already used them to choose startdir) */ 1167 while (cnp->cn_nameptr[0] == '/') { 1168 cnp->cn_nameptr++; 1169 ndp->ni_pathlen--; 1170 } 1171 /* was it just "/"? */ 1172 if (cnp->cn_nameptr[0] == '\0') { 1173 foundobj = searchdir; 1174 searchdir = NULL; 1175 cnp->cn_flags |= ISLASTCN; 1176 1177 /* bleh */ 1178 goto skiploop; 1179 } 1180 1181 for (;;) { 1182 1183 /* 1184 * If the directory we're on is unmounted, bail out. 1185 * XXX: should this also check if it's unlinked? 1186 * XXX: yes it should... but how? 1187 */ 1188 if (searchdir->v_mount == NULL) { 1189 vput(searchdir); 1190 ndp->ni_dvp = NULL; 1191 ndp->ni_vp = NULL; 1192 return (ENOENT); 1193 } 1194 1195 /* 1196 * Look up the next path component. 1197 * (currently, this may consume more than one) 1198 */ 1199 1200 /* There should be no slashes here. */ 1201 KASSERT(cnp->cn_nameptr[0] != '/'); 1202 1203 /* and we shouldn't have looped around if we were done */ 1204 KASSERT(cnp->cn_nameptr[0] != '\0'); 1205 1206 error = lookup_parsepath(state); 1207 if (error) { 1208 vput(searchdir); 1209 ndp->ni_dvp = NULL; 1210 ndp->ni_vp = NULL; 1211 state->attempt_retry = 1; 1212 return (error); 1213 } 1214 1215 error = lookup_once(state, searchdir, &searchdir, &foundobj); 1216 if (error) { 1217 vput(searchdir); 1218 ndp->ni_dvp = NULL; 1219 ndp->ni_vp = NULL; 1220 /* 1221 * Note that if we're doing TRYEMULROOT we can 1222 * retry with the normal root. Where this is 1223 * currently set matches previous practice, 1224 * but the previous practice didn't make much 1225 * sense and somebody should sit down and 1226 * figure out which cases should cause retry 1227 * and which shouldn't. XXX. 1228 */ 1229 state->attempt_retry = 1; 1230 return (error); 1231 } 1232 1233 if (foundobj == NULL) { 1234 /* 1235 * Success with no object returned means we're 1236 * creating something and it isn't already 1237 * there. Break out of the main loop now so 1238 * the code below doesn't have to test for 1239 * foundobj == NULL. 1240 */ 1241 break; 1242 } 1243 1244 /* 1245 * Check for symbolic link. If we've reached one, 1246 * follow it, unless we aren't supposed to. Back up 1247 * over any slashes that we skipped, as we will need 1248 * them again. 1249 */ 1250 if (namei_atsymlink(state, foundobj)) { 1251 ndp->ni_pathlen += state->slashes; 1252 ndp->ni_next -= state->slashes; 1253 if (neverfollow) { 1254 error = EINVAL; 1255 } else { 1256 /* 1257 * dholland 20110410: if we're at a 1258 * union mount it might make sense to 1259 * use the top of the union stack here 1260 * rather than the layer we found the 1261 * symlink in. (FUTURE) 1262 */ 1263 error = namei_follow(state, inhibitmagic, 1264 searchdir, foundobj, 1265 &searchdir); 1266 } 1267 if (error) { 1268 KASSERT(searchdir != foundobj); 1269 vput(searchdir); 1270 vput(foundobj); 1271 ndp->ni_dvp = NULL; 1272 ndp->ni_vp = NULL; 1273 return error; 1274 } 1275 /* namei_follow unlocks it (ugh) so rele, not put */ 1276 vrele(foundobj); 1277 foundobj = NULL; 1278 1279 /* 1280 * If we followed a symlink to `/' and there 1281 * are no more components after the symlink, 1282 * we're done with the loop and what we found 1283 * is the searchdir. 1284 */ 1285 if (cnp->cn_nameptr[0] == '\0') { 1286 foundobj = searchdir; 1287 searchdir = NULL; 1288 cnp->cn_flags |= ISLASTCN; 1289 break; 1290 } 1291 1292 continue; 1293 } 1294 1295 /* 1296 * Not a symbolic link. 1297 * 1298 * Check for directory, if the component was 1299 * followed by a series of slashes. 1300 */ 1301 if ((foundobj->v_type != VDIR) && 1302 (cnp->cn_flags & REQUIREDIR)) { 1303 if (searchdir == foundobj) { 1304 vrele(searchdir); 1305 } else { 1306 vput(searchdir); 1307 } 1308 vput(foundobj); 1309 ndp->ni_dvp = NULL; 1310 ndp->ni_vp = NULL; 1311 state->attempt_retry = 1; 1312 return ENOTDIR; 1313 } 1314 1315 /* 1316 * Stop if we've reached the last component. 1317 */ 1318 if (cnp->cn_flags & ISLASTCN) { 1319 break; 1320 } 1321 1322 /* 1323 * Continue with the next component. 1324 */ 1325 cnp->cn_nameptr = ndp->ni_next; 1326 if (searchdir == foundobj) { 1327 vrele(searchdir); 1328 } else { 1329 vput(searchdir); 1330 } 1331 searchdir = foundobj; 1332 foundobj = NULL; 1333 } 1334 1335 skiploop: 1336 1337 if (foundobj != NULL) { 1338 if (foundobj == ndp->ni_erootdir) { 1339 /* 1340 * We are about to return the emulation root. 1341 * This isn't a good idea because code might 1342 * repeatedly lookup ".." until the file 1343 * matches that returned for "/" and loop 1344 * forever. So convert it to the real root. 1345 */ 1346 if (searchdir != NULL) { 1347 if (searchdir == foundobj) 1348 vrele(searchdir); 1349 else 1350 vput(searchdir); 1351 searchdir = NULL; 1352 } 1353 vput(foundobj); 1354 foundobj = ndp->ni_rootdir; 1355 vref(foundobj); 1356 vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); 1357 } 1358 1359 /* 1360 * If the caller requested the parent node (i.e. it's 1361 * a CREATE, DELETE, or RENAME), and we don't have one 1362 * (because this is the root directory, or we crossed 1363 * a mount point), then we must fail. 1364 */ 1365 if (cnp->cn_nameiop != LOOKUP && 1366 (searchdir == NULL || 1367 searchdir->v_mount != foundobj->v_mount)) { 1368 if (searchdir) { 1369 vput(searchdir); 1370 } 1371 vput(foundobj); 1372 foundobj = NULL; 1373 ndp->ni_dvp = NULL; 1374 ndp->ni_vp = NULL; 1375 state->attempt_retry = 1; 1376 1377 switch (cnp->cn_nameiop) { 1378 case CREATE: 1379 return EEXIST; 1380 case DELETE: 1381 case RENAME: 1382 return EBUSY; 1383 default: 1384 break; 1385 } 1386 panic("Invalid nameiop\n"); 1387 } 1388 1389 /* 1390 * Disallow directory write attempts on read-only lookups. 1391 * Prefers EEXIST over EROFS for the CREATE case. 1392 */ 1393 if (state->rdonly && 1394 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1395 if (searchdir) { 1396 if (foundobj != searchdir) { 1397 vput(searchdir); 1398 } else { 1399 vrele(searchdir); 1400 } 1401 searchdir = NULL; 1402 } 1403 vput(foundobj); 1404 foundobj = NULL; 1405 ndp->ni_dvp = NULL; 1406 ndp->ni_vp = NULL; 1407 state->attempt_retry = 1; 1408 return EROFS; 1409 } 1410 if ((cnp->cn_flags & LOCKLEAF) == 0) { 1411 /* 1412 * Note: if LOCKPARENT but not LOCKLEAF is 1413 * set, and searchdir == foundobj, this code 1414 * necessarily unlocks the parent as well as 1415 * the leaf. That is, just because you specify 1416 * LOCKPARENT doesn't mean you necessarily get 1417 * a locked parent vnode. The code in 1418 * vfs_syscalls.c, and possibly elsewhere, 1419 * that uses this combination "knows" this, so 1420 * it can't be safely changed. Feh. XXX 1421 */ 1422 VOP_UNLOCK(foundobj); 1423 } 1424 } 1425 1426 /* 1427 * Done. 1428 */ 1429 1430 /* 1431 * If LOCKPARENT is not set, the parent directory isn't returned. 1432 */ 1433 if ((cnp->cn_flags & LOCKPARENT) == 0 && searchdir != NULL) { 1434 if (searchdir == foundobj) { 1435 vrele(searchdir); 1436 } else { 1437 vput(searchdir); 1438 } 1439 searchdir = NULL; 1440 } 1441 1442 ndp->ni_dvp = searchdir; 1443 ndp->ni_vp = foundobj; 1444 return 0; 1445 } 1446 1447 /* 1448 * Do namei; wrapper layer that handles TRYEMULROOT. 1449 */ 1450 static int 1451 namei_tryemulroot(struct namei_state *state, 1452 int neverfollow, int inhibitmagic, int isnfsd) 1453 { 1454 int error; 1455 1456 struct nameidata *ndp = state->ndp; 1457 struct componentname *cnp = state->cnp; 1458 const char *savepath = NULL; 1459 1460 KASSERT(cnp == &ndp->ni_cnd); 1461 1462 if (cnp->cn_flags & TRYEMULROOT) { 1463 savepath = pathbuf_stringcopy_get(ndp->ni_pathbuf); 1464 } 1465 1466 emul_retry: 1467 state->attempt_retry = 0; 1468 1469 error = namei_oneroot(state, neverfollow, inhibitmagic, isnfsd); 1470 if (error) { 1471 /* 1472 * Once namei has started up, the existence of ni_erootdir 1473 * tells us whether we're working from an emulation root. 1474 * The TRYEMULROOT flag isn't necessarily authoritative. 1475 */ 1476 if (ndp->ni_erootdir != NULL && state->attempt_retry) { 1477 /* Retry the whole thing using the normal root */ 1478 cnp->cn_flags &= ~TRYEMULROOT; 1479 state->attempt_retry = 0; 1480 1481 /* kinda gross */ 1482 strcpy(ndp->ni_pathbuf->pb_path, savepath); 1483 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1484 savepath = NULL; 1485 1486 goto emul_retry; 1487 } 1488 } 1489 if (savepath != NULL) { 1490 pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath); 1491 } 1492 return error; 1493 } 1494 1495 /* 1496 * External interface. 1497 */ 1498 int 1499 namei(struct nameidata *ndp) 1500 { 1501 struct namei_state state; 1502 int error; 1503 1504 namei_init(&state, ndp); 1505 error = namei_tryemulroot(&state, 1506 0/*!neverfollow*/, 0/*!inhibitmagic*/, 1507 0/*isnfsd*/); 1508 namei_cleanup(&state); 1509 1510 if (error) { 1511 /* make sure no stray refs leak out */ 1512 KASSERT(ndp->ni_dvp == NULL); 1513 KASSERT(ndp->ni_vp == NULL); 1514 } 1515 1516 return error; 1517 } 1518 1519 //////////////////////////////////////////////////////////// 1520 1521 /* 1522 * External interface used by nfsd. This is basically different from 1523 * namei only in that it has the ability to pass in the "current 1524 * directory", and uses an extra flag "neverfollow" for which there's 1525 * no physical flag defined in namei.h. (There used to be a cut&paste 1526 * copy of about half of namei in nfsd to allow these minor 1527 * adjustments to exist.) 1528 * 1529 * XXX: the namei interface should be adjusted so nfsd can just use 1530 * ordinary namei(). 1531 */ 1532 int 1533 lookup_for_nfsd(struct nameidata *ndp, struct vnode *forcecwd, int neverfollow) 1534 { 1535 struct namei_state state; 1536 int error; 1537 1538 KASSERT(ndp->ni_atdir == NULL); 1539 ndp->ni_atdir = forcecwd; 1540 1541 namei_init(&state, ndp); 1542 error = namei_tryemulroot(&state, 1543 neverfollow, 1/*inhibitmagic*/, 1/*isnfsd*/); 1544 namei_cleanup(&state); 1545 1546 if (error) { 1547 /* make sure no stray refs leak out */ 1548 KASSERT(ndp->ni_dvp == NULL); 1549 KASSERT(ndp->ni_vp == NULL); 1550 } 1551 1552 return error; 1553 } 1554 1555 /* 1556 * A second external interface used by nfsd. This turns out to be a 1557 * single lookup used by the WebNFS code (ha!) to get "index.html" or 1558 * equivalent when asked for a directory. It should eventually evolve 1559 * into some kind of namei_once() call; for the time being it's kind 1560 * of a mess. XXX. 1561 * 1562 * dholland 20110109: I don't think it works, and I don't think it 1563 * worked before I started hacking and slashing either, and I doubt 1564 * anyone will ever notice. 1565 */ 1566 1567 /* 1568 * Internals. This calls lookup_once() after setting up the assorted 1569 * pieces of state the way they ought to be. 1570 */ 1571 static int 1572 do_lookup_for_nfsd_index(struct namei_state *state) 1573 { 1574 int error = 0; 1575 1576 struct componentname *cnp = state->cnp; 1577 struct nameidata *ndp = state->ndp; 1578 struct vnode *startdir; 1579 struct vnode *foundobj; 1580 const char *cp; /* pointer into pathname argument */ 1581 1582 KASSERT(cnp == &ndp->ni_cnd); 1583 1584 startdir = state->ndp->ni_atdir; 1585 1586 cnp->cn_nameptr = ndp->ni_pnbuf; 1587 state->docache = 1; 1588 state->rdonly = cnp->cn_flags & RDONLY; 1589 ndp->ni_dvp = NULL; 1590 1591 cnp->cn_consume = 0; 1592 cnp->cn_namelen = namei_getcomponent(cnp->cn_nameptr); 1593 cp = cnp->cn_nameptr + cnp->cn_namelen; 1594 KASSERT(cnp->cn_namelen <= KERNEL_NAME_MAX); 1595 ndp->ni_pathlen -= cnp->cn_namelen; 1596 ndp->ni_next = cp; 1597 state->slashes = 0; 1598 cnp->cn_flags &= ~REQUIREDIR; 1599 cnp->cn_flags |= MAKEENTRY|ISLASTCN; 1600 1601 if (cnp->cn_namelen == 2 && 1602 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 1603 cnp->cn_flags |= ISDOTDOT; 1604 else 1605 cnp->cn_flags &= ~ISDOTDOT; 1606 1607 /* 1608 * Because lookup_once can change the startdir, we need our 1609 * own reference to it to avoid consuming the caller's. 1610 */ 1611 vref(startdir); 1612 vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); 1613 error = lookup_once(state, startdir, &startdir, &foundobj); 1614 if (error == 0 && startdir == foundobj) { 1615 vrele(startdir); 1616 } else { 1617 vput(startdir); 1618 } 1619 if (error) { 1620 goto bad; 1621 } 1622 ndp->ni_vp = foundobj; 1623 1624 if (foundobj == NULL) { 1625 return 0; 1626 } 1627 1628 KASSERT((cnp->cn_flags & LOCKPARENT) == 0); 1629 if ((cnp->cn_flags & LOCKLEAF) == 0) { 1630 VOP_UNLOCK(foundobj); 1631 } 1632 return (0); 1633 1634 bad: 1635 ndp->ni_vp = NULL; 1636 return (error); 1637 } 1638 1639 /* 1640 * External interface. The partitioning between this function and the 1641 * above isn't very clear - the above function exists mostly so code 1642 * that uses "state->" can be shuffled around without having to change 1643 * it to "state.". 1644 */ 1645 int 1646 lookup_for_nfsd_index(struct nameidata *ndp, struct vnode *startdir) 1647 { 1648 struct namei_state state; 1649 int error; 1650 1651 KASSERT(ndp->ni_atdir == NULL); 1652 ndp->ni_atdir = startdir; 1653 1654 /* 1655 * Note: the name sent in here (is not|should not be) allowed 1656 * to contain a slash. 1657 */ 1658 if (strlen(ndp->ni_pathbuf->pb_path) > KERNEL_NAME_MAX) { 1659 return ENAMETOOLONG; 1660 } 1661 if (strchr(ndp->ni_pathbuf->pb_path, '/')) { 1662 return EINVAL; 1663 } 1664 1665 ndp->ni_pathlen = strlen(ndp->ni_pathbuf->pb_path) + 1; 1666 ndp->ni_pnbuf = NULL; 1667 ndp->ni_cnd.cn_nameptr = NULL; 1668 1669 namei_init(&state, ndp); 1670 error = do_lookup_for_nfsd_index(&state); 1671 namei_cleanup(&state); 1672 1673 return error; 1674 } 1675 1676 //////////////////////////////////////////////////////////// 1677 1678 /* 1679 * Reacquire a path name component. 1680 * dvp is locked on entry and exit. 1681 * *vpp is locked on exit unless it's NULL. 1682 */ 1683 int 1684 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int dummy) 1685 { 1686 int rdonly; /* lookup read-only flag bit */ 1687 int error = 0; 1688 #ifdef DEBUG 1689 size_t newlen; /* DEBUG: check name len */ 1690 const char *cp; /* DEBUG: check name ptr */ 1691 #endif /* DEBUG */ 1692 1693 (void)dummy; 1694 1695 /* 1696 * Setup: break out flag bits into variables. 1697 */ 1698 rdonly = cnp->cn_flags & RDONLY; 1699 1700 /* 1701 * Search a new directory. 1702 * 1703 * The cn_hash value is for use by vfs_cache. 1704 * The last component of the filename is left accessible via 1705 * cnp->cn_nameptr for callers that need the name. Callers needing 1706 * the name set the SAVENAME flag. When done, they assume 1707 * responsibility for freeing the pathname buffer. 1708 */ 1709 #ifdef DEBUG 1710 #if 0 1711 cp = NULL; 1712 newhash = namei_hash(cnp->cn_nameptr, &cp); 1713 if ((uint32_t)newhash != (uint32_t)cnp->cn_hash) 1714 panic("relookup: bad hash"); 1715 #endif 1716 newlen = namei_getcomponent(cnp->cn_nameptr); 1717 if (cnp->cn_namelen != newlen) 1718 panic("relookup: bad len"); 1719 cp = cnp->cn_nameptr + cnp->cn_namelen; 1720 while (*cp == '/') 1721 cp++; 1722 if (*cp != 0) 1723 panic("relookup: not last component"); 1724 #endif /* DEBUG */ 1725 1726 /* 1727 * Check for degenerate name (e.g. / or "") 1728 * which is a way of talking about a directory, 1729 * e.g. like "/." or ".". 1730 */ 1731 if (cnp->cn_nameptr[0] == '\0') 1732 panic("relookup: null name"); 1733 1734 if (cnp->cn_flags & ISDOTDOT) 1735 panic("relookup: lookup on dot-dot"); 1736 1737 /* 1738 * We now have a segment name to search for, and a directory to search. 1739 */ 1740 *vpp = NULL; 1741 error = VOP_LOOKUP(dvp, vpp, cnp); 1742 if ((error) != 0) { 1743 #ifdef DIAGNOSTIC 1744 if (*vpp != NULL) 1745 panic("leaf `%s' should be empty", cnp->cn_nameptr); 1746 #endif 1747 if (error != EJUSTRETURN) 1748 goto bad; 1749 } 1750 1751 #ifdef DIAGNOSTIC 1752 /* 1753 * Check for symbolic link 1754 */ 1755 if (*vpp && (*vpp)->v_type == VLNK && (cnp->cn_flags & FOLLOW)) 1756 panic("relookup: symlink found"); 1757 #endif 1758 1759 /* 1760 * Check for read-only lookups. 1761 */ 1762 if (rdonly && cnp->cn_nameiop != LOOKUP) { 1763 error = EROFS; 1764 if (*vpp) { 1765 vrele(*vpp); 1766 } 1767 goto bad; 1768 } 1769 /* 1770 * Lock result. 1771 */ 1772 if (*vpp && *vpp != dvp) { 1773 error = vn_lock(*vpp, LK_EXCLUSIVE); 1774 if (error != 0) { 1775 vrele(*vpp); 1776 goto bad; 1777 } 1778 } 1779 return (0); 1780 1781 bad: 1782 *vpp = NULL; 1783 return (error); 1784 } 1785 1786 /* 1787 * namei_simple - simple forms of namei. 1788 * 1789 * These are wrappers to allow the simple case callers of namei to be 1790 * left alone while everything else changes under them. 1791 */ 1792 1793 /* Flags */ 1794 struct namei_simple_flags_type { 1795 int dummy; 1796 }; 1797 static const struct namei_simple_flags_type ns_nn, ns_nt, ns_fn, ns_ft; 1798 const namei_simple_flags_t NSM_NOFOLLOW_NOEMULROOT = &ns_nn; 1799 const namei_simple_flags_t NSM_NOFOLLOW_TRYEMULROOT = &ns_nt; 1800 const namei_simple_flags_t NSM_FOLLOW_NOEMULROOT = &ns_fn; 1801 const namei_simple_flags_t NSM_FOLLOW_TRYEMULROOT = &ns_ft; 1802 1803 static 1804 int 1805 namei_simple_convert_flags(namei_simple_flags_t sflags) 1806 { 1807 if (sflags == NSM_NOFOLLOW_NOEMULROOT) 1808 return NOFOLLOW | 0; 1809 if (sflags == NSM_NOFOLLOW_TRYEMULROOT) 1810 return NOFOLLOW | TRYEMULROOT; 1811 if (sflags == NSM_FOLLOW_NOEMULROOT) 1812 return FOLLOW | 0; 1813 if (sflags == NSM_FOLLOW_TRYEMULROOT) 1814 return FOLLOW | TRYEMULROOT; 1815 panic("namei_simple_convert_flags: bogus sflags\n"); 1816 return 0; 1817 } 1818 1819 int 1820 namei_simple_kernel(const char *path, namei_simple_flags_t sflags, 1821 struct vnode **vp_ret) 1822 { 1823 return nameiat_simple_kernel(NULL, path, sflags, vp_ret); 1824 } 1825 1826 int 1827 nameiat_simple_kernel(struct vnode *dvp, const char *path, 1828 namei_simple_flags_t sflags, struct vnode **vp_ret) 1829 { 1830 struct nameidata nd; 1831 struct pathbuf *pb; 1832 int err; 1833 1834 pb = pathbuf_create(path); 1835 if (pb == NULL) { 1836 return ENOMEM; 1837 } 1838 1839 NDINIT(&nd, 1840 LOOKUP, 1841 namei_simple_convert_flags(sflags), 1842 pb); 1843 1844 if (dvp != NULL) 1845 NDAT(&nd, dvp); 1846 1847 err = namei(&nd); 1848 if (err != 0) { 1849 pathbuf_destroy(pb); 1850 return err; 1851 } 1852 *vp_ret = nd.ni_vp; 1853 pathbuf_destroy(pb); 1854 return 0; 1855 } 1856 1857 int 1858 namei_simple_user(const char *path, namei_simple_flags_t sflags, 1859 struct vnode **vp_ret) 1860 { 1861 return nameiat_simple_user(NULL, path, sflags, vp_ret); 1862 } 1863 1864 int 1865 nameiat_simple_user(struct vnode *dvp, const char *path, 1866 namei_simple_flags_t sflags, struct vnode **vp_ret) 1867 { 1868 struct pathbuf *pb; 1869 struct nameidata nd; 1870 int err; 1871 1872 err = pathbuf_copyin(path, &pb); 1873 if (err) { 1874 return err; 1875 } 1876 1877 NDINIT(&nd, 1878 LOOKUP, 1879 namei_simple_convert_flags(sflags), 1880 pb); 1881 1882 if (dvp != NULL) 1883 NDAT(&nd, dvp); 1884 1885 err = namei(&nd); 1886 if (err != 0) { 1887 pathbuf_destroy(pb); 1888 return err; 1889 } 1890 *vp_ret = nd.ni_vp; 1891 pathbuf_destroy(pb); 1892 return 0; 1893 } 1894