1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)vfs_lookup.c 7.6 (Berkeley) 05/01/89 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "inode.h" 12 #include "fs.h" 13 #include "mount.h" 14 #include "dir.h" 15 #include "user.h" 16 #include "buf.h" 17 #include "conf.h" 18 #include "uio.h" 19 #include "kernel.h" 20 #include "malloc.h" 21 #ifdef KTRACE 22 #include "proc.h" 23 #include "ktrace.h" 24 #endif 25 26 struct buf *blkatoff(); 27 int dirchk = 1; 28 29 /* 30 * Structures associated with name cacheing. 31 */ 32 #define NCHHASH 32 /* size of hash table */ 33 34 #if ((NCHHASH)&((NCHHASH)-1)) != 0 35 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH)) 36 #else 37 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1)) 38 #endif 39 40 union nchash { 41 union nchash *nch_head[2]; 42 struct namecache *nch_chain[2]; 43 } nchash[NCHHASH]; 44 #define nch_forw nch_chain[0] 45 #define nch_back nch_chain[1] 46 47 struct namecache *nchhead, **nchtail; /* LRU chain pointers */ 48 struct nchstats nchstats; /* cache effectiveness statistics */ 49 50 /* 51 * Convert a pathname into a pointer to a locked inode. 52 * This is a very central and rather complicated routine. 53 * If the file system is not maintained in a strict tree hierarchy, 54 * this can result in a deadlock situation (see comments in code below). 55 * 56 * The flag argument is LOOKUP, CREATE, or DELETE depending on whether 57 * the name is to be looked up, created, or deleted. When CREATE or 58 * DELETE is specified, information usable in creating or deleteing a 59 * directory entry is also calculated. If flag has LOCKPARENT or'ed 60 * into it and the target of the pathname exists, namei returns both 61 * the target and its parent directory locked. When creating and 62 * LOCKPARENT is specified, the target may not be ".". When deleting 63 * and LOCKPARENT is specified, the target may be ".", but the caller 64 * must check to insure it does an irele and iput instead of two iputs. 65 * 66 * The FOLLOW flag is set when symbolic links are to be followed 67 * when they occur at the end of the name translation process. 68 * Symbolic links are always followed for all other pathname 69 * components other than the last. 70 * 71 * The segflg defines whether the name is to be copied from user 72 * space or kernel space. 73 * 74 * Name caching works as follows: 75 * 76 * Names found by directory scans are retained in a cache 77 * for future reference. It is managed LRU, so frequently 78 * used names will hang around. Cache is indexed by hash value 79 * obtained from (ino,dev,name) where ino & dev refer to the 80 * directory containing name. 81 * 82 * For simplicity (and economy of storage), names longer than 83 * a maximum length of NCHNAMLEN are not cached; they occur 84 * infrequently in any case, and are almost never of interest. 85 * 86 * Upon reaching the last segment of a path, if the reference 87 * is for DELETE, or NOCACHE is set (rewrite), and the 88 * name is located in the cache, it will be dropped. 89 * 90 * Overall outline of namei: 91 * 92 * copy in name 93 * get starting directory 94 * dirloop: 95 * check accessibility of directory 96 * dirloop2: 97 * copy next component of name to ndp->ni_dent 98 * handle degenerate case where name is null string 99 * look for name in cache, if found, then if at end of path 100 * and deleting or creating, drop it, else to haveino 101 * search for name in directory, to found or notfound 102 * notfound: 103 * if creating, return locked directory, leaving info on avail. slots 104 * else return error 105 * found: 106 * if at end of path and deleting, return information to allow delete 107 * if at end of path and rewriting (CREATE and LOCKPARENT), lock target 108 * inode and return info to allow rewrite 109 * if .. and on mounted filesys, look in mount table for parent 110 * if not at end, add name to cache; if at end and neither creating 111 * nor deleting, add name to cache 112 * haveino: 113 * if symbolic link, massage name in buffer and continue at dirloop 114 * if more components of name, do next level at dirloop 115 * return the answer as locked inode 116 * 117 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, 118 * but unlocked. 119 */ 120 struct inode * 121 namei(ndp) 122 register struct nameidata *ndp; 123 { 124 register char *cp; /* pointer into pathname argument */ 125 /* these variables refer to things which must be freed or unlocked */ 126 register struct inode *dp = 0; /* the directory we are searching */ 127 register struct namecache *ncp; /* cache slot for entry */ 128 register struct fs *fs; /* file system that directory is in */ 129 register struct buf *bp = 0; /* a buffer of directory entries */ 130 register struct direct *ep; /* the current directory entry */ 131 int entryoffsetinblock; /* offset of ep in bp's buffer */ 132 register caddr_t nbp; /* buffer storing path name argument */ 133 /* these variables hold information about the search for a slot */ 134 enum {NONE, COMPACT, FOUND} slotstatus; 135 int slotoffset = -1; /* offset of area with free space */ 136 int slotsize; /* size of area at slotoffset */ 137 int slotfreespace; /* amount of space free in slot */ 138 int slotneeded; /* size of the entry we're seeking */ 139 /* */ 140 int numdirpasses; /* strategy for directory search */ 141 int endsearch; /* offset to end directory search */ 142 int prevoff; /* ndp->ni_offset of previous entry */ 143 int nlink = 0; /* number of symbolic links taken */ 144 struct inode *pdp; /* saved dp during symlink work */ 145 int error, i; 146 int lockparent; 147 int docache; /* == 0 do not cache last component */ 148 int makeentry; /* != 0 if name to be added to cache */ 149 unsigned hash; /* value of name hash for entry */ 150 union nchash *nhp; /* cache chain head for entry */ 151 int isdotdot; /* != 0 if current name is ".." */ 152 int flag; /* op ie, LOOKUP, CREATE, or DELETE */ 153 off_t enduseful; /* pointer past last used dir slot */ 154 155 lockparent = ndp->ni_nameiop & LOCKPARENT; 156 docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE; 157 flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW); 158 if (flag == DELETE || lockparent) 159 docache = 0; 160 /* 161 * Get a buffer for the name to be translated, and copy the 162 * name into the buffer. 163 */ 164 MALLOC(nbp, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); 165 if (ndp->ni_segflg == UIO_SYSSPACE) 166 error = copystr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0); 167 else 168 error = copyinstr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0); 169 if (error) { 170 u.u_error = error; 171 goto bad; 172 } 173 #ifdef KTRACE 174 if (KTRPOINT(u.u_procp, KTR_NAMEI)) 175 ktrnamei(u.u_procp->p_tracep, nbp); 176 #endif 177 178 /* 179 * Get starting directory. 180 */ 181 cp = nbp; 182 if (*cp == '/') { 183 while (*cp == '/') 184 cp++; 185 if ((dp = u.u_rdir) == NULL) 186 dp = rootdir; 187 } else 188 dp = u.u_cdir; 189 fs = dp->i_fs; 190 ILOCK(dp); 191 dp->i_count++; 192 ndp->ni_pdir = (struct inode *)0xc0000000; /* illegal */ 193 ndp->ni_endoff = 0; 194 195 /* 196 * We come to dirloop to search a new directory. 197 * The directory must be locked so that it can be 198 * iput, and fs must be already set to dp->i_fs. 199 */ 200 dirloop: 201 /* 202 * Check accessiblity of directory. 203 */ 204 if ((dp->i_mode&IFMT) != IFDIR) { 205 u.u_error = ENOTDIR; 206 goto bad; 207 } 208 if (access(dp, IEXEC)) 209 goto bad; 210 211 dirloop2: 212 /* 213 * Copy next component of name to ndp->ni_dent. 214 */ 215 hash = 0; 216 for (i = 0; *cp != 0 && *cp != '/'; cp++) { 217 if (i >= MAXNAMLEN) { 218 u.u_error = ENAMETOOLONG; 219 goto bad; 220 } 221 if (*cp & 0200) 222 if ((*cp&0377) == ('/'|0200) || flag != DELETE) { 223 u.u_error = EINVAL; 224 goto bad; 225 } 226 ndp->ni_dent.d_name[i++] = *cp; 227 hash += (unsigned char)*cp * i; 228 } 229 ndp->ni_dent.d_namlen = i; 230 ndp->ni_dent.d_name[i] = '\0'; 231 isdotdot = (i == 2 && 232 ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.'); 233 makeentry = 1; 234 if (*cp == '\0' && docache == 0) 235 makeentry = 0; 236 237 /* 238 * Check for degenerate name (e.g. / or "") 239 * which is a way of talking about a directory, 240 * e.g. like "/." or ".". 241 */ 242 if (ndp->ni_dent.d_name[0] == '\0') { 243 if (flag != LOOKUP || lockparent) { 244 u.u_error = EISDIR; 245 goto bad; 246 } 247 FREE(nbp, M_NAMEI); 248 return (dp); 249 } 250 251 /* 252 * Special handling for ".." allowing chdir out of mounted 253 * file system: indirect .. in root inode to reevaluate 254 * in directory file system was mounted on. 255 */ 256 if (isdotdot) { 257 for (;;) { 258 if (dp == u.u_rdir || dp == rootdir) { 259 ndp->ni_dent.d_ino = dp->i_number; 260 pdp = dp; 261 dp->i_count++; 262 goto haveino; 263 } 264 if (dp->i_number != ROOTINO) 265 break; 266 for (i = 1; i < NMOUNT; i++) { 267 if (mount[i].m_fs != NULL && 268 mount[i].m_dev == dp->i_dev) { 269 iput(dp); 270 dp = mount[i].m_inodp; 271 ILOCK(dp); 272 dp->i_count++; 273 fs = dp->i_fs; 274 break; 275 } 276 } 277 } 278 } 279 280 /* 281 * We now have a segment name to search for, and a directory to search. 282 * 283 * Before tediously performing a linear scan of the directory, 284 * check the name cache to see if the directory/name pair 285 * we are looking for is known already. We don't do this 286 * if the segment name is long, simply so the cache can avoid 287 * holding long names (which would either waste space, or 288 * add greatly to the complexity). 289 */ 290 if (ndp->ni_dent.d_namlen > NCHNAMLEN) { 291 nchstats.ncs_long++; 292 makeentry = 0; 293 } else { 294 nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)]; 295 for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp; 296 ncp = ncp->nc_forw) { 297 if (ncp->nc_ino == dp->i_number && 298 ncp->nc_dev == dp->i_dev && 299 ncp->nc_nlen == ndp->ni_dent.d_namlen && 300 !bcmp(ncp->nc_name, ndp->ni_dent.d_name, 301 (unsigned)ncp->nc_nlen)) 302 break; 303 } 304 if (ncp == (struct namecache *)nhp) { 305 nchstats.ncs_miss++; 306 ncp = NULL; 307 } else { 308 if (ncp->nc_id != ncp->nc_ip->i_id) 309 nchstats.ncs_falsehits++; 310 else if (!makeentry) 311 nchstats.ncs_badhits++; 312 else { 313 /* 314 * move this slot to end of LRU 315 * chain, if not already there 316 */ 317 if (ncp->nc_nxt) { 318 /* remove from LRU chain */ 319 *ncp->nc_prev = ncp->nc_nxt; 320 ncp->nc_nxt->nc_prev = ncp->nc_prev; 321 322 /* and replace at end of it */ 323 ncp->nc_nxt = NULL; 324 ncp->nc_prev = nchtail; 325 *nchtail = ncp; 326 nchtail = &ncp->nc_nxt; 327 } 328 329 /* 330 * Get the next inode in the path. 331 * See comment above other `IUNLOCK' code for 332 * an explaination of the locking protocol. 333 */ 334 pdp = dp; 335 if (!isdotdot || dp != u.u_rdir) 336 dp = ncp->nc_ip; 337 if (dp == NULL) 338 panic("namei: null cache ino"); 339 if (pdp == dp) 340 dp->i_count++; 341 else if (isdotdot) { 342 IUNLOCK(pdp); 343 igrab(dp); 344 } else { 345 igrab(dp); 346 IUNLOCK(pdp); 347 } 348 349 /* 350 * Verify that the inode that we got 351 * did not change while we were waiting 352 * for it to be locked. 353 */ 354 if (ncp->nc_id != ncp->nc_ip->i_id) { 355 iput(dp); 356 ILOCK(pdp); 357 dp = pdp; 358 nchstats.ncs_falsehits++; 359 } else { 360 ndp->ni_dent.d_ino = dp->i_number; 361 /* ni_dent.d_reclen is garbage ... */ 362 nchstats.ncs_goodhits++; 363 goto haveino; 364 } 365 } 366 367 /* 368 * Last component and we are renaming or deleting, 369 * the cache entry is invalid, or otherwise don't 370 * want cache entry to exist. 371 */ 372 /* remove from LRU chain */ 373 *ncp->nc_prev = ncp->nc_nxt; 374 if (ncp->nc_nxt) 375 ncp->nc_nxt->nc_prev = ncp->nc_prev; 376 else 377 nchtail = ncp->nc_prev; 378 remque(ncp); /* remove from hash chain */ 379 /* insert at head of LRU list (first to grab) */ 380 ncp->nc_nxt = nchhead; 381 ncp->nc_prev = &nchhead; 382 nchhead->nc_prev = &ncp->nc_nxt; 383 nchhead = ncp; 384 /* and make a dummy hash chain */ 385 ncp->nc_forw = ncp; 386 ncp->nc_back = ncp; 387 ncp = NULL; 388 } 389 } 390 391 /* 392 * Suppress search for slots unless creating 393 * file and at end of pathname, in which case 394 * we watch for a place to put the new file in 395 * case it doesn't already exist. 396 */ 397 slotstatus = FOUND; 398 if (flag == CREATE && *cp == 0) { 399 slotstatus = NONE; 400 slotfreespace = 0; 401 slotneeded = DIRSIZ(&ndp->ni_dent); 402 } 403 /* 404 * If this is the same directory that this process 405 * previously searched, pick up where we last left off. 406 * We cache only lookups as these are the most common 407 * and have the greatest payoff. Caching CREATE has little 408 * benefit as it usually must search the entire directory 409 * to determine that the entry does not exist. Caching the 410 * location of the last DELETE has not reduced profiling time 411 * and hence has been removed in the interest of simplicity. 412 */ 413 if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber || 414 dp->i_dev != u.u_ncache.nc_dev) { 415 ndp->ni_offset = 0; 416 numdirpasses = 1; 417 } else { 418 if (u.u_ncache.nc_prevoffset > dp->i_size) 419 u.u_ncache.nc_prevoffset = 0; 420 ndp->ni_offset = u.u_ncache.nc_prevoffset; 421 entryoffsetinblock = blkoff(fs, ndp->ni_offset); 422 if (entryoffsetinblock != 0) { 423 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 424 if (bp == 0) 425 goto bad; 426 } 427 numdirpasses = 2; 428 nchstats.ncs_2passes++; 429 } 430 endsearch = roundup(dp->i_size, DIRBLKSIZ); 431 enduseful = 0; 432 433 #ifdef NAMEI_DIAGNOSTIC 434 printf("{%s}:\n", ndp->ni_dent.d_name); 435 #endif 436 searchloop: 437 while (ndp->ni_offset < endsearch) { 438 /* 439 * If offset is on a block boundary, 440 * read the next directory block. 441 * Release previous if it exists. 442 */ 443 if (blkoff(fs, ndp->ni_offset) == 0) { 444 if (bp != NULL) 445 brelse(bp); 446 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 447 if (bp == 0) 448 goto bad; 449 entryoffsetinblock = 0; 450 } 451 /* 452 * If still looking for a slot, and at a DIRBLKSIZE 453 * boundary, have to start looking for free space again. 454 */ 455 if (slotstatus == NONE && 456 (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) { 457 slotoffset = -1; 458 slotfreespace = 0; 459 } 460 /* 461 * Get pointer to next entry. 462 * Full validation checks are slow, so we only check 463 * enough to insure forward progress through the 464 * directory. Complete checks can be run by patching 465 * "dirchk" to be true. 466 */ 467 ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock); 468 if (ep->d_reclen == 0 || 469 dirchk && dirbadentry(ep, entryoffsetinblock)) { 470 dirbad(dp, ndp->ni_offset, "mangled entry"); 471 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 472 ndp->ni_offset += i; 473 entryoffsetinblock += i; 474 continue; 475 } 476 477 /* 478 * If an appropriate sized slot has not yet been found, 479 * check to see if one is available. Also accumulate space 480 * in the current block so that we can determine if 481 * compaction is viable. 482 */ 483 if (slotstatus != FOUND) { 484 int size = ep->d_reclen; 485 486 if (ep->d_ino != 0) 487 size -= DIRSIZ(ep); 488 if (size > 0) { 489 if (size >= slotneeded) { 490 slotstatus = FOUND; 491 slotoffset = ndp->ni_offset; 492 slotsize = ep->d_reclen; 493 } else if (slotstatus == NONE) { 494 slotfreespace += size; 495 if (slotoffset == -1) 496 slotoffset = ndp->ni_offset; 497 if (slotfreespace >= slotneeded) { 498 slotstatus = COMPACT; 499 slotsize = ndp->ni_offset + 500 ep->d_reclen - slotoffset; 501 } 502 } 503 } 504 } 505 506 /* 507 * Check for a name match. 508 */ 509 if (ep->d_ino) { 510 #ifdef NAMEI_DIAGNOSTIC 511 printf("{%s} ", ep->d_name); 512 #endif 513 if (ep->d_namlen == ndp->ni_dent.d_namlen && 514 !bcmp(ndp->ni_dent.d_name, ep->d_name, 515 (unsigned)ep->d_namlen)) 516 goto found; 517 } 518 prevoff = ndp->ni_offset; 519 ndp->ni_offset += ep->d_reclen; 520 entryoffsetinblock += ep->d_reclen; 521 if (ep->d_ino) 522 enduseful = ndp->ni_offset; 523 } 524 #ifdef NAMEI_DIAGNOSTIC 525 printf("\nnotfound\n"); 526 #endif 527 /* notfound: */ 528 /* 529 * If we started in the middle of the directory and failed 530 * to find our target, we must check the beginning as well. 531 */ 532 if (numdirpasses == 2) { 533 numdirpasses--; 534 ndp->ni_offset = 0; 535 endsearch = u.u_ncache.nc_prevoffset; 536 goto searchloop; 537 } 538 /* 539 * If creating, and at end of pathname and current 540 * directory has not been removed, then can consider 541 * allowing file to be created. 542 */ 543 if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { 544 /* 545 * Access for write is interpreted as allowing 546 * creation of files in the directory. 547 */ 548 if (access(dp, IWRITE)) 549 goto bad; 550 /* 551 * Return an indication of where the new directory 552 * entry should be put. If we didn't find a slot, 553 * then set ndp->ni_count to 0 indicating that the new 554 * slot belongs at the end of the directory. If we found 555 * a slot, then the new entry can be put in the range 556 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count) 557 */ 558 if (slotstatus == NONE) { 559 ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ); 560 ndp->ni_count = 0; 561 enduseful = ndp->ni_offset; 562 } else { 563 ndp->ni_offset = slotoffset; 564 ndp->ni_count = slotsize; 565 if (enduseful < slotoffset + slotsize) 566 enduseful = slotoffset + slotsize; 567 } 568 ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ); 569 dp->i_flag |= IUPD|ICHG; 570 if (bp) 571 brelse(bp); 572 FREE(nbp, M_NAMEI); 573 /* 574 * We return with the directory locked, so that 575 * the parameters we set up above will still be 576 * valid if we actually decide to do a direnter(). 577 * We return NULL to indicate that the entry doesn't 578 * currently exist, leaving a pointer to the (locked) 579 * directory inode in ndp->ni_pdir. 580 */ 581 ndp->ni_pdir = dp; 582 return (NULL); 583 } 584 u.u_error = ENOENT; 585 goto bad; 586 found: 587 #ifdef NAMEI_DIAGNOSTIC 588 printf("\nfound\n"); 589 #endif 590 if (numdirpasses == 2) 591 nchstats.ncs_pass2++; 592 /* 593 * Check that directory length properly reflects presence 594 * of this entry. 595 */ 596 if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) { 597 dirbad(dp, ndp->ni_offset, "i_size too small"); 598 dp->i_size = entryoffsetinblock + DIRSIZ(ep); 599 dp->i_flag |= IUPD|ICHG; 600 } 601 602 /* 603 * Found component in pathname. 604 * If the final component of path name, save information 605 * in the cache as to where the entry was found. 606 */ 607 if (*cp == '\0' && flag == LOOKUP) { 608 u.u_ncache.nc_prevoffset = ndp->ni_offset &~ (DIRBLKSIZ - 1); 609 u.u_ncache.nc_inumber = dp->i_number; 610 u.u_ncache.nc_dev = dp->i_dev; 611 } 612 /* 613 * Save directory entry's inode number and reclen in ndp->ni_dent, 614 * and release directory buffer. 615 */ 616 ndp->ni_dent.d_ino = ep->d_ino; 617 ndp->ni_dent.d_reclen = ep->d_reclen; 618 brelse(bp); 619 bp = NULL; 620 621 /* 622 * If deleting, and at end of pathname, return 623 * parameters which can be used to remove file. 624 * If the lockparent flag isn't set, we return only 625 * the directory (in ndp->ni_pdir), otherwise we go 626 * on and lock the inode, being careful with ".". 627 */ 628 if (flag == DELETE && *cp == 0) { 629 /* 630 * Write access to directory required to delete files. 631 */ 632 if (access(dp, IWRITE)) 633 goto bad; 634 ndp->ni_pdir = dp; /* for dirremove() */ 635 /* 636 * Return pointer to current entry in ndp->ni_offset, 637 * and distance past previous entry (if there 638 * is a previous entry in this block) in ndp->ni_count. 639 * Save directory inode pointer in ndp->ni_pdir for dirremove(). 640 */ 641 if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0) 642 ndp->ni_count = 0; 643 else 644 ndp->ni_count = ndp->ni_offset - prevoff; 645 if (lockparent) { 646 if (dp->i_number == ndp->ni_dent.d_ino) 647 dp->i_count++; 648 else { 649 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 650 if (dp == NULL) { 651 iput(ndp->ni_pdir); 652 goto bad; 653 } 654 /* 655 * If directory is "sticky", then user must own 656 * the directory, or the file in it, else he 657 * may not delete it (unless he's root). This 658 * implements append-only directories. 659 */ 660 if ((ndp->ni_pdir->i_mode & ISVTX) && 661 u.u_uid != 0 && 662 u.u_uid != ndp->ni_pdir->i_uid && 663 dp->i_uid != u.u_uid) { 664 iput(ndp->ni_pdir); 665 u.u_error = EPERM; 666 goto bad; 667 } 668 } 669 } 670 FREE(nbp, M_NAMEI); 671 return (dp); 672 } 673 674 /* 675 * If rewriting (rename), return the inode and the 676 * information required to rewrite the present directory 677 * Must get inode of directory entry to verify it's a 678 * regular file, or empty directory. 679 */ 680 if ((flag == CREATE && lockparent) && *cp == 0) { 681 if (access(dp, IWRITE)) 682 goto bad; 683 ndp->ni_pdir = dp; /* for dirrewrite() */ 684 /* 685 * Careful about locking second inode. 686 * This can only occur if the target is ".". 687 */ 688 if (dp->i_number == ndp->ni_dent.d_ino) { 689 u.u_error = EISDIR; /* XXX */ 690 goto bad; 691 } 692 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 693 if (dp == NULL) { 694 iput(ndp->ni_pdir); 695 goto bad; 696 } 697 FREE(nbp, M_NAMEI); 698 return (dp); 699 } 700 701 /* 702 * Check for symbolic link, which may require us to massage the 703 * name before we continue translation. We do not `iput' the 704 * directory because we may need it again if the symbolic link 705 * is relative to the current directory. Instead we save it 706 * unlocked as "pdp". We must get the target inode before unlocking 707 * the directory to insure that the inode will not be removed 708 * before we get it. We prevent deadlock by always fetching 709 * inodes from the root, moving down the directory tree. Thus 710 * when following backward pointers ".." we must unlock the 711 * parent directory before getting the requested directory. 712 * There is a potential race condition here if both the current 713 * and parent directories are removed before the `iget' for the 714 * inode associated with ".." returns. We hope that this occurs 715 * infrequently since we cannot avoid this race condition without 716 * implementing a sophisticated deadlock detection algorithm. 717 * Note also that this simple deadlock detection scheme will not 718 * work if the file system has any hard links other than ".." 719 * that point backwards in the directory structure. 720 */ 721 pdp = dp; 722 if (isdotdot) { 723 IUNLOCK(pdp); /* race to get the inode */ 724 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 725 if (dp == NULL) 726 goto bad2; 727 } else if (dp->i_number == ndp->ni_dent.d_ino) { 728 dp->i_count++; /* we want ourself, ie "." */ 729 } else { 730 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 731 IUNLOCK(pdp); 732 if (dp == NULL) 733 goto bad2; 734 } 735 736 /* 737 * Insert name into cache if appropriate. 738 */ 739 if (makeentry) { 740 if (ncp != NULL) 741 panic("namei: duplicating cache"); 742 /* 743 * Free the cache slot at head of lru chain. 744 */ 745 if (ncp = nchhead) { 746 /* remove from lru chain */ 747 *ncp->nc_prev = ncp->nc_nxt; 748 if (ncp->nc_nxt) 749 ncp->nc_nxt->nc_prev = ncp->nc_prev; 750 else 751 nchtail = ncp->nc_prev; 752 remque(ncp); /* remove from old hash chain */ 753 /* grab the inode we just found */ 754 ncp->nc_ip = dp; 755 /* fill in cache info */ 756 ncp->nc_ino = pdp->i_number; /* parents inum */ 757 ncp->nc_dev = pdp->i_dev; /* & device */ 758 ncp->nc_idev = dp->i_dev; /* our device */ 759 ncp->nc_id = dp->i_id; /* identifier */ 760 ncp->nc_nlen = ndp->ni_dent.d_namlen; 761 bcopy(ndp->ni_dent.d_name, ncp->nc_name, 762 (unsigned)ncp->nc_nlen); 763 /* link at end of lru chain */ 764 ncp->nc_nxt = NULL; 765 ncp->nc_prev = nchtail; 766 *nchtail = ncp; 767 nchtail = &ncp->nc_nxt; 768 /* and insert on hash chain */ 769 insque(ncp, nhp); 770 } 771 } 772 773 haveino: 774 fs = dp->i_fs; 775 776 /* 777 * Check for symbolic link 778 */ 779 if ((dp->i_mode & IFMT) == IFLNK && 780 ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) { 781 u_int pathlen = strlen(cp) + 1; 782 783 if (dp->i_size + pathlen >= MAXPATHLEN - 1) { 784 u.u_error = ENAMETOOLONG; 785 goto bad2; 786 } 787 if (++nlink > MAXSYMLINKS) { 788 u.u_error = ELOOP; 789 goto bad2; 790 } 791 ovbcopy(cp, nbp + dp->i_size, pathlen); 792 u.u_error = 793 rdwri(UIO_READ, dp, nbp, (int)dp->i_size, 794 (off_t)0, 1, (int *)0); 795 if (u.u_error) 796 goto bad2; 797 cp = nbp; 798 iput(dp); 799 if (*cp == '/') { 800 irele(pdp); 801 while (*cp == '/') 802 cp++; 803 if ((dp = u.u_rdir) == NULL) 804 dp = rootdir; 805 ILOCK(dp); 806 dp->i_count++; 807 } else { 808 dp = pdp; 809 ILOCK(dp); 810 } 811 fs = dp->i_fs; 812 goto dirloop; 813 } 814 815 /* 816 * Not a symbolic link. If more pathname, 817 * continue at next component, else return. 818 */ 819 if (*cp == '/') { 820 while (*cp == '/') 821 cp++; 822 irele(pdp); 823 goto dirloop; 824 } 825 FREE(nbp, M_NAMEI); 826 if (lockparent) 827 ndp->ni_pdir = pdp; 828 else 829 irele(pdp); 830 return (dp); 831 bad2: 832 irele(pdp); 833 bad: 834 if (bp) 835 brelse(bp); 836 if (dp) 837 iput(dp); 838 FREE(nbp, M_NAMEI); 839 return (NULL); 840 } 841 842 843 dirbad(ip, offset, how) 844 struct inode *ip; 845 off_t offset; 846 char *how; 847 { 848 849 printf("%s: bad dir ino %d at offset %d: %s\n", 850 ip->i_fs->fs_fsmnt, ip->i_number, offset, how); 851 } 852 853 /* 854 * Do consistency checking on a directory entry: 855 * record length must be multiple of 4 856 * entry must fit in rest of its DIRBLKSIZ block 857 * record must be large enough to contain entry 858 * name is not longer than MAXNAMLEN 859 * name must be as long as advertised, and null terminated 860 */ 861 dirbadentry(ep, entryoffsetinblock) 862 register struct direct *ep; 863 int entryoffsetinblock; 864 { 865 register int i; 866 867 if ((ep->d_reclen & 0x3) != 0 || 868 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 869 ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN) 870 return (1); 871 for (i = 0; i < ep->d_namlen; i++) 872 if (ep->d_name[i] == '\0') 873 return (1); 874 return (ep->d_name[i]); 875 } 876 877 /* 878 * Write a directory entry after a call to namei, using the parameters 879 * which it left in the u. area. The argument ip is the inode which 880 * the new directory entry will refer to. The u. area field ndp->ni_pdir is 881 * a pointer to the directory to be written, which was left locked by 882 * namei. Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate 883 * how the space for the new entry is to be gotten. 884 */ 885 direnter(ip, ndp) 886 struct inode *ip; 887 register struct nameidata *ndp; 888 { 889 register struct direct *ep, *nep; 890 register struct inode *dp = ndp->ni_pdir; 891 struct buf *bp; 892 int loc, spacefree, error = 0; 893 u_int dsize; 894 int newentrysize; 895 char *dirbuf; 896 897 ndp->ni_dent.d_ino = ip->i_number; 898 newentrysize = DIRSIZ(&ndp->ni_dent); 899 if (ndp->ni_count == 0) { 900 /* 901 * If ndp->ni_count is 0, then namei could find no space in the 902 * directory. In this case ndp->ni_offset will be on a directory 903 * block boundary and we will write the new entry into a fresh 904 * block. 905 */ 906 if (ndp->ni_offset&(DIRBLKSIZ-1)) 907 panic("wdir: newblk"); 908 ndp->ni_dent.d_reclen = DIRBLKSIZ; 909 error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 910 newentrysize, ndp->ni_offset, 1, (int *)0); 911 if (DIRBLKSIZ > dp->i_fs->fs_fsize) 912 panic("wdir: blksize"); /* XXX - should grow w/bmap() */ 913 else 914 dp->i_size = roundup(dp->i_size, DIRBLKSIZ); 915 iput(dp); 916 return (error); 917 } 918 919 /* 920 * If ndp->ni_count is non-zero, then namei found space for the new 921 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count. 922 * in the directory. To use this space, we may have to compact 923 * the entries located there, by copying them together towards 924 * the beginning of the block, leaving the free space in 925 * one usable chunk at the end. 926 */ 927 928 /* 929 * Increase size of directory if entry eats into new space. 930 * This should never push the size past a new multiple of 931 * DIRBLKSIZE. 932 * 933 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 934 */ 935 if (ndp->ni_offset + ndp->ni_count > dp->i_size) 936 dp->i_size = ndp->ni_offset + ndp->ni_count; 937 /* 938 * Get the block containing the space for the new directory 939 * entry. Should return error by result instead of u.u_error. 940 */ 941 bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf); 942 if (bp == 0) { 943 iput(dp); 944 return (u.u_error); 945 } 946 /* 947 * Find space for the new entry. In the simple case, the 948 * entry at offset base will have the space. If it does 949 * not, then namei arranged that compacting the region 950 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space. 951 */ 952 ep = (struct direct *)dirbuf; 953 dsize = DIRSIZ(ep); 954 spacefree = ep->d_reclen - dsize; 955 for (loc = ep->d_reclen; loc < ndp->ni_count; ) { 956 nep = (struct direct *)(dirbuf + loc); 957 if (ep->d_ino) { 958 /* trim the existing slot */ 959 ep->d_reclen = dsize; 960 ep = (struct direct *)((char *)ep + dsize); 961 } else { 962 /* overwrite; nothing there; header is ours */ 963 spacefree += dsize; 964 } 965 dsize = DIRSIZ(nep); 966 spacefree += nep->d_reclen - dsize; 967 loc += nep->d_reclen; 968 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 969 } 970 /* 971 * Update the pointer fields in the previous entry (if any), 972 * copy in the new entry, and write out the block. 973 */ 974 if (ep->d_ino == 0) { 975 if (spacefree + dsize < newentrysize) 976 panic("wdir: compact1"); 977 ndp->ni_dent.d_reclen = spacefree + dsize; 978 } else { 979 if (spacefree < newentrysize) 980 panic("wdir: compact2"); 981 ndp->ni_dent.d_reclen = spacefree; 982 ep->d_reclen = dsize; 983 ep = (struct direct *)((char *)ep + dsize); 984 } 985 bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize); 986 bwrite(bp); 987 dp->i_flag |= IUPD|ICHG; 988 if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size) 989 itrunc(dp, (u_long)ndp->ni_endoff); 990 iput(dp); 991 return (error); 992 } 993 994 /* 995 * Remove a directory entry after a call to namei, using the 996 * parameters which it left in the u. area. The u. entry 997 * ni_offset contains the offset into the directory of the 998 * entry to be eliminated. The ni_count field contains the 999 * size of the previous record in the directory. If this 1000 * is 0, the first entry is being deleted, so we need only 1001 * zero the inode number to mark the entry as free. If the 1002 * entry isn't the first in the directory, we must reclaim 1003 * the space of the now empty record by adding the record size 1004 * to the size of the previous entry. 1005 */ 1006 dirremove(ndp) 1007 register struct nameidata *ndp; 1008 { 1009 register struct inode *dp = ndp->ni_pdir; 1010 register struct buf *bp; 1011 struct direct *ep; 1012 1013 if (ndp->ni_count == 0) { 1014 /* 1015 * First entry in block: set d_ino to zero. 1016 */ 1017 ndp->ni_dent.d_ino = 0; 1018 (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1019 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1020 } else { 1021 /* 1022 * Collapse new free space into previous entry. 1023 */ 1024 bp = blkatoff(dp, ndp->ni_offset - ndp->ni_count, (char **)&ep); 1025 if (bp == 0) 1026 return (0); 1027 ep->d_reclen += ndp->ni_dent.d_reclen; 1028 bwrite(bp); 1029 dp->i_flag |= IUPD|ICHG; 1030 } 1031 return (1); 1032 } 1033 1034 /* 1035 * Rewrite an existing directory entry to point at the inode 1036 * supplied. The parameters describing the directory entry are 1037 * set up by a call to namei. 1038 */ 1039 dirrewrite(dp, ip, ndp) 1040 struct inode *dp, *ip; 1041 struct nameidata *ndp; 1042 { 1043 1044 ndp->ni_dent.d_ino = ip->i_number; 1045 u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1046 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1047 iput(dp); 1048 } 1049 1050 /* 1051 * Return buffer with contents of block "offset" 1052 * from the beginning of directory "ip". If "res" 1053 * is non-zero, fill it in with a pointer to the 1054 * remaining space in the directory. 1055 */ 1056 struct buf * 1057 blkatoff(ip, offset, res) 1058 struct inode *ip; 1059 off_t offset; 1060 char **res; 1061 { 1062 register struct fs *fs = ip->i_fs; 1063 daddr_t lbn = lblkno(fs, offset); 1064 int bsize = blksize(fs, ip, lbn); 1065 register struct buf *bp; 1066 daddr_t bn; 1067 1068 bn = bmap(ip, lbn, B_READ, bsize); 1069 if (u.u_error) 1070 return (0); 1071 if (bn == (daddr_t)-1) { 1072 dirbad(ip, offset, "hole in dir"); 1073 return (0); 1074 } 1075 bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize); 1076 if (bp->b_flags & B_ERROR) { 1077 brelse(bp); 1078 return (0); 1079 } 1080 if (res) 1081 *res = bp->b_un.b_addr + blkoff(fs, offset); 1082 return (bp); 1083 } 1084 1085 /* 1086 * Check if a directory is empty or not. 1087 * Inode supplied must be locked. 1088 * 1089 * Using a struct dirtemplate here is not precisely 1090 * what we want, but better than using a struct direct. 1091 * 1092 * NB: does not handle corrupted directories. 1093 */ 1094 dirempty(ip, parentino) 1095 register struct inode *ip; 1096 ino_t parentino; 1097 { 1098 register off_t off; 1099 struct dirtemplate dbuf; 1100 register struct direct *dp = (struct direct *)&dbuf; 1101 int error, count; 1102 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1103 1104 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1105 error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ, 1106 off, 1, &count); 1107 /* 1108 * Since we read MINDIRSIZ, residual must 1109 * be 0 unless we're at end of file. 1110 */ 1111 if (error || count != 0) 1112 return (0); 1113 /* avoid infinite loops */ 1114 if (dp->d_reclen == 0) 1115 return (0); 1116 /* skip empty entries */ 1117 if (dp->d_ino == 0) 1118 continue; 1119 /* accept only "." and ".." */ 1120 if (dp->d_namlen > 2) 1121 return (0); 1122 if (dp->d_name[0] != '.') 1123 return (0); 1124 /* 1125 * At this point d_namlen must be 1 or 2. 1126 * 1 implies ".", 2 implies ".." if second 1127 * char is also "." 1128 */ 1129 if (dp->d_namlen == 1) 1130 continue; 1131 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1132 continue; 1133 return (0); 1134 } 1135 return (1); 1136 } 1137 1138 /* 1139 * Check if source directory is in the path of the target directory. 1140 * Target is supplied locked, source is unlocked. 1141 * The target is always iput() before returning. 1142 */ 1143 checkpath(source, target) 1144 struct inode *source, *target; 1145 { 1146 struct dirtemplate dirbuf; 1147 register struct inode *ip; 1148 int error = 0; 1149 1150 ip = target; 1151 if (ip->i_number == source->i_number) { 1152 error = EEXIST; 1153 goto out; 1154 } 1155 if (ip->i_number == ROOTINO) 1156 goto out; 1157 1158 for (;;) { 1159 if ((ip->i_mode&IFMT) != IFDIR) { 1160 error = ENOTDIR; 1161 break; 1162 } 1163 error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, 1164 sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); 1165 if (error != 0) 1166 break; 1167 if (dirbuf.dotdot_namlen != 2 || 1168 dirbuf.dotdot_name[0] != '.' || 1169 dirbuf.dotdot_name[1] != '.') { 1170 error = ENOTDIR; 1171 break; 1172 } 1173 if (dirbuf.dotdot_ino == source->i_number) { 1174 error = EINVAL; 1175 break; 1176 } 1177 if (dirbuf.dotdot_ino == ROOTINO) 1178 break; 1179 iput(ip); 1180 ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino); 1181 if (ip == NULL) { 1182 error = u.u_error; 1183 break; 1184 } 1185 } 1186 1187 out: 1188 if (error == ENOTDIR) 1189 printf("checkpath: .. not a directory\n"); 1190 if (ip != NULL) 1191 iput(ip); 1192 return (error); 1193 } 1194 1195 /* 1196 * Name cache initialization, from main() when we are booting 1197 */ 1198 nchinit() 1199 { 1200 register union nchash *nchp; 1201 register struct namecache *ncp; 1202 1203 nchhead = 0; 1204 nchtail = &nchhead; 1205 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) { 1206 ncp->nc_forw = ncp; /* hash chain */ 1207 ncp->nc_back = ncp; 1208 ncp->nc_nxt = NULL; /* lru chain */ 1209 *nchtail = ncp; 1210 ncp->nc_prev = nchtail; 1211 nchtail = &ncp->nc_nxt; 1212 /* all else is zero already */ 1213 } 1214 for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) { 1215 nchp->nch_head[0] = nchp; 1216 nchp->nch_head[1] = nchp; 1217 } 1218 } 1219 1220 /* 1221 * Cache flush, called when filesys is umounted to 1222 * remove entries that would now be invalid 1223 * 1224 * The line "nxtcp = nchhead" near the end is to avoid potential problems 1225 * if the cache lru chain is modified while we are dumping the 1226 * inode. This makes the algorithm O(n^2), but do you think I care? 1227 */ 1228 nchinval(dev) 1229 register dev_t dev; 1230 { 1231 register struct namecache *ncp, *nxtcp; 1232 1233 for (ncp = nchhead; ncp; ncp = nxtcp) { 1234 nxtcp = ncp->nc_nxt; 1235 if (ncp->nc_ip == NULL || 1236 (ncp->nc_idev != dev && ncp->nc_dev != dev)) 1237 continue; 1238 /* free the resources we had */ 1239 ncp->nc_idev = NODEV; 1240 ncp->nc_dev = NODEV; 1241 ncp->nc_id = NULL; 1242 ncp->nc_ino = 0; 1243 ncp->nc_ip = NULL; 1244 remque(ncp); /* remove entry from its hash chain */ 1245 ncp->nc_forw = ncp; /* and make a dummy one */ 1246 ncp->nc_back = ncp; 1247 /* delete this entry from LRU chain */ 1248 *ncp->nc_prev = nxtcp; 1249 if (nxtcp) 1250 nxtcp->nc_prev = ncp->nc_prev; 1251 else 1252 nchtail = ncp->nc_prev; 1253 /* cause rescan of list, it may have altered */ 1254 nxtcp = nchhead; 1255 /* put the now-free entry at head of LRU */ 1256 ncp->nc_nxt = nxtcp; 1257 ncp->nc_prev = &nchhead; 1258 nxtcp->nc_prev = &ncp->nc_nxt; 1259 nchhead = ncp; 1260 } 1261 } 1262 1263 /* 1264 * Name cache invalidation of all entries. 1265 */ 1266 cacheinvalall() 1267 { 1268 register struct namecache *ncp; 1269 1270 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) 1271 ncp->nc_id = 0; 1272 } 1273