1 /* 2 * Copyright (c) 1982 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)vfs_lookup.c 6.25 (Berkeley) 09/04/85 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "inode.h" 12 #include "fs.h" 13 #include "mount.h" 14 #include "dir.h" 15 #include "user.h" 16 #include "buf.h" 17 #include "conf.h" 18 #include "uio.h" 19 #include "kernel.h" 20 21 struct buf *blkatoff(); 22 struct buf *freenamebuf; 23 int dirchk = 0; 24 25 /* 26 * Structures associated with name cacheing. 27 */ 28 #define NCHHASH 32 /* size of hash table */ 29 30 #if ((NCHHASH)&((NCHHASH)-1)) != 0 31 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH)) 32 #else 33 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1)) 34 #endif 35 36 union nchash { 37 union nchash *nch_head[2]; 38 struct nch *nch_chain[2]; 39 } nchash[NCHHASH]; 40 #define nch_forw nch_chain[0] 41 #define nch_back nch_chain[1] 42 43 struct nch *nchhead, **nchtail; /* LRU chain pointers */ 44 struct nchstats nchstats; /* cache effectiveness statistics */ 45 46 /* 47 * Convert a pathname into a pointer to a locked inode, 48 * with side effects usable in creating and removing files. 49 * This is a very central and rather complicated routine. 50 * 51 * The segflg defines whether the name is to be copied from user 52 * space or kernel space. 53 * 54 * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether 55 * the name is to be (looked up, created, deleted). If flag has 56 * LOCKPARENT or'ed into it and the target of the pathname exists, 57 * namei returns both the target and its parent directory locked. 58 * If the file system is not maintained in a strict tree hierarchy, 59 * this can result in a deadlock situation. When creating and 60 * LOCKPARENT is specified, the target may not be ".". When deleting 61 * and LOCKPARENT is specified, the target may be ".", but the caller 62 * must check to insure it does an irele and iput instead of two iputs. 63 * 64 * The FOLLOW flag is set when symbolic links are to be followed 65 * when they occur at the end of the name translation process. 66 * 67 * Name caching works as follows: 68 * 69 * names found by directory scans are retained in a cache 70 * for future reference. It is managed LRU, so frequently 71 * used names will hang around. Cache is indexed by hash value 72 * obtained from (ino,dev,name) where ino & dev refer to the 73 * directory containing name. 74 * 75 * For simplicity (and economy of storage), names longer than 76 * some (small) maximum length are not cached, they occur 77 * infrequently in any case, and are almost never of interest. 78 * 79 * Upon reaching the last segment of a path, if the reference 80 * is for DELETE, or NOCACHE is set (rewrite), and the 81 * name is located in the cache, it will be dropped. 82 * 83 * We must be sure never to enter the name ".." into the cache 84 * because of the extremely kludgey way that rename() alters 85 * ".." in a situation like 86 * mv a/x b/x 87 * where x is a directory, and x/.. is the ".." in question. 88 * 89 * Overall outline of namei: 90 * 91 * copy in name 92 * get starting directory 93 * dirloop: 94 * check accessibility of directory 95 * dirloop2: 96 * copy next component of name to ndp->ni_dent 97 * handle degenerate case where name is null string 98 * look for name in cache, if found, then if at end of path 99 * and deleting or creating, drop it, else to haveino 100 * search for name in directory, to found or notfound 101 * notfound: 102 * if creating, return locked directory, leaving info on avail. slots 103 * else return error 104 * found: 105 * if at end of path and deleting, return information to allow delete 106 * if at end of path and rewriting (create and LOCKPARENT), lock target 107 * inode and return info to allow rewrite 108 * if .. and on mounted filesys, look in mount table for parent 109 * if not at end, if neither creating nor deleting, add name to cache 110 * haveino: 111 * if symbolic link, massage name in buffer and continue at dirloop 112 * if more components of name, do next level at dirloop 113 * return the answer as locked inode 114 * 115 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, 116 * but unlocked. 117 */ 118 struct inode * 119 namei(ndp) 120 register struct nameidata *ndp; 121 { 122 register char *cp; /* pointer into pathname argument */ 123 /* these variables refer to things which must be freed or unlocked */ 124 register struct inode *dp = 0; /* the directory we are searching */ 125 register struct nch *ncp; /* cache slot for entry */ 126 register struct fs *fs; /* file system that directory is in */ 127 register struct buf *bp = 0; /* a buffer of directory entries */ 128 register struct direct *ep; /* the current directory entry */ 129 int entryoffsetinblock; /* offset of ep in bp's buffer */ 130 register struct buf *nbp; /* buffer storing path name argument */ 131 /* these variables hold information about the search for a slot */ 132 enum {NONE, COMPACT, FOUND} slotstatus; 133 int slotoffset = -1; /* offset of area with free space */ 134 int slotsize; /* size of area at slotoffset */ 135 int slotfreespace; /* amount of space free in slot */ 136 int slotneeded; /* size of the entry we're seeking */ 137 /* */ 138 int numdirpasses; /* strategy for directory search */ 139 int endsearch; /* offset to end directory search */ 140 int prevoff; /* ndp->ni_offset of previous entry */ 141 int nlink = 0; /* number of symbolic links taken */ 142 struct inode *pdp; /* saved dp during symlink work */ 143 int error, i; 144 int lockparent; 145 int docache; /* == 0 do not cache last component */ 146 int makeentry; /* != 0 if name to be added to cache */ 147 unsigned hash; /* value of name hash for entry */ 148 union nchash *nhp; /* cache chain head for entry */ 149 int isdotdot; /* != 0 if current name is ".." */ 150 int flag; /* op ie, LOOKUP, CREATE, or DELETE */ 151 off_t enduseful; /* pointer past last used dir slot */ 152 153 lockparent = ndp->ni_nameiop & LOCKPARENT; 154 docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE; 155 flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW); 156 if (flag == DELETE || lockparent) 157 docache = 0; 158 /* 159 * Get a buffer for the name to be translated, and copy the 160 * name into the buffer. 161 */ 162 nbp = freenamebuf; 163 if (nbp == NULL) 164 nbp = geteblk(MAXPATHLEN); 165 else 166 freenamebuf = nbp->av_forw; 167 if (ndp->ni_segflg == UIO_SYSSPACE) 168 error = copystr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN, 169 (u_int *)0); 170 else 171 error = copyinstr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN, 172 (u_int *)0); 173 if (error) { 174 u.u_error = error; 175 goto bad; 176 } 177 178 /* 179 * Get starting directory. 180 */ 181 cp = nbp->b_un.b_addr; 182 if (*cp == '/') { 183 while (*cp == '/') 184 cp++; 185 if ((dp = u.u_rdir) == NULL) 186 dp = rootdir; 187 } else 188 dp = u.u_cdir; 189 fs = dp->i_fs; 190 ILOCK(dp); 191 dp->i_count++; 192 ndp->ni_pdir = (struct inode *)0xc0000000; /* illegal */ 193 ndp->ni_endoff = 0; 194 195 /* 196 * We come to dirloop to search a new directory. 197 * The directory must be locked so that it can be 198 * iput, and fs must be already set to dp->i_fs. 199 */ 200 dirloop: 201 /* 202 * Check accessiblity of directory. 203 */ 204 if ((dp->i_mode&IFMT) != IFDIR) { 205 u.u_error = ENOTDIR; 206 goto bad; 207 } 208 if (access(dp, IEXEC)) 209 goto bad; 210 211 dirloop2: 212 /* 213 * Copy next component of name to ndp->ni_dent. 214 */ 215 hash = 0; 216 for (i = 0; *cp != 0 && *cp != '/'; cp++) { 217 if (i >= MAXNAMLEN) { 218 u.u_error = ENAMETOOLONG; 219 goto bad; 220 } 221 if (*cp & 0200) 222 if ((*cp&0377) == ('/'|0200) || flag != DELETE) { 223 u.u_error = EINVAL; 224 goto bad; 225 } 226 ndp->ni_dent.d_name[i++] = *cp; 227 hash += (unsigned char)*cp * i; 228 } 229 ndp->ni_dent.d_namlen = i; 230 ndp->ni_dent.d_name[i] = '\0'; 231 isdotdot = (i == 2 && 232 ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.'); 233 makeentry = 1; 234 if (*cp == '\0' && docache == 0) 235 makeentry = 0; 236 237 /* 238 * Check for degenerate name (e.g. / or "") 239 * which is a way of talking about a directory, 240 * e.g. like "/." or ".". 241 */ 242 if (ndp->ni_dent.d_name[0] == '\0') { 243 if (flag != LOOKUP || lockparent) { 244 u.u_error = EISDIR; 245 goto bad; 246 } 247 nbp->av_forw = freenamebuf; 248 freenamebuf = nbp; 249 return (dp); 250 } 251 252 /* 253 * We now have a segment name to search for, and a directory to search. 254 * 255 * Before tediously performing a linear scan of the directory, 256 * check the name cache to see if the directory/name pair 257 * we are looking for is known already. We don't do this 258 * if the segment name is long, simply so the cache can avoid 259 * holding long names (which would either waste space, or 260 * add greatly to the complexity). 261 */ 262 if (ndp->ni_dent.d_namlen > NCHNAMLEN) { 263 nchstats.ncs_long++; 264 makeentry = 0; 265 } else { 266 nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)]; 267 for (ncp = nhp->nch_forw; ncp != (struct nch *)nhp; 268 ncp = ncp->nc_forw) { 269 if (ncp->nc_ino == dp->i_number && 270 ncp->nc_dev == dp->i_dev && 271 ncp->nc_nlen == ndp->ni_dent.d_namlen && 272 !bcmp(ncp->nc_name, ndp->ni_dent.d_name, 273 ncp->nc_nlen)) 274 break; 275 } 276 277 if (ncp == (struct nch *)nhp) { 278 nchstats.ncs_miss++; 279 ncp = NULL; 280 } else { 281 if (ncp->nc_id != ncp->nc_ip->i_id) { 282 nchstats.ncs_falsehits++; 283 } else if (!makeentry) { 284 nchstats.ncs_badhits++; 285 } else { 286 287 /* 288 * move this slot to end of LRU 289 * chain, if not already there 290 */ 291 if (ncp->nc_nxt) { 292 /* remove from LRU chain */ 293 *ncp->nc_prev = ncp->nc_nxt; 294 ncp->nc_nxt->nc_prev = ncp->nc_prev; 295 296 /* and replace at end of it */ 297 ncp->nc_nxt = NULL; 298 ncp->nc_prev = nchtail; 299 *nchtail = ncp; 300 nchtail = &ncp->nc_nxt; 301 } 302 303 /* 304 * Get the next inode in the path. 305 * See comment above other `IUNLOCK' code for 306 * an explaination of the locking protocol. 307 */ 308 pdp = dp; 309 if (!isdotdot || dp != u.u_rdir) 310 dp = ncp->nc_ip; 311 if (dp == NULL) 312 panic("nami: null cache ino"); 313 if (pdp == dp) { 314 dp->i_count++; 315 } else if (isdotdot) { 316 IUNLOCK(pdp); 317 igrab(dp); 318 } else { 319 igrab(dp); 320 IUNLOCK(pdp); 321 } 322 323 /* 324 * Verify that the inode that we got 325 * did not change while we were waiting 326 * for it to be locked. 327 */ 328 if (ncp->nc_id != ncp->nc_ip->i_id) { 329 iput(dp); 330 ILOCK(pdp); 331 dp = pdp; 332 nchstats.ncs_falsehits++; 333 } else { 334 ndp->ni_dent.d_ino = dp->i_number; 335 /* ni_dent.d_reclen is garbage ... */ 336 nchstats.ncs_goodhits++; 337 goto haveino; 338 } 339 } 340 341 /* 342 * Last component and we are renaming or deleting, 343 * the cache entry is invalid, or otherwise don't 344 * want cache entry to exist. 345 */ 346 347 /* remove from LRU chain */ 348 *ncp->nc_prev = ncp->nc_nxt; 349 if (ncp->nc_nxt) 350 ncp->nc_nxt->nc_prev = ncp->nc_prev; 351 else 352 nchtail = ncp->nc_prev; 353 354 /* remove from hash chain */ 355 remque(ncp); 356 357 /* insert at head of LRU list (first to grab) */ 358 ncp->nc_nxt = nchhead; 359 ncp->nc_prev = &nchhead; 360 nchhead->nc_prev = &ncp->nc_nxt; 361 nchhead = ncp; 362 363 /* and make a dummy hash chain */ 364 ncp->nc_forw = ncp; 365 ncp->nc_back = ncp; 366 367 ncp = NULL; 368 } 369 } 370 371 /* 372 * Suppress search for slots unless creating 373 * file and at end of pathname, in which case 374 * we watch for a place to put the new file in 375 * case it doesn't already exist. 376 */ 377 slotstatus = FOUND; 378 if (flag == CREATE && *cp == 0) { 379 slotstatus = NONE; 380 slotfreespace = 0; 381 slotneeded = DIRSIZ(&ndp->ni_dent); 382 } 383 /* 384 * If this is the same directory that this process 385 * previously searched, pick up where we last left off. 386 * We cache only lookups as these are the most common 387 * and have the greatest payoff. Caching CREATE has little 388 * benefit as it usually must search the entire directory 389 * to determine that the entry does not exist. Caching the 390 * location of the last DELETE has not reduced profiling time 391 * and hence has been removed in the interest of simplicity. 392 */ 393 if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber || 394 dp->i_dev != u.u_ncache.nc_dev) { 395 ndp->ni_offset = 0; 396 numdirpasses = 1; 397 } else { 398 if ((dp->i_flag & ICHG) || dp->i_ctime >= u.u_ncache.nc_time) { 399 if (u.u_ncache.nc_prevoffset > dp->i_size) 400 u.u_ncache.nc_prevoffset = 0; 401 else 402 u.u_ncache.nc_prevoffset &= ~(DIRBLKSIZ - 1); 403 u.u_ncache.nc_time = time.tv_sec; 404 } 405 ndp->ni_offset = u.u_ncache.nc_prevoffset; 406 entryoffsetinblock = blkoff(fs, ndp->ni_offset); 407 if (entryoffsetinblock != 0) { 408 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 409 if (bp == 0) 410 goto bad; 411 } 412 numdirpasses = 2; 413 nchstats.ncs_2passes++; 414 } 415 endsearch = roundup(dp->i_size, DIRBLKSIZ); 416 enduseful = 0; 417 418 searchloop: 419 while (ndp->ni_offset < endsearch) { 420 /* 421 * If offset is on a block boundary, 422 * read the next directory block. 423 * Release previous if it exists. 424 */ 425 if (blkoff(fs, ndp->ni_offset) == 0) { 426 if (bp != NULL) 427 brelse(bp); 428 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 429 if (bp == 0) 430 goto bad; 431 entryoffsetinblock = 0; 432 } 433 434 /* 435 * If still looking for a slot, and at a DIRBLKSIZE 436 * boundary, have to start looking for free space again. 437 */ 438 if (slotstatus == NONE && 439 (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) { 440 slotoffset = -1; 441 slotfreespace = 0; 442 } 443 444 /* 445 * Get pointer to next entry. 446 * Full validation checks are slow, so we only check 447 * enough to insure forward progress through the 448 * directory. Complete checks can be run by patching 449 * "dirchk" to be true. 450 */ 451 ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock); 452 if (ep->d_reclen <= 0 || 453 dirchk && dirbadentry(ep, entryoffsetinblock)) { 454 dirbad(dp, ndp->ni_offset, "mangled entry"); 455 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 456 ndp->ni_offset += i; 457 entryoffsetinblock += i; 458 continue; 459 } 460 461 /* 462 * If an appropriate sized slot has not yet been found, 463 * check to see if one is available. Also accumulate space 464 * in the current block so that we can determine if 465 * compaction is viable. 466 */ 467 if (slotstatus != FOUND) { 468 int size = ep->d_reclen; 469 470 if (ep->d_ino != 0) 471 size -= DIRSIZ(ep); 472 if (size > 0) { 473 if (size >= slotneeded) { 474 slotstatus = FOUND; 475 slotoffset = ndp->ni_offset; 476 slotsize = ep->d_reclen; 477 } else if (slotstatus == NONE) { 478 slotfreespace += size; 479 if (slotoffset == -1) 480 slotoffset = ndp->ni_offset; 481 if (slotfreespace >= slotneeded) { 482 slotstatus = COMPACT; 483 slotsize = ndp->ni_offset + 484 ep->d_reclen - slotoffset; 485 } 486 } 487 } 488 } 489 490 /* 491 * Check for a name match. 492 */ 493 if (ep->d_ino) { 494 if (ep->d_namlen == ndp->ni_dent.d_namlen && 495 !bcmp(ndp->ni_dent.d_name, ep->d_name, 496 ep->d_namlen)) 497 goto found; 498 } 499 prevoff = ndp->ni_offset; 500 ndp->ni_offset += ep->d_reclen; 501 entryoffsetinblock += ep->d_reclen; 502 if (ep->d_ino) 503 enduseful = ndp->ni_offset; 504 } 505 /* notfound: */ 506 /* 507 * If we started in the middle of the directory and failed 508 * to find our target, we must check the beginning as well. 509 */ 510 if (numdirpasses == 2) { 511 numdirpasses--; 512 ndp->ni_offset = 0; 513 endsearch = u.u_ncache.nc_prevoffset; 514 goto searchloop; 515 } 516 /* 517 * If creating, and at end of pathname and current 518 * directory has not been removed, then can consider 519 * allowing file to be created. 520 */ 521 if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { 522 /* 523 * Access for write is interpreted as allowing 524 * creation of files in the directory. 525 */ 526 if (access(dp, IWRITE)) 527 goto bad; 528 /* 529 * Return an indication of where the new directory 530 * entry should be put. If we didn't find a slot, 531 * then set ndp->ni_count to 0 indicating that the new 532 * slot belongs at the end of the directory. If we found 533 * a slot, then the new entry can be put in the range 534 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count) 535 */ 536 if (slotstatus == NONE) { 537 ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ); 538 ndp->ni_count = 0; 539 enduseful = ndp->ni_offset; 540 } else { 541 ndp->ni_offset = slotoffset; 542 ndp->ni_count = slotsize; 543 if (enduseful < slotoffset + slotsize) 544 enduseful = slotoffset + slotsize; 545 } 546 ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ); 547 dp->i_flag |= IUPD|ICHG; 548 if (bp) 549 brelse(bp); 550 nbp->av_forw = freenamebuf; 551 freenamebuf = nbp; 552 /* 553 * We return with the directory locked, so that 554 * the parameters we set up above will still be 555 * valid if we actually decide to do a direnter(). 556 * We return NULL to indicate that the entry doesn't 557 * currently exist, leaving a pointer to the (locked) 558 * directory inode in ndp->ni_pdir. 559 */ 560 ndp->ni_pdir = dp; 561 return (NULL); 562 } 563 u.u_error = ENOENT; 564 goto bad; 565 found: 566 if (numdirpasses == 2) 567 nchstats.ncs_pass2++; 568 /* 569 * Check that directory length properly reflects presence 570 * of this entry. 571 */ 572 if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) { 573 dirbad(dp, ndp->ni_offset, "i_size too small"); 574 dp->i_size = entryoffsetinblock + DIRSIZ(ep); 575 dp->i_flag |= IUPD|ICHG; 576 } 577 578 /* 579 * Found component in pathname. 580 * If the final component of path name, save information 581 * in the cache as to where the entry was found. 582 */ 583 if (*cp == '\0' && flag == LOOKUP) { 584 u.u_ncache.nc_prevoffset = ndp->ni_offset; 585 u.u_ncache.nc_inumber = dp->i_number; 586 u.u_ncache.nc_dev = dp->i_dev; 587 u.u_ncache.nc_time = time.tv_sec; 588 } 589 /* 590 * Save directory entry's inode number and reclen in ndp->ni_dent, 591 * and release directory buffer. 592 */ 593 ndp->ni_dent.d_ino = ep->d_ino; 594 ndp->ni_dent.d_reclen = ep->d_reclen; 595 brelse(bp); 596 bp = NULL; 597 598 /* 599 * If deleting, and at end of pathname, return 600 * parameters which can be used to remove file. 601 * If the lockparent flag isn't set, we return only 602 * the directory (in ndp->ni_pdir), otherwise we go 603 * on and lock the inode, being careful with ".". 604 */ 605 if (flag == DELETE && *cp == 0) { 606 /* 607 * Write access to directory required to delete files. 608 */ 609 if (access(dp, IWRITE)) 610 goto bad; 611 ndp->ni_pdir = dp; /* for dirremove() */ 612 /* 613 * Return pointer to current entry in ndp->ni_offset, 614 * and distance past previous entry (if there 615 * is a previous entry in this block) in ndp->ni_count. 616 * Save directory inode pointer in ndp->ni_pdir for dirremove(). 617 */ 618 if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0) 619 ndp->ni_count = 0; 620 else 621 ndp->ni_count = ndp->ni_offset - prevoff; 622 if (lockparent) { 623 if (dp->i_number == ndp->ni_dent.d_ino) 624 dp->i_count++; 625 else { 626 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 627 if (dp == NULL) { 628 iput(ndp->ni_pdir); 629 goto bad; 630 } 631 /* 632 * If directory is "sticky", then user must own 633 * the directory, or the file in it, else he 634 * may not delete it (unless he's root). This 635 * implements append-only directories. 636 */ 637 if ((ndp->ni_pdir->i_mode & ISVTX) && 638 u.u_uid != 0 && 639 u.u_uid != ndp->ni_pdir->i_uid && 640 dp->i_uid != u.u_uid) { 641 iput(ndp->ni_pdir); 642 u.u_error = EPERM; 643 goto bad; 644 } 645 } 646 } 647 nbp->av_forw = freenamebuf; 648 freenamebuf = nbp; 649 return (dp); 650 } 651 652 /* 653 * Special handling for ".." allowing chdir out of mounted 654 * file system: indirect .. in root inode to reevaluate 655 * in directory file system was mounted on. 656 */ 657 if (isdotdot) { 658 if (dp == u.u_rdir) { 659 ndp->ni_dent.d_ino = dp->i_number; 660 makeentry = 0; 661 } else if (ndp->ni_dent.d_ino == ROOTINO && 662 dp->i_number == ROOTINO) { 663 for (i = 1; i < NMOUNT; i++) 664 if (mount[i].m_bufp != NULL && 665 mount[i].m_dev == dp->i_dev) { 666 iput(dp); 667 dp = mount[i].m_inodp; 668 ILOCK(dp); 669 dp->i_count++; 670 fs = dp->i_fs; 671 cp -= 2; /* back over .. */ 672 goto dirloop2; 673 } 674 } 675 } 676 677 /* 678 * If rewriting (rename), return the inode and the 679 * information required to rewrite the present directory 680 * Must get inode of directory entry to verify it's a 681 * regular file, or empty directory. 682 */ 683 if ((flag == CREATE && lockparent) && *cp == 0) { 684 if (access(dp, IWRITE)) 685 goto bad; 686 ndp->ni_pdir = dp; /* for dirrewrite() */ 687 /* 688 * Careful about locking second inode. 689 * This can only occur if the target is ".". 690 */ 691 if (dp->i_number == ndp->ni_dent.d_ino) { 692 u.u_error = EISDIR; /* XXX */ 693 goto bad; 694 } 695 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 696 if (dp == NULL) { 697 iput(ndp->ni_pdir); 698 goto bad; 699 } 700 nbp->av_forw = freenamebuf; 701 freenamebuf = nbp; 702 return (dp); 703 } 704 705 /* 706 * Check for symbolic link, which may require us to massage the 707 * name before we continue translation. We do not `iput' the 708 * directory because we may need it again if the symbolic link 709 * is relative to the current directory. Instead we save it 710 * unlocked as "pdp". We must get the target inode before unlocking 711 * the directory to insure that the inode will not be removed 712 * before we get it. We prevent deadlock by always fetching 713 * inodes from the root, moving down the directory tree. Thus 714 * when following backward pointers ".." we must unlock the 715 * parent directory before getting the requested directory. 716 * There is a potential race condition here if both the current 717 * and parent directories are removed before the `iget' for the 718 * inode associated with ".." returns. We hope that this occurs 719 * infrequently since we cannot avoid this race condition without 720 * implementing a sophisticated deadlock detection algorithm. 721 * Note also that this simple deadlock detection scheme will not 722 * work if the file system has any hard links other than ".." 723 * that point backwards in the directory structure. 724 */ 725 pdp = dp; 726 if (isdotdot) { 727 IUNLOCK(pdp); /* race to get the inode */ 728 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 729 if (dp == NULL) 730 goto bad2; 731 } else if (dp->i_number == ndp->ni_dent.d_ino) { 732 dp->i_count++; /* we want ourself, ie "." */ 733 } else { 734 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 735 IUNLOCK(pdp); 736 if (dp == NULL) 737 goto bad2; 738 } 739 740 /* 741 * insert name into cache if appropriate 742 */ 743 if (makeentry) { 744 if (ncp != NULL) 745 panic("nami: duplicating cache"); 746 747 /* 748 * free the cache slot at head of lru chain 749 */ 750 if (ncp = nchhead) { 751 /* remove from lru chain */ 752 *ncp->nc_prev = ncp->nc_nxt; 753 if (ncp->nc_nxt) 754 ncp->nc_nxt->nc_prev = ncp->nc_prev; 755 else 756 nchtail = ncp->nc_prev; 757 758 /* remove from old hash chain */ 759 remque(ncp); 760 761 /* grab the inode we just found */ 762 ncp->nc_ip = dp; 763 764 /* fill in cache info */ 765 ncp->nc_ino = pdp->i_number; /* parents inum */ 766 ncp->nc_dev = pdp->i_dev; /* & device */ 767 ncp->nc_idev = dp->i_dev; /* our device */ 768 ncp->nc_id = dp->i_id; /* identifier */ 769 ncp->nc_nlen = ndp->ni_dent.d_namlen; 770 bcopy(ndp->ni_dent.d_name, ncp->nc_name, ncp->nc_nlen); 771 772 /* link at end of lru chain */ 773 ncp->nc_nxt = NULL; 774 ncp->nc_prev = nchtail; 775 *nchtail = ncp; 776 nchtail = &ncp->nc_nxt; 777 778 /* and insert on hash chain */ 779 insque(ncp, nhp); 780 } 781 } 782 783 haveino: 784 fs = dp->i_fs; 785 786 /* 787 * Check for symbolic link 788 */ 789 if ((dp->i_mode & IFMT) == IFLNK && 790 ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) { 791 u_int pathlen = strlen(cp) + 1; 792 793 if (dp->i_size + pathlen >= MAXPATHLEN - 1) { 794 u.u_error = ENAMETOOLONG; 795 goto bad2; 796 } 797 if (++nlink > MAXSYMLINKS) { 798 u.u_error = ELOOP; 799 goto bad2; 800 } 801 ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen); 802 u.u_error = 803 rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size, 804 0, 1, (int *)0); 805 if (u.u_error) 806 goto bad2; 807 cp = nbp->b_un.b_addr; 808 iput(dp); 809 if (*cp == '/') { 810 irele(pdp); 811 while (*cp == '/') 812 cp++; 813 if ((dp = u.u_rdir) == NULL) 814 dp = rootdir; 815 ILOCK(dp); 816 dp->i_count++; 817 } else { 818 dp = pdp; 819 ILOCK(dp); 820 } 821 fs = dp->i_fs; 822 goto dirloop; 823 } 824 825 /* 826 * Not a symbolic link. If more pathname, 827 * continue at next component, else return. 828 */ 829 if (*cp == '/') { 830 while (*cp == '/') 831 cp++; 832 irele(pdp); 833 goto dirloop; 834 } 835 nbp->av_forw = freenamebuf; 836 freenamebuf = nbp; 837 if (lockparent) 838 ndp->ni_pdir = pdp; 839 else 840 irele(pdp); 841 return (dp); 842 bad2: 843 irele(pdp); 844 bad: 845 if (bp) 846 brelse(bp); 847 if (dp) 848 iput(dp); 849 nbp->av_forw = freenamebuf; 850 freenamebuf = nbp; 851 return (NULL); 852 } 853 854 855 dirbad(ip, offset, how) 856 struct inode *ip; 857 off_t offset; 858 char *how; 859 { 860 861 printf("%s: bad dir ino %d at offset %d: %s\n", 862 ip->i_fs->fs_fsmnt, ip->i_number, offset, how); 863 } 864 865 /* 866 * Do consistency checking on a directory entry: 867 * record length must be multiple of 4 868 * record length must not be non-negative 869 * entry must fit in rest of its DIRBLKSIZ block 870 * record must be large enough to contain entry 871 * name is not longer than MAXNAMLEN 872 * name must be as long as advertised, and null terminated 873 */ 874 dirbadentry(ep, entryoffsetinblock) 875 register struct direct *ep; 876 int entryoffsetinblock; 877 { 878 register int i; 879 880 if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen <= 0 || 881 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 882 ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN) 883 return (1); 884 for (i = 0; i < ep->d_namlen; i++) 885 if (ep->d_name[i] == '\0') 886 return (1); 887 return (ep->d_name[i]); 888 } 889 890 /* 891 * Write a directory entry after a call to namei, using the parameters 892 * which it left in the u. area. The argument ip is the inode which 893 * the new directory entry will refer to. The u. area field ndp->ni_pdir is 894 * a pointer to the directory to be written, which was left locked by 895 * namei. Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate 896 * how the space for the new entry is to be gotten. 897 */ 898 direnter(ip, ndp) 899 struct inode *ip; 900 register struct nameidata *ndp; 901 { 902 register struct direct *ep, *nep; 903 register struct inode *dp = ndp->ni_pdir; 904 struct buf *bp; 905 int loc, spacefree, error = 0; 906 u_int dsize; 907 int newentrysize; 908 char *dirbuf; 909 910 ndp->ni_dent.d_ino = ip->i_number; 911 newentrysize = DIRSIZ(&ndp->ni_dent); 912 if (ndp->ni_count == 0) { 913 /* 914 * If ndp->ni_count is 0, then namei could find no space in the 915 * directory. In this case ndp->ni_offset will be on a directory 916 * block boundary and we will write the new entry into a fresh 917 * block. 918 */ 919 if (ndp->ni_offset&(DIRBLKSIZ-1)) 920 panic("wdir: newblk"); 921 ndp->ni_dent.d_reclen = DIRBLKSIZ; 922 error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 923 newentrysize, ndp->ni_offset, 1, (int *)0); 924 if (DIRBLKSIZ > dp->i_fs->fs_fsize) 925 panic("wdir: blksize"); /* XXX - should grow w/bmap() */ 926 else 927 dp->i_size = roundup(dp->i_size, DIRBLKSIZ); 928 iput(dp); 929 return (error); 930 } 931 932 /* 933 * If ndp->ni_count is non-zero, then namei found space for the new 934 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count. 935 * in the directory. To use this space, we may have to compact 936 * the entries located there, by copying them together towards 937 * the beginning of the block, leaving the free space in 938 * one usable chunk at the end. 939 */ 940 941 /* 942 * Increase size of directory if entry eats into new space. 943 * This should never push the size past a new multiple of 944 * DIRBLKSIZE. 945 * 946 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 947 */ 948 if (ndp->ni_offset + ndp->ni_count > dp->i_size) 949 dp->i_size = ndp->ni_offset + ndp->ni_count; 950 951 /* 952 * Get the block containing the space for the new directory 953 * entry. Should return error by result instead of u.u_error. 954 */ 955 bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf); 956 if (bp == 0) { 957 iput(dp); 958 return (u.u_error); 959 } 960 961 /* 962 * Find space for the new entry. In the simple case, the 963 * entry at offset base will have the space. If it does 964 * not, then namei arranged that compacting the region 965 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space. 966 */ 967 ep = (struct direct *)dirbuf; 968 dsize = DIRSIZ(ep); 969 spacefree = ep->d_reclen - dsize; 970 for (loc = ep->d_reclen; loc < ndp->ni_count; ) { 971 nep = (struct direct *)(dirbuf + loc); 972 if (ep->d_ino) { 973 /* trim the existing slot */ 974 ep->d_reclen = dsize; 975 ep = (struct direct *)((char *)ep + dsize); 976 } else { 977 /* overwrite; nothing there; header is ours */ 978 spacefree += dsize; 979 } 980 dsize = DIRSIZ(nep); 981 spacefree += nep->d_reclen - dsize; 982 loc += nep->d_reclen; 983 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 984 } 985 /* 986 * Update the pointer fields in the previous entry (if any), 987 * copy in the new entry, and write out the block. 988 */ 989 if (ep->d_ino == 0) { 990 if (spacefree + dsize < newentrysize) 991 panic("wdir: compact1"); 992 ndp->ni_dent.d_reclen = spacefree + dsize; 993 } else { 994 if (spacefree < newentrysize) 995 panic("wdir: compact2"); 996 ndp->ni_dent.d_reclen = spacefree; 997 ep->d_reclen = dsize; 998 ep = (struct direct *)((char *)ep + dsize); 999 } 1000 bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize); 1001 bwrite(bp); 1002 dp->i_flag |= IUPD|ICHG; 1003 if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size) 1004 itrunc(dp, ndp->ni_endoff); 1005 iput(dp); 1006 return (error); 1007 } 1008 1009 /* 1010 * Remove a directory entry after a call to namei, using the 1011 * parameters which it left in the u. area. The u. entry 1012 * ni_offset contains the offset into the directory of the 1013 * entry to be eliminated. The ni_count field contains the 1014 * size of the previous record in the directory. If this 1015 * is 0, the first entry is being deleted, so we need only 1016 * zero the inode number to mark the entry as free. If the 1017 * entry isn't the first in the directory, we must reclaim 1018 * the space of the now empty record by adding the record size 1019 * to the size of the previous entry. 1020 */ 1021 dirremove(ndp) 1022 register struct nameidata *ndp; 1023 { 1024 register struct inode *dp = ndp->ni_pdir; 1025 register struct buf *bp; 1026 struct direct *ep; 1027 1028 if (ndp->ni_count == 0) { 1029 /* 1030 * First entry in block: set d_ino to zero. 1031 */ 1032 ndp->ni_dent.d_ino = 0; 1033 (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1034 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1035 } else { 1036 /* 1037 * Collapse new free space into previous entry. 1038 */ 1039 bp = blkatoff(dp, (int)(ndp->ni_offset - ndp->ni_count), 1040 (char **)&ep); 1041 if (bp == 0) 1042 return (0); 1043 ep->d_reclen += ndp->ni_dent.d_reclen; 1044 bwrite(bp); 1045 dp->i_flag |= IUPD|ICHG; 1046 } 1047 return (1); 1048 } 1049 1050 /* 1051 * Rewrite an existing directory entry to point at the inode 1052 * supplied. The parameters describing the directory entry are 1053 * set up by a call to namei. 1054 */ 1055 dirrewrite(dp, ip, ndp) 1056 struct inode *dp, *ip; 1057 struct nameidata *ndp; 1058 { 1059 1060 ndp->ni_dent.d_ino = ip->i_number; 1061 u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1062 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1063 iput(dp); 1064 } 1065 1066 /* 1067 * Return buffer with contents of block "offset" 1068 * from the beginning of directory "ip". If "res" 1069 * is non-zero, fill it in with a pointer to the 1070 * remaining space in the directory. 1071 */ 1072 struct buf * 1073 blkatoff(ip, offset, res) 1074 struct inode *ip; 1075 off_t offset; 1076 char **res; 1077 { 1078 register struct fs *fs = ip->i_fs; 1079 daddr_t lbn = lblkno(fs, offset); 1080 int bsize = blksize(fs, ip, lbn); 1081 register struct buf *bp; 1082 daddr_t bn; 1083 1084 bn = bmap(ip, lbn, B_READ, bsize); 1085 if (u.u_error) 1086 return (0); 1087 if (bn == (daddr_t)-1) { 1088 dirbad(ip, offset, "hole in dir"); 1089 return (0); 1090 } 1091 bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize); 1092 if (bp->b_flags & B_ERROR) { 1093 brelse(bp); 1094 return (0); 1095 } 1096 if (res) 1097 *res = bp->b_un.b_addr + blkoff(fs, offset); 1098 return (bp); 1099 } 1100 1101 /* 1102 * Check if a directory is empty or not. 1103 * Inode supplied must be locked. 1104 * 1105 * Using a struct dirtemplate here is not precisely 1106 * what we want, but better than using a struct direct. 1107 * 1108 * NB: does not handle corrupted directories. 1109 */ 1110 dirempty(ip, parentino) 1111 register struct inode *ip; 1112 ino_t parentino; 1113 { 1114 register off_t off; 1115 struct dirtemplate dbuf; 1116 register struct direct *dp = (struct direct *)&dbuf; 1117 int error, count; 1118 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1119 1120 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1121 error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ, 1122 off, 1, &count); 1123 /* 1124 * Since we read MINDIRSIZ, residual must 1125 * be 0 unless we're at end of file. 1126 */ 1127 if (error || count != 0) 1128 return (0); 1129 /* avoid infinite loops */ 1130 if (dp->d_reclen <= 0) 1131 return (0); 1132 /* skip empty entries */ 1133 if (dp->d_ino == 0) 1134 continue; 1135 /* accept only "." and ".." */ 1136 if (dp->d_namlen > 2) 1137 return (0); 1138 if (dp->d_name[0] != '.') 1139 return (0); 1140 /* 1141 * At this point d_namlen must be 1 or 2. 1142 * 1 implies ".", 2 implies ".." if second 1143 * char is also "." 1144 */ 1145 if (dp->d_namlen == 1) 1146 continue; 1147 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1148 continue; 1149 return (0); 1150 } 1151 return (1); 1152 } 1153 1154 /* 1155 * Check if source directory is in the path of the target directory. 1156 * Target is supplied locked, source is unlocked. 1157 * The target is always iput() before returning. 1158 */ 1159 checkpath(source, target) 1160 struct inode *source, *target; 1161 { 1162 struct dirtemplate dirbuf; 1163 register struct inode *ip; 1164 int error = 0; 1165 1166 ip = target; 1167 if (ip->i_number == source->i_number) { 1168 error = EEXIST; 1169 goto out; 1170 } 1171 if (ip->i_number == ROOTINO) 1172 goto out; 1173 1174 for (;;) { 1175 if ((ip->i_mode&IFMT) != IFDIR) { 1176 error = ENOTDIR; 1177 break; 1178 } 1179 error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, 1180 sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); 1181 if (error != 0) 1182 break; 1183 if (dirbuf.dotdot_namlen != 2 || 1184 dirbuf.dotdot_name[0] != '.' || 1185 dirbuf.dotdot_name[1] != '.') { 1186 error = ENOTDIR; 1187 break; 1188 } 1189 if (dirbuf.dotdot_ino == source->i_number) { 1190 error = EINVAL; 1191 break; 1192 } 1193 if (dirbuf.dotdot_ino == ROOTINO) 1194 break; 1195 iput(ip); 1196 ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino); 1197 if (ip == NULL) { 1198 error = u.u_error; 1199 break; 1200 } 1201 } 1202 1203 out: 1204 if (error == ENOTDIR) 1205 printf("checkpath: .. not a directory\n"); 1206 if (ip != NULL) 1207 iput(ip); 1208 return (error); 1209 } 1210 1211 /* 1212 * Name cache initialization, from main() when we are booting 1213 */ 1214 nchinit() 1215 { 1216 register union nchash *nchp; 1217 register struct nch *ncp; 1218 1219 nchhead = 0; 1220 nchtail = &nchhead; 1221 1222 for (ncp = nch; ncp < &nch[nchsize]; ncp++) { 1223 ncp->nc_forw = ncp; /* hash chain */ 1224 ncp->nc_back = ncp; 1225 1226 ncp->nc_nxt = NULL; /* lru chain */ 1227 *nchtail = ncp; 1228 ncp->nc_prev = nchtail; 1229 nchtail = &ncp->nc_nxt; 1230 1231 /* all else is zero already */ 1232 } 1233 1234 for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) { 1235 nchp->nch_head[0] = nchp; 1236 nchp->nch_head[1] = nchp; 1237 } 1238 } 1239 1240 /* 1241 * Cache flush, called when filesys is umounted to 1242 * remove entries that would now be invalid 1243 * 1244 * The line "nxtcp = nchhead" near the end is to avoid potential problems 1245 * if the cache lru chain is modified while we are dumping the 1246 * inode. This makes the algorithm O(n^2), but do you think I care? 1247 */ 1248 nchinval(dev) 1249 register dev_t dev; 1250 { 1251 register struct nch *ncp, *nxtcp; 1252 1253 for (ncp = nchhead; ncp; ncp = nxtcp) { 1254 nxtcp = ncp->nc_nxt; 1255 1256 if (ncp->nc_ip == NULL || 1257 (ncp->nc_idev != dev && ncp->nc_dev != dev)) 1258 continue; 1259 1260 /* free the resources we had */ 1261 ncp->nc_idev = NODEV; 1262 ncp->nc_dev = NODEV; 1263 ncp->nc_id = NULL; 1264 ncp->nc_ino = 0; 1265 ncp->nc_ip = NULL; 1266 1267 1268 /* remove the entry from its hash chain */ 1269 remque(ncp); 1270 /* and make a dummy one */ 1271 ncp->nc_forw = ncp; 1272 ncp->nc_back = ncp; 1273 1274 /* delete this entry from LRU chain */ 1275 *ncp->nc_prev = nxtcp; 1276 if (nxtcp) 1277 nxtcp->nc_prev = ncp->nc_prev; 1278 else 1279 nchtail = ncp->nc_prev; 1280 1281 /* cause rescan of list, it may have altered */ 1282 nxtcp = nchhead; 1283 /* put the now-free entry at head of LRU */ 1284 ncp->nc_nxt = nxtcp; 1285 ncp->nc_prev = &nchhead; 1286 nxtcp->nc_prev = &ncp->nc_nxt; 1287 nchhead = ncp; 1288 } 1289 } 1290 1291 /* 1292 * Name cache invalidation of all entries. 1293 */ 1294 cacheinvalall() 1295 { 1296 register struct nch *ncp; 1297 1298 for (ncp = nch; ncp < &nch[nchsize]; ncp++) { 1299 ncp->nc_id = 0; 1300 } 1301 } 1302