1 /* vfs_lookup.c 6.17 85/01/10 */ 2 3 #include "param.h" 4 #include "systm.h" 5 #include "inode.h" 6 #include "fs.h" 7 #include "mount.h" 8 #include "dir.h" 9 #include "user.h" 10 #include "buf.h" 11 #include "conf.h" 12 #include "uio.h" 13 #include "kernel.h" 14 15 struct buf *blkatoff(); 16 struct buf *freenamebuf; 17 int dirchk = 0; 18 19 /* 20 * Structures associated with name cacheing. 21 */ 22 #define NCHHASH 32 /* size of hash table */ 23 24 #if ((NCHHASH)&((NCHHASH)-1)) != 0 25 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH)) 26 #else 27 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1)) 28 #endif 29 30 union nchash { 31 union nchash *nch_head[2]; 32 struct nch *nch_chain[2]; 33 } nchash[NCHHASH]; 34 #define nch_forw nch_chain[0] 35 #define nch_back nch_chain[1] 36 37 struct nch *nchhead, **nchtail; /* LRU chain pointers */ 38 struct nchstats nchstats; /* cache effectiveness statistics */ 39 40 /* 41 * Convert a pathname into a pointer to a locked inode, 42 * with side effects usable in creating and removing files. 43 * This is a very central and rather complicated routine. 44 * 45 * The segflg defines whether the name is to be copied from user 46 * space or kernel space. 47 * 48 * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether 49 * the name is to be (looked up, created, deleted). If flag has 50 * LOCKPARENT or'ed into it and the target of the pathname exists, 51 * namei returns both the target and its parent directory locked. 52 * If the file system is not maintained in a strict tree hierarchy, 53 * this can result in a deadlock situation. When creating and 54 * LOCKPARENT is specified, the target may not be ".". When deleting 55 * and LOCKPARENT is specified, the target may be ".", but the caller 56 * must check to insure it does an irele and iput instead of two iputs. 57 * 58 * The FOLLOW flag is set when symbolic links are to be followed 59 * when they occur at the end of the name translation process. 60 * 61 * Name caching works as follows: 62 * 63 * names found by directory scans are retained in a cache 64 * for future reference. It is managed LRU, so frequently 65 * used names will hang around. Cache is indexed by hash value 66 * obtained from (ino,dev,name) where ino & dev refer to the 67 * directory containing name. 68 * 69 * For simplicity (and economy of storage), names longer than 70 * some (small) maximum length are not cached, they occur 71 * infrequently in any case, and are almost never of interest. 72 * 73 * Upon reaching the last segment of a path, if the reference 74 * is for DELETE, or NOCACHE is set (rewrite), and the 75 * name is located in the cache, it will be dropped. 76 * 77 * We must be sure never to enter the name ".." into the cache 78 * because of the extremely kludgey way that rename() alters 79 * ".." in a situation like 80 * mv a/x b/x 81 * where x is a directory, and x/.. is the ".." in question. 82 * 83 * Overall outline of namei: 84 * 85 * copy in name 86 * get starting directory 87 * dirloop: 88 * check accessibility of directory 89 * dirloop2: 90 * copy next component of name to ndp->ni_dent 91 * handle degenerate case where name is null string 92 * look for name in cache, if found, then if at end of path 93 * and deleting or creating, drop it, else to haveino 94 * search for name in directory, to found or notfound 95 * notfound: 96 * if creating, return locked directory, leaving info on avail. slots 97 * else return error 98 * found: 99 * if at end of path and deleting, return information to allow delete 100 * if at end of path and rewriting (create and LOCKPARENT), lock target 101 * inode and return info to allow rewrite 102 * if .. and on mounted filesys, look in mount table for parent 103 * if not at end, if neither creating nor deleting, add name to cache 104 * haveino: 105 * if symbolic link, massage name in buffer and continue at dirloop 106 * if more components of name, do next level at dirloop 107 * return the answer as locked inode 108 * 109 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, 110 * but unlocked. 111 */ 112 struct inode * 113 namei(ndp) 114 register struct nameidata *ndp; 115 { 116 register char *cp; /* pointer into pathname argument */ 117 /* these variables refer to things which must be freed or unlocked */ 118 register struct inode *dp = 0; /* the directory we are searching */ 119 register struct nch *ncp; /* cache slot for entry */ 120 register struct fs *fs; /* file system that directory is in */ 121 register struct buf *bp = 0; /* a buffer of directory entries */ 122 register struct direct *ep; /* the current directory entry */ 123 int entryoffsetinblock; /* offset of ep in bp's buffer */ 124 register struct buf *nbp; /* buffer storing path name argument */ 125 /* these variables hold information about the search for a slot */ 126 enum {NONE, COMPACT, FOUND} slotstatus; 127 int slotoffset = -1; /* offset of area with free space */ 128 int slotsize; /* size of area at slotoffset */ 129 int slotfreespace; /* amount of space free in slot */ 130 int slotneeded; /* size of the entry we're seeking */ 131 /* */ 132 int numdirpasses; /* strategy for directory search */ 133 int endsearch; /* offset to end directory search */ 134 int prevoff; /* ndp->ni_offset of previous entry */ 135 int nlink = 0; /* number of symbolic links taken */ 136 struct inode *pdp; /* saved dp during symlink work */ 137 int error, i; 138 int lockparent; 139 int docache; 140 unsigned hash; /* value of name hash for entry */ 141 union nchash *nhp; /* cache chain head for entry */ 142 int isdotdot; /* != 0 if current name is ".." */ 143 int flag; /* op ie, LOOKUP, CREATE, or DELETE */ 144 145 lockparent = ndp->ni_nameiop & LOCKPARENT; 146 docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE; 147 flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW); 148 if (flag == DELETE) 149 docache = 0; 150 /* 151 * Get a buffer for the name to be translated, and copy the 152 * name into the buffer. 153 */ 154 nbp = freenamebuf; 155 if (nbp == NULL) 156 nbp = geteblk(MAXPATHLEN); 157 else 158 freenamebuf = nbp->av_forw; 159 if (ndp->ni_segflg == UIO_SYSSPACE) 160 error = copystr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN, 161 (u_int *)0); 162 else 163 error = copyinstr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN, 164 (u_int *)0); 165 if (error) { 166 u.u_error = error; 167 goto bad; 168 } 169 170 /* 171 * Get starting directory. 172 */ 173 cp = nbp->b_un.b_addr; 174 if (*cp == '/') { 175 while (*cp == '/') 176 cp++; 177 if ((dp = u.u_rdir) == NULL) 178 dp = rootdir; 179 } else 180 dp = u.u_cdir; 181 fs = dp->i_fs; 182 ILOCK(dp); 183 dp->i_count++; 184 ndp->ni_pdir = (struct inode *)0xc0000000; /* illegal */ 185 186 /* 187 * We come to dirloop to search a new directory. 188 * The directory must be locked so that it can be 189 * iput, and fs must be already set to dp->i_fs. 190 */ 191 dirloop: 192 /* 193 * Check accessiblity of directory. 194 */ 195 if ((dp->i_mode&IFMT) != IFDIR) { 196 u.u_error = ENOTDIR; 197 goto bad; 198 } 199 if (access(dp, IEXEC)) 200 goto bad; 201 202 dirloop2: 203 /* 204 * Copy next component of name to ndp->ni_dent. 205 */ 206 hash = 0; 207 for (i = 0; *cp != 0 && *cp != '/'; cp++) { 208 if (i >= MAXNAMLEN) { 209 u.u_error = ENOENT; 210 goto bad; 211 } 212 if ((*cp&0377) == ('/'|0200) || (*cp&0200) && flag != DELETE) { 213 u.u_error = EPERM; 214 goto bad; 215 } 216 ndp->ni_dent.d_name[i++] = *cp; 217 hash += (unsigned char)*cp * i; 218 } 219 ndp->ni_dent.d_namlen = i; 220 ndp->ni_dent.d_name[i] = '\0'; 221 isdotdot = (i == 2 && 222 ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.'); 223 224 /* 225 * Check for degenerate name (e.g. / or "") 226 * which is a way of talking about a directory, 227 * e.g. like "/." or ".". 228 */ 229 if (ndp->ni_dent.d_name[0] == '\0') { 230 if (flag != LOOKUP || lockparent) { 231 u.u_error = EISDIR; 232 goto bad; 233 } 234 nbp->av_forw = freenamebuf; 235 freenamebuf = nbp; 236 return (dp); 237 } 238 239 /* 240 * We now have a segment name to search for, and a directory to search. 241 * 242 * Before tediously performing a linear scan of the directory, 243 * check the name cache to see if the directory/name pair 244 * we are looking for is known already. We don't do this 245 * if the segment name is long, simply so the cache can avoid 246 * holding long names (which would either waste space, or 247 * add greatly to the complexity). 248 */ 249 if (ndp->ni_dent.d_namlen > NCHNAMLEN) { 250 nchstats.ncs_long++; 251 docache = 0; 252 } else { 253 nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)]; 254 for (ncp = nhp->nch_forw; ncp != (struct nch *)nhp; 255 ncp = ncp->nc_forw) { 256 if (ncp->nc_ino == dp->i_number && 257 ncp->nc_dev == dp->i_dev && 258 ncp->nc_nlen == ndp->ni_dent.d_namlen && 259 !bcmp(ncp->nc_name, ndp->ni_dent.d_name, 260 ncp->nc_nlen)) 261 break; 262 } 263 264 if (ncp == (struct nch *)nhp) { 265 nchstats.ncs_miss++; 266 ncp = NULL; 267 } else { 268 if (ncp->nc_id != ncp->nc_ip->i_id) { 269 nchstats.ncs_falsehits++; 270 } else if (*cp == '\0' && !docache) { 271 nchstats.ncs_badhits++; 272 } else { 273 274 /* 275 * move this slot to end of LRU 276 * chain, if not already there 277 */ 278 if (ncp->nc_nxt) { 279 /* remove from LRU chain */ 280 *ncp->nc_prev = ncp->nc_nxt; 281 ncp->nc_nxt->nc_prev = ncp->nc_prev; 282 283 /* and replace at end of it */ 284 ncp->nc_nxt = NULL; 285 ncp->nc_prev = nchtail; 286 *nchtail = ncp; 287 nchtail = &ncp->nc_nxt; 288 } 289 290 /* 291 * Get the next inode in the path. 292 * See comment above other `IUNLOCK' code for 293 * an explaination of the locking protocol. 294 */ 295 pdp = dp; 296 dp = ncp->nc_ip; 297 if (dp == NULL) 298 panic("nami: null cache ino"); 299 if (pdp == dp) 300 dp->i_count++; 301 else { 302 if (isdotdot) { 303 IUNLOCK(pdp); 304 igrab(dp); 305 } else { 306 igrab(dp); 307 IUNLOCK(pdp); 308 } 309 } 310 311 /* 312 * Verify that the inode that we got 313 * did not change while we were waiting 314 * for it to be locked. 315 */ 316 if (ncp->nc_id != ncp->nc_ip->i_id) { 317 iput(dp); 318 ILOCK(pdp); 319 dp = pdp; 320 nchstats.ncs_falsehits++; 321 } else { 322 ndp->ni_dent.d_ino = dp->i_number; 323 /* ni_dent.d_reclen is garbage ... */ 324 nchstats.ncs_goodhits++; 325 goto haveino; 326 } 327 } 328 329 /* 330 * Last component and we are renaming or deleting, 331 * the cache entry is invalid, or otherwise don't 332 * want cache entry to exist. 333 */ 334 335 /* remove from LRU chain */ 336 *ncp->nc_prev = ncp->nc_nxt; 337 if (ncp->nc_nxt) 338 ncp->nc_nxt->nc_prev = ncp->nc_prev; 339 else 340 nchtail = ncp->nc_prev; 341 342 /* remove from hash chain */ 343 remque(ncp); 344 345 /* insert at head of LRU list (first to grab) */ 346 ncp->nc_nxt = nchhead; 347 ncp->nc_prev = &nchhead; 348 nchhead->nc_prev = &ncp->nc_nxt; 349 nchhead = ncp; 350 351 /* and make a dummy hash chain */ 352 ncp->nc_forw = ncp; 353 ncp->nc_back = ncp; 354 355 ncp = NULL; 356 } 357 } 358 359 /* 360 * Suppress search for slots unless creating 361 * file and at end of pathname, in which case 362 * we watch for a place to put the new file in 363 * case it doesn't already exist. 364 */ 365 slotstatus = FOUND; 366 if (flag == CREATE && *cp == 0) { 367 slotstatus = NONE; 368 slotfreespace = 0; 369 slotneeded = DIRSIZ(&ndp->ni_dent); 370 } 371 /* 372 * If this is the same directory that this process 373 * previously searched, pick up where we last left off. 374 * We cache only lookups as these are the most common 375 * and have the greatest payoff. Caching CREATE has little 376 * benefit as it usually must search the entire directory 377 * to determine that the entry does not exist. Caching the 378 * location of the last DELETE has not reduced profiling time 379 * and hence has been removed in the interest of simplicity. 380 */ 381 if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber || 382 dp->i_dev != u.u_ncache.nc_dev) { 383 ndp->ni_offset = 0; 384 numdirpasses = 1; 385 } else { 386 if ((dp->i_flag & ICHG) || dp->i_ctime >= u.u_ncache.nc_time) { 387 if (u.u_ncache.nc_prevoffset > dp->i_size) 388 u.u_ncache.nc_prevoffset = 0; 389 else 390 u.u_ncache.nc_prevoffset &= ~(DIRBLKSIZ - 1); 391 u.u_ncache.nc_time = time.tv_sec; 392 } 393 ndp->ni_offset = u.u_ncache.nc_prevoffset; 394 entryoffsetinblock = blkoff(fs, ndp->ni_offset); 395 if (entryoffsetinblock != 0) { 396 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 397 if (bp == 0) 398 goto bad; 399 } 400 numdirpasses = 2; 401 nchstats.ncs_2passes++; 402 } 403 endsearch = roundup(dp->i_size, DIRBLKSIZ); 404 405 searchloop: 406 while (ndp->ni_offset < endsearch) { 407 /* 408 * If offset is on a block boundary, 409 * read the next directory block. 410 * Release previous if it exists. 411 */ 412 if (blkoff(fs, ndp->ni_offset) == 0) { 413 if (bp != NULL) 414 brelse(bp); 415 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 416 if (bp == 0) 417 goto bad; 418 entryoffsetinblock = 0; 419 } 420 421 /* 422 * If still looking for a slot, and at a DIRBLKSIZE 423 * boundary, have to start looking for free space again. 424 */ 425 if (slotstatus == NONE && 426 (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) { 427 slotoffset = -1; 428 slotfreespace = 0; 429 } 430 431 /* 432 * Get pointer to next entry. 433 * Full validation checks are slow, so we only check 434 * enough to insure forward progress through the 435 * directory. Complete checks can be run by patching 436 * "dirchk" to be true. 437 */ 438 ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock); 439 if (ep->d_reclen <= 0 || 440 dirchk && dirbadentry(ep, entryoffsetinblock)) { 441 dirbad(dp, ndp->ni_offset, "mangled entry"); 442 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 443 ndp->ni_offset += i; 444 entryoffsetinblock += i; 445 continue; 446 } 447 448 /* 449 * If an appropriate sized slot has not yet been found, 450 * check to see if one is available. Also accumulate space 451 * in the current block so that we can determine if 452 * compaction is viable. 453 */ 454 if (slotstatus != FOUND) { 455 int size = ep->d_reclen; 456 457 if (ep->d_ino != 0) 458 size -= DIRSIZ(ep); 459 if (size > 0) { 460 if (size >= slotneeded) { 461 slotstatus = FOUND; 462 slotoffset = ndp->ni_offset; 463 slotsize = ep->d_reclen; 464 } else if (slotstatus == NONE) { 465 slotfreespace += size; 466 if (slotoffset == -1) 467 slotoffset = ndp->ni_offset; 468 if (slotfreespace >= slotneeded) { 469 slotstatus = COMPACT; 470 slotsize = ndp->ni_offset + 471 ep->d_reclen - slotoffset; 472 } 473 } 474 } 475 } 476 477 /* 478 * Check for a name match. 479 */ 480 if (ep->d_ino) { 481 if (ep->d_namlen == ndp->ni_dent.d_namlen && 482 !bcmp(ndp->ni_dent.d_name, ep->d_name, 483 ep->d_namlen)) 484 goto found; 485 } 486 prevoff = ndp->ni_offset; 487 ndp->ni_offset += ep->d_reclen; 488 entryoffsetinblock += ep->d_reclen; 489 } 490 /* notfound: */ 491 /* 492 * If we started in the middle of the directory and failed 493 * to find our target, we must check the beginning as well. 494 */ 495 if (numdirpasses == 2) { 496 numdirpasses--; 497 ndp->ni_offset = 0; 498 endsearch = u.u_ncache.nc_prevoffset; 499 goto searchloop; 500 } 501 /* 502 * If creating, and at end of pathname and current 503 * directory has not been removed, then can consider 504 * allowing file to be created. 505 */ 506 if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { 507 /* 508 * Access for write is interpreted as allowing 509 * creation of files in the directory. 510 */ 511 if (access(dp, IWRITE)) 512 goto bad; 513 /* 514 * Return an indication of where the new directory 515 * entry should be put. If we didn't find a slot, 516 * then set ndp->ni_count to 0 indicating that the new 517 * slot belongs at the end of the directory. If we found 518 * a slot, then the new entry can be put in the range 519 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count) 520 */ 521 if (slotstatus == NONE) { 522 ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ); 523 ndp->ni_count = 0; 524 } else { 525 ndp->ni_offset = slotoffset; 526 ndp->ni_count = slotsize; 527 } 528 dp->i_flag |= IUPD|ICHG; 529 if (bp) 530 brelse(bp); 531 nbp->av_forw = freenamebuf; 532 freenamebuf = nbp; 533 /* 534 * We return with the directory locked, so that 535 * the parameters we set up above will still be 536 * valid if we actually decide to do a direnter(). 537 * We return NULL to indicate that the entry doesn't 538 * currently exist, leaving a pointer to the (locked) 539 * directory inode in ndp->ni_pdir. 540 */ 541 ndp->ni_pdir = dp; 542 return (NULL); 543 } 544 u.u_error = ENOENT; 545 goto bad; 546 found: 547 if (numdirpasses == 2) 548 nchstats.ncs_pass2++; 549 /* 550 * Check that directory length properly reflects presence 551 * of this entry. 552 */ 553 if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) { 554 dirbad(dp, ndp->ni_offset, "i_size too small"); 555 dp->i_size = entryoffsetinblock + DIRSIZ(ep); 556 dp->i_flag |= IUPD|ICHG; 557 } 558 559 /* 560 * Found component in pathname. 561 * If the final component of path name, save information 562 * in the cache as to where the entry was found. 563 */ 564 if (*cp == '\0' && flag == LOOKUP) { 565 u.u_ncache.nc_prevoffset = ndp->ni_offset; 566 u.u_ncache.nc_inumber = dp->i_number; 567 u.u_ncache.nc_dev = dp->i_dev; 568 u.u_ncache.nc_time = time.tv_sec; 569 } 570 /* 571 * Save directory entry in ndp->ni_dent, 572 * and release directory buffer. 573 */ 574 bcopy((caddr_t)ep, (caddr_t)&ndp->ni_dent, (u_int)DIRSIZ(ep)); 575 brelse(bp); 576 bp = NULL; 577 578 /* 579 * If deleting, and at end of pathname, return 580 * parameters which can be used to remove file. 581 * If the lockparent flag isn't set, we return only 582 * the directory (in ndp->ni_pdir), otherwise we go 583 * on and lock the inode, being careful with ".". 584 */ 585 if (flag == DELETE && *cp == 0) { 586 /* 587 * Write access to directory required to delete files. 588 */ 589 if (access(dp, IWRITE)) 590 goto bad; 591 ndp->ni_pdir = dp; /* for dirremove() */ 592 /* 593 * Return pointer to current entry in ndp->ni_offset, 594 * and distance past previous entry (if there 595 * is a previous entry in this block) in ndp->ni_count. 596 * Save directory inode pointer in ndp->ni_pdir for dirremove(). 597 */ 598 if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0) 599 ndp->ni_count = 0; 600 else 601 ndp->ni_count = ndp->ni_offset - prevoff; 602 if (lockparent) { 603 if (dp->i_number == ndp->ni_dent.d_ino) 604 dp->i_count++; 605 else { 606 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 607 if (dp == NULL) { 608 iput(ndp->ni_pdir); 609 goto bad; 610 } 611 /* 612 * If directory is "sticky", then user must own 613 * the directory, or the file in it, else he 614 * may not delete it (unless he's root). This 615 * implements append-only directories. 616 */ 617 if ((ndp->ni_pdir->i_mode & ISVTX) && 618 u.u_uid != 0 && 619 u.u_uid != ndp->ni_pdir->i_uid && 620 dp->i_uid != u.u_uid) { 621 iput(ndp->ni_pdir); 622 u.u_error = EPERM; 623 goto bad; 624 } 625 } 626 } 627 nbp->av_forw = freenamebuf; 628 freenamebuf = nbp; 629 return (dp); 630 } 631 632 /* 633 * Special handling for ".." allowing chdir out of mounted 634 * file system: indirect .. in root inode to reevaluate 635 * in directory file system was mounted on. 636 */ 637 if (isdotdot) { 638 if (dp == u.u_rdir) 639 ndp->ni_dent.d_ino = dp->i_number; 640 else if (ndp->ni_dent.d_ino == ROOTINO && 641 dp->i_number == ROOTINO) { 642 for (i = 1; i < NMOUNT; i++) 643 if (mount[i].m_bufp != NULL && 644 mount[i].m_dev == dp->i_dev) { 645 iput(dp); 646 dp = mount[i].m_inodp; 647 ILOCK(dp); 648 dp->i_count++; 649 fs = dp->i_fs; 650 cp -= 2; /* back over .. */ 651 goto dirloop2; 652 } 653 } 654 } 655 656 /* 657 * If rewriting (rename), return the inode and the 658 * information required to rewrite the present directory 659 * Must get inode of directory entry to verify it's a 660 * regular file, or empty directory. 661 */ 662 if ((flag == CREATE && lockparent) && *cp == 0) { 663 if (access(dp, IWRITE)) 664 goto bad; 665 ndp->ni_pdir = dp; /* for dirrewrite() */ 666 /* 667 * Careful about locking second inode. 668 * This can only occur if the target is ".". 669 */ 670 if (dp->i_number == ndp->ni_dent.d_ino) { 671 u.u_error = EISDIR; /* XXX */ 672 goto bad; 673 } 674 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 675 if (dp == NULL) { 676 iput(ndp->ni_pdir); 677 goto bad; 678 } 679 nbp->av_forw = freenamebuf; 680 freenamebuf = nbp; 681 return (dp); 682 } 683 684 /* 685 * Check for symbolic link, which may require us to massage the 686 * name before we continue translation. We do not `iput' the 687 * directory because we may need it again if the symbolic link 688 * is relative to the current directory. Instead we save it 689 * unlocked as "pdp". We must get the target inode before unlocking 690 * the directory to insure that the inode will not be removed 691 * before we get it. We prevent deadlock by always fetching 692 * inodes from the root, moving down the directory tree. Thus 693 * when following backward pointers ".." we must unlock the 694 * parent directory before getting the requested directory. 695 * There is a potential race condition here if both the current 696 * and parent directories are removed before the `iget' for the 697 * inode associated with ".." returns. We hope that this occurs 698 * infrequently since we cannot avoid this race condition without 699 * implementing a sophisticated deadlock detection algorithm. 700 * Note also that this simple deadlock detection scheme will not 701 * work if the file system has any hard links other than ".." 702 * that point backwards in the directory structure. 703 */ 704 pdp = dp; 705 if (isdotdot) { 706 IUNLOCK(pdp); /* race to get the inode */ 707 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 708 if (dp == NULL) 709 goto bad2; 710 } else if (dp->i_number == ndp->ni_dent.d_ino) { 711 dp->i_count++; /* we want ourself, ie "." */ 712 } else { 713 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 714 IUNLOCK(pdp); 715 if (dp == NULL) 716 goto bad2; 717 } 718 719 /* 720 * insert name into cache (if we want it, and it isn't "." or "..") 721 * 722 * all other cases where making a cache entry would be wrong 723 * have already departed from the code sequence somewhere above. 724 */ 725 if (docache) { 726 if (ncp != NULL) 727 panic("nami: duplicating cache"); 728 729 /* 730 * free the cache slot at head of lru chain 731 */ 732 if (ncp = nchhead) { 733 /* remove from lru chain */ 734 *ncp->nc_prev = ncp->nc_nxt; 735 if (ncp->nc_nxt) 736 ncp->nc_nxt->nc_prev = ncp->nc_prev; 737 else 738 nchtail = ncp->nc_prev; 739 740 /* remove from old hash chain */ 741 remque(ncp); 742 743 /* grab the inode we just found */ 744 ncp->nc_ip = dp; 745 746 /* fill in cache info */ 747 ncp->nc_ino = pdp->i_number; /* parents inum */ 748 ncp->nc_dev = pdp->i_dev; /* & device */ 749 ncp->nc_idev = dp->i_dev; /* our device */ 750 ncp->nc_id = dp->i_id; /* identifier */ 751 ncp->nc_nlen = ndp->ni_dent.d_namlen; 752 bcopy(ndp->ni_dent.d_name, ncp->nc_name, ncp->nc_nlen); 753 754 /* link at end of lru chain */ 755 ncp->nc_nxt = NULL; 756 ncp->nc_prev = nchtail; 757 *nchtail = ncp; 758 nchtail = &ncp->nc_nxt; 759 760 /* and insert on hash chain */ 761 insque(ncp, nhp); 762 } 763 } 764 765 haveino: 766 fs = dp->i_fs; 767 768 /* 769 * Check for symbolic link 770 */ 771 if ((dp->i_mode & IFMT) == IFLNK && 772 ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) { 773 u_int pathlen = strlen(cp) + 1; 774 775 if (dp->i_size + pathlen >= MAXPATHLEN - 1 || 776 ++nlink > MAXSYMLINKS) { 777 u.u_error = ELOOP; 778 goto bad2; 779 } 780 ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen); 781 u.u_error = 782 rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size, 783 0, 1, (int *)0); 784 if (u.u_error) 785 goto bad2; 786 cp = nbp->b_un.b_addr; 787 iput(dp); 788 if (*cp == '/') { 789 irele(pdp); 790 while (*cp == '/') 791 cp++; 792 if ((dp = u.u_rdir) == NULL) 793 dp = rootdir; 794 ILOCK(dp); 795 dp->i_count++; 796 } else { 797 dp = pdp; 798 ILOCK(dp); 799 } 800 fs = dp->i_fs; 801 goto dirloop; 802 } 803 804 /* 805 * Not a symbolic link. If more pathname, 806 * continue at next component, else return. 807 */ 808 if (*cp == '/') { 809 while (*cp == '/') 810 cp++; 811 irele(pdp); 812 goto dirloop; 813 } 814 nbp->av_forw = freenamebuf; 815 freenamebuf = nbp; 816 if (lockparent) 817 ndp->ni_pdir = pdp; 818 else 819 irele(pdp); 820 return (dp); 821 bad2: 822 irele(pdp); 823 bad: 824 if (bp) 825 brelse(bp); 826 if (dp) 827 iput(dp); 828 nbp->av_forw = freenamebuf; 829 freenamebuf = nbp; 830 return (NULL); 831 } 832 833 834 dirbad(ip, offset, how) 835 struct inode *ip; 836 off_t offset; 837 char *how; 838 { 839 840 printf("%s: bad dir ino %d at offset %d: %s\n", 841 ip->i_fs->fs_fsmnt, ip->i_number, offset, how); 842 } 843 844 /* 845 * Do consistency checking on a directory entry: 846 * record length must be multiple of 4 847 * record length must not be non-negative 848 * entry must fit in rest of its DIRBLKSIZ block 849 * record must be large enough to contain entry 850 * name is not longer than MAXNAMLEN 851 * name must be as long as advertised, and null terminated 852 */ 853 dirbadentry(ep, entryoffsetinblock) 854 register struct direct *ep; 855 int entryoffsetinblock; 856 { 857 register int i; 858 859 if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen <= 0 || 860 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 861 ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN) 862 return (1); 863 for (i = 0; i < ep->d_namlen; i++) 864 if (ep->d_name[i] == '\0') 865 return (1); 866 return (ep->d_name[i]); 867 } 868 869 /* 870 * Write a directory entry after a call to namei, using the parameters 871 * which it left in the u. area. The argument ip is the inode which 872 * the new directory entry will refer to. The u. area field ndp->ni_pdir is 873 * a pointer to the directory to be written, which was left locked by 874 * namei. Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate 875 * how the space for the new entry is to be gotten. 876 */ 877 direnter(ip, ndp) 878 struct inode *ip; 879 register struct nameidata *ndp; 880 { 881 register struct direct *ep, *nep; 882 struct buf *bp; 883 int loc, spacefree, error = 0; 884 u_int dsize; 885 int newentrysize; 886 char *dirbuf; 887 888 ndp->ni_dent.d_ino = ip->i_number; 889 newentrysize = DIRSIZ(&ndp->ni_dent); 890 if (ndp->ni_count == 0) { 891 /* 892 * If ndp->ni_count is 0, then namei could find no space in the 893 * directory. In this case ndp->ni_offset will be on a directory 894 * block boundary and we will write the new entry into a fresh 895 * block. 896 */ 897 if (ndp->ni_offset&(DIRBLKSIZ-1)) 898 panic("wdir: newblk"); 899 ndp->ni_dent.d_reclen = DIRBLKSIZ; 900 error = rdwri(UIO_WRITE, ndp->ni_pdir, (caddr_t)&ndp->ni_dent, 901 newentrysize, ndp->ni_offset, 1, (int *)0); 902 iput(ndp->ni_pdir); 903 return (error); 904 } 905 906 /* 907 * If ndp->ni_count is non-zero, then namei found space for the new 908 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count. 909 * in the directory. To use this space, we may have to compact 910 * the entries located there, by copying them together towards 911 * the beginning of the block, leaving the free space in 912 * one usable chunk at the end. 913 */ 914 915 /* 916 * Increase size of directory if entry eats into new space. 917 * This should never push the size past a new multiple of 918 * DIRBLKSIZE. 919 */ 920 if (ndp->ni_offset + ndp->ni_count > ndp->ni_pdir->i_size) 921 ndp->ni_pdir->i_size = ndp->ni_offset + ndp->ni_count; 922 923 /* 924 * Get the block containing the space for the new directory 925 * entry. Should return error by result instead of u.u_error. 926 */ 927 bp = blkatoff(ndp->ni_pdir, ndp->ni_offset, (char **)&dirbuf); 928 if (bp == 0) { 929 iput(ndp->ni_pdir); 930 return (u.u_error); 931 } 932 933 /* 934 * Find space for the new entry. In the simple case, the 935 * entry at offset base will have the space. If it does 936 * not, then namei arranged that compacting the region 937 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space. 938 */ 939 ep = (struct direct *)dirbuf; 940 dsize = DIRSIZ(ep); 941 spacefree = ep->d_reclen - dsize; 942 for (loc = ep->d_reclen; loc < ndp->ni_count; ) { 943 nep = (struct direct *)(dirbuf + loc); 944 if (ep->d_ino) { 945 /* trim the existing slot */ 946 ep->d_reclen = dsize; 947 ep = (struct direct *)((char *)ep + dsize); 948 } else { 949 /* overwrite; nothing there; header is ours */ 950 spacefree += dsize; 951 } 952 dsize = DIRSIZ(nep); 953 spacefree += nep->d_reclen - dsize; 954 loc += nep->d_reclen; 955 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 956 } 957 /* 958 * Update the pointer fields in the previous entry (if any), 959 * copy in the new entry, and write out the block. 960 */ 961 if (ep->d_ino == 0) { 962 if (spacefree + dsize < newentrysize) 963 panic("wdir: compact1"); 964 ndp->ni_dent.d_reclen = spacefree + dsize; 965 } else { 966 if (spacefree < newentrysize) 967 panic("wdir: compact2"); 968 ndp->ni_dent.d_reclen = spacefree; 969 ep->d_reclen = dsize; 970 ep = (struct direct *)((char *)ep + dsize); 971 } 972 bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize); 973 bwrite(bp); 974 ndp->ni_pdir->i_flag |= IUPD|ICHG; 975 iput(ndp->ni_pdir); 976 return (error); 977 } 978 979 /* 980 * Remove a directory entry after a call to namei, using the 981 * parameters which it left in the u. area. The u. entry 982 * ni_offset contains the offset into the directory of the 983 * entry to be eliminated. The ni_count field contains the 984 * size of the previous record in the directory. If this 985 * is 0, the first entry is being deleted, so we need only 986 * zero the inode number to mark the entry as free. If the 987 * entry isn't the first in the directory, we must reclaim 988 * the space of the now empty record by adding the record size 989 * to the size of the previous entry. 990 */ 991 dirremove(ndp) 992 register struct nameidata *ndp; 993 { 994 register struct inode *dp = ndp->ni_pdir; 995 register struct buf *bp; 996 struct direct *ep; 997 998 if (ndp->ni_count == 0) { 999 /* 1000 * First entry in block: set d_ino to zero. 1001 */ 1002 ndp->ni_dent.d_ino = 0; 1003 (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1004 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1005 } else { 1006 /* 1007 * Collapse new free space into previous entry. 1008 */ 1009 bp = blkatoff(dp, (int)(ndp->ni_offset - ndp->ni_count), 1010 (char **)&ep); 1011 if (bp == 0) 1012 return (0); 1013 ep->d_reclen += ndp->ni_dent.d_reclen; 1014 bwrite(bp); 1015 dp->i_flag |= IUPD|ICHG; 1016 } 1017 return (1); 1018 } 1019 1020 /* 1021 * Rewrite an existing directory entry to point at the inode 1022 * supplied. The parameters describing the directory entry are 1023 * set up by a call to namei. 1024 */ 1025 dirrewrite(dp, ip, ndp) 1026 struct inode *dp, *ip; 1027 struct nameidata *ndp; 1028 { 1029 1030 ndp->ni_dent.d_ino = ip->i_number; 1031 u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1032 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1033 iput(dp); 1034 } 1035 1036 /* 1037 * Return buffer with contents of block "offset" 1038 * from the beginning of directory "ip". If "res" 1039 * is non-zero, fill it in with a pointer to the 1040 * remaining space in the directory. 1041 */ 1042 struct buf * 1043 blkatoff(ip, offset, res) 1044 struct inode *ip; 1045 off_t offset; 1046 char **res; 1047 { 1048 register struct fs *fs = ip->i_fs; 1049 daddr_t lbn = lblkno(fs, offset); 1050 int base = blkoff(fs, offset); 1051 int bsize = blksize(fs, ip, lbn); 1052 daddr_t bn = fsbtodb(fs, bmap(ip, lbn, B_WRITE, base, bsize)); 1053 register struct buf *bp; 1054 1055 if (u.u_error) 1056 return (0); 1057 bp = bread(ip->i_dev, bn, bsize); 1058 if (bp->b_flags & B_ERROR) { 1059 brelse(bp); 1060 return (0); 1061 } 1062 if (res) 1063 *res = bp->b_un.b_addr + base; 1064 return (bp); 1065 } 1066 1067 /* 1068 * Check if a directory is empty or not. 1069 * Inode supplied must be locked. 1070 * 1071 * Using a struct dirtemplate here is not precisely 1072 * what we want, but better than using a struct direct. 1073 * 1074 * NB: does not handle corrupted directories. 1075 */ 1076 dirempty(ip, parentino) 1077 register struct inode *ip; 1078 ino_t parentino; 1079 { 1080 register off_t off; 1081 struct dirtemplate dbuf; 1082 register struct direct *dp = (struct direct *)&dbuf; 1083 int error, count; 1084 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1085 1086 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1087 error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ, 1088 off, 1, &count); 1089 /* 1090 * Since we read MINDIRSIZ, residual must 1091 * be 0 unless we're at end of file. 1092 */ 1093 if (error || count != 0) 1094 return (0); 1095 /* skip empty entries */ 1096 if (dp->d_ino == 0) 1097 continue; 1098 /* accept only "." and ".." */ 1099 if (dp->d_namlen > 2) 1100 return (0); 1101 if (dp->d_name[0] != '.') 1102 return (0); 1103 /* 1104 * At this point d_namlen must be 1 or 2. 1105 * 1 implies ".", 2 implies ".." if second 1106 * char is also "." 1107 */ 1108 if (dp->d_namlen == 1) 1109 continue; 1110 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1111 continue; 1112 return (0); 1113 } 1114 return (1); 1115 } 1116 1117 /* 1118 * Check if source directory is in the path of the target directory. 1119 * Target is supplied locked, source is unlocked. 1120 * The target is always iput() before returning. 1121 */ 1122 checkpath(source, target) 1123 struct inode *source, *target; 1124 { 1125 struct dirtemplate dirbuf; 1126 register struct inode *ip; 1127 int error = 0; 1128 1129 ip = target; 1130 if (ip->i_number == source->i_number) { 1131 error = EEXIST; 1132 goto out; 1133 } 1134 if (ip->i_number == ROOTINO) 1135 goto out; 1136 1137 for (;;) { 1138 if ((ip->i_mode&IFMT) != IFDIR) { 1139 error = ENOTDIR; 1140 break; 1141 } 1142 error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, 1143 sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); 1144 if (error != 0) 1145 break; 1146 if (dirbuf.dotdot_namlen != 2 || 1147 dirbuf.dotdot_name[0] != '.' || 1148 dirbuf.dotdot_name[1] != '.') { 1149 error = ENOTDIR; 1150 break; 1151 } 1152 if (dirbuf.dotdot_ino == source->i_number) { 1153 error = EINVAL; 1154 break; 1155 } 1156 if (dirbuf.dotdot_ino == ROOTINO) 1157 break; 1158 iput(ip); 1159 ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino); 1160 if (ip == NULL) { 1161 error = u.u_error; 1162 break; 1163 } 1164 } 1165 1166 out: 1167 if (error == ENOTDIR) 1168 printf("checkpath: .. not a directory\n"); 1169 if (ip != NULL) 1170 iput(ip); 1171 return (error); 1172 } 1173 1174 /* 1175 * Name cache initialization, from main() when we are booting 1176 */ 1177 nchinit() 1178 { 1179 register union nchash *nchp; 1180 register struct nch *ncp; 1181 1182 nchhead = 0; 1183 nchtail = &nchhead; 1184 1185 for (ncp = nch; ncp < &nch[nchsize]; ncp++) { 1186 ncp->nc_forw = ncp; /* hash chain */ 1187 ncp->nc_back = ncp; 1188 1189 ncp->nc_nxt = NULL; /* lru chain */ 1190 *nchtail = ncp; 1191 ncp->nc_prev = nchtail; 1192 nchtail = &ncp->nc_nxt; 1193 1194 /* all else is zero already */ 1195 } 1196 1197 for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) { 1198 nchp->nch_head[0] = nchp; 1199 nchp->nch_head[1] = nchp; 1200 } 1201 } 1202 1203 /* 1204 * Cache flush, called when filesys is umounted to 1205 * remove entries that would now be invalid 1206 * 1207 * The line "nxtcp = nchhead" near the end is to avoid potential problems 1208 * if the cache lru chain is modified while we are dumping the 1209 * inode. This makes the algorithm O(n^2), but do you think I care? 1210 */ 1211 nchinval(dev) 1212 register dev_t dev; 1213 { 1214 register struct nch *ncp, *nxtcp; 1215 1216 for (ncp = nchhead; ncp; ncp = nxtcp) { 1217 nxtcp = ncp->nc_nxt; 1218 1219 if (ncp->nc_ip == NULL || 1220 (ncp->nc_idev != dev && ncp->nc_dev != dev)) 1221 continue; 1222 1223 /* free the resources we had */ 1224 ncp->nc_idev = NODEV; 1225 ncp->nc_dev = NODEV; 1226 ncp->nc_id = NULL; 1227 ncp->nc_ino = 0; 1228 ncp->nc_ip = NULL; 1229 1230 1231 /* remove the entry from its hash chain */ 1232 remque(ncp); 1233 /* and make a dummy one */ 1234 ncp->nc_forw = ncp; 1235 ncp->nc_back = ncp; 1236 1237 /* delete this entry from LRU chain */ 1238 *ncp->nc_prev = nxtcp; 1239 if (nxtcp) 1240 nxtcp->nc_prev = ncp->nc_prev; 1241 else 1242 nchtail = ncp->nc_prev; 1243 1244 /* cause rescan of list, it may have altered */ 1245 nxtcp = nchhead; 1246 /* put the now-free entry at head of LRU */ 1247 ncp->nc_nxt = nxtcp; 1248 ncp->nc_prev = &nchhead; 1249 nxtcp->nc_prev = &ncp->nc_nxt; 1250 nchhead = ncp; 1251 } 1252 } 1253 1254 /* 1255 * Name cache invalidation of all entries. 1256 */ 1257 cacheinvalall() 1258 { 1259 register struct nch *ncp; 1260 1261 for (ncp = nch; ncp < &nch[nchsize]; ncp++) { 1262 ncp->nc_id = 0; 1263 } 1264 } 1265