1 /* 2 * Copyright (c) 1982 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)vfs_lookup.c 6.29 (Berkeley) 04/23/86 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "inode.h" 12 #include "fs.h" 13 #include "mount.h" 14 #include "dir.h" 15 #include "user.h" 16 #include "buf.h" 17 #include "conf.h" 18 #include "uio.h" 19 #include "kernel.h" 20 21 struct buf *blkatoff(); 22 struct buf *freenamebuf; 23 int dirchk = 0; 24 25 /* 26 * Structures associated with name cacheing. 27 */ 28 #define NCHHASH 32 /* size of hash table */ 29 30 #if ((NCHHASH)&((NCHHASH)-1)) != 0 31 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH)) 32 #else 33 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1)) 34 #endif 35 36 union nchash { 37 union nchash *nch_head[2]; 38 struct namecache *nch_chain[2]; 39 } nchash[NCHHASH]; 40 #define nch_forw nch_chain[0] 41 #define nch_back nch_chain[1] 42 43 struct namecache *nchhead, **nchtail; /* LRU chain pointers */ 44 struct nchstats nchstats; /* cache effectiveness statistics */ 45 46 /* 47 * Convert a pathname into a pointer to a locked inode. 48 * This is a very central and rather complicated routine. 49 * If the file system is not maintained in a strict tree hierarchy, 50 * this can result in a deadlock situation (see comments in code below). 51 * 52 * The flag argument is LOOKUP, CREATE, or DELETE depending on whether 53 * the name is to be looked up, created, or deleted. When CREATE or 54 * DELETE is specified, information usable in creating or deleteing a 55 * directory entry is also calculated. If flag has LOCKPARENT or'ed 56 * into it and the target of the pathname exists, namei returns both 57 * the target and its parent directory locked. When creating and 58 * LOCKPARENT is specified, the target may not be ".". When deleting 59 * and LOCKPARENT is specified, the target may be ".", but the caller 60 * must check to insure it does an irele and iput instead of two iputs. 61 * 62 * The FOLLOW flag is set when symbolic links are to be followed 63 * when they occur at the end of the name translation process. 64 * Symbolic links are always followed for all other pathname 65 * components other than the last. 66 * 67 * The segflg defines whether the name is to be copied from user 68 * space or kernel space. 69 * 70 * Name caching works as follows: 71 * 72 * Names found by directory scans are retained in a cache 73 * for future reference. It is managed LRU, so frequently 74 * used names will hang around. Cache is indexed by hash value 75 * obtained from (ino,dev,name) where ino & dev refer to the 76 * directory containing name. 77 * 78 * For simplicity (and economy of storage), names longer than 79 * a maximum length of NCHNAMLEN are not cached; they occur 80 * infrequently in any case, and are almost never of interest. 81 * 82 * Upon reaching the last segment of a path, if the reference 83 * is for DELETE, or NOCACHE is set (rewrite), and the 84 * name is located in the cache, it will be dropped. 85 * 86 * Overall outline of namei: 87 * 88 * copy in name 89 * get starting directory 90 * dirloop: 91 * check accessibility of directory 92 * dirloop2: 93 * copy next component of name to ndp->ni_dent 94 * handle degenerate case where name is null string 95 * look for name in cache, if found, then if at end of path 96 * and deleting or creating, drop it, else to haveino 97 * search for name in directory, to found or notfound 98 * notfound: 99 * if creating, return locked directory, leaving info on avail. slots 100 * else return error 101 * found: 102 * if at end of path and deleting, return information to allow delete 103 * if at end of path and rewriting (CREATE and LOCKPARENT), lock target 104 * inode and return info to allow rewrite 105 * if .. and on mounted filesys, look in mount table for parent 106 * if not at end, add name to cache; if at end and neither creating 107 * nor deleting, add name to cache 108 * haveino: 109 * if symbolic link, massage name in buffer and continue at dirloop 110 * if more components of name, do next level at dirloop 111 * return the answer as locked inode 112 * 113 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, 114 * but unlocked. 115 */ 116 struct inode * 117 namei(ndp) 118 register struct nameidata *ndp; 119 { 120 register char *cp; /* pointer into pathname argument */ 121 /* these variables refer to things which must be freed or unlocked */ 122 register struct inode *dp = 0; /* the directory we are searching */ 123 register struct namecache *ncp; /* cache slot for entry */ 124 register struct fs *fs; /* file system that directory is in */ 125 register struct buf *bp = 0; /* a buffer of directory entries */ 126 register struct direct *ep; /* the current directory entry */ 127 int entryoffsetinblock; /* offset of ep in bp's buffer */ 128 register struct buf *nbp; /* buffer storing path name argument */ 129 /* these variables hold information about the search for a slot */ 130 enum {NONE, COMPACT, FOUND} slotstatus; 131 int slotoffset = -1; /* offset of area with free space */ 132 int slotsize; /* size of area at slotoffset */ 133 int slotfreespace; /* amount of space free in slot */ 134 int slotneeded; /* size of the entry we're seeking */ 135 /* */ 136 int numdirpasses; /* strategy for directory search */ 137 int endsearch; /* offset to end directory search */ 138 int prevoff; /* ndp->ni_offset of previous entry */ 139 int nlink = 0; /* number of symbolic links taken */ 140 struct inode *pdp; /* saved dp during symlink work */ 141 int error, i; 142 int lockparent; 143 int docache; /* == 0 do not cache last component */ 144 int makeentry; /* != 0 if name to be added to cache */ 145 unsigned hash; /* value of name hash for entry */ 146 union nchash *nhp; /* cache chain head for entry */ 147 int isdotdot; /* != 0 if current name is ".." */ 148 int flag; /* op ie, LOOKUP, CREATE, or DELETE */ 149 off_t enduseful; /* pointer past last used dir slot */ 150 151 lockparent = ndp->ni_nameiop & LOCKPARENT; 152 docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE; 153 flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW); 154 if (flag == DELETE || lockparent) 155 docache = 0; 156 /* 157 * Get a buffer for the name to be translated, and copy the 158 * name into the buffer. 159 */ 160 nbp = freenamebuf; 161 if (nbp == NULL) 162 nbp = geteblk(MAXPATHLEN); 163 else 164 freenamebuf = nbp->av_forw; 165 if (ndp->ni_segflg == UIO_SYSSPACE) 166 error = copystr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN, 167 (u_int *)0); 168 else 169 error = copyinstr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN, 170 (u_int *)0); 171 if (error) { 172 u.u_error = error; 173 goto bad; 174 } 175 176 /* 177 * Get starting directory. 178 */ 179 cp = nbp->b_un.b_addr; 180 if (*cp == '/') { 181 while (*cp == '/') 182 cp++; 183 if ((dp = u.u_rdir) == NULL) 184 dp = rootdir; 185 } else 186 dp = u.u_cdir; 187 fs = dp->i_fs; 188 ILOCK(dp); 189 dp->i_count++; 190 ndp->ni_pdir = (struct inode *)0xc0000000; /* illegal */ 191 ndp->ni_endoff = 0; 192 193 /* 194 * We come to dirloop to search a new directory. 195 * The directory must be locked so that it can be 196 * iput, and fs must be already set to dp->i_fs. 197 */ 198 dirloop: 199 /* 200 * Check accessiblity of directory. 201 */ 202 if ((dp->i_mode&IFMT) != IFDIR) { 203 u.u_error = ENOTDIR; 204 goto bad; 205 } 206 if (access(dp, IEXEC)) 207 goto bad; 208 209 dirloop2: 210 /* 211 * Copy next component of name to ndp->ni_dent. 212 */ 213 hash = 0; 214 for (i = 0; *cp != 0 && *cp != '/'; cp++) { 215 if (i >= MAXNAMLEN) { 216 u.u_error = ENAMETOOLONG; 217 goto bad; 218 } 219 if (*cp & 0200) 220 if ((*cp&0377) == ('/'|0200) || flag != DELETE) { 221 u.u_error = EINVAL; 222 goto bad; 223 } 224 ndp->ni_dent.d_name[i++] = *cp; 225 hash += (unsigned char)*cp * i; 226 } 227 ndp->ni_dent.d_namlen = i; 228 ndp->ni_dent.d_name[i] = '\0'; 229 isdotdot = (i == 2 && 230 ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.'); 231 makeentry = 1; 232 if (*cp == '\0' && docache == 0) 233 makeentry = 0; 234 235 /* 236 * Check for degenerate name (e.g. / or "") 237 * which is a way of talking about a directory, 238 * e.g. like "/." or ".". 239 */ 240 if (ndp->ni_dent.d_name[0] == '\0') { 241 if (flag != LOOKUP || lockparent) { 242 u.u_error = EISDIR; 243 goto bad; 244 } 245 nbp->av_forw = freenamebuf; 246 freenamebuf = nbp; 247 return (dp); 248 } 249 250 /* 251 * We now have a segment name to search for, and a directory to search. 252 * 253 * Before tediously performing a linear scan of the directory, 254 * check the name cache to see if the directory/name pair 255 * we are looking for is known already. We don't do this 256 * if the segment name is long, simply so the cache can avoid 257 * holding long names (which would either waste space, or 258 * add greatly to the complexity). 259 */ 260 if (ndp->ni_dent.d_namlen > NCHNAMLEN) { 261 nchstats.ncs_long++; 262 makeentry = 0; 263 } else { 264 nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)]; 265 for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp; 266 ncp = ncp->nc_forw) { 267 if (ncp->nc_ino == dp->i_number && 268 ncp->nc_dev == dp->i_dev && 269 ncp->nc_nlen == ndp->ni_dent.d_namlen && 270 !bcmp(ncp->nc_name, ndp->ni_dent.d_name, 271 (unsigned)ncp->nc_nlen)) 272 break; 273 } 274 if (ncp == (struct namecache *)nhp) { 275 nchstats.ncs_miss++; 276 ncp = NULL; 277 } else { 278 if (ncp->nc_id != ncp->nc_ip->i_id) 279 nchstats.ncs_falsehits++; 280 else if (!makeentry) 281 nchstats.ncs_badhits++; 282 else { 283 /* 284 * move this slot to end of LRU 285 * chain, if not already there 286 */ 287 if (ncp->nc_nxt) { 288 /* remove from LRU chain */ 289 *ncp->nc_prev = ncp->nc_nxt; 290 ncp->nc_nxt->nc_prev = ncp->nc_prev; 291 292 /* and replace at end of it */ 293 ncp->nc_nxt = NULL; 294 ncp->nc_prev = nchtail; 295 *nchtail = ncp; 296 nchtail = &ncp->nc_nxt; 297 } 298 299 /* 300 * Get the next inode in the path. 301 * See comment above other `IUNLOCK' code for 302 * an explaination of the locking protocol. 303 */ 304 pdp = dp; 305 if (!isdotdot || dp != u.u_rdir) 306 dp = ncp->nc_ip; 307 if (dp == NULL) 308 panic("nami: null cache ino"); 309 if (pdp == dp) 310 dp->i_count++; 311 else if (isdotdot) { 312 IUNLOCK(pdp); 313 igrab(dp); 314 } else { 315 igrab(dp); 316 IUNLOCK(pdp); 317 } 318 319 /* 320 * Verify that the inode that we got 321 * did not change while we were waiting 322 * for it to be locked. 323 */ 324 if (ncp->nc_id != ncp->nc_ip->i_id) { 325 iput(dp); 326 ILOCK(pdp); 327 dp = pdp; 328 nchstats.ncs_falsehits++; 329 } else { 330 ndp->ni_dent.d_ino = dp->i_number; 331 /* ni_dent.d_reclen is garbage ... */ 332 nchstats.ncs_goodhits++; 333 goto haveino; 334 } 335 } 336 337 /* 338 * Last component and we are renaming or deleting, 339 * the cache entry is invalid, or otherwise don't 340 * want cache entry to exist. 341 */ 342 /* remove from LRU chain */ 343 *ncp->nc_prev = ncp->nc_nxt; 344 if (ncp->nc_nxt) 345 ncp->nc_nxt->nc_prev = ncp->nc_prev; 346 else 347 nchtail = ncp->nc_prev; 348 remque(ncp); /* remove from hash chain */ 349 /* insert at head of LRU list (first to grab) */ 350 ncp->nc_nxt = nchhead; 351 ncp->nc_prev = &nchhead; 352 nchhead->nc_prev = &ncp->nc_nxt; 353 nchhead = ncp; 354 /* and make a dummy hash chain */ 355 ncp->nc_forw = ncp; 356 ncp->nc_back = ncp; 357 ncp = NULL; 358 } 359 } 360 361 /* 362 * Suppress search for slots unless creating 363 * file and at end of pathname, in which case 364 * we watch for a place to put the new file in 365 * case it doesn't already exist. 366 */ 367 slotstatus = FOUND; 368 if (flag == CREATE && *cp == 0) { 369 slotstatus = NONE; 370 slotfreespace = 0; 371 slotneeded = DIRSIZ(&ndp->ni_dent); 372 } 373 /* 374 * If this is the same directory that this process 375 * previously searched, pick up where we last left off. 376 * We cache only lookups as these are the most common 377 * and have the greatest payoff. Caching CREATE has little 378 * benefit as it usually must search the entire directory 379 * to determine that the entry does not exist. Caching the 380 * location of the last DELETE has not reduced profiling time 381 * and hence has been removed in the interest of simplicity. 382 */ 383 if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber || 384 dp->i_dev != u.u_ncache.nc_dev) { 385 ndp->ni_offset = 0; 386 numdirpasses = 1; 387 } else { 388 if ((dp->i_flag & (ICHG|IMOD)) || 389 dp->i_ctime >= u.u_ncache.nc_time) { 390 if (u.u_ncache.nc_prevoffset > dp->i_size) 391 u.u_ncache.nc_prevoffset = 0; 392 else 393 u.u_ncache.nc_prevoffset &= ~(DIRBLKSIZ - 1); 394 u.u_ncache.nc_time = time.tv_sec; 395 } 396 ndp->ni_offset = u.u_ncache.nc_prevoffset; 397 entryoffsetinblock = blkoff(fs, ndp->ni_offset); 398 if (entryoffsetinblock != 0) { 399 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 400 if (bp == 0) 401 goto bad; 402 } 403 numdirpasses = 2; 404 nchstats.ncs_2passes++; 405 } 406 endsearch = roundup(dp->i_size, DIRBLKSIZ); 407 enduseful = 0; 408 409 searchloop: 410 while (ndp->ni_offset < endsearch) { 411 /* 412 * If offset is on a block boundary, 413 * read the next directory block. 414 * Release previous if it exists. 415 */ 416 if (blkoff(fs, ndp->ni_offset) == 0) { 417 if (bp != NULL) 418 brelse(bp); 419 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 420 if (bp == 0) 421 goto bad; 422 entryoffsetinblock = 0; 423 } 424 /* 425 * If still looking for a slot, and at a DIRBLKSIZE 426 * boundary, have to start looking for free space again. 427 */ 428 if (slotstatus == NONE && 429 (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) { 430 slotoffset = -1; 431 slotfreespace = 0; 432 } 433 /* 434 * Get pointer to next entry. 435 * Full validation checks are slow, so we only check 436 * enough to insure forward progress through the 437 * directory. Complete checks can be run by patching 438 * "dirchk" to be true. 439 */ 440 ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock); 441 if (ep->d_reclen == 0 || 442 dirchk && dirbadentry(ep, entryoffsetinblock)) { 443 dirbad(dp, ndp->ni_offset, "mangled entry"); 444 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 445 ndp->ni_offset += i; 446 entryoffsetinblock += i; 447 continue; 448 } 449 450 /* 451 * If an appropriate sized slot has not yet been found, 452 * check to see if one is available. Also accumulate space 453 * in the current block so that we can determine if 454 * compaction is viable. 455 */ 456 if (slotstatus != FOUND) { 457 int size = ep->d_reclen; 458 459 if (ep->d_ino != 0) 460 size -= DIRSIZ(ep); 461 if (size > 0) { 462 if (size >= slotneeded) { 463 slotstatus = FOUND; 464 slotoffset = ndp->ni_offset; 465 slotsize = ep->d_reclen; 466 } else if (slotstatus == NONE) { 467 slotfreespace += size; 468 if (slotoffset == -1) 469 slotoffset = ndp->ni_offset; 470 if (slotfreespace >= slotneeded) { 471 slotstatus = COMPACT; 472 slotsize = ndp->ni_offset + 473 ep->d_reclen - slotoffset; 474 } 475 } 476 } 477 } 478 479 /* 480 * Check for a name match. 481 */ 482 if (ep->d_ino) { 483 if (ep->d_namlen == ndp->ni_dent.d_namlen && 484 !bcmp(ndp->ni_dent.d_name, ep->d_name, 485 (unsigned)ep->d_namlen)) 486 goto found; 487 } 488 prevoff = ndp->ni_offset; 489 ndp->ni_offset += ep->d_reclen; 490 entryoffsetinblock += ep->d_reclen; 491 if (ep->d_ino) 492 enduseful = ndp->ni_offset; 493 } 494 /* notfound: */ 495 /* 496 * If we started in the middle of the directory and failed 497 * to find our target, we must check the beginning as well. 498 */ 499 if (numdirpasses == 2) { 500 numdirpasses--; 501 ndp->ni_offset = 0; 502 endsearch = u.u_ncache.nc_prevoffset; 503 goto searchloop; 504 } 505 /* 506 * If creating, and at end of pathname and current 507 * directory has not been removed, then can consider 508 * allowing file to be created. 509 */ 510 if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { 511 /* 512 * Access for write is interpreted as allowing 513 * creation of files in the directory. 514 */ 515 if (access(dp, IWRITE)) 516 goto bad; 517 /* 518 * Return an indication of where the new directory 519 * entry should be put. If we didn't find a slot, 520 * then set ndp->ni_count to 0 indicating that the new 521 * slot belongs at the end of the directory. If we found 522 * a slot, then the new entry can be put in the range 523 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count) 524 */ 525 if (slotstatus == NONE) { 526 ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ); 527 ndp->ni_count = 0; 528 enduseful = ndp->ni_offset; 529 } else { 530 ndp->ni_offset = slotoffset; 531 ndp->ni_count = slotsize; 532 if (enduseful < slotoffset + slotsize) 533 enduseful = slotoffset + slotsize; 534 } 535 ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ); 536 dp->i_flag |= IUPD|ICHG; 537 if (bp) 538 brelse(bp); 539 nbp->av_forw = freenamebuf; 540 freenamebuf = nbp; 541 /* 542 * We return with the directory locked, so that 543 * the parameters we set up above will still be 544 * valid if we actually decide to do a direnter(). 545 * We return NULL to indicate that the entry doesn't 546 * currently exist, leaving a pointer to the (locked) 547 * directory inode in ndp->ni_pdir. 548 */ 549 ndp->ni_pdir = dp; 550 return (NULL); 551 } 552 u.u_error = ENOENT; 553 goto bad; 554 found: 555 if (numdirpasses == 2) 556 nchstats.ncs_pass2++; 557 /* 558 * Check that directory length properly reflects presence 559 * of this entry. 560 */ 561 if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) { 562 dirbad(dp, ndp->ni_offset, "i_size too small"); 563 dp->i_size = entryoffsetinblock + DIRSIZ(ep); 564 dp->i_flag |= IUPD|ICHG; 565 } 566 567 /* 568 * Found component in pathname. 569 * If the final component of path name, save information 570 * in the cache as to where the entry was found. 571 */ 572 if (*cp == '\0' && flag == LOOKUP) { 573 u.u_ncache.nc_prevoffset = ndp->ni_offset; 574 u.u_ncache.nc_inumber = dp->i_number; 575 u.u_ncache.nc_dev = dp->i_dev; 576 u.u_ncache.nc_time = time.tv_sec; 577 } 578 /* 579 * Save directory entry's inode number and reclen in ndp->ni_dent, 580 * and release directory buffer. 581 */ 582 ndp->ni_dent.d_ino = ep->d_ino; 583 ndp->ni_dent.d_reclen = ep->d_reclen; 584 brelse(bp); 585 bp = NULL; 586 587 /* 588 * If deleting, and at end of pathname, return 589 * parameters which can be used to remove file. 590 * If the lockparent flag isn't set, we return only 591 * the directory (in ndp->ni_pdir), otherwise we go 592 * on and lock the inode, being careful with ".". 593 */ 594 if (flag == DELETE && *cp == 0) { 595 /* 596 * Write access to directory required to delete files. 597 */ 598 if (access(dp, IWRITE)) 599 goto bad; 600 ndp->ni_pdir = dp; /* for dirremove() */ 601 /* 602 * Return pointer to current entry in ndp->ni_offset, 603 * and distance past previous entry (if there 604 * is a previous entry in this block) in ndp->ni_count. 605 * Save directory inode pointer in ndp->ni_pdir for dirremove(). 606 */ 607 if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0) 608 ndp->ni_count = 0; 609 else 610 ndp->ni_count = ndp->ni_offset - prevoff; 611 if (lockparent) { 612 if (dp->i_number == ndp->ni_dent.d_ino) 613 dp->i_count++; 614 else { 615 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 616 if (dp == NULL) { 617 iput(ndp->ni_pdir); 618 goto bad; 619 } 620 /* 621 * If directory is "sticky", then user must own 622 * the directory, or the file in it, else he 623 * may not delete it (unless he's root). This 624 * implements append-only directories. 625 */ 626 if ((ndp->ni_pdir->i_mode & ISVTX) && 627 u.u_uid != 0 && 628 u.u_uid != ndp->ni_pdir->i_uid && 629 dp->i_uid != u.u_uid) { 630 iput(ndp->ni_pdir); 631 u.u_error = EPERM; 632 goto bad; 633 } 634 } 635 } 636 nbp->av_forw = freenamebuf; 637 freenamebuf = nbp; 638 return (dp); 639 } 640 641 /* 642 * Special handling for ".." allowing chdir out of mounted 643 * file system: indirect .. in root inode to reevaluate 644 * in directory file system was mounted on. 645 */ 646 if (isdotdot) { 647 if (dp == u.u_rdir) { 648 ndp->ni_dent.d_ino = dp->i_number; 649 makeentry = 0; 650 } else if (ndp->ni_dent.d_ino == ROOTINO && 651 dp->i_number == ROOTINO) { 652 for (i = 1; i < NMOUNT; i++) 653 if (mount[i].m_bufp != NULL && 654 mount[i].m_dev == dp->i_dev) { 655 iput(dp); 656 dp = mount[i].m_inodp; 657 ILOCK(dp); 658 dp->i_count++; 659 fs = dp->i_fs; 660 cp -= 2; /* back over .. */ 661 goto dirloop2; 662 } 663 } 664 } 665 666 /* 667 * If rewriting (rename), return the inode and the 668 * information required to rewrite the present directory 669 * Must get inode of directory entry to verify it's a 670 * regular file, or empty directory. 671 */ 672 if ((flag == CREATE && lockparent) && *cp == 0) { 673 if (access(dp, IWRITE)) 674 goto bad; 675 ndp->ni_pdir = dp; /* for dirrewrite() */ 676 /* 677 * Careful about locking second inode. 678 * This can only occur if the target is ".". 679 */ 680 if (dp->i_number == ndp->ni_dent.d_ino) { 681 u.u_error = EISDIR; /* XXX */ 682 goto bad; 683 } 684 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 685 if (dp == NULL) { 686 iput(ndp->ni_pdir); 687 goto bad; 688 } 689 nbp->av_forw = freenamebuf; 690 freenamebuf = nbp; 691 return (dp); 692 } 693 694 /* 695 * Check for symbolic link, which may require us to massage the 696 * name before we continue translation. We do not `iput' the 697 * directory because we may need it again if the symbolic link 698 * is relative to the current directory. Instead we save it 699 * unlocked as "pdp". We must get the target inode before unlocking 700 * the directory to insure that the inode will not be removed 701 * before we get it. We prevent deadlock by always fetching 702 * inodes from the root, moving down the directory tree. Thus 703 * when following backward pointers ".." we must unlock the 704 * parent directory before getting the requested directory. 705 * There is a potential race condition here if both the current 706 * and parent directories are removed before the `iget' for the 707 * inode associated with ".." returns. We hope that this occurs 708 * infrequently since we cannot avoid this race condition without 709 * implementing a sophisticated deadlock detection algorithm. 710 * Note also that this simple deadlock detection scheme will not 711 * work if the file system has any hard links other than ".." 712 * that point backwards in the directory structure. 713 */ 714 pdp = dp; 715 if (isdotdot) { 716 IUNLOCK(pdp); /* race to get the inode */ 717 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 718 if (dp == NULL) 719 goto bad2; 720 } else if (dp->i_number == ndp->ni_dent.d_ino) { 721 dp->i_count++; /* we want ourself, ie "." */ 722 } else { 723 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 724 IUNLOCK(pdp); 725 if (dp == NULL) 726 goto bad2; 727 } 728 729 /* 730 * Insert name into cache if appropriate. 731 */ 732 if (makeentry) { 733 if (ncp != NULL) 734 panic("nami: duplicating cache"); 735 /* 736 * Free the cache slot at head of lru chain. 737 */ 738 if (ncp = nchhead) { 739 /* remove from lru chain */ 740 *ncp->nc_prev = ncp->nc_nxt; 741 if (ncp->nc_nxt) 742 ncp->nc_nxt->nc_prev = ncp->nc_prev; 743 else 744 nchtail = ncp->nc_prev; 745 remque(ncp); /* remove from old hash chain */ 746 /* grab the inode we just found */ 747 ncp->nc_ip = dp; 748 /* fill in cache info */ 749 ncp->nc_ino = pdp->i_number; /* parents inum */ 750 ncp->nc_dev = pdp->i_dev; /* & device */ 751 ncp->nc_idev = dp->i_dev; /* our device */ 752 ncp->nc_id = dp->i_id; /* identifier */ 753 ncp->nc_nlen = ndp->ni_dent.d_namlen; 754 bcopy(ndp->ni_dent.d_name, ncp->nc_name, 755 (unsigned)ncp->nc_nlen); 756 /* link at end of lru chain */ 757 ncp->nc_nxt = NULL; 758 ncp->nc_prev = nchtail; 759 *nchtail = ncp; 760 nchtail = &ncp->nc_nxt; 761 /* and insert on hash chain */ 762 insque(ncp, nhp); 763 } 764 } 765 766 haveino: 767 fs = dp->i_fs; 768 769 /* 770 * Check for symbolic link 771 */ 772 if ((dp->i_mode & IFMT) == IFLNK && 773 ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) { 774 u_int pathlen = strlen(cp) + 1; 775 776 if (dp->i_size + pathlen >= MAXPATHLEN - 1) { 777 u.u_error = ENAMETOOLONG; 778 goto bad2; 779 } 780 if (++nlink > MAXSYMLINKS) { 781 u.u_error = ELOOP; 782 goto bad2; 783 } 784 ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen); 785 u.u_error = 786 rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size, 787 (off_t)0, 1, (int *)0); 788 if (u.u_error) 789 goto bad2; 790 cp = nbp->b_un.b_addr; 791 iput(dp); 792 if (*cp == '/') { 793 irele(pdp); 794 while (*cp == '/') 795 cp++; 796 if ((dp = u.u_rdir) == NULL) 797 dp = rootdir; 798 ILOCK(dp); 799 dp->i_count++; 800 } else { 801 dp = pdp; 802 ILOCK(dp); 803 } 804 fs = dp->i_fs; 805 goto dirloop; 806 } 807 808 /* 809 * Not a symbolic link. If more pathname, 810 * continue at next component, else return. 811 */ 812 if (*cp == '/') { 813 while (*cp == '/') 814 cp++; 815 irele(pdp); 816 goto dirloop; 817 } 818 nbp->av_forw = freenamebuf; 819 freenamebuf = nbp; 820 if (lockparent) 821 ndp->ni_pdir = pdp; 822 else 823 irele(pdp); 824 return (dp); 825 bad2: 826 irele(pdp); 827 bad: 828 if (bp) 829 brelse(bp); 830 if (dp) 831 iput(dp); 832 nbp->av_forw = freenamebuf; 833 freenamebuf = nbp; 834 return (NULL); 835 } 836 837 838 dirbad(ip, offset, how) 839 struct inode *ip; 840 off_t offset; 841 char *how; 842 { 843 844 printf("%s: bad dir ino %d at offset %d: %s\n", 845 ip->i_fs->fs_fsmnt, ip->i_number, offset, how); 846 } 847 848 /* 849 * Do consistency checking on a directory entry: 850 * record length must be multiple of 4 851 * entry must fit in rest of its DIRBLKSIZ block 852 * record must be large enough to contain entry 853 * name is not longer than MAXNAMLEN 854 * name must be as long as advertised, and null terminated 855 */ 856 dirbadentry(ep, entryoffsetinblock) 857 register struct direct *ep; 858 int entryoffsetinblock; 859 { 860 register int i; 861 862 if ((ep->d_reclen & 0x3) != 0 || 863 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 864 ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN) 865 return (1); 866 for (i = 0; i < ep->d_namlen; i++) 867 if (ep->d_name[i] == '\0') 868 return (1); 869 return (ep->d_name[i]); 870 } 871 872 /* 873 * Write a directory entry after a call to namei, using the parameters 874 * which it left in the u. area. The argument ip is the inode which 875 * the new directory entry will refer to. The u. area field ndp->ni_pdir is 876 * a pointer to the directory to be written, which was left locked by 877 * namei. Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate 878 * how the space for the new entry is to be gotten. 879 */ 880 direnter(ip, ndp) 881 struct inode *ip; 882 register struct nameidata *ndp; 883 { 884 register struct direct *ep, *nep; 885 register struct inode *dp = ndp->ni_pdir; 886 struct buf *bp; 887 int loc, spacefree, error = 0; 888 u_int dsize; 889 int newentrysize; 890 char *dirbuf; 891 892 ndp->ni_dent.d_ino = ip->i_number; 893 newentrysize = DIRSIZ(&ndp->ni_dent); 894 if (ndp->ni_count == 0) { 895 /* 896 * If ndp->ni_count is 0, then namei could find no space in the 897 * directory. In this case ndp->ni_offset will be on a directory 898 * block boundary and we will write the new entry into a fresh 899 * block. 900 */ 901 if (ndp->ni_offset&(DIRBLKSIZ-1)) 902 panic("wdir: newblk"); 903 ndp->ni_dent.d_reclen = DIRBLKSIZ; 904 error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 905 newentrysize, ndp->ni_offset, 1, (int *)0); 906 if (DIRBLKSIZ > dp->i_fs->fs_fsize) 907 panic("wdir: blksize"); /* XXX - should grow w/bmap() */ 908 else 909 dp->i_size = roundup(dp->i_size, DIRBLKSIZ); 910 iput(dp); 911 return (error); 912 } 913 914 /* 915 * If ndp->ni_count is non-zero, then namei found space for the new 916 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count. 917 * in the directory. To use this space, we may have to compact 918 * the entries located there, by copying them together towards 919 * the beginning of the block, leaving the free space in 920 * one usable chunk at the end. 921 */ 922 923 /* 924 * Increase size of directory if entry eats into new space. 925 * This should never push the size past a new multiple of 926 * DIRBLKSIZE. 927 * 928 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 929 */ 930 if (ndp->ni_offset + ndp->ni_count > dp->i_size) 931 dp->i_size = ndp->ni_offset + ndp->ni_count; 932 /* 933 * Get the block containing the space for the new directory 934 * entry. Should return error by result instead of u.u_error. 935 */ 936 bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf); 937 if (bp == 0) { 938 iput(dp); 939 return (u.u_error); 940 } 941 /* 942 * Find space for the new entry. In the simple case, the 943 * entry at offset base will have the space. If it does 944 * not, then namei arranged that compacting the region 945 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space. 946 */ 947 ep = (struct direct *)dirbuf; 948 dsize = DIRSIZ(ep); 949 spacefree = ep->d_reclen - dsize; 950 for (loc = ep->d_reclen; loc < ndp->ni_count; ) { 951 nep = (struct direct *)(dirbuf + loc); 952 if (ep->d_ino) { 953 /* trim the existing slot */ 954 ep->d_reclen = dsize; 955 ep = (struct direct *)((char *)ep + dsize); 956 } else { 957 /* overwrite; nothing there; header is ours */ 958 spacefree += dsize; 959 } 960 dsize = DIRSIZ(nep); 961 spacefree += nep->d_reclen - dsize; 962 loc += nep->d_reclen; 963 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 964 } 965 /* 966 * Update the pointer fields in the previous entry (if any), 967 * copy in the new entry, and write out the block. 968 */ 969 if (ep->d_ino == 0) { 970 if (spacefree + dsize < newentrysize) 971 panic("wdir: compact1"); 972 ndp->ni_dent.d_reclen = spacefree + dsize; 973 } else { 974 if (spacefree < newentrysize) 975 panic("wdir: compact2"); 976 ndp->ni_dent.d_reclen = spacefree; 977 ep->d_reclen = dsize; 978 ep = (struct direct *)((char *)ep + dsize); 979 } 980 bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize); 981 bwrite(bp); 982 dp->i_flag |= IUPD|ICHG; 983 if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size) 984 itrunc(dp, (u_long)ndp->ni_endoff); 985 iput(dp); 986 return (error); 987 } 988 989 /* 990 * Remove a directory entry after a call to namei, using the 991 * parameters which it left in the u. area. The u. entry 992 * ni_offset contains the offset into the directory of the 993 * entry to be eliminated. The ni_count field contains the 994 * size of the previous record in the directory. If this 995 * is 0, the first entry is being deleted, so we need only 996 * zero the inode number to mark the entry as free. If the 997 * entry isn't the first in the directory, we must reclaim 998 * the space of the now empty record by adding the record size 999 * to the size of the previous entry. 1000 */ 1001 dirremove(ndp) 1002 register struct nameidata *ndp; 1003 { 1004 register struct inode *dp = ndp->ni_pdir; 1005 register struct buf *bp; 1006 struct direct *ep; 1007 1008 if (ndp->ni_count == 0) { 1009 /* 1010 * First entry in block: set d_ino to zero. 1011 */ 1012 ndp->ni_dent.d_ino = 0; 1013 (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1014 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1015 } else { 1016 /* 1017 * Collapse new free space into previous entry. 1018 */ 1019 bp = blkatoff(dp, ndp->ni_offset - ndp->ni_count, (char **)&ep); 1020 if (bp == 0) 1021 return (0); 1022 ep->d_reclen += ndp->ni_dent.d_reclen; 1023 bwrite(bp); 1024 dp->i_flag |= IUPD|ICHG; 1025 } 1026 return (1); 1027 } 1028 1029 /* 1030 * Rewrite an existing directory entry to point at the inode 1031 * supplied. The parameters describing the directory entry are 1032 * set up by a call to namei. 1033 */ 1034 dirrewrite(dp, ip, ndp) 1035 struct inode *dp, *ip; 1036 struct nameidata *ndp; 1037 { 1038 1039 ndp->ni_dent.d_ino = ip->i_number; 1040 u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1041 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1042 iput(dp); 1043 } 1044 1045 /* 1046 * Return buffer with contents of block "offset" 1047 * from the beginning of directory "ip". If "res" 1048 * is non-zero, fill it in with a pointer to the 1049 * remaining space in the directory. 1050 */ 1051 struct buf * 1052 blkatoff(ip, offset, res) 1053 struct inode *ip; 1054 off_t offset; 1055 char **res; 1056 { 1057 register struct fs *fs = ip->i_fs; 1058 daddr_t lbn = lblkno(fs, offset); 1059 int bsize = blksize(fs, ip, lbn); 1060 register struct buf *bp; 1061 daddr_t bn; 1062 1063 bn = bmap(ip, lbn, B_READ, bsize); 1064 if (u.u_error) 1065 return (0); 1066 if (bn == (daddr_t)-1) { 1067 dirbad(ip, offset, "hole in dir"); 1068 return (0); 1069 } 1070 bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize); 1071 if (bp->b_flags & B_ERROR) { 1072 brelse(bp); 1073 return (0); 1074 } 1075 if (res) 1076 *res = bp->b_un.b_addr + blkoff(fs, offset); 1077 return (bp); 1078 } 1079 1080 /* 1081 * Check if a directory is empty or not. 1082 * Inode supplied must be locked. 1083 * 1084 * Using a struct dirtemplate here is not precisely 1085 * what we want, but better than using a struct direct. 1086 * 1087 * NB: does not handle corrupted directories. 1088 */ 1089 dirempty(ip, parentino) 1090 register struct inode *ip; 1091 ino_t parentino; 1092 { 1093 register off_t off; 1094 struct dirtemplate dbuf; 1095 register struct direct *dp = (struct direct *)&dbuf; 1096 int error, count; 1097 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1098 1099 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1100 error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ, 1101 off, 1, &count); 1102 /* 1103 * Since we read MINDIRSIZ, residual must 1104 * be 0 unless we're at end of file. 1105 */ 1106 if (error || count != 0) 1107 return (0); 1108 /* avoid infinite loops */ 1109 if (dp->d_reclen == 0) 1110 return (0); 1111 /* skip empty entries */ 1112 if (dp->d_ino == 0) 1113 continue; 1114 /* accept only "." and ".." */ 1115 if (dp->d_namlen > 2) 1116 return (0); 1117 if (dp->d_name[0] != '.') 1118 return (0); 1119 /* 1120 * At this point d_namlen must be 1 or 2. 1121 * 1 implies ".", 2 implies ".." if second 1122 * char is also "." 1123 */ 1124 if (dp->d_namlen == 1) 1125 continue; 1126 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1127 continue; 1128 return (0); 1129 } 1130 return (1); 1131 } 1132 1133 /* 1134 * Check if source directory is in the path of the target directory. 1135 * Target is supplied locked, source is unlocked. 1136 * The target is always iput() before returning. 1137 */ 1138 checkpath(source, target) 1139 struct inode *source, *target; 1140 { 1141 struct dirtemplate dirbuf; 1142 register struct inode *ip; 1143 int error = 0; 1144 1145 ip = target; 1146 if (ip->i_number == source->i_number) { 1147 error = EEXIST; 1148 goto out; 1149 } 1150 if (ip->i_number == ROOTINO) 1151 goto out; 1152 1153 for (;;) { 1154 if ((ip->i_mode&IFMT) != IFDIR) { 1155 error = ENOTDIR; 1156 break; 1157 } 1158 error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, 1159 sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); 1160 if (error != 0) 1161 break; 1162 if (dirbuf.dotdot_namlen != 2 || 1163 dirbuf.dotdot_name[0] != '.' || 1164 dirbuf.dotdot_name[1] != '.') { 1165 error = ENOTDIR; 1166 break; 1167 } 1168 if (dirbuf.dotdot_ino == source->i_number) { 1169 error = EINVAL; 1170 break; 1171 } 1172 if (dirbuf.dotdot_ino == ROOTINO) 1173 break; 1174 iput(ip); 1175 ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino); 1176 if (ip == NULL) { 1177 error = u.u_error; 1178 break; 1179 } 1180 } 1181 1182 out: 1183 if (error == ENOTDIR) 1184 printf("checkpath: .. not a directory\n"); 1185 if (ip != NULL) 1186 iput(ip); 1187 return (error); 1188 } 1189 1190 /* 1191 * Name cache initialization, from main() when we are booting 1192 */ 1193 nchinit() 1194 { 1195 register union nchash *nchp; 1196 register struct namecache *ncp; 1197 1198 nchhead = 0; 1199 nchtail = &nchhead; 1200 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) { 1201 ncp->nc_forw = ncp; /* hash chain */ 1202 ncp->nc_back = ncp; 1203 ncp->nc_nxt = NULL; /* lru chain */ 1204 *nchtail = ncp; 1205 ncp->nc_prev = nchtail; 1206 nchtail = &ncp->nc_nxt; 1207 /* all else is zero already */ 1208 } 1209 for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) { 1210 nchp->nch_head[0] = nchp; 1211 nchp->nch_head[1] = nchp; 1212 } 1213 } 1214 1215 /* 1216 * Cache flush, called when filesys is umounted to 1217 * remove entries that would now be invalid 1218 * 1219 * The line "nxtcp = nchhead" near the end is to avoid potential problems 1220 * if the cache lru chain is modified while we are dumping the 1221 * inode. This makes the algorithm O(n^2), but do you think I care? 1222 */ 1223 nchinval(dev) 1224 register dev_t dev; 1225 { 1226 register struct namecache *ncp, *nxtcp; 1227 1228 for (ncp = nchhead; ncp; ncp = nxtcp) { 1229 nxtcp = ncp->nc_nxt; 1230 if (ncp->nc_ip == NULL || 1231 (ncp->nc_idev != dev && ncp->nc_dev != dev)) 1232 continue; 1233 /* free the resources we had */ 1234 ncp->nc_idev = NODEV; 1235 ncp->nc_dev = NODEV; 1236 ncp->nc_id = NULL; 1237 ncp->nc_ino = 0; 1238 ncp->nc_ip = NULL; 1239 remque(ncp); /* remove entry from its hash chain */ 1240 ncp->nc_forw = ncp; /* and make a dummy one */ 1241 ncp->nc_back = ncp; 1242 /* delete this entry from LRU chain */ 1243 *ncp->nc_prev = nxtcp; 1244 if (nxtcp) 1245 nxtcp->nc_prev = ncp->nc_prev; 1246 else 1247 nchtail = ncp->nc_prev; 1248 /* cause rescan of list, it may have altered */ 1249 nxtcp = nchhead; 1250 /* put the now-free entry at head of LRU */ 1251 ncp->nc_nxt = nxtcp; 1252 ncp->nc_prev = &nchhead; 1253 nxtcp->nc_prev = &ncp->nc_nxt; 1254 nchhead = ncp; 1255 } 1256 } 1257 1258 /* 1259 * Name cache invalidation of all entries. 1260 */ 1261 cacheinvalall() 1262 { 1263 register struct namecache *ncp; 1264 1265 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) 1266 ncp->nc_id = 0; 1267 } 1268