1 /* $NetBSD: vfs_cache.c,v 1.59 2004/05/07 12:05:41 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.59 2004/05/07 12:05:41 yamt Exp $"); 36 37 #include "opt_ddb.h" 38 #include "opt_revcache.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/time.h> 43 #include <sys/mount.h> 44 #include <sys/vnode.h> 45 #include <sys/namei.h> 46 #include <sys/errno.h> 47 #include <sys/malloc.h> 48 #include <sys/pool.h> 49 #include <sys/lock.h> 50 51 /* 52 * Name caching works as follows: 53 * 54 * Names found by directory scans are retained in a cache 55 * for future reference. It is managed LRU, so frequently 56 * used names will hang around. Cache is indexed by hash value 57 * obtained from (dvp, name) where dvp refers to the directory 58 * containing name. 59 * 60 * For simplicity (and economy of storage), names longer than 61 * a maximum length of NCHNAMLEN are not cached; they occur 62 * infrequently in any case, and are almost never of interest. 63 * 64 * Upon reaching the last segment of a path, if the reference 65 * is for DELETE, or NOCACHE is set (rewrite), and the 66 * name is located in the cache, it will be dropped. 67 * The entry is dropped also when it was not possible to lock 68 * the cached vnode, either because vget() failed or the generation 69 * number has changed while waiting for the lock. 70 */ 71 72 /* 73 * Structures associated with name cacheing. 74 */ 75 LIST_HEAD(nchashhead, namecache) *nchashtbl; 76 u_long nchash; /* size of hash table - 1 */ 77 long numcache; /* number of cache entries allocated */ 78 #define NCHASH(cnp, dvp) \ 79 (((cnp)->cn_hash ^ ((uintptr_t)(dvp) >> 3)) & nchash) 80 81 LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl; 82 u_long ncvhash; /* size of hash table - 1 */ 83 #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash) 84 85 TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */ 86 struct nchstats nchstats; /* cache effectiveness statistics */ 87 88 POOL_INIT(namecache_pool, sizeof(struct namecache), 0, 0, 0, "ncachepl", 89 &pool_allocator_nointr); 90 91 MALLOC_DEFINE(M_CACHE, "namecache", "Dynamically allocated cache entries"); 92 93 int doingcache = 1; /* 1 => enable the cache */ 94 95 /* A single lock to protect cache insertion, removal and lookup */ 96 static struct simplelock namecache_slock = SIMPLELOCK_INITIALIZER; 97 98 static void cache_remove(struct namecache *); 99 static void cache_free(struct namecache *); 100 static __inline struct namecache *cache_lookup_entry( 101 const struct vnode *, const struct componentname *); 102 103 static void 104 cache_remove(struct namecache *ncp) 105 { 106 107 LOCK_ASSERT(simple_lock_held(&namecache_slock)); 108 109 ncp->nc_dvp = NULL; 110 ncp->nc_vp = NULL; 111 112 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 113 if (ncp->nc_hash.le_prev != NULL) { 114 LIST_REMOVE(ncp, nc_hash); 115 ncp->nc_hash.le_prev = NULL; 116 } 117 if (ncp->nc_vhash.le_prev != NULL) { 118 LIST_REMOVE(ncp, nc_vhash); 119 ncp->nc_vhash.le_prev = NULL; 120 } 121 if (ncp->nc_vlist.le_prev != NULL) { 122 LIST_REMOVE(ncp, nc_vlist); 123 ncp->nc_vlist.le_prev = NULL; 124 } 125 if (ncp->nc_dvlist.le_prev != NULL) { 126 LIST_REMOVE(ncp, nc_dvlist); 127 ncp->nc_dvlist.le_prev = NULL; 128 } 129 } 130 131 static void 132 cache_free(struct namecache *ncp) 133 { 134 135 pool_put(&namecache_pool, ncp); 136 numcache--; 137 } 138 139 static __inline struct namecache * 140 cache_lookup_entry(const struct vnode *dvp, const struct componentname *cnp) 141 { 142 struct nchashhead *ncpp; 143 struct namecache *ncp; 144 145 LOCK_ASSERT(simple_lock_held(&namecache_slock)); 146 147 ncpp = &nchashtbl[NCHASH(cnp, dvp)]; 148 149 LIST_FOREACH(ncp, ncpp, nc_hash) { 150 if (ncp->nc_dvp == dvp && 151 ncp->nc_nlen == cnp->cn_namelen && 152 !memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen)) 153 break; 154 } 155 156 return ncp; 157 } 158 159 /* 160 * Look for a the name in the cache. We don't do this 161 * if the segment name is long, simply so the cache can avoid 162 * holding long names (which would either waste space, or 163 * add greatly to the complexity). 164 * 165 * Lookup is called with ni_dvp pointing to the directory to search, 166 * ni_ptr pointing to the name of the entry being sought, ni_namelen 167 * tells the length of the name, and ni_hash contains a hash of 168 * the name. If the lookup succeeds, the vnode is locked, stored in ni_vp 169 * and a status of zero is returned. If the locking fails for whatever 170 * reason, the vnode is unlocked and the error is returned to caller. 171 * If the lookup determines that the name does not exist (negative cacheing), 172 * a status of ENOENT is returned. If the lookup fails, a status of -1 173 * is returned. 174 */ 175 int 176 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 177 { 178 struct namecache *ncp; 179 struct vnode *vp; 180 int error; 181 182 if (!doingcache) { 183 cnp->cn_flags &= ~MAKEENTRY; 184 *vpp = NULL; 185 return (-1); 186 } 187 188 if (cnp->cn_namelen > NCHNAMLEN) { 189 /* XXXSMP - updating stats without lock; do we care? */ 190 nchstats.ncs_long++; 191 cnp->cn_flags &= ~MAKEENTRY; 192 goto fail; 193 } 194 simple_lock(&namecache_slock); 195 ncp = cache_lookup_entry(dvp, cnp); 196 if (ncp == NULL) { 197 nchstats.ncs_miss++; 198 goto fail_wlock; 199 } 200 if ((cnp->cn_flags & MAKEENTRY) == 0) { 201 nchstats.ncs_badhits++; 202 goto remove; 203 } else if (ncp->nc_vp == NULL) { 204 /* 205 * Restore the ISWHITEOUT flag saved earlier. 206 */ 207 cnp->cn_flags |= ncp->nc_flags; 208 if (cnp->cn_nameiop != CREATE || 209 (cnp->cn_flags & ISLASTCN) == 0) { 210 nchstats.ncs_neghits++; 211 /* 212 * Move this slot to end of LRU chain, 213 * if not already there. 214 */ 215 if (TAILQ_NEXT(ncp, nc_lru) != 0) { 216 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 217 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 218 } 219 simple_unlock(&namecache_slock); 220 return (ENOENT); 221 } else { 222 nchstats.ncs_badhits++; 223 goto remove; 224 } 225 } 226 227 vp = ncp->nc_vp; 228 229 /* 230 * Move this slot to end of LRU chain, if not already there. 231 */ 232 if (TAILQ_NEXT(ncp, nc_lru) != 0) { 233 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 234 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 235 } 236 237 if (vp != dvp) 238 simple_lock(&vp->v_interlock); 239 240 /* Release the name cache mutex while we acquire vnode locks */ 241 simple_unlock(&namecache_slock); 242 243 #ifdef DEBUG 244 /* 245 * since we released namecache_slock, 246 * we can't use this pointer any more. 247 */ 248 ncp = NULL; 249 #endif /* DEBUG */ 250 251 if (vp != dvp && __predict_false(vp->v_flag & VXLOCK)) { 252 /* 253 * this vnode is being cleaned out. 254 */ 255 simple_unlock(&vp->v_interlock); 256 nchstats.ncs_falsehits++; /* XXX badhits? */ 257 goto fail; 258 } 259 260 if (vp == dvp) { /* lookup on "." */ 261 VREF(dvp); 262 error = 0; 263 } else if (cnp->cn_flags & ISDOTDOT) { 264 VOP_UNLOCK(dvp, 0); 265 cnp->cn_flags |= PDIRUNLOCK; 266 error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK); 267 /* 268 * If the above vget() succeeded and both LOCKPARENT and 269 * ISLASTCN is set, lock the directory vnode as well. 270 */ 271 if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) { 272 if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0) { 273 vput(vp); 274 return (error); 275 } 276 cnp->cn_flags &= ~PDIRUNLOCK; 277 } 278 } else { 279 error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK); 280 /* 281 * If the above vget() failed or either of LOCKPARENT or 282 * ISLASTCN is set, unlock the directory vnode. 283 */ 284 if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) { 285 VOP_UNLOCK(dvp, 0); 286 cnp->cn_flags |= PDIRUNLOCK; 287 } 288 } 289 290 /* 291 * Check that the lock succeeded. 292 */ 293 if (error) { 294 /* XXXSMP - updating stats without lock; do we care? */ 295 nchstats.ncs_badhits++; 296 297 /* 298 * The parent needs to be locked when we return to VOP_LOOKUP(). 299 * The `.' case here should be extremely rare (if it can happen 300 * at all), so we don't bother optimizing out the unlock/relock. 301 */ 302 if (vp == dvp || 303 error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) { 304 if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0) 305 return (error); 306 cnp->cn_flags &= ~PDIRUNLOCK; 307 } 308 *vpp = NULL; 309 return (-1); 310 } 311 312 /* XXXSMP - updating stats without lock; do we care? */ 313 nchstats.ncs_goodhits++; 314 *vpp = vp; 315 return (0); 316 317 remove: 318 /* 319 * Last component and we are renaming or deleting, 320 * the cache entry is invalid, or otherwise don't 321 * want cache entry to exist. 322 */ 323 cache_remove(ncp); 324 cache_free(ncp); 325 326 fail_wlock: 327 simple_unlock(&namecache_slock); 328 fail: 329 *vpp = NULL; 330 return (-1); 331 } 332 333 /* 334 * Scan cache looking for name of directory entry pointing at vp. 335 * 336 * Fill in dvpp. 337 * 338 * If bufp is non-NULL, also place the name in the buffer which starts 339 * at bufp, immediately before *bpp, and move bpp backwards to point 340 * at the start of it. (Yes, this is a little baroque, but it's done 341 * this way to cater to the whims of getcwd). 342 * 343 * Returns 0 on success, -1 on cache miss, positive errno on failure. 344 */ 345 int 346 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp) 347 { 348 struct namecache *ncp; 349 struct vnode *dvp; 350 struct ncvhashhead *nvcpp; 351 char *bp; 352 353 if (!doingcache) 354 goto out; 355 356 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 357 358 simple_lock(&namecache_slock); 359 LIST_FOREACH(ncp, nvcpp, nc_vhash) { 360 if (ncp->nc_vp == vp && 361 (dvp = ncp->nc_dvp) != NULL && 362 dvp != vp) { /* avoid pesky . entries.. */ 363 364 #ifdef DIAGNOSTIC 365 if (ncp->nc_nlen == 1 && 366 ncp->nc_name[0] == '.') 367 panic("cache_revlookup: found entry for ."); 368 369 if (ncp->nc_nlen == 2 && 370 ncp->nc_name[0] == '.' && 371 ncp->nc_name[1] == '.') 372 panic("cache_revlookup: found entry for .."); 373 #endif 374 nchstats.ncs_revhits++; 375 376 if (bufp) { 377 bp = *bpp; 378 bp -= ncp->nc_nlen; 379 if (bp <= bufp) { 380 *dvpp = NULL; 381 simple_unlock(&namecache_slock); 382 return (ERANGE); 383 } 384 memcpy(bp, ncp->nc_name, ncp->nc_nlen); 385 *bpp = bp; 386 } 387 388 /* XXX MP: how do we know dvp won't evaporate? */ 389 *dvpp = dvp; 390 simple_unlock(&namecache_slock); 391 return (0); 392 } 393 } 394 nchstats.ncs_revmiss++; 395 simple_unlock(&namecache_slock); 396 out: 397 *dvpp = NULL; 398 return (-1); 399 } 400 401 /* 402 * Add an entry to the cache 403 */ 404 void 405 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 406 { 407 struct namecache *ncp; 408 struct namecache *oncp; 409 struct nchashhead *ncpp; 410 struct ncvhashhead *nvcpp; 411 412 #ifdef DIAGNOSTIC 413 if (cnp->cn_namelen > NCHNAMLEN) 414 panic("cache_enter: name too long"); 415 #endif 416 if (!doingcache) 417 return; 418 /* 419 * Free the cache slot at head of lru chain. 420 */ 421 simple_lock(&namecache_slock); 422 423 if (numcache < numvnodes) { 424 numcache++; 425 simple_unlock(&namecache_slock); 426 ncp = pool_get(&namecache_pool, PR_WAITOK); 427 memset(ncp, 0, sizeof(*ncp)); 428 simple_lock(&namecache_slock); 429 } else if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) { 430 cache_remove(ncp); 431 } else { 432 simple_unlock(&namecache_slock); 433 return; 434 } 435 436 /* 437 * Concurrent lookups in the same directory may race for a 438 * cache entry. if there's a duplicated entry, free it. 439 */ 440 oncp = cache_lookup_entry(dvp, cnp); 441 if (oncp) { 442 cache_remove(oncp); 443 cache_free(oncp); 444 } 445 KASSERT(cache_lookup_entry(dvp, cnp) == NULL); 446 447 /* Grab the vnode we just found. */ 448 ncp->nc_vp = vp; 449 if (vp == NULL) { 450 /* 451 * For negative hits, save the ISWHITEOUT flag so we can 452 * restore it later when the cache entry is used again. 453 */ 454 ncp->nc_flags = cnp->cn_flags & ISWHITEOUT; 455 } 456 /* Fill in cache info. */ 457 ncp->nc_dvp = dvp; 458 LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist); 459 if (vp) 460 LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist); 461 ncp->nc_nlen = cnp->cn_namelen; 462 memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen); 463 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 464 ncpp = &nchashtbl[NCHASH(cnp, dvp)]; 465 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 466 467 ncp->nc_vhash.le_prev = NULL; 468 ncp->nc_vhash.le_next = NULL; 469 470 /* 471 * Create reverse-cache entries (used in getcwd) for directories. 472 */ 473 if (vp != NULL && 474 vp != dvp && 475 #ifndef NAMECACHE_ENTER_REVERSE 476 vp->v_type == VDIR && 477 #endif 478 (ncp->nc_nlen > 2 || 479 (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') || 480 (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) { 481 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 482 LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash); 483 } 484 simple_unlock(&namecache_slock); 485 } 486 487 /* 488 * Name cache initialization, from vfs_init() when we are booting 489 */ 490 void 491 nchinit(void) 492 { 493 494 TAILQ_INIT(&nclruhead); 495 nchashtbl = 496 hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &nchash); 497 ncvhashtbl = 498 #ifdef NAMECACHE_ENTER_REVERSE 499 hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash); 500 #else 501 hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash); 502 #endif 503 } 504 505 /* 506 * Name cache reinitialization, for when the maximum number of vnodes increases. 507 */ 508 void 509 nchreinit(void) 510 { 511 struct namecache *ncp; 512 struct nchashhead *oldhash1, *hash1; 513 struct ncvhashhead *oldhash2, *hash2; 514 u_long i, oldmask1, oldmask2, mask1, mask2; 515 516 hash1 = hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask1); 517 hash2 = 518 #ifdef NAMECACHE_ENTER_REVERSE 519 hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask2); 520 #else 521 hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &mask2); 522 #endif 523 simple_lock(&namecache_slock); 524 oldhash1 = nchashtbl; 525 oldmask1 = nchash; 526 nchashtbl = hash1; 527 nchash = mask1; 528 oldhash2 = ncvhashtbl; 529 oldmask2 = ncvhash; 530 ncvhashtbl = hash2; 531 ncvhash = mask2; 532 for (i = 0; i <= oldmask1; i++) { 533 while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) { 534 LIST_REMOVE(ncp, nc_hash); 535 ncp->nc_hash.le_prev = NULL; 536 } 537 } 538 for (i = 0; i <= oldmask2; i++) { 539 while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) { 540 LIST_REMOVE(ncp, nc_vhash); 541 ncp->nc_vhash.le_prev = NULL; 542 } 543 } 544 simple_unlock(&namecache_slock); 545 hashdone(oldhash1, M_CACHE); 546 hashdone(oldhash2, M_CACHE); 547 } 548 549 /* 550 * Cache flush, a particular vnode; called when a vnode is renamed to 551 * hide entries that would now be invalid 552 */ 553 void 554 cache_purge1(struct vnode *vp, const struct componentname *cnp, int flags) 555 { 556 struct namecache *ncp, *ncnext; 557 558 simple_lock(&namecache_slock); 559 if (flags & PURGE_PARENTS) { 560 for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL; 561 ncp = ncnext) { 562 ncnext = LIST_NEXT(ncp, nc_vlist); 563 cache_remove(ncp); 564 cache_free(ncp); 565 } 566 } 567 if (flags & PURGE_CHILDREN) { 568 for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL; 569 ncp = ncnext) { 570 ncnext = LIST_NEXT(ncp, nc_dvlist); 571 cache_remove(ncp); 572 cache_free(ncp); 573 } 574 } 575 if (cnp != NULL) { 576 ncp = cache_lookup_entry(vp, cnp); 577 if (ncp) { 578 cache_remove(ncp); 579 cache_free(ncp); 580 } 581 } 582 simple_unlock(&namecache_slock); 583 } 584 585 /* 586 * Cache flush, a whole filesystem; called when filesys is umounted to 587 * remove entries that would now be invalid. 588 */ 589 void 590 cache_purgevfs(struct mount *mp) 591 { 592 struct namecache *ncp, *nxtcp; 593 594 simple_lock(&namecache_slock); 595 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 596 nxtcp = TAILQ_NEXT(ncp, nc_lru); 597 if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) { 598 continue; 599 } 600 /* Free the resources we had. */ 601 cache_remove(ncp); 602 cache_free(ncp); 603 } 604 simple_unlock(&namecache_slock); 605 } 606 607 #ifdef DDB 608 void 609 namecache_print(struct vnode *vp, void (*pr)(const char *, ...)) 610 { 611 struct vnode *dvp = NULL; 612 struct namecache *ncp; 613 614 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 615 if (ncp->nc_vp == vp) { 616 (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name); 617 dvp = ncp->nc_dvp; 618 } 619 } 620 if (dvp == NULL) { 621 (*pr)("name not found\n"); 622 return; 623 } 624 vp = dvp; 625 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 626 if (ncp->nc_vp == vp) { 627 (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name); 628 } 629 } 630 } 631 #endif 632