1 /* $NetBSD: vfs_cache.c,v 1.91 2012/11/05 17:27:39 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.91 2012/11/05 17:27:39 dholland Exp $"); 62 63 #include "opt_ddb.h" 64 #include "opt_revcache.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/mount.h> 70 #include <sys/vnode.h> 71 #include <sys/namei.h> 72 #include <sys/errno.h> 73 #include <sys/pool.h> 74 #include <sys/mutex.h> 75 #include <sys/atomic.h> 76 #include <sys/kthread.h> 77 #include <sys/kernel.h> 78 #include <sys/cpu.h> 79 #include <sys/evcnt.h> 80 81 #define NAMECACHE_ENTER_REVERSE 82 /* 83 * Name caching works as follows: 84 * 85 * Names found by directory scans are retained in a cache 86 * for future reference. It is managed LRU, so frequently 87 * used names will hang around. Cache is indexed by hash value 88 * obtained from (dvp, name) where dvp refers to the directory 89 * containing name. 90 * 91 * For simplicity (and economy of storage), names longer than 92 * a maximum length of NCHNAMLEN are not cached; they occur 93 * infrequently in any case, and are almost never of interest. 94 * 95 * Upon reaching the last segment of a path, if the reference 96 * is for DELETE, or NOCACHE is set (rewrite), and the 97 * name is located in the cache, it will be dropped. 98 * The entry is dropped also when it was not possible to lock 99 * the cached vnode, either because vget() failed or the generation 100 * number has changed while waiting for the lock. 101 */ 102 103 /* 104 * Per-cpu namecache data. 105 */ 106 struct nchcpu { 107 kmutex_t cpu_lock; 108 struct nchstats cpu_stats; 109 }; 110 111 /* 112 * The type for the hash code. While the hash function generates a 113 * u32, the hash code has historically been passed around as a u_long, 114 * and the value is modified by xor'ing a uintptr_t, so it's not 115 * entirely clear what the best type is. For now I'll leave it 116 * unchanged as u_long. 117 */ 118 119 typedef u_long nchash_t; 120 121 /* 122 * Structures associated with name cacheing. 123 */ 124 125 static kmutex_t *namecache_lock __read_mostly; 126 static pool_cache_t namecache_cache __read_mostly; 127 static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned; 128 129 static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly; 130 static u_long nchash __read_mostly; 131 132 #define NCHASH2(hash, dvp) \ 133 (((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash) 134 135 static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly; 136 static u_long ncvhash __read_mostly; 137 138 #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash) 139 140 /* Number of cache entries allocated. */ 141 static long numcache __cacheline_aligned; 142 143 /* Garbage collection queue and number of entries pending in it. */ 144 static void *cache_gcqueue; 145 static u_int cache_gcpend; 146 147 /* Cache effectiveness statistics. */ 148 struct nchstats nchstats __cacheline_aligned; 149 #define COUNT(c,x) (c.x++) 150 151 static const int cache_lowat = 95; 152 static const int cache_hiwat = 98; 153 static const int cache_hottime = 5; /* number of seconds */ 154 static int doingcache = 1; /* 1 => enable the cache */ 155 156 static struct evcnt cache_ev_scan; 157 static struct evcnt cache_ev_gc; 158 static struct evcnt cache_ev_over; 159 static struct evcnt cache_ev_under; 160 static struct evcnt cache_ev_forced; 161 162 static void cache_invalidate(struct namecache *); 163 static struct namecache *cache_lookup_entry( 164 const struct vnode *, const char *, size_t); 165 static void cache_thread(void *); 166 static void cache_invalidate(struct namecache *); 167 static void cache_disassociate(struct namecache *); 168 static void cache_reclaim(void); 169 static int cache_ctor(void *, void *, int); 170 static void cache_dtor(void *, void *); 171 172 /* 173 * Compute the hash for an entry. 174 * 175 * (This is for now a wrapper around namei_hash, whose interface is 176 * for the time being slightly inconvenient.) 177 */ 178 static nchash_t 179 cache_hash(const char *name, size_t namelen) 180 { 181 const char *endptr; 182 183 endptr = name + namelen; 184 return namei_hash(name, &endptr); 185 } 186 187 /* 188 * Invalidate a cache entry and enqueue it for garbage collection. 189 */ 190 static void 191 cache_invalidate(struct namecache *ncp) 192 { 193 void *head; 194 195 KASSERT(mutex_owned(&ncp->nc_lock)); 196 197 if (ncp->nc_dvp != NULL) { 198 ncp->nc_vp = NULL; 199 ncp->nc_dvp = NULL; 200 do { 201 head = cache_gcqueue; 202 ncp->nc_gcqueue = head; 203 } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head); 204 atomic_inc_uint(&cache_gcpend); 205 } 206 } 207 208 /* 209 * Disassociate a namecache entry from any vnodes it is attached to, 210 * and remove from the global LRU list. 211 */ 212 static void 213 cache_disassociate(struct namecache *ncp) 214 { 215 216 KASSERT(mutex_owned(namecache_lock)); 217 KASSERT(ncp->nc_dvp == NULL); 218 219 if (ncp->nc_lru.tqe_prev != NULL) { 220 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 221 ncp->nc_lru.tqe_prev = NULL; 222 } 223 if (ncp->nc_vhash.le_prev != NULL) { 224 LIST_REMOVE(ncp, nc_vhash); 225 ncp->nc_vhash.le_prev = NULL; 226 } 227 if (ncp->nc_vlist.le_prev != NULL) { 228 LIST_REMOVE(ncp, nc_vlist); 229 ncp->nc_vlist.le_prev = NULL; 230 } 231 if (ncp->nc_dvlist.le_prev != NULL) { 232 LIST_REMOVE(ncp, nc_dvlist); 233 ncp->nc_dvlist.le_prev = NULL; 234 } 235 } 236 237 /* 238 * Lock all CPUs to prevent any cache lookup activity. Conceptually, 239 * this locks out all "readers". 240 */ 241 static void 242 cache_lock_cpus(void) 243 { 244 CPU_INFO_ITERATOR cii; 245 struct cpu_info *ci; 246 struct nchcpu *cpup; 247 long *s, *d, *m; 248 249 for (CPU_INFO_FOREACH(cii, ci)) { 250 cpup = ci->ci_data.cpu_nch; 251 mutex_enter(&cpup->cpu_lock); 252 253 /* Collate statistics. */ 254 d = (long *)&nchstats; 255 s = (long *)&cpup->cpu_stats; 256 m = s + sizeof(nchstats) / sizeof(long); 257 for (; s < m; s++, d++) { 258 *d += *s; 259 *s = 0; 260 } 261 } 262 } 263 264 /* 265 * Release all CPU locks. 266 */ 267 static void 268 cache_unlock_cpus(void) 269 { 270 CPU_INFO_ITERATOR cii; 271 struct cpu_info *ci; 272 struct nchcpu *cpup; 273 274 for (CPU_INFO_FOREACH(cii, ci)) { 275 cpup = ci->ci_data.cpu_nch; 276 mutex_exit(&cpup->cpu_lock); 277 } 278 } 279 280 /* 281 * Find a single cache entry and return it locked. 'namecache_lock' or 282 * at least one of the per-CPU locks must be held. 283 */ 284 static struct namecache * 285 cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen) 286 { 287 struct nchashhead *ncpp; 288 struct namecache *ncp; 289 nchash_t hash; 290 291 KASSERT(dvp != NULL); 292 hash = cache_hash(name, namelen); 293 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 294 295 LIST_FOREACH(ncp, ncpp, nc_hash) { 296 if (ncp->nc_dvp != dvp || 297 ncp->nc_nlen != namelen || 298 memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen)) 299 continue; 300 mutex_enter(&ncp->nc_lock); 301 if (__predict_true(ncp->nc_dvp == dvp)) { 302 ncp->nc_hittime = hardclock_ticks; 303 return ncp; 304 } 305 /* Raced: entry has been nullified. */ 306 mutex_exit(&ncp->nc_lock); 307 } 308 309 return NULL; 310 } 311 312 /* 313 * Look for a the name in the cache. We don't do this 314 * if the segment name is long, simply so the cache can avoid 315 * holding long names (which would either waste space, or 316 * add greatly to the complexity). 317 * 318 * Lookup is called with DVP pointing to the directory to search, 319 * and CNP providing the name of the entry being sought: cn_nameptr 320 * is the name, cn_namelen is its length, and cn_flags is the flags 321 * word from the namei operation. 322 * 323 * DVP must be locked. 324 * 325 * There are three possible non-error return states: 326 * 1. Nothing was found in the cache. Nothing is known about 327 * the requested name. 328 * 2. A negative entry was found in the cache, meaning that the 329 * requested name definitely does not exist. 330 * 3. A positive entry was found in the cache, meaning that the 331 * requested name does exist and that we are providing the 332 * vnode. 333 * In these cases the results are: 334 * 1. 0 returned; VN is set to NULL. 335 * 2. 1 returned; VN is set to NULL. 336 * 3. 1 returned; VN is set to the vnode found. 337 * 338 * The additional result argument ISWHT is set to zero, unless a 339 * negative entry is found that was entered as a whiteout, in which 340 * case ISWHT is set to one. 341 * 342 * The ISWHT_RET argument pointer may be null. In this case an 343 * assertion is made that the whiteout flag is not set. File systems 344 * that do not support whiteouts can/should do this. 345 * 346 * Filesystems that do support whiteouts should add ISWHITEOUT to 347 * cnp->cn_flags if ISWHT comes back nonzero. 348 * 349 * When a vnode is returned, it is locked, as per the vnode lookup 350 * locking protocol. 351 * 352 * There is no way for this function to fail, in the sense of 353 * generating an error that requires aborting the namei operation. 354 * 355 * (Prior to October 2012, this function returned an integer status, 356 * and a vnode, and mucked with the flags word in CNP for whiteouts. 357 * The integer status was -1 for "nothing found", ENOENT for "a 358 * negative entry found", 0 for "a positive entry found", and possibly 359 * other errors, and the value of VN might or might not have been set 360 * depending on what error occurred.) 361 */ 362 int 363 cache_lookup(struct vnode *dvp, const char *name, size_t namelen, 364 uint32_t nameiop, uint32_t cnflags, 365 int *iswht_ret, struct vnode **vn_ret) 366 { 367 struct namecache *ncp; 368 struct vnode *vp; 369 struct nchcpu *cpup; 370 int error; 371 372 /* Establish default result values */ 373 if (iswht_ret != NULL) { 374 *iswht_ret = 0; 375 } 376 *vn_ret = NULL; 377 378 if (__predict_false(!doingcache)) { 379 return 0; 380 } 381 382 cpup = curcpu()->ci_data.cpu_nch; 383 mutex_enter(&cpup->cpu_lock); 384 if (__predict_false(namelen > NCHNAMLEN)) { 385 COUNT(cpup->cpu_stats, ncs_long); 386 mutex_exit(&cpup->cpu_lock); 387 /* found nothing */ 388 return 0; 389 } 390 ncp = cache_lookup_entry(dvp, name, namelen); 391 if (__predict_false(ncp == NULL)) { 392 COUNT(cpup->cpu_stats, ncs_miss); 393 mutex_exit(&cpup->cpu_lock); 394 /* found nothing */ 395 return 0; 396 } 397 if ((cnflags & MAKEENTRY) == 0) { 398 COUNT(cpup->cpu_stats, ncs_badhits); 399 /* 400 * Last component and we are renaming or deleting, 401 * the cache entry is invalid, or otherwise don't 402 * want cache entry to exist. 403 */ 404 cache_invalidate(ncp); 405 mutex_exit(&ncp->nc_lock); 406 mutex_exit(&cpup->cpu_lock); 407 /* found nothing */ 408 return 0; 409 } 410 if (ncp->nc_vp == NULL) { 411 if (iswht_ret != NULL) { 412 /* 413 * Restore the ISWHITEOUT flag saved earlier. 414 */ 415 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 416 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 417 } else { 418 KASSERT(ncp->nc_flags == 0); 419 } 420 421 if (__predict_true(nameiop != CREATE || 422 (cnflags & ISLASTCN) == 0)) { 423 COUNT(cpup->cpu_stats, ncs_neghits); 424 mutex_exit(&ncp->nc_lock); 425 mutex_exit(&cpup->cpu_lock); 426 /* found neg entry; vn is already null from above */ 427 return 1; 428 } else { 429 COUNT(cpup->cpu_stats, ncs_badhits); 430 /* 431 * Last component and we are renaming or 432 * deleting, the cache entry is invalid, 433 * or otherwise don't want cache entry to 434 * exist. 435 */ 436 cache_invalidate(ncp); 437 mutex_exit(&ncp->nc_lock); 438 mutex_exit(&cpup->cpu_lock); 439 /* found nothing */ 440 return 0; 441 } 442 } 443 444 vp = ncp->nc_vp; 445 if (vtryget(vp)) { 446 mutex_exit(&ncp->nc_lock); 447 mutex_exit(&cpup->cpu_lock); 448 } else { 449 mutex_enter(vp->v_interlock); 450 mutex_exit(&ncp->nc_lock); 451 mutex_exit(&cpup->cpu_lock); 452 error = vget(vp, LK_NOWAIT); 453 if (error) { 454 KASSERT(error == EBUSY); 455 /* 456 * This vnode is being cleaned out. 457 * XXX badhits? 458 */ 459 COUNT(cpup->cpu_stats, ncs_falsehits); 460 /* found nothing */ 461 return 0; 462 } 463 } 464 465 #ifdef DEBUG 466 /* 467 * since we released nb->nb_lock, 468 * we can't use this pointer any more. 469 */ 470 ncp = NULL; 471 #endif /* DEBUG */ 472 473 if (vp == dvp) { /* lookup on "." */ 474 error = 0; 475 } else if (cnflags & ISDOTDOT) { 476 VOP_UNLOCK(dvp); 477 error = vn_lock(vp, LK_EXCLUSIVE); 478 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 479 } else { 480 error = vn_lock(vp, LK_EXCLUSIVE); 481 } 482 483 /* 484 * Check that the lock succeeded. 485 */ 486 if (error) { 487 /* We don't have the right lock, but this is only for stats. */ 488 COUNT(cpup->cpu_stats, ncs_badhits); 489 490 vrele(vp); 491 /* found nothing */ 492 return 0; 493 } 494 495 /* We don't have the right lock, but this is only for stats. */ 496 COUNT(cpup->cpu_stats, ncs_goodhits); 497 498 /* found it */ 499 *vn_ret = vp; 500 return 1; 501 } 502 503 int 504 cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen, 505 uint32_t cnflags, 506 int *iswht_ret, struct vnode **vn_ret) 507 { 508 struct namecache *ncp; 509 struct vnode *vp; 510 struct nchcpu *cpup; 511 int error; 512 513 /* Establish default results. */ 514 if (iswht_ret != NULL) { 515 *iswht_ret = 0; 516 } 517 *vn_ret = NULL; 518 519 if (__predict_false(!doingcache)) { 520 /* found nothing */ 521 return 0; 522 } 523 524 cpup = curcpu()->ci_data.cpu_nch; 525 mutex_enter(&cpup->cpu_lock); 526 if (__predict_false(namelen > NCHNAMLEN)) { 527 COUNT(cpup->cpu_stats, ncs_long); 528 mutex_exit(&cpup->cpu_lock); 529 /* found nothing */ 530 return 0; 531 } 532 ncp = cache_lookup_entry(dvp, name, namelen); 533 if (__predict_false(ncp == NULL)) { 534 COUNT(cpup->cpu_stats, ncs_miss); 535 mutex_exit(&cpup->cpu_lock); 536 /* found nothing */ 537 return 0; 538 } 539 vp = ncp->nc_vp; 540 if (vp == NULL) { 541 /* 542 * Restore the ISWHITEOUT flag saved earlier. 543 */ 544 if (iswht_ret != NULL) { 545 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 546 /*cnp->cn_flags |= ncp->nc_flags;*/ 547 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 548 } 549 COUNT(cpup->cpu_stats, ncs_neghits); 550 mutex_exit(&ncp->nc_lock); 551 mutex_exit(&cpup->cpu_lock); 552 /* found negative entry; vn is already null from above */ 553 return 1; 554 } 555 if (vtryget(vp)) { 556 mutex_exit(&ncp->nc_lock); 557 mutex_exit(&cpup->cpu_lock); 558 } else { 559 mutex_enter(vp->v_interlock); 560 mutex_exit(&ncp->nc_lock); 561 mutex_exit(&cpup->cpu_lock); 562 error = vget(vp, LK_NOWAIT); 563 if (error) { 564 KASSERT(error == EBUSY); 565 /* 566 * This vnode is being cleaned out. 567 * XXX badhits? 568 */ 569 COUNT(cpup->cpu_stats, ncs_falsehits); 570 /* found nothing */ 571 return 0; 572 } 573 } 574 575 /* Unlocked, but only for stats. */ 576 COUNT(cpup->cpu_stats, ncs_goodhits); /* XXX can be "badhits" */ 577 578 /* found it */ 579 *vn_ret = vp; 580 return 1; 581 } 582 583 /* 584 * Scan cache looking for name of directory entry pointing at vp. 585 * 586 * If the lookup succeeds the vnode is referenced and stored in dvpp. 587 * 588 * If bufp is non-NULL, also place the name in the buffer which starts 589 * at bufp, immediately before *bpp, and move bpp backwards to point 590 * at the start of it. (Yes, this is a little baroque, but it's done 591 * this way to cater to the whims of getcwd). 592 * 593 * Returns 0 on success, -1 on cache miss, positive errno on failure. 594 */ 595 int 596 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp) 597 { 598 struct namecache *ncp; 599 struct vnode *dvp; 600 struct ncvhashhead *nvcpp; 601 char *bp; 602 int error, nlen; 603 604 if (!doingcache) 605 goto out; 606 607 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 608 609 mutex_enter(namecache_lock); 610 LIST_FOREACH(ncp, nvcpp, nc_vhash) { 611 mutex_enter(&ncp->nc_lock); 612 if (ncp->nc_vp == vp && 613 (dvp = ncp->nc_dvp) != NULL && 614 dvp != vp) { /* avoid pesky . entries.. */ 615 616 #ifdef DIAGNOSTIC 617 if (ncp->nc_nlen == 1 && 618 ncp->nc_name[0] == '.') 619 panic("cache_revlookup: found entry for ."); 620 621 if (ncp->nc_nlen == 2 && 622 ncp->nc_name[0] == '.' && 623 ncp->nc_name[1] == '.') 624 panic("cache_revlookup: found entry for .."); 625 #endif 626 COUNT(nchstats, ncs_revhits); 627 nlen = ncp->nc_nlen; 628 629 if (bufp) { 630 bp = *bpp; 631 bp -= nlen; 632 if (bp <= bufp) { 633 *dvpp = NULL; 634 mutex_exit(&ncp->nc_lock); 635 mutex_exit(namecache_lock); 636 return (ERANGE); 637 } 638 memcpy(bp, ncp->nc_name, nlen); 639 *bpp = bp; 640 } 641 642 if (vtryget(dvp)) { 643 mutex_exit(&ncp->nc_lock); 644 mutex_exit(namecache_lock); 645 } else { 646 mutex_enter(dvp->v_interlock); 647 mutex_exit(&ncp->nc_lock); 648 mutex_exit(namecache_lock); 649 error = vget(dvp, LK_NOWAIT); 650 if (error) { 651 KASSERT(error == EBUSY); 652 if (bufp) 653 (*bpp) += nlen; 654 *dvpp = NULL; 655 return -1; 656 } 657 } 658 *dvpp = dvp; 659 return (0); 660 } 661 mutex_exit(&ncp->nc_lock); 662 } 663 COUNT(nchstats, ncs_revmiss); 664 mutex_exit(namecache_lock); 665 out: 666 *dvpp = NULL; 667 return (-1); 668 } 669 670 /* 671 * Add an entry to the cache 672 */ 673 void 674 cache_enter(struct vnode *dvp, struct vnode *vp, 675 const char *name, size_t namelen, uint32_t cnflags) 676 { 677 struct namecache *ncp; 678 struct namecache *oncp; 679 struct nchashhead *ncpp; 680 struct ncvhashhead *nvcpp; 681 nchash_t hash; 682 683 /* First, check whether we can/should add a cache entry. */ 684 if ((cnflags & MAKEENTRY) == 0 || 685 __predict_false(namelen > NCHNAMLEN || !doingcache)) { 686 return; 687 } 688 689 if (numcache > desiredvnodes) { 690 mutex_enter(namecache_lock); 691 cache_ev_forced.ev_count++; 692 cache_reclaim(); 693 mutex_exit(namecache_lock); 694 } 695 696 ncp = pool_cache_get(namecache_cache, PR_WAITOK); 697 mutex_enter(namecache_lock); 698 numcache++; 699 700 /* 701 * Concurrent lookups in the same directory may race for a 702 * cache entry. if there's a duplicated entry, free it. 703 */ 704 oncp = cache_lookup_entry(dvp, name, namelen); 705 if (oncp) { 706 cache_invalidate(oncp); 707 mutex_exit(&oncp->nc_lock); 708 } 709 710 /* Grab the vnode we just found. */ 711 mutex_enter(&ncp->nc_lock); 712 ncp->nc_vp = vp; 713 ncp->nc_flags = 0; 714 ncp->nc_hittime = 0; 715 ncp->nc_gcqueue = NULL; 716 if (vp == NULL) { 717 /* 718 * For negative hits, save the ISWHITEOUT flag so we can 719 * restore it later when the cache entry is used again. 720 */ 721 ncp->nc_flags = cnflags & ISWHITEOUT; 722 } 723 724 /* Fill in cache info. */ 725 ncp->nc_dvp = dvp; 726 LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist); 727 if (vp) 728 LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist); 729 else { 730 ncp->nc_vlist.le_prev = NULL; 731 ncp->nc_vlist.le_next = NULL; 732 } 733 KASSERT(namelen <= NCHNAMLEN); 734 ncp->nc_nlen = namelen; 735 memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen); 736 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 737 hash = cache_hash(name, namelen); 738 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 739 740 /* 741 * Flush updates before making visible in table. No need for a 742 * memory barrier on the other side: to see modifications the 743 * list must be followed, meaning a dependent pointer load. 744 * The below is LIST_INSERT_HEAD() inlined, with the memory 745 * barrier included in the correct place. 746 */ 747 if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL) 748 ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next; 749 ncp->nc_hash.le_prev = &ncpp->lh_first; 750 membar_producer(); 751 ncpp->lh_first = ncp; 752 753 ncp->nc_vhash.le_prev = NULL; 754 ncp->nc_vhash.le_next = NULL; 755 756 /* 757 * Create reverse-cache entries (used in getcwd) for directories. 758 * (and in linux procfs exe node) 759 */ 760 if (vp != NULL && 761 vp != dvp && 762 #ifndef NAMECACHE_ENTER_REVERSE 763 vp->v_type == VDIR && 764 #endif 765 (ncp->nc_nlen > 2 || 766 (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') || 767 (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) { 768 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 769 LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash); 770 } 771 mutex_exit(&ncp->nc_lock); 772 mutex_exit(namecache_lock); 773 } 774 775 /* 776 * Name cache initialization, from vfs_init() when we are booting 777 */ 778 void 779 nchinit(void) 780 { 781 int error; 782 783 TAILQ_INIT(&nclruhead); 784 namecache_cache = pool_cache_init(sizeof(struct namecache), 785 coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor, 786 cache_dtor, NULL); 787 KASSERT(namecache_cache != NULL); 788 789 namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 790 791 nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash); 792 ncvhashtbl = 793 #ifdef NAMECACHE_ENTER_REVERSE 794 hashinit(desiredvnodes, HASH_LIST, true, &ncvhash); 795 #else 796 hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash); 797 #endif 798 799 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread, 800 NULL, NULL, "cachegc"); 801 if (error != 0) 802 panic("nchinit %d", error); 803 804 evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL, 805 "namecache", "entries scanned"); 806 evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL, 807 "namecache", "entries collected"); 808 evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL, 809 "namecache", "over scan target"); 810 evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL, 811 "namecache", "under scan target"); 812 evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL, 813 "namecache", "forced reclaims"); 814 } 815 816 static int 817 cache_ctor(void *arg, void *obj, int flag) 818 { 819 struct namecache *ncp; 820 821 ncp = obj; 822 mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE); 823 824 return 0; 825 } 826 827 static void 828 cache_dtor(void *arg, void *obj) 829 { 830 struct namecache *ncp; 831 832 ncp = obj; 833 mutex_destroy(&ncp->nc_lock); 834 } 835 836 /* 837 * Called once for each CPU in the system as attached. 838 */ 839 void 840 cache_cpu_init(struct cpu_info *ci) 841 { 842 struct nchcpu *cpup; 843 size_t sz; 844 845 sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit; 846 cpup = kmem_zalloc(sz, KM_SLEEP); 847 cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit); 848 mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE); 849 ci->ci_data.cpu_nch = cpup; 850 } 851 852 /* 853 * Name cache reinitialization, for when the maximum number of vnodes increases. 854 */ 855 void 856 nchreinit(void) 857 { 858 struct namecache *ncp; 859 struct nchashhead *oldhash1, *hash1; 860 struct ncvhashhead *oldhash2, *hash2; 861 u_long i, oldmask1, oldmask2, mask1, mask2; 862 863 hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1); 864 hash2 = 865 #ifdef NAMECACHE_ENTER_REVERSE 866 hashinit(desiredvnodes, HASH_LIST, true, &mask2); 867 #else 868 hashinit(desiredvnodes/8, HASH_LIST, true, &mask2); 869 #endif 870 mutex_enter(namecache_lock); 871 cache_lock_cpus(); 872 oldhash1 = nchashtbl; 873 oldmask1 = nchash; 874 nchashtbl = hash1; 875 nchash = mask1; 876 oldhash2 = ncvhashtbl; 877 oldmask2 = ncvhash; 878 ncvhashtbl = hash2; 879 ncvhash = mask2; 880 for (i = 0; i <= oldmask1; i++) { 881 while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) { 882 LIST_REMOVE(ncp, nc_hash); 883 ncp->nc_hash.le_prev = NULL; 884 } 885 } 886 for (i = 0; i <= oldmask2; i++) { 887 while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) { 888 LIST_REMOVE(ncp, nc_vhash); 889 ncp->nc_vhash.le_prev = NULL; 890 } 891 } 892 cache_unlock_cpus(); 893 mutex_exit(namecache_lock); 894 hashdone(oldhash1, HASH_LIST, oldmask1); 895 hashdone(oldhash2, HASH_LIST, oldmask2); 896 } 897 898 /* 899 * Cache flush, a particular vnode; called when a vnode is renamed to 900 * hide entries that would now be invalid 901 */ 902 void 903 cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags) 904 { 905 struct namecache *ncp, *ncnext; 906 907 mutex_enter(namecache_lock); 908 if (flags & PURGE_PARENTS) { 909 for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL; 910 ncp = ncnext) { 911 ncnext = LIST_NEXT(ncp, nc_vlist); 912 mutex_enter(&ncp->nc_lock); 913 cache_invalidate(ncp); 914 mutex_exit(&ncp->nc_lock); 915 cache_disassociate(ncp); 916 } 917 } 918 if (flags & PURGE_CHILDREN) { 919 for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL; 920 ncp = ncnext) { 921 ncnext = LIST_NEXT(ncp, nc_dvlist); 922 mutex_enter(&ncp->nc_lock); 923 cache_invalidate(ncp); 924 mutex_exit(&ncp->nc_lock); 925 cache_disassociate(ncp); 926 } 927 } 928 if (name != NULL) { 929 ncp = cache_lookup_entry(vp, name, namelen); 930 if (ncp) { 931 cache_invalidate(ncp); 932 mutex_exit(&ncp->nc_lock); 933 cache_disassociate(ncp); 934 } 935 } 936 mutex_exit(namecache_lock); 937 } 938 939 /* 940 * Cache flush, a whole filesystem; called when filesys is umounted to 941 * remove entries that would now be invalid. 942 */ 943 void 944 cache_purgevfs(struct mount *mp) 945 { 946 struct namecache *ncp, *nxtcp; 947 948 mutex_enter(namecache_lock); 949 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 950 nxtcp = TAILQ_NEXT(ncp, nc_lru); 951 mutex_enter(&ncp->nc_lock); 952 if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) { 953 /* Free the resources we had. */ 954 cache_invalidate(ncp); 955 cache_disassociate(ncp); 956 } 957 mutex_exit(&ncp->nc_lock); 958 } 959 cache_reclaim(); 960 mutex_exit(namecache_lock); 961 } 962 963 /* 964 * Scan global list invalidating entries until we meet a preset target. 965 * Prefer to invalidate entries that have not scored a hit within 966 * cache_hottime seconds. We sort the LRU list only for this routine's 967 * benefit. 968 */ 969 static void 970 cache_prune(int incache, int target) 971 { 972 struct namecache *ncp, *nxtcp, *sentinel; 973 int items, recent, tryharder; 974 975 KASSERT(mutex_owned(namecache_lock)); 976 977 items = 0; 978 tryharder = 0; 979 recent = hardclock_ticks - hz * cache_hottime; 980 sentinel = NULL; 981 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 982 if (incache <= target) 983 break; 984 items++; 985 nxtcp = TAILQ_NEXT(ncp, nc_lru); 986 if (ncp->nc_dvp == NULL) 987 continue; 988 if (ncp == sentinel) { 989 /* 990 * If we looped back on ourself, then ignore 991 * recent entries and purge whatever we find. 992 */ 993 tryharder = 1; 994 } 995 if (!tryharder && (ncp->nc_hittime - recent) > 0) { 996 if (sentinel == NULL) 997 sentinel = ncp; 998 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 999 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 1000 continue; 1001 } 1002 mutex_enter(&ncp->nc_lock); 1003 if (ncp->nc_dvp != NULL) { 1004 cache_invalidate(ncp); 1005 cache_disassociate(ncp); 1006 incache--; 1007 } 1008 mutex_exit(&ncp->nc_lock); 1009 } 1010 cache_ev_scan.ev_count += items; 1011 } 1012 1013 /* 1014 * Collect dead cache entries from all CPUs and garbage collect. 1015 */ 1016 static void 1017 cache_reclaim(void) 1018 { 1019 struct namecache *ncp, *next; 1020 int items; 1021 1022 KASSERT(mutex_owned(namecache_lock)); 1023 1024 /* 1025 * If the number of extant entries not awaiting garbage collection 1026 * exceeds the high water mark, then reclaim stale entries until we 1027 * reach our low water mark. 1028 */ 1029 items = numcache - cache_gcpend; 1030 if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) { 1031 cache_prune(items, (int)((uint64_t)desiredvnodes * 1032 cache_lowat / 100)); 1033 cache_ev_over.ev_count++; 1034 } else 1035 cache_ev_under.ev_count++; 1036 1037 /* 1038 * Stop forward lookup activity on all CPUs and garbage collect dead 1039 * entries. 1040 */ 1041 cache_lock_cpus(); 1042 ncp = cache_gcqueue; 1043 cache_gcqueue = NULL; 1044 items = cache_gcpend; 1045 cache_gcpend = 0; 1046 while (ncp != NULL) { 1047 next = ncp->nc_gcqueue; 1048 cache_disassociate(ncp); 1049 KASSERT(ncp->nc_dvp == NULL); 1050 if (ncp->nc_hash.le_prev != NULL) { 1051 LIST_REMOVE(ncp, nc_hash); 1052 ncp->nc_hash.le_prev = NULL; 1053 } 1054 pool_cache_put(namecache_cache, ncp); 1055 ncp = next; 1056 } 1057 cache_unlock_cpus(); 1058 numcache -= items; 1059 cache_ev_gc.ev_count += items; 1060 } 1061 1062 /* 1063 * Cache maintainence thread, awakening once per second to: 1064 * 1065 * => keep number of entries below the high water mark 1066 * => sort pseudo-LRU list 1067 * => garbage collect dead entries 1068 */ 1069 static void 1070 cache_thread(void *arg) 1071 { 1072 1073 mutex_enter(namecache_lock); 1074 for (;;) { 1075 cache_reclaim(); 1076 kpause("cachegc", false, hz, namecache_lock); 1077 } 1078 } 1079 1080 #ifdef DDB 1081 void 1082 namecache_print(struct vnode *vp, void (*pr)(const char *, ...)) 1083 { 1084 struct vnode *dvp = NULL; 1085 struct namecache *ncp; 1086 1087 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1088 if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) { 1089 (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name); 1090 dvp = ncp->nc_dvp; 1091 } 1092 } 1093 if (dvp == NULL) { 1094 (*pr)("name not found\n"); 1095 return; 1096 } 1097 vp = dvp; 1098 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1099 if (ncp->nc_vp == vp) { 1100 (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name); 1101 } 1102 } 1103 } 1104 #endif 1105