1 /* $NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $"); 62 63 #define __NAMECACHE_PRIVATE 64 #ifdef _KERNEL_OPT 65 #include "opt_ddb.h" 66 #include "opt_dtrace.h" 67 #include "opt_revcache.h" 68 #endif 69 70 #include <sys/param.h> 71 #include <sys/atomic.h> 72 #include <sys/cpu.h> 73 #include <sys/errno.h> 74 #include <sys/evcnt.h> 75 #include <sys/kernel.h> 76 #include <sys/kthread.h> 77 #include <sys/mount.h> 78 #include <sys/mutex.h> 79 #include <sys/namei.h> 80 #include <sys/pool.h> 81 #include <sys/sdt.h> 82 #include <sys/sysctl.h> 83 #include <sys/systm.h> 84 #include <sys/time.h> 85 #include <sys/vnode_impl.h> 86 87 #define NAMECACHE_ENTER_REVERSE 88 /* 89 * Name caching works as follows: 90 * 91 * Names found by directory scans are retained in a cache 92 * for future reference. It is managed LRU, so frequently 93 * used names will hang around. Cache is indexed by hash value 94 * obtained from (dvp, name) where dvp refers to the directory 95 * containing name. 96 * 97 * Upon reaching the last segment of a path, if the reference 98 * is for DELETE, or NOCACHE is set (rewrite), and the 99 * name is located in the cache, it will be dropped. 100 */ 101 102 /* 103 * Cache entry lifetime: 104 * 105 * nonexistent 106 * ---create---> active 107 * ---invalidate---> queued 108 * ---reclaim---> nonexistent. 109 * 110 * States: 111 * - Nonexistent. Cache entry does not exist. 112 * 113 * - Active. cache_lookup, cache_lookup_raw, cache_revlookup can look 114 * up, acquire references, and hand off references to vnodes, 115 * e.g. via v_interlock. Marked by nonnull ncp->nc_dvp. 116 * 117 * - Queued. Pending desstruction by cache_reclaim. Cannot be used by 118 * cache_lookup, cache_lookup_raw, or cache_revlookup. May still be 119 * on lists. Marked by null ncp->nc_dvp. 120 * 121 * Transitions: 122 * 123 * - Create: nonexistent--->active 124 * 125 * Done by cache_enter(dvp, vp, name, namelen, cnflags), called by 126 * VOP_LOOKUP after the answer is found. Allocates a struct 127 * namecache object, initializes it with the above fields, and 128 * activates it by inserting it into the forward and reverse tables. 129 * 130 * - Invalidate: active--->queued 131 * 132 * Done by cache_invalidate. If not already invalidated, nullify 133 * ncp->nc_dvp and ncp->nc_vp, and add to cache_gcqueue. Called, 134 * among various other places, in cache_lookup(dvp, name, namelen, 135 * nameiop, cnflags, &iswht, &vp) when MAKEENTRY is missing from 136 * cnflags. 137 * 138 * - Reclaim: queued--->nonexistent 139 * 140 * Done by cache_reclaim. Disassociate ncp from any lists it is on 141 * and free memory. 142 */ 143 144 /* 145 * Locking. 146 * 147 * L namecache_lock Global lock for namecache table and queues. 148 * C struct nchcpu::cpu_lock Per-CPU lock to reduce read contention. 149 * N struct namecache::nc_lock Per-entry lock. 150 * V struct vnode::v_interlock Vnode interlock. 151 * 152 * Lock order: L -> C -> N -> V 153 * 154 * Examples: 155 * . L->C: cache_reclaim 156 * . C->N->V: cache_lookup 157 * . L->N->V: cache_purge1, cache_revlookup 158 * 159 * All use serialized by namecache_lock: 160 * 161 * nclruhead / struct namecache::nc_lru 162 * ncvhashtbl / struct namecache::nc_vhash 163 * struct vnode_impl::vi_dnclist / struct namecache::nc_dvlist 164 * struct vnode_impl::vi_nclist / struct namecache::nc_vlist 165 * nchstats 166 * 167 * - Insertion serialized by namecache_lock, 168 * - read protected by per-CPU lock, 169 * - insert/read ordering guaranteed by memory barriers, and 170 * - deletion allowed only under namecache_lock and *all* per-CPU locks 171 * in CPU_INFO_FOREACH order: 172 * 173 * nchashtbl / struct namecache::nc_hash 174 * 175 * The per-CPU locks exist only to reduce the probability of 176 * contention between readers. We do not bind to a CPU, so 177 * contention is still possible. 178 * 179 * All use serialized by struct namecache::nc_lock: 180 * 181 * struct namecache::nc_dvp 182 * struct namecache::nc_vp 183 * struct namecache::nc_gcqueue (*) 184 * struct namecache::nc_hittime (**) 185 * 186 * (*) Once on the queue, only cache_thread uses this nc_gcqueue, unlocked. 187 * (**) cache_prune reads nc_hittime unlocked, since approximate is OK. 188 * 189 * Unlocked because stable after initialization: 190 * 191 * struct namecache::nc_dvp 192 * struct namecache::nc_vp 193 * struct namecache::nc_flags 194 * struct namecache::nc_nlen 195 * struct namecache::nc_name 196 * 197 * Unlocked because approximation is OK: 198 * 199 * struct nchcpu::cpu_stats 200 * struct nchcpu::cpu_stats_last 201 * 202 * Updates under namecache_lock or any per-CPU lock are marked with 203 * COUNT, while updates outside those locks are marked with COUNT_UNL. 204 * 205 * - The theory seems to have been that you could replace COUNT_UNL by 206 * atomic operations -- except that doesn't help unless you also 207 * replace COUNT by atomic operations, because mixing atomics and 208 * nonatomics is a recipe for failure. 209 * - We use 32-bit per-CPU counters and 64-bit global counters under 210 * the theory that 32-bit counters are less likely to be hosed by 211 * nonatomic increment. 212 */ 213 214 /* 215 * The comment below is preserved for posterity in case it is 216 * important, but it is clear that everywhere the namecache_count_*() 217 * functions are called, other cache_*() functions that take the same 218 * locks are also called, so I can't imagine how this could be a 219 * problem: 220 * 221 * N.B.: Attempting to protect COUNT_UNL() increments by taking 222 * a per-cpu lock in the namecache_count_*() functions causes 223 * a deadlock. Don't do that, use atomic increments instead if 224 * the imperfections here bug you. 225 */ 226 227 /* 228 * struct nchstats_percpu: 229 * 230 * Per-CPU counters. 231 */ 232 struct nchstats_percpu _NAMEI_CACHE_STATS(uint32_t); 233 234 /* 235 * struct nchcpu: 236 * 237 * Per-CPU namecache state: lock and per-CPU counters. 238 */ 239 struct nchcpu { 240 kmutex_t cpu_lock; 241 struct nchstats_percpu cpu_stats; 242 /* XXX maybe __cacheline_aligned would improve this? */ 243 struct nchstats_percpu cpu_stats_last; /* from last sample */ 244 }; 245 246 /* 247 * The type for the hash code. While the hash function generates a 248 * u32, the hash code has historically been passed around as a u_long, 249 * and the value is modified by xor'ing a uintptr_t, so it's not 250 * entirely clear what the best type is. For now I'll leave it 251 * unchanged as u_long. 252 */ 253 254 typedef u_long nchash_t; 255 256 /* 257 * Structures associated with name cacheing. 258 */ 259 260 static kmutex_t *namecache_lock __read_mostly; 261 static pool_cache_t namecache_cache __read_mostly; 262 static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned; 263 264 static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly; 265 static u_long nchash __read_mostly; 266 267 #define NCHASH2(hash, dvp) \ 268 (((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash) 269 270 static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly; 271 static u_long ncvhash __read_mostly; 272 273 #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash) 274 275 /* Number of cache entries allocated. */ 276 static long numcache __cacheline_aligned; 277 278 /* Garbage collection queue and number of entries pending in it. */ 279 static void *cache_gcqueue; 280 static u_int cache_gcpend; 281 282 /* Cache effectiveness statistics. This holds total from per-cpu stats */ 283 struct nchstats nchstats __cacheline_aligned; 284 285 /* 286 * Macros to count an event, update the central stats with per-cpu 287 * values and add current per-cpu increments to the subsystem total 288 * last collected by cache_reclaim(). 289 */ 290 #define CACHE_STATS_CURRENT /* nothing */ 291 292 #define COUNT(cpup, f) ((cpup)->cpu_stats.f++) 293 294 #define UPDATE(cpup, f) do { \ 295 struct nchcpu *Xcpup = (cpup); \ 296 uint32_t Xcnt = (volatile uint32_t) Xcpup->cpu_stats.f; \ 297 nchstats.f += Xcnt - Xcpup->cpu_stats_last.f; \ 298 Xcpup->cpu_stats_last.f = Xcnt; \ 299 } while (/* CONSTCOND */ 0) 300 301 #define ADD(stats, cpup, f) do { \ 302 struct nchcpu *Xcpup = (cpup); \ 303 stats.f += Xcpup->cpu_stats.f - Xcpup->cpu_stats_last.f; \ 304 } while (/* CONSTCOND */ 0) 305 306 /* Do unlocked stats the same way. Use a different name to allow mind changes */ 307 #define COUNT_UNL(cpup, f) COUNT((cpup), f) 308 309 static const int cache_lowat = 95; 310 static const int cache_hiwat = 98; 311 static const int cache_hottime = 5; /* number of seconds */ 312 static int doingcache = 1; /* 1 => enable the cache */ 313 314 static struct evcnt cache_ev_scan; 315 static struct evcnt cache_ev_gc; 316 static struct evcnt cache_ev_over; 317 static struct evcnt cache_ev_under; 318 static struct evcnt cache_ev_forced; 319 320 static struct namecache *cache_lookup_entry( 321 const struct vnode *, const char *, size_t); 322 static void cache_thread(void *); 323 static void cache_invalidate(struct namecache *); 324 static void cache_disassociate(struct namecache *); 325 static void cache_reclaim(void); 326 static int cache_ctor(void *, void *, int); 327 static void cache_dtor(void *, void *); 328 329 static struct sysctllog *sysctllog; 330 static void sysctl_cache_stat_setup(void); 331 332 SDT_PROVIDER_DEFINE(vfs); 333 334 SDT_PROBE_DEFINE1(vfs, namecache, invalidate, done, "struct vnode *"); 335 SDT_PROBE_DEFINE1(vfs, namecache, purge, parents, "struct vnode *"); 336 SDT_PROBE_DEFINE1(vfs, namecache, purge, children, "struct vnode *"); 337 SDT_PROBE_DEFINE2(vfs, namecache, purge, name, "char *", "size_t"); 338 SDT_PROBE_DEFINE1(vfs, namecache, purge, vfs, "struct mount *"); 339 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", 340 "char *", "size_t"); 341 SDT_PROBE_DEFINE3(vfs, namecache, lookup, miss, "struct vnode *", 342 "char *", "size_t"); 343 SDT_PROBE_DEFINE3(vfs, namecache, lookup, toolong, "struct vnode *", 344 "char *", "size_t"); 345 SDT_PROBE_DEFINE2(vfs, namecache, revlookup, success, "struct vnode *", 346 "struct vnode *"); 347 SDT_PROBE_DEFINE2(vfs, namecache, revlookup, fail, "struct vnode *", 348 "int"); 349 SDT_PROBE_DEFINE2(vfs, namecache, prune, done, "int", "int"); 350 SDT_PROBE_DEFINE3(vfs, namecache, enter, toolong, "struct vnode *", 351 "char *", "size_t"); 352 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", 353 "char *", "size_t"); 354 355 /* 356 * Compute the hash for an entry. 357 * 358 * (This is for now a wrapper around namei_hash, whose interface is 359 * for the time being slightly inconvenient.) 360 */ 361 static nchash_t 362 cache_hash(const char *name, size_t namelen) 363 { 364 const char *endptr; 365 366 endptr = name + namelen; 367 return namei_hash(name, &endptr); 368 } 369 370 /* 371 * Invalidate a cache entry and enqueue it for garbage collection. 372 * The caller needs to hold namecache_lock or a per-cpu lock to hold 373 * off cache_reclaim(). 374 */ 375 static void 376 cache_invalidate(struct namecache *ncp) 377 { 378 void *head; 379 380 KASSERT(mutex_owned(&ncp->nc_lock)); 381 382 if (ncp->nc_dvp != NULL) { 383 SDT_PROBE(vfs, namecache, invalidate, done, ncp->nc_dvp, 384 0, 0, 0, 0); 385 386 ncp->nc_vp = NULL; 387 ncp->nc_dvp = NULL; 388 do { 389 head = cache_gcqueue; 390 ncp->nc_gcqueue = head; 391 } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head); 392 atomic_inc_uint(&cache_gcpend); 393 } 394 } 395 396 /* 397 * Disassociate a namecache entry from any vnodes it is attached to, 398 * and remove from the global LRU list. 399 */ 400 static void 401 cache_disassociate(struct namecache *ncp) 402 { 403 404 KASSERT(mutex_owned(namecache_lock)); 405 KASSERT(ncp->nc_dvp == NULL); 406 407 if (ncp->nc_lru.tqe_prev != NULL) { 408 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 409 ncp->nc_lru.tqe_prev = NULL; 410 } 411 if (ncp->nc_vhash.le_prev != NULL) { 412 LIST_REMOVE(ncp, nc_vhash); 413 ncp->nc_vhash.le_prev = NULL; 414 } 415 if (ncp->nc_vlist.le_prev != NULL) { 416 LIST_REMOVE(ncp, nc_vlist); 417 ncp->nc_vlist.le_prev = NULL; 418 } 419 if (ncp->nc_dvlist.le_prev != NULL) { 420 LIST_REMOVE(ncp, nc_dvlist); 421 ncp->nc_dvlist.le_prev = NULL; 422 } 423 } 424 425 /* 426 * Lock all CPUs to prevent any cache lookup activity. Conceptually, 427 * this locks out all "readers". 428 */ 429 static void 430 cache_lock_cpus(void) 431 { 432 CPU_INFO_ITERATOR cii; 433 struct cpu_info *ci; 434 struct nchcpu *cpup; 435 436 /* 437 * Lock out all CPUs first, then harvest per-cpu stats. This 438 * is probably not quite as cache-efficient as doing the lock 439 * and harvest at the same time, but allows cache_stat_sysctl() 440 * to make do with a per-cpu lock. 441 */ 442 for (CPU_INFO_FOREACH(cii, ci)) { 443 cpup = ci->ci_data.cpu_nch; 444 mutex_enter(&cpup->cpu_lock); 445 } 446 for (CPU_INFO_FOREACH(cii, ci)) { 447 cpup = ci->ci_data.cpu_nch; 448 UPDATE(cpup, ncs_goodhits); 449 UPDATE(cpup, ncs_neghits); 450 UPDATE(cpup, ncs_badhits); 451 UPDATE(cpup, ncs_falsehits); 452 UPDATE(cpup, ncs_miss); 453 UPDATE(cpup, ncs_long); 454 UPDATE(cpup, ncs_pass2); 455 UPDATE(cpup, ncs_2passes); 456 UPDATE(cpup, ncs_revhits); 457 UPDATE(cpup, ncs_revmiss); 458 } 459 } 460 461 /* 462 * Release all CPU locks. 463 */ 464 static void 465 cache_unlock_cpus(void) 466 { 467 CPU_INFO_ITERATOR cii; 468 struct cpu_info *ci; 469 struct nchcpu *cpup; 470 471 for (CPU_INFO_FOREACH(cii, ci)) { 472 cpup = ci->ci_data.cpu_nch; 473 mutex_exit(&cpup->cpu_lock); 474 } 475 } 476 477 /* 478 * Find a single cache entry and return it locked. 479 * The caller needs to hold namecache_lock or a per-cpu lock to hold 480 * off cache_reclaim(). 481 */ 482 static struct namecache * 483 cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen) 484 { 485 struct nchashhead *ncpp; 486 struct namecache *ncp; 487 nchash_t hash; 488 489 KASSERT(dvp != NULL); 490 hash = cache_hash(name, namelen); 491 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 492 493 LIST_FOREACH(ncp, ncpp, nc_hash) { 494 membar_datadep_consumer(); /* for Alpha... */ 495 if (ncp->nc_dvp != dvp || 496 ncp->nc_nlen != namelen || 497 memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen)) 498 continue; 499 mutex_enter(&ncp->nc_lock); 500 if (__predict_true(ncp->nc_dvp == dvp)) { 501 ncp->nc_hittime = hardclock_ticks; 502 SDT_PROBE(vfs, namecache, lookup, hit, dvp, 503 name, namelen, 0, 0); 504 return ncp; 505 } 506 /* Raced: entry has been nullified. */ 507 mutex_exit(&ncp->nc_lock); 508 } 509 510 SDT_PROBE(vfs, namecache, lookup, miss, dvp, 511 name, namelen, 0, 0); 512 return NULL; 513 } 514 515 /* 516 * Look for a the name in the cache. We don't do this 517 * if the segment name is long, simply so the cache can avoid 518 * holding long names (which would either waste space, or 519 * add greatly to the complexity). 520 * 521 * Lookup is called with DVP pointing to the directory to search, 522 * and CNP providing the name of the entry being sought: cn_nameptr 523 * is the name, cn_namelen is its length, and cn_flags is the flags 524 * word from the namei operation. 525 * 526 * DVP must be locked. 527 * 528 * There are three possible non-error return states: 529 * 1. Nothing was found in the cache. Nothing is known about 530 * the requested name. 531 * 2. A negative entry was found in the cache, meaning that the 532 * requested name definitely does not exist. 533 * 3. A positive entry was found in the cache, meaning that the 534 * requested name does exist and that we are providing the 535 * vnode. 536 * In these cases the results are: 537 * 1. 0 returned; VN is set to NULL. 538 * 2. 1 returned; VN is set to NULL. 539 * 3. 1 returned; VN is set to the vnode found. 540 * 541 * The additional result argument ISWHT is set to zero, unless a 542 * negative entry is found that was entered as a whiteout, in which 543 * case ISWHT is set to one. 544 * 545 * The ISWHT_RET argument pointer may be null. In this case an 546 * assertion is made that the whiteout flag is not set. File systems 547 * that do not support whiteouts can/should do this. 548 * 549 * Filesystems that do support whiteouts should add ISWHITEOUT to 550 * cnp->cn_flags if ISWHT comes back nonzero. 551 * 552 * When a vnode is returned, it is locked, as per the vnode lookup 553 * locking protocol. 554 * 555 * There is no way for this function to fail, in the sense of 556 * generating an error that requires aborting the namei operation. 557 * 558 * (Prior to October 2012, this function returned an integer status, 559 * and a vnode, and mucked with the flags word in CNP for whiteouts. 560 * The integer status was -1 for "nothing found", ENOENT for "a 561 * negative entry found", 0 for "a positive entry found", and possibly 562 * other errors, and the value of VN might or might not have been set 563 * depending on what error occurred.) 564 */ 565 bool 566 cache_lookup(struct vnode *dvp, const char *name, size_t namelen, 567 uint32_t nameiop, uint32_t cnflags, 568 int *iswht_ret, struct vnode **vn_ret) 569 { 570 struct namecache *ncp; 571 struct vnode *vp; 572 struct nchcpu *cpup; 573 int error; 574 bool hit; 575 576 577 /* Establish default result values */ 578 if (iswht_ret != NULL) { 579 *iswht_ret = 0; 580 } 581 *vn_ret = NULL; 582 583 if (__predict_false(!doingcache)) { 584 return false; 585 } 586 587 cpup = curcpu()->ci_data.cpu_nch; 588 mutex_enter(&cpup->cpu_lock); 589 if (__predict_false(namelen > USHRT_MAX)) { 590 SDT_PROBE(vfs, namecache, lookup, toolong, dvp, 591 name, namelen, 0, 0); 592 COUNT(cpup, ncs_long); 593 mutex_exit(&cpup->cpu_lock); 594 /* found nothing */ 595 return false; 596 } 597 598 ncp = cache_lookup_entry(dvp, name, namelen); 599 if (__predict_false(ncp == NULL)) { 600 COUNT(cpup, ncs_miss); 601 mutex_exit(&cpup->cpu_lock); 602 /* found nothing */ 603 return false; 604 } 605 if ((cnflags & MAKEENTRY) == 0) { 606 COUNT(cpup, ncs_badhits); 607 /* 608 * Last component and we are renaming or deleting, 609 * the cache entry is invalid, or otherwise don't 610 * want cache entry to exist. 611 */ 612 cache_invalidate(ncp); 613 mutex_exit(&ncp->nc_lock); 614 mutex_exit(&cpup->cpu_lock); 615 /* found nothing */ 616 return false; 617 } 618 if (ncp->nc_vp == NULL) { 619 if (iswht_ret != NULL) { 620 /* 621 * Restore the ISWHITEOUT flag saved earlier. 622 */ 623 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 624 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 625 } else { 626 KASSERT(ncp->nc_flags == 0); 627 } 628 629 if (__predict_true(nameiop != CREATE || 630 (cnflags & ISLASTCN) == 0)) { 631 COUNT(cpup, ncs_neghits); 632 /* found neg entry; vn is already null from above */ 633 hit = true; 634 } else { 635 COUNT(cpup, ncs_badhits); 636 /* 637 * Last component and we are preparing to create 638 * the named object, so flush the negative cache 639 * entry. 640 */ 641 cache_invalidate(ncp); 642 /* found nothing */ 643 hit = false; 644 } 645 mutex_exit(&ncp->nc_lock); 646 mutex_exit(&cpup->cpu_lock); 647 return hit; 648 } 649 650 vp = ncp->nc_vp; 651 mutex_enter(vp->v_interlock); 652 mutex_exit(&ncp->nc_lock); 653 mutex_exit(&cpup->cpu_lock); 654 655 /* 656 * Unlocked except for the vnode interlock. Call vcache_tryvget(). 657 */ 658 error = vcache_tryvget(vp); 659 if (error) { 660 KASSERT(error == EBUSY); 661 /* 662 * This vnode is being cleaned out. 663 * XXX badhits? 664 */ 665 COUNT_UNL(cpup, ncs_falsehits); 666 /* found nothing */ 667 return false; 668 } 669 670 COUNT_UNL(cpup, ncs_goodhits); 671 /* found it */ 672 *vn_ret = vp; 673 return true; 674 } 675 676 677 /* 678 * Cut-'n-pasted version of the above without the nameiop argument. 679 */ 680 bool 681 cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen, 682 uint32_t cnflags, 683 int *iswht_ret, struct vnode **vn_ret) 684 { 685 struct namecache *ncp; 686 struct vnode *vp; 687 struct nchcpu *cpup; 688 int error; 689 690 /* Establish default results. */ 691 if (iswht_ret != NULL) { 692 *iswht_ret = 0; 693 } 694 *vn_ret = NULL; 695 696 if (__predict_false(!doingcache)) { 697 /* found nothing */ 698 return false; 699 } 700 701 cpup = curcpu()->ci_data.cpu_nch; 702 mutex_enter(&cpup->cpu_lock); 703 if (__predict_false(namelen > USHRT_MAX)) { 704 COUNT(cpup, ncs_long); 705 mutex_exit(&cpup->cpu_lock); 706 /* found nothing */ 707 return false; 708 } 709 ncp = cache_lookup_entry(dvp, name, namelen); 710 if (__predict_false(ncp == NULL)) { 711 COUNT(cpup, ncs_miss); 712 mutex_exit(&cpup->cpu_lock); 713 /* found nothing */ 714 return false; 715 } 716 vp = ncp->nc_vp; 717 if (vp == NULL) { 718 /* 719 * Restore the ISWHITEOUT flag saved earlier. 720 */ 721 if (iswht_ret != NULL) { 722 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 723 /*cnp->cn_flags |= ncp->nc_flags;*/ 724 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 725 } 726 COUNT(cpup, ncs_neghits); 727 mutex_exit(&ncp->nc_lock); 728 mutex_exit(&cpup->cpu_lock); 729 /* found negative entry; vn is already null from above */ 730 return true; 731 } 732 mutex_enter(vp->v_interlock); 733 mutex_exit(&ncp->nc_lock); 734 mutex_exit(&cpup->cpu_lock); 735 736 /* 737 * Unlocked except for the vnode interlock. Call vcache_tryvget(). 738 */ 739 error = vcache_tryvget(vp); 740 if (error) { 741 KASSERT(error == EBUSY); 742 /* 743 * This vnode is being cleaned out. 744 * XXX badhits? 745 */ 746 COUNT_UNL(cpup, ncs_falsehits); 747 /* found nothing */ 748 return false; 749 } 750 751 COUNT_UNL(cpup, ncs_goodhits); /* XXX can be "badhits" */ 752 /* found it */ 753 *vn_ret = vp; 754 return true; 755 } 756 757 /* 758 * Scan cache looking for name of directory entry pointing at vp. 759 * 760 * If the lookup succeeds the vnode is referenced and stored in dvpp. 761 * 762 * If bufp is non-NULL, also place the name in the buffer which starts 763 * at bufp, immediately before *bpp, and move bpp backwards to point 764 * at the start of it. (Yes, this is a little baroque, but it's done 765 * this way to cater to the whims of getcwd). 766 * 767 * Returns 0 on success, -1 on cache miss, positive errno on failure. 768 */ 769 int 770 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp) 771 { 772 struct namecache *ncp; 773 struct vnode *dvp; 774 struct ncvhashhead *nvcpp; 775 struct nchcpu *cpup; 776 char *bp; 777 int error, nlen; 778 779 if (!doingcache) 780 goto out; 781 782 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 783 784 /* 785 * We increment counters in the local CPU's per-cpu stats. 786 * We don't take the per-cpu lock, however, since this function 787 * is the only place these counters are incremented so no one 788 * will be racing with us to increment them. 789 */ 790 cpup = curcpu()->ci_data.cpu_nch; 791 mutex_enter(namecache_lock); 792 LIST_FOREACH(ncp, nvcpp, nc_vhash) { 793 mutex_enter(&ncp->nc_lock); 794 if (ncp->nc_vp == vp && 795 (dvp = ncp->nc_dvp) != NULL && 796 dvp != vp) { /* avoid pesky . entries.. */ 797 798 #ifdef DIAGNOSTIC 799 if (ncp->nc_nlen == 1 && 800 ncp->nc_name[0] == '.') 801 panic("cache_revlookup: found entry for ."); 802 803 if (ncp->nc_nlen == 2 && 804 ncp->nc_name[0] == '.' && 805 ncp->nc_name[1] == '.') 806 panic("cache_revlookup: found entry for .."); 807 #endif 808 COUNT(cpup, ncs_revhits); 809 nlen = ncp->nc_nlen; 810 811 if (bufp) { 812 bp = *bpp; 813 bp -= nlen; 814 if (bp <= bufp) { 815 *dvpp = NULL; 816 mutex_exit(&ncp->nc_lock); 817 mutex_exit(namecache_lock); 818 SDT_PROBE(vfs, namecache, revlookup, 819 fail, vp, ERANGE, 0, 0, 0); 820 return (ERANGE); 821 } 822 memcpy(bp, ncp->nc_name, nlen); 823 *bpp = bp; 824 } 825 826 mutex_enter(dvp->v_interlock); 827 mutex_exit(&ncp->nc_lock); 828 mutex_exit(namecache_lock); 829 error = vcache_tryvget(dvp); 830 if (error) { 831 KASSERT(error == EBUSY); 832 if (bufp) 833 (*bpp) += nlen; 834 *dvpp = NULL; 835 SDT_PROBE(vfs, namecache, revlookup, fail, vp, 836 error, 0, 0, 0); 837 return -1; 838 } 839 *dvpp = dvp; 840 SDT_PROBE(vfs, namecache, revlookup, success, vp, dvp, 841 0, 0, 0); 842 return (0); 843 } 844 mutex_exit(&ncp->nc_lock); 845 } 846 COUNT(cpup, ncs_revmiss); 847 mutex_exit(namecache_lock); 848 out: 849 *dvpp = NULL; 850 return (-1); 851 } 852 853 /* 854 * Add an entry to the cache 855 */ 856 void 857 cache_enter(struct vnode *dvp, struct vnode *vp, 858 const char *name, size_t namelen, uint32_t cnflags) 859 { 860 struct namecache *ncp; 861 struct namecache *oncp; 862 struct nchashhead *ncpp; 863 struct ncvhashhead *nvcpp; 864 nchash_t hash; 865 866 /* First, check whether we can/should add a cache entry. */ 867 if ((cnflags & MAKEENTRY) == 0 || 868 __predict_false(namelen > USHRT_MAX || !doingcache)) { 869 SDT_PROBE(vfs, namecache, enter, toolong, vp, name, namelen, 870 0, 0); 871 return; 872 } 873 874 SDT_PROBE(vfs, namecache, enter, done, vp, name, namelen, 0, 0); 875 if (numcache > desiredvnodes) { 876 mutex_enter(namecache_lock); 877 cache_ev_forced.ev_count++; 878 cache_reclaim(); 879 mutex_exit(namecache_lock); 880 } 881 882 if (namelen > NCHNAMLEN) { 883 ncp = kmem_alloc(sizeof(*ncp) + namelen, KM_SLEEP); 884 cache_ctor(NULL, ncp, 0); 885 } else 886 ncp = pool_cache_get(namecache_cache, PR_WAITOK); 887 888 mutex_enter(namecache_lock); 889 numcache++; 890 891 /* 892 * Concurrent lookups in the same directory may race for a 893 * cache entry. if there's a duplicated entry, free it. 894 */ 895 oncp = cache_lookup_entry(dvp, name, namelen); 896 if (oncp) { 897 cache_invalidate(oncp); 898 mutex_exit(&oncp->nc_lock); 899 } 900 901 /* Grab the vnode we just found. */ 902 mutex_enter(&ncp->nc_lock); 903 ncp->nc_vp = vp; 904 ncp->nc_flags = 0; 905 ncp->nc_hittime = 0; 906 ncp->nc_gcqueue = NULL; 907 if (vp == NULL) { 908 /* 909 * For negative hits, save the ISWHITEOUT flag so we can 910 * restore it later when the cache entry is used again. 911 */ 912 ncp->nc_flags = cnflags & ISWHITEOUT; 913 } 914 915 /* Fill in cache info. */ 916 ncp->nc_dvp = dvp; 917 LIST_INSERT_HEAD(&VNODE_TO_VIMPL(dvp)->vi_dnclist, ncp, nc_dvlist); 918 if (vp) 919 LIST_INSERT_HEAD(&VNODE_TO_VIMPL(vp)->vi_nclist, ncp, nc_vlist); 920 else { 921 ncp->nc_vlist.le_prev = NULL; 922 ncp->nc_vlist.le_next = NULL; 923 } 924 KASSERT(namelen <= USHRT_MAX); 925 ncp->nc_nlen = namelen; 926 memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen); 927 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 928 hash = cache_hash(name, namelen); 929 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 930 931 /* 932 * Flush updates before making visible in table. No need for a 933 * memory barrier on the other side: to see modifications the 934 * list must be followed, meaning a dependent pointer load. 935 * The below is LIST_INSERT_HEAD() inlined, with the memory 936 * barrier included in the correct place. 937 */ 938 if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL) 939 ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next; 940 ncp->nc_hash.le_prev = &ncpp->lh_first; 941 membar_producer(); 942 ncpp->lh_first = ncp; 943 944 ncp->nc_vhash.le_prev = NULL; 945 ncp->nc_vhash.le_next = NULL; 946 947 /* 948 * Create reverse-cache entries (used in getcwd) for directories. 949 * (and in linux procfs exe node) 950 */ 951 if (vp != NULL && 952 vp != dvp && 953 #ifndef NAMECACHE_ENTER_REVERSE 954 vp->v_type == VDIR && 955 #endif 956 (ncp->nc_nlen > 2 || 957 (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') || 958 (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) { 959 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 960 LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash); 961 } 962 mutex_exit(&ncp->nc_lock); 963 mutex_exit(namecache_lock); 964 } 965 966 /* 967 * Name cache initialization, from vfs_init() when we are booting 968 */ 969 void 970 nchinit(void) 971 { 972 int error; 973 974 TAILQ_INIT(&nclruhead); 975 namecache_cache = pool_cache_init(sizeof(struct namecache) + NCHNAMLEN, 976 coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor, 977 cache_dtor, NULL); 978 KASSERT(namecache_cache != NULL); 979 980 namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 981 982 nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash); 983 ncvhashtbl = 984 #ifdef NAMECACHE_ENTER_REVERSE 985 hashinit(desiredvnodes, HASH_LIST, true, &ncvhash); 986 #else 987 hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash); 988 #endif 989 990 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread, 991 NULL, NULL, "cachegc"); 992 if (error != 0) 993 panic("nchinit %d", error); 994 995 evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL, 996 "namecache", "entries scanned"); 997 evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL, 998 "namecache", "entries collected"); 999 evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL, 1000 "namecache", "over scan target"); 1001 evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL, 1002 "namecache", "under scan target"); 1003 evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL, 1004 "namecache", "forced reclaims"); 1005 1006 sysctl_cache_stat_setup(); 1007 } 1008 1009 static int 1010 cache_ctor(void *arg, void *obj, int flag) 1011 { 1012 struct namecache *ncp; 1013 1014 ncp = obj; 1015 mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE); 1016 1017 return 0; 1018 } 1019 1020 static void 1021 cache_dtor(void *arg, void *obj) 1022 { 1023 struct namecache *ncp; 1024 1025 ncp = obj; 1026 mutex_destroy(&ncp->nc_lock); 1027 } 1028 1029 /* 1030 * Called once for each CPU in the system as attached. 1031 */ 1032 void 1033 cache_cpu_init(struct cpu_info *ci) 1034 { 1035 struct nchcpu *cpup; 1036 size_t sz; 1037 1038 sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit; 1039 cpup = kmem_zalloc(sz, KM_SLEEP); 1040 cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit); 1041 mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE); 1042 ci->ci_data.cpu_nch = cpup; 1043 } 1044 1045 /* 1046 * Name cache reinitialization, for when the maximum number of vnodes increases. 1047 */ 1048 void 1049 nchreinit(void) 1050 { 1051 struct namecache *ncp; 1052 struct nchashhead *oldhash1, *hash1; 1053 struct ncvhashhead *oldhash2, *hash2; 1054 u_long i, oldmask1, oldmask2, mask1, mask2; 1055 1056 hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1); 1057 hash2 = 1058 #ifdef NAMECACHE_ENTER_REVERSE 1059 hashinit(desiredvnodes, HASH_LIST, true, &mask2); 1060 #else 1061 hashinit(desiredvnodes/8, HASH_LIST, true, &mask2); 1062 #endif 1063 mutex_enter(namecache_lock); 1064 cache_lock_cpus(); 1065 oldhash1 = nchashtbl; 1066 oldmask1 = nchash; 1067 nchashtbl = hash1; 1068 nchash = mask1; 1069 oldhash2 = ncvhashtbl; 1070 oldmask2 = ncvhash; 1071 ncvhashtbl = hash2; 1072 ncvhash = mask2; 1073 for (i = 0; i <= oldmask1; i++) { 1074 while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) { 1075 LIST_REMOVE(ncp, nc_hash); 1076 ncp->nc_hash.le_prev = NULL; 1077 } 1078 } 1079 for (i = 0; i <= oldmask2; i++) { 1080 while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) { 1081 LIST_REMOVE(ncp, nc_vhash); 1082 ncp->nc_vhash.le_prev = NULL; 1083 } 1084 } 1085 cache_unlock_cpus(); 1086 mutex_exit(namecache_lock); 1087 hashdone(oldhash1, HASH_LIST, oldmask1); 1088 hashdone(oldhash2, HASH_LIST, oldmask2); 1089 } 1090 1091 /* 1092 * Cache flush, a particular vnode; called when a vnode is renamed to 1093 * hide entries that would now be invalid 1094 */ 1095 void 1096 cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags) 1097 { 1098 struct namecache *ncp, *ncnext; 1099 1100 mutex_enter(namecache_lock); 1101 if (flags & PURGE_PARENTS) { 1102 SDT_PROBE(vfs, namecache, purge, parents, vp, 0, 0, 0, 0); 1103 1104 for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_nclist); 1105 ncp != NULL; ncp = ncnext) { 1106 ncnext = LIST_NEXT(ncp, nc_vlist); 1107 mutex_enter(&ncp->nc_lock); 1108 cache_invalidate(ncp); 1109 mutex_exit(&ncp->nc_lock); 1110 cache_disassociate(ncp); 1111 } 1112 } 1113 if (flags & PURGE_CHILDREN) { 1114 SDT_PROBE(vfs, namecache, purge, children, vp, 0, 0, 0, 0); 1115 for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_dnclist); 1116 ncp != NULL; ncp = ncnext) { 1117 ncnext = LIST_NEXT(ncp, nc_dvlist); 1118 mutex_enter(&ncp->nc_lock); 1119 cache_invalidate(ncp); 1120 mutex_exit(&ncp->nc_lock); 1121 cache_disassociate(ncp); 1122 } 1123 } 1124 if (name != NULL) { 1125 SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0); 1126 ncp = cache_lookup_entry(vp, name, namelen); 1127 if (ncp) { 1128 cache_invalidate(ncp); 1129 mutex_exit(&ncp->nc_lock); 1130 cache_disassociate(ncp); 1131 } 1132 } 1133 mutex_exit(namecache_lock); 1134 } 1135 1136 /* 1137 * Cache flush, a whole filesystem; called when filesys is umounted to 1138 * remove entries that would now be invalid. 1139 */ 1140 void 1141 cache_purgevfs(struct mount *mp) 1142 { 1143 struct namecache *ncp, *nxtcp; 1144 1145 SDT_PROBE(vfs, namecache, purge, vfs, mp, 0, 0, 0, 0); 1146 mutex_enter(namecache_lock); 1147 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 1148 nxtcp = TAILQ_NEXT(ncp, nc_lru); 1149 mutex_enter(&ncp->nc_lock); 1150 if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) { 1151 /* Free the resources we had. */ 1152 cache_invalidate(ncp); 1153 cache_disassociate(ncp); 1154 } 1155 mutex_exit(&ncp->nc_lock); 1156 } 1157 cache_reclaim(); 1158 mutex_exit(namecache_lock); 1159 } 1160 1161 /* 1162 * Scan global list invalidating entries until we meet a preset target. 1163 * Prefer to invalidate entries that have not scored a hit within 1164 * cache_hottime seconds. We sort the LRU list only for this routine's 1165 * benefit. 1166 */ 1167 static void 1168 cache_prune(int incache, int target) 1169 { 1170 struct namecache *ncp, *nxtcp, *sentinel; 1171 int items, recent, tryharder; 1172 1173 KASSERT(mutex_owned(namecache_lock)); 1174 1175 SDT_PROBE(vfs, namecache, prune, done, incache, target, 0, 0, 0); 1176 items = 0; 1177 tryharder = 0; 1178 recent = hardclock_ticks - hz * cache_hottime; 1179 sentinel = NULL; 1180 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 1181 if (incache <= target) 1182 break; 1183 items++; 1184 nxtcp = TAILQ_NEXT(ncp, nc_lru); 1185 if (ncp == sentinel) { 1186 /* 1187 * If we looped back on ourself, then ignore 1188 * recent entries and purge whatever we find. 1189 */ 1190 tryharder = 1; 1191 } 1192 if (ncp->nc_dvp == NULL) 1193 continue; 1194 if (!tryharder && (ncp->nc_hittime - recent) > 0) { 1195 if (sentinel == NULL) 1196 sentinel = ncp; 1197 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 1198 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 1199 continue; 1200 } 1201 mutex_enter(&ncp->nc_lock); 1202 if (ncp->nc_dvp != NULL) { 1203 cache_invalidate(ncp); 1204 cache_disassociate(ncp); 1205 incache--; 1206 } 1207 mutex_exit(&ncp->nc_lock); 1208 } 1209 cache_ev_scan.ev_count += items; 1210 } 1211 1212 /* 1213 * Collect dead cache entries from all CPUs and garbage collect. 1214 */ 1215 static void 1216 cache_reclaim(void) 1217 { 1218 struct namecache *ncp, *next; 1219 int items; 1220 1221 KASSERT(mutex_owned(namecache_lock)); 1222 1223 /* 1224 * If the number of extant entries not awaiting garbage collection 1225 * exceeds the high water mark, then reclaim stale entries until we 1226 * reach our low water mark. 1227 */ 1228 items = numcache - cache_gcpend; 1229 if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) { 1230 cache_prune(items, (int)((uint64_t)desiredvnodes * 1231 cache_lowat / 100)); 1232 cache_ev_over.ev_count++; 1233 } else 1234 cache_ev_under.ev_count++; 1235 1236 /* 1237 * Stop forward lookup activity on all CPUs and garbage collect dead 1238 * entries. 1239 */ 1240 cache_lock_cpus(); 1241 ncp = cache_gcqueue; 1242 cache_gcqueue = NULL; 1243 items = cache_gcpend; 1244 cache_gcpend = 0; 1245 while (ncp != NULL) { 1246 next = ncp->nc_gcqueue; 1247 cache_disassociate(ncp); 1248 KASSERT(ncp->nc_dvp == NULL); 1249 if (ncp->nc_hash.le_prev != NULL) { 1250 LIST_REMOVE(ncp, nc_hash); 1251 ncp->nc_hash.le_prev = NULL; 1252 } 1253 if (ncp->nc_nlen > NCHNAMLEN) { 1254 cache_dtor(NULL, ncp); 1255 kmem_free(ncp, sizeof(*ncp) + ncp->nc_nlen); 1256 } else 1257 pool_cache_put(namecache_cache, ncp); 1258 ncp = next; 1259 } 1260 cache_unlock_cpus(); 1261 numcache -= items; 1262 cache_ev_gc.ev_count += items; 1263 } 1264 1265 /* 1266 * Cache maintainence thread, awakening once per second to: 1267 * 1268 * => keep number of entries below the high water mark 1269 * => sort pseudo-LRU list 1270 * => garbage collect dead entries 1271 */ 1272 static void 1273 cache_thread(void *arg) 1274 { 1275 1276 mutex_enter(namecache_lock); 1277 for (;;) { 1278 cache_reclaim(); 1279 kpause("cachegc", false, hz, namecache_lock); 1280 } 1281 } 1282 1283 #ifdef DDB 1284 void 1285 namecache_print(struct vnode *vp, void (*pr)(const char *, ...)) 1286 { 1287 struct vnode *dvp = NULL; 1288 struct namecache *ncp; 1289 1290 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1291 if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) { 1292 (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name); 1293 dvp = ncp->nc_dvp; 1294 } 1295 } 1296 if (dvp == NULL) { 1297 (*pr)("name not found\n"); 1298 return; 1299 } 1300 vp = dvp; 1301 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1302 if (ncp->nc_vp == vp) { 1303 (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name); 1304 } 1305 } 1306 } 1307 #endif 1308 1309 void 1310 namecache_count_pass2(void) 1311 { 1312 struct nchcpu *cpup = curcpu()->ci_data.cpu_nch; 1313 1314 COUNT_UNL(cpup, ncs_pass2); 1315 } 1316 1317 void 1318 namecache_count_2passes(void) 1319 { 1320 struct nchcpu *cpup = curcpu()->ci_data.cpu_nch; 1321 1322 COUNT_UNL(cpup, ncs_2passes); 1323 } 1324 1325 /* 1326 * Fetch the current values of the stats. We return the most 1327 * recent values harvested into nchstats by cache_reclaim(), which 1328 * will be less than a second old. 1329 */ 1330 static int 1331 cache_stat_sysctl(SYSCTLFN_ARGS) 1332 { 1333 struct nchstats stats; 1334 struct nchcpu *my_cpup; 1335 #ifdef CACHE_STATS_CURRENT 1336 CPU_INFO_ITERATOR cii; 1337 struct cpu_info *ci; 1338 #endif /* CACHE_STATS_CURRENT */ 1339 1340 if (oldp == NULL) { 1341 *oldlenp = sizeof(stats); 1342 return 0; 1343 } 1344 1345 if (*oldlenp < sizeof(stats)) { 1346 *oldlenp = 0; 1347 return 0; 1348 } 1349 1350 /* 1351 * Take this CPU's per-cpu lock to hold off cache_reclaim() 1352 * from doing a stats update while doing minimal damage to 1353 * concurrent operations. 1354 */ 1355 sysctl_unlock(); 1356 my_cpup = curcpu()->ci_data.cpu_nch; 1357 mutex_enter(&my_cpup->cpu_lock); 1358 stats = nchstats; 1359 #ifdef CACHE_STATS_CURRENT 1360 for (CPU_INFO_FOREACH(cii, ci)) { 1361 struct nchcpu *cpup = ci->ci_data.cpu_nch; 1362 1363 ADD(stats, cpup, ncs_goodhits); 1364 ADD(stats, cpup, ncs_neghits); 1365 ADD(stats, cpup, ncs_badhits); 1366 ADD(stats, cpup, ncs_falsehits); 1367 ADD(stats, cpup, ncs_miss); 1368 ADD(stats, cpup, ncs_long); 1369 ADD(stats, cpup, ncs_pass2); 1370 ADD(stats, cpup, ncs_2passes); 1371 ADD(stats, cpup, ncs_revhits); 1372 ADD(stats, cpup, ncs_revmiss); 1373 } 1374 #endif /* CACHE_STATS_CURRENT */ 1375 mutex_exit(&my_cpup->cpu_lock); 1376 sysctl_relock(); 1377 1378 *oldlenp = sizeof(stats); 1379 return sysctl_copyout(l, &stats, oldp, sizeof(stats)); 1380 } 1381 1382 static void 1383 sysctl_cache_stat_setup(void) 1384 { 1385 1386 KASSERT(sysctllog == NULL); 1387 sysctl_createv(&sysctllog, 0, NULL, NULL, 1388 CTLFLAG_PERMANENT, 1389 CTLTYPE_STRUCT, "namecache_stats", 1390 SYSCTL_DESCR("namecache statistics"), 1391 cache_stat_sysctl, 0, NULL, 0, 1392 CTL_VFS, CTL_CREATE, CTL_EOL); 1393 } 1394