1 /* $NetBSD: vfs_cache.c,v 1.109 2015/12/05 05:23:35 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.109 2015/12/05 05:23:35 dholland Exp $"); 62 63 #ifdef _KERNEL_OPT 64 #include "opt_ddb.h" 65 #include "opt_revcache.h" 66 #include "opt_dtrace.h" 67 #endif 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/sysctl.h> 72 #include <sys/time.h> 73 #include <sys/mount.h> 74 #include <sys/vnode.h> 75 #include <sys/namei.h> 76 #include <sys/errno.h> 77 #include <sys/pool.h> 78 #include <sys/mutex.h> 79 #include <sys/atomic.h> 80 #include <sys/kthread.h> 81 #include <sys/kernel.h> 82 #include <sys/cpu.h> 83 #include <sys/evcnt.h> 84 #include <sys/sdt.h> 85 86 #define NAMECACHE_ENTER_REVERSE 87 /* 88 * Name caching works as follows: 89 * 90 * Names found by directory scans are retained in a cache 91 * for future reference. It is managed LRU, so frequently 92 * used names will hang around. Cache is indexed by hash value 93 * obtained from (dvp, name) where dvp refers to the directory 94 * containing name. 95 * 96 * For simplicity (and economy of storage), names longer than 97 * a maximum length of NCHNAMLEN are not cached; they occur 98 * infrequently in any case, and are almost never of interest. 99 * 100 * Upon reaching the last segment of a path, if the reference 101 * is for DELETE, or NOCACHE is set (rewrite), and the 102 * name is located in the cache, it will be dropped. 103 * The entry is dropped also when it was not possible to lock 104 * the cached vnode, either because vget() failed or the generation 105 * number has changed while waiting for the lock. 106 */ 107 108 /* 109 * The locking in this subsystem works as follows: 110 * 111 * When an entry is added to the cache, via cache_enter(), 112 * namecache_lock is taken to exclude other writers. The new 113 * entry is added to the hash list in a way which permits 114 * concurrent lookups and invalidations in the cache done on 115 * other CPUs to continue in parallel. 116 * 117 * When a lookup is done in the cache, via cache_lookup() or 118 * cache_lookup_raw(), the per-cpu lock below is taken. This 119 * protects calls to cache_lookup_entry() and cache_invalidate() 120 * against cache_reclaim() but allows lookups to continue in 121 * parallel with cache_enter(). 122 * 123 * cache_revlookup() takes namecache_lock to exclude cache_enter() 124 * and cache_reclaim() since the list it operates on is not 125 * maintained to allow concurrent reads. 126 * 127 * When cache_reclaim() is called namecache_lock is held to hold 128 * off calls to cache_enter()/cache_revlookup() and each of the 129 * per-cpu locks is taken to hold off lookups. Holding all these 130 * locks essentially idles the subsystem, ensuring there are no 131 * concurrent references to the cache entries being freed. 132 * 133 * 32 bit per-cpu statistic counters (struct nchstats_percpu) are 134 * incremented when the operations they count are performed while 135 * running on the corresponding CPU. Frequently individual counters 136 * are incremented while holding a lock (either a per-cpu lock or 137 * namecache_lock) sufficient to preclude concurrent increments 138 * being done to the same counter, so non-atomic increments are 139 * done using the COUNT() macro. Counters which are incremented 140 * when one of these locks is not held use the COUNT_UNL() macro 141 * instead. COUNT_UNL() could be defined to do atomic increments 142 * but currently just does what COUNT() does, on the theory that 143 * it is unlikely the non-atomic increment will be interrupted 144 * by something on the same CPU that increments the same counter, 145 * but even if it does happen the consequences aren't serious. 146 * 147 * N.B.: Attempting to protect COUNT_UNL() increments by taking 148 * a per-cpu lock in the namecache_count_*() functions causes 149 * a deadlock. Don't do that, use atomic increments instead if 150 * the imperfections here bug you. 151 * 152 * The 64 bit system-wide statistic counts (struct nchstats) are 153 * maintained by sampling the per-cpu counters periodically, adding 154 * in the deltas since the last samples and recording the current 155 * samples to use to compute the next delta. The sampling is done 156 * as a side effect of cache_reclaim() which is run periodically, 157 * for its own purposes, often enough to avoid overflow of the 32 158 * bit counters. While sampling in this fashion requires no locking 159 * it is never-the-less done only after all locks have been taken by 160 * cache_reclaim() to allow cache_stat_sysctl() to hold off 161 * cache_reclaim() with minimal locking. 162 * 163 * cache_stat_sysctl() takes its CPU's per-cpu lock to hold off 164 * cache_reclaim() so that it can copy the subsystem total stats 165 * without them being concurrently modified. If CACHE_STATS_CURRENT 166 * is defined it also harvests the per-cpu increments into the total, 167 * which again requires cache_reclaim() to be held off. 168 * 169 * The per-cpu data (a lock and the per-cpu stats structures) 170 * are defined next. 171 */ 172 struct nchstats_percpu _NAMEI_CACHE_STATS(uint32_t); 173 174 struct nchcpu { 175 kmutex_t cpu_lock; 176 struct nchstats_percpu cpu_stats; 177 /* XXX maybe __cacheline_aligned would improve this? */ 178 struct nchstats_percpu cpu_stats_last; /* from last sample */ 179 }; 180 181 /* 182 * The type for the hash code. While the hash function generates a 183 * u32, the hash code has historically been passed around as a u_long, 184 * and the value is modified by xor'ing a uintptr_t, so it's not 185 * entirely clear what the best type is. For now I'll leave it 186 * unchanged as u_long. 187 */ 188 189 typedef u_long nchash_t; 190 191 /* 192 * Structures associated with name cacheing. 193 */ 194 195 static kmutex_t *namecache_lock __read_mostly; 196 static pool_cache_t namecache_cache __read_mostly; 197 static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned; 198 199 static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly; 200 static u_long nchash __read_mostly; 201 202 #define NCHASH2(hash, dvp) \ 203 (((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash) 204 205 static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly; 206 static u_long ncvhash __read_mostly; 207 208 #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash) 209 210 /* Number of cache entries allocated. */ 211 static long numcache __cacheline_aligned; 212 213 /* Garbage collection queue and number of entries pending in it. */ 214 static void *cache_gcqueue; 215 static u_int cache_gcpend; 216 217 /* Cache effectiveness statistics. This holds total from per-cpu stats */ 218 struct nchstats nchstats __cacheline_aligned; 219 220 /* 221 * Macros to count an event, update the central stats with per-cpu 222 * values and add current per-cpu increments to the subsystem total 223 * last collected by cache_reclaim(). 224 */ 225 #define CACHE_STATS_CURRENT /* nothing */ 226 227 #define COUNT(cpup, f) ((cpup)->cpu_stats.f++) 228 229 #define UPDATE(cpup, f) do { \ 230 struct nchcpu *Xcpup = (cpup); \ 231 uint32_t Xcnt = (volatile uint32_t) Xcpup->cpu_stats.f; \ 232 nchstats.f += Xcnt - Xcpup->cpu_stats_last.f; \ 233 Xcpup->cpu_stats_last.f = Xcnt; \ 234 } while (/* CONSTCOND */ 0) 235 236 #define ADD(stats, cpup, f) do { \ 237 struct nchcpu *Xcpup = (cpup); \ 238 stats.f += Xcpup->cpu_stats.f - Xcpup->cpu_stats_last.f; \ 239 } while (/* CONSTCOND */ 0) 240 241 /* Do unlocked stats the same way. Use a different name to allow mind changes */ 242 #define COUNT_UNL(cpup, f) COUNT((cpup), f) 243 244 static const int cache_lowat = 95; 245 static const int cache_hiwat = 98; 246 static const int cache_hottime = 5; /* number of seconds */ 247 static int doingcache = 1; /* 1 => enable the cache */ 248 249 static struct evcnt cache_ev_scan; 250 static struct evcnt cache_ev_gc; 251 static struct evcnt cache_ev_over; 252 static struct evcnt cache_ev_under; 253 static struct evcnt cache_ev_forced; 254 255 static void cache_invalidate(struct namecache *); 256 static struct namecache *cache_lookup_entry( 257 const struct vnode *, const char *, size_t); 258 static void cache_thread(void *); 259 static void cache_invalidate(struct namecache *); 260 static void cache_disassociate(struct namecache *); 261 static void cache_reclaim(void); 262 static int cache_ctor(void *, void *, int); 263 static void cache_dtor(void *, void *); 264 265 static struct sysctllog *sysctllog; 266 static void sysctl_cache_stat_setup(void); 267 268 SDT_PROVIDER_DEFINE(vfs); 269 270 SDT_PROBE_DEFINE1(vfs, namecache, invalidate, done, "struct vnode *"); 271 SDT_PROBE_DEFINE1(vfs, namecache, purge, parents, "struct vnode *"); 272 SDT_PROBE_DEFINE1(vfs, namecache, purge, children, "struct vnode *"); 273 SDT_PROBE_DEFINE2(vfs, namecache, purge, name, "char *", "size_t"); 274 SDT_PROBE_DEFINE1(vfs, namecache, purge, vfs, "struct mount *"); 275 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", 276 "char *", "size_t"); 277 SDT_PROBE_DEFINE3(vfs, namecache, lookup, miss, "struct vnode *", 278 "char *", "size_t"); 279 SDT_PROBE_DEFINE3(vfs, namecache, lookup, toolong, "struct vnode *", 280 "char *", "size_t"); 281 SDT_PROBE_DEFINE2(vfs, namecache, revlookup, success, "struct vnode *", 282 "struct vnode *"); 283 SDT_PROBE_DEFINE2(vfs, namecache, revlookup, fail, "struct vnode *", 284 "int"); 285 SDT_PROBE_DEFINE2(vfs, namecache, prune, done, "int", "int"); 286 SDT_PROBE_DEFINE3(vfs, namecache, enter, toolong, "struct vnode *", 287 "char *", "size_t"); 288 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", 289 "char *", "size_t"); 290 291 /* 292 * Compute the hash for an entry. 293 * 294 * (This is for now a wrapper around namei_hash, whose interface is 295 * for the time being slightly inconvenient.) 296 */ 297 static nchash_t 298 cache_hash(const char *name, size_t namelen) 299 { 300 const char *endptr; 301 302 endptr = name + namelen; 303 return namei_hash(name, &endptr); 304 } 305 306 /* 307 * Invalidate a cache entry and enqueue it for garbage collection. 308 * The caller needs to hold namecache_lock or a per-cpu lock to hold 309 * off cache_reclaim(). 310 */ 311 static void 312 cache_invalidate(struct namecache *ncp) 313 { 314 void *head; 315 316 KASSERT(mutex_owned(&ncp->nc_lock)); 317 318 if (ncp->nc_dvp != NULL) { 319 SDT_PROBE(vfs, namecache, invalidate, done, ncp->nc_dvp, 320 0, 0, 0, 0); 321 322 ncp->nc_vp = NULL; 323 ncp->nc_dvp = NULL; 324 do { 325 head = cache_gcqueue; 326 ncp->nc_gcqueue = head; 327 } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head); 328 atomic_inc_uint(&cache_gcpend); 329 } 330 } 331 332 /* 333 * Disassociate a namecache entry from any vnodes it is attached to, 334 * and remove from the global LRU list. 335 */ 336 static void 337 cache_disassociate(struct namecache *ncp) 338 { 339 340 KASSERT(mutex_owned(namecache_lock)); 341 KASSERT(ncp->nc_dvp == NULL); 342 343 if (ncp->nc_lru.tqe_prev != NULL) { 344 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 345 ncp->nc_lru.tqe_prev = NULL; 346 } 347 if (ncp->nc_vhash.le_prev != NULL) { 348 LIST_REMOVE(ncp, nc_vhash); 349 ncp->nc_vhash.le_prev = NULL; 350 } 351 if (ncp->nc_vlist.le_prev != NULL) { 352 LIST_REMOVE(ncp, nc_vlist); 353 ncp->nc_vlist.le_prev = NULL; 354 } 355 if (ncp->nc_dvlist.le_prev != NULL) { 356 LIST_REMOVE(ncp, nc_dvlist); 357 ncp->nc_dvlist.le_prev = NULL; 358 } 359 } 360 361 /* 362 * Lock all CPUs to prevent any cache lookup activity. Conceptually, 363 * this locks out all "readers". 364 */ 365 static void 366 cache_lock_cpus(void) 367 { 368 CPU_INFO_ITERATOR cii; 369 struct cpu_info *ci; 370 struct nchcpu *cpup; 371 372 /* 373 * Lock out all CPUs first, then harvest per-cpu stats. This 374 * is probably not quite as cache-efficient as doing the lock 375 * and harvest at the same time, but allows cache_stat_sysctl() 376 * to make do with a per-cpu lock. 377 */ 378 for (CPU_INFO_FOREACH(cii, ci)) { 379 cpup = ci->ci_data.cpu_nch; 380 mutex_enter(&cpup->cpu_lock); 381 } 382 for (CPU_INFO_FOREACH(cii, ci)) { 383 cpup = ci->ci_data.cpu_nch; 384 UPDATE(cpup, ncs_goodhits); 385 UPDATE(cpup, ncs_neghits); 386 UPDATE(cpup, ncs_badhits); 387 UPDATE(cpup, ncs_falsehits); 388 UPDATE(cpup, ncs_miss); 389 UPDATE(cpup, ncs_long); 390 UPDATE(cpup, ncs_pass2); 391 UPDATE(cpup, ncs_2passes); 392 UPDATE(cpup, ncs_revhits); 393 UPDATE(cpup, ncs_revmiss); 394 } 395 } 396 397 /* 398 * Release all CPU locks. 399 */ 400 static void 401 cache_unlock_cpus(void) 402 { 403 CPU_INFO_ITERATOR cii; 404 struct cpu_info *ci; 405 struct nchcpu *cpup; 406 407 for (CPU_INFO_FOREACH(cii, ci)) { 408 cpup = ci->ci_data.cpu_nch; 409 mutex_exit(&cpup->cpu_lock); 410 } 411 } 412 413 /* 414 * Find a single cache entry and return it locked. 415 * The caller needs to hold namecache_lock or a per-cpu lock to hold 416 * off cache_reclaim(). 417 */ 418 static struct namecache * 419 cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen) 420 { 421 struct nchashhead *ncpp; 422 struct namecache *ncp; 423 nchash_t hash; 424 425 KASSERT(dvp != NULL); 426 hash = cache_hash(name, namelen); 427 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 428 429 LIST_FOREACH(ncp, ncpp, nc_hash) { 430 membar_datadep_consumer(); /* for Alpha... */ 431 if (ncp->nc_dvp != dvp || 432 ncp->nc_nlen != namelen || 433 memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen)) 434 continue; 435 mutex_enter(&ncp->nc_lock); 436 if (__predict_true(ncp->nc_dvp == dvp)) { 437 ncp->nc_hittime = hardclock_ticks; 438 SDT_PROBE(vfs, namecache, lookup, hit, dvp, 439 name, namelen, 0, 0); 440 return ncp; 441 } 442 /* Raced: entry has been nullified. */ 443 mutex_exit(&ncp->nc_lock); 444 } 445 446 SDT_PROBE(vfs, namecache, lookup, miss, dvp, 447 name, namelen, 0, 0); 448 return NULL; 449 } 450 451 /* 452 * Look for a the name in the cache. We don't do this 453 * if the segment name is long, simply so the cache can avoid 454 * holding long names (which would either waste space, or 455 * add greatly to the complexity). 456 * 457 * Lookup is called with DVP pointing to the directory to search, 458 * and CNP providing the name of the entry being sought: cn_nameptr 459 * is the name, cn_namelen is its length, and cn_flags is the flags 460 * word from the namei operation. 461 * 462 * DVP must be locked. 463 * 464 * There are three possible non-error return states: 465 * 1. Nothing was found in the cache. Nothing is known about 466 * the requested name. 467 * 2. A negative entry was found in the cache, meaning that the 468 * requested name definitely does not exist. 469 * 3. A positive entry was found in the cache, meaning that the 470 * requested name does exist and that we are providing the 471 * vnode. 472 * In these cases the results are: 473 * 1. 0 returned; VN is set to NULL. 474 * 2. 1 returned; VN is set to NULL. 475 * 3. 1 returned; VN is set to the vnode found. 476 * 477 * The additional result argument ISWHT is set to zero, unless a 478 * negative entry is found that was entered as a whiteout, in which 479 * case ISWHT is set to one. 480 * 481 * The ISWHT_RET argument pointer may be null. In this case an 482 * assertion is made that the whiteout flag is not set. File systems 483 * that do not support whiteouts can/should do this. 484 * 485 * Filesystems that do support whiteouts should add ISWHITEOUT to 486 * cnp->cn_flags if ISWHT comes back nonzero. 487 * 488 * When a vnode is returned, it is locked, as per the vnode lookup 489 * locking protocol. 490 * 491 * There is no way for this function to fail, in the sense of 492 * generating an error that requires aborting the namei operation. 493 * 494 * (Prior to October 2012, this function returned an integer status, 495 * and a vnode, and mucked with the flags word in CNP for whiteouts. 496 * The integer status was -1 for "nothing found", ENOENT for "a 497 * negative entry found", 0 for "a positive entry found", and possibly 498 * other errors, and the value of VN might or might not have been set 499 * depending on what error occurred.) 500 */ 501 int 502 cache_lookup(struct vnode *dvp, const char *name, size_t namelen, 503 uint32_t nameiop, uint32_t cnflags, 504 int *iswht_ret, struct vnode **vn_ret) 505 { 506 struct namecache *ncp; 507 struct vnode *vp; 508 struct nchcpu *cpup; 509 int error, ret_value; 510 511 512 /* Establish default result values */ 513 if (iswht_ret != NULL) { 514 *iswht_ret = 0; 515 } 516 *vn_ret = NULL; 517 518 if (__predict_false(!doingcache)) { 519 return 0; 520 } 521 522 cpup = curcpu()->ci_data.cpu_nch; 523 mutex_enter(&cpup->cpu_lock); 524 if (__predict_false(namelen > NCHNAMLEN)) { 525 SDT_PROBE(vfs, namecache, lookup, toolong, dvp, 526 name, namelen, 0, 0); 527 COUNT(cpup, ncs_long); 528 mutex_exit(&cpup->cpu_lock); 529 /* found nothing */ 530 return 0; 531 } 532 533 ncp = cache_lookup_entry(dvp, name, namelen); 534 if (__predict_false(ncp == NULL)) { 535 COUNT(cpup, ncs_miss); 536 mutex_exit(&cpup->cpu_lock); 537 /* found nothing */ 538 return 0; 539 } 540 if ((cnflags & MAKEENTRY) == 0) { 541 COUNT(cpup, ncs_badhits); 542 /* 543 * Last component and we are renaming or deleting, 544 * the cache entry is invalid, or otherwise don't 545 * want cache entry to exist. 546 */ 547 cache_invalidate(ncp); 548 mutex_exit(&ncp->nc_lock); 549 mutex_exit(&cpup->cpu_lock); 550 /* found nothing */ 551 return 0; 552 } 553 if (ncp->nc_vp == NULL) { 554 if (iswht_ret != NULL) { 555 /* 556 * Restore the ISWHITEOUT flag saved earlier. 557 */ 558 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 559 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 560 } else { 561 KASSERT(ncp->nc_flags == 0); 562 } 563 564 if (__predict_true(nameiop != CREATE || 565 (cnflags & ISLASTCN) == 0)) { 566 COUNT(cpup, ncs_neghits); 567 /* found neg entry; vn is already null from above */ 568 ret_value = 1; 569 } else { 570 COUNT(cpup, ncs_badhits); 571 /* 572 * Last component and we are preparing to create 573 * the named object, so flush the negative cache 574 * entry. 575 */ 576 cache_invalidate(ncp); 577 /* found nothing */ 578 ret_value = 0; 579 } 580 mutex_exit(&ncp->nc_lock); 581 mutex_exit(&cpup->cpu_lock); 582 return ret_value; 583 } 584 585 vp = ncp->nc_vp; 586 mutex_enter(vp->v_interlock); 587 mutex_exit(&ncp->nc_lock); 588 mutex_exit(&cpup->cpu_lock); 589 590 /* 591 * Unlocked except for the vnode interlock. Call vget(). 592 */ 593 error = vget(vp, LK_NOWAIT, false /* !wait */); 594 if (error) { 595 KASSERT(error == EBUSY); 596 /* 597 * This vnode is being cleaned out. 598 * XXX badhits? 599 */ 600 COUNT_UNL(cpup, ncs_falsehits); 601 /* found nothing */ 602 return 0; 603 } 604 605 COUNT_UNL(cpup, ncs_goodhits); 606 /* found it */ 607 *vn_ret = vp; 608 return 1; 609 } 610 611 612 /* 613 * Cut-'n-pasted version of the above without the nameiop argument. 614 */ 615 int 616 cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen, 617 uint32_t cnflags, 618 int *iswht_ret, struct vnode **vn_ret) 619 { 620 struct namecache *ncp; 621 struct vnode *vp; 622 struct nchcpu *cpup; 623 int error; 624 625 /* Establish default results. */ 626 if (iswht_ret != NULL) { 627 *iswht_ret = 0; 628 } 629 *vn_ret = NULL; 630 631 if (__predict_false(!doingcache)) { 632 /* found nothing */ 633 return 0; 634 } 635 636 cpup = curcpu()->ci_data.cpu_nch; 637 mutex_enter(&cpup->cpu_lock); 638 if (__predict_false(namelen > NCHNAMLEN)) { 639 COUNT(cpup, ncs_long); 640 mutex_exit(&cpup->cpu_lock); 641 /* found nothing */ 642 return 0; 643 } 644 ncp = cache_lookup_entry(dvp, name, namelen); 645 if (__predict_false(ncp == NULL)) { 646 COUNT(cpup, ncs_miss); 647 mutex_exit(&cpup->cpu_lock); 648 /* found nothing */ 649 return 0; 650 } 651 vp = ncp->nc_vp; 652 if (vp == NULL) { 653 /* 654 * Restore the ISWHITEOUT flag saved earlier. 655 */ 656 if (iswht_ret != NULL) { 657 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 658 /*cnp->cn_flags |= ncp->nc_flags;*/ 659 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 660 } 661 COUNT(cpup, ncs_neghits); 662 mutex_exit(&ncp->nc_lock); 663 mutex_exit(&cpup->cpu_lock); 664 /* found negative entry; vn is already null from above */ 665 return 1; 666 } 667 mutex_enter(vp->v_interlock); 668 mutex_exit(&ncp->nc_lock); 669 mutex_exit(&cpup->cpu_lock); 670 671 /* 672 * Unlocked except for the vnode interlock. Call vget(). 673 */ 674 error = vget(vp, LK_NOWAIT, false /* !wait */); 675 if (error) { 676 KASSERT(error == EBUSY); 677 /* 678 * This vnode is being cleaned out. 679 * XXX badhits? 680 */ 681 COUNT_UNL(cpup, ncs_falsehits); 682 /* found nothing */ 683 return 0; 684 } 685 686 COUNT_UNL(cpup, ncs_goodhits); /* XXX can be "badhits" */ 687 /* found it */ 688 *vn_ret = vp; 689 return 1; 690 } 691 692 /* 693 * Scan cache looking for name of directory entry pointing at vp. 694 * 695 * If the lookup succeeds the vnode is referenced and stored in dvpp. 696 * 697 * If bufp is non-NULL, also place the name in the buffer which starts 698 * at bufp, immediately before *bpp, and move bpp backwards to point 699 * at the start of it. (Yes, this is a little baroque, but it's done 700 * this way to cater to the whims of getcwd). 701 * 702 * Returns 0 on success, -1 on cache miss, positive errno on failure. 703 */ 704 int 705 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp) 706 { 707 struct namecache *ncp; 708 struct vnode *dvp; 709 struct ncvhashhead *nvcpp; 710 struct nchcpu *cpup; 711 char *bp; 712 int error, nlen; 713 714 if (!doingcache) 715 goto out; 716 717 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 718 719 /* 720 * We increment counters in the local CPU's per-cpu stats. 721 * We don't take the per-cpu lock, however, since this function 722 * is the only place these counters are incremented so no one 723 * will be racing with us to increment them. 724 */ 725 cpup = curcpu()->ci_data.cpu_nch; 726 mutex_enter(namecache_lock); 727 LIST_FOREACH(ncp, nvcpp, nc_vhash) { 728 mutex_enter(&ncp->nc_lock); 729 if (ncp->nc_vp == vp && 730 (dvp = ncp->nc_dvp) != NULL && 731 dvp != vp) { /* avoid pesky . entries.. */ 732 733 #ifdef DIAGNOSTIC 734 if (ncp->nc_nlen == 1 && 735 ncp->nc_name[0] == '.') 736 panic("cache_revlookup: found entry for ."); 737 738 if (ncp->nc_nlen == 2 && 739 ncp->nc_name[0] == '.' && 740 ncp->nc_name[1] == '.') 741 panic("cache_revlookup: found entry for .."); 742 #endif 743 COUNT(cpup, ncs_revhits); 744 nlen = ncp->nc_nlen; 745 746 if (bufp) { 747 bp = *bpp; 748 bp -= nlen; 749 if (bp <= bufp) { 750 *dvpp = NULL; 751 mutex_exit(&ncp->nc_lock); 752 mutex_exit(namecache_lock); 753 SDT_PROBE(vfs, namecache, revlookup, 754 fail, vp, ERANGE, 0, 0, 0); 755 return (ERANGE); 756 } 757 memcpy(bp, ncp->nc_name, nlen); 758 *bpp = bp; 759 } 760 761 mutex_enter(dvp->v_interlock); 762 mutex_exit(&ncp->nc_lock); 763 mutex_exit(namecache_lock); 764 error = vget(dvp, LK_NOWAIT, false /* !wait */); 765 if (error) { 766 KASSERT(error == EBUSY); 767 if (bufp) 768 (*bpp) += nlen; 769 *dvpp = NULL; 770 SDT_PROBE(vfs, namecache, revlookup, fail, vp, 771 error, 0, 0, 0); 772 return -1; 773 } 774 *dvpp = dvp; 775 SDT_PROBE(vfs, namecache, revlookup, success, vp, dvp, 776 0, 0, 0); 777 return (0); 778 } 779 mutex_exit(&ncp->nc_lock); 780 } 781 COUNT(cpup, ncs_revmiss); 782 mutex_exit(namecache_lock); 783 out: 784 *dvpp = NULL; 785 return (-1); 786 } 787 788 /* 789 * Add an entry to the cache 790 */ 791 void 792 cache_enter(struct vnode *dvp, struct vnode *vp, 793 const char *name, size_t namelen, uint32_t cnflags) 794 { 795 struct namecache *ncp; 796 struct namecache *oncp; 797 struct nchashhead *ncpp; 798 struct ncvhashhead *nvcpp; 799 nchash_t hash; 800 801 /* First, check whether we can/should add a cache entry. */ 802 if ((cnflags & MAKEENTRY) == 0 || 803 __predict_false(namelen > NCHNAMLEN || !doingcache)) { 804 SDT_PROBE(vfs, namecache, enter, toolong, vp, name, namelen, 805 0, 0); 806 return; 807 } 808 809 SDT_PROBE(vfs, namecache, enter, done, vp, name, namelen, 0, 0); 810 if (numcache > desiredvnodes) { 811 mutex_enter(namecache_lock); 812 cache_ev_forced.ev_count++; 813 cache_reclaim(); 814 mutex_exit(namecache_lock); 815 } 816 817 ncp = pool_cache_get(namecache_cache, PR_WAITOK); 818 mutex_enter(namecache_lock); 819 numcache++; 820 821 /* 822 * Concurrent lookups in the same directory may race for a 823 * cache entry. if there's a duplicated entry, free it. 824 */ 825 oncp = cache_lookup_entry(dvp, name, namelen); 826 if (oncp) { 827 cache_invalidate(oncp); 828 mutex_exit(&oncp->nc_lock); 829 } 830 831 /* Grab the vnode we just found. */ 832 mutex_enter(&ncp->nc_lock); 833 ncp->nc_vp = vp; 834 ncp->nc_flags = 0; 835 ncp->nc_hittime = 0; 836 ncp->nc_gcqueue = NULL; 837 if (vp == NULL) { 838 /* 839 * For negative hits, save the ISWHITEOUT flag so we can 840 * restore it later when the cache entry is used again. 841 */ 842 ncp->nc_flags = cnflags & ISWHITEOUT; 843 } 844 845 /* Fill in cache info. */ 846 ncp->nc_dvp = dvp; 847 LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist); 848 if (vp) 849 LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist); 850 else { 851 ncp->nc_vlist.le_prev = NULL; 852 ncp->nc_vlist.le_next = NULL; 853 } 854 KASSERT(namelen <= NCHNAMLEN); 855 ncp->nc_nlen = namelen; 856 memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen); 857 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 858 hash = cache_hash(name, namelen); 859 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 860 861 /* 862 * Flush updates before making visible in table. No need for a 863 * memory barrier on the other side: to see modifications the 864 * list must be followed, meaning a dependent pointer load. 865 * The below is LIST_INSERT_HEAD() inlined, with the memory 866 * barrier included in the correct place. 867 */ 868 if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL) 869 ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next; 870 ncp->nc_hash.le_prev = &ncpp->lh_first; 871 membar_producer(); 872 ncpp->lh_first = ncp; 873 874 ncp->nc_vhash.le_prev = NULL; 875 ncp->nc_vhash.le_next = NULL; 876 877 /* 878 * Create reverse-cache entries (used in getcwd) for directories. 879 * (and in linux procfs exe node) 880 */ 881 if (vp != NULL && 882 vp != dvp && 883 #ifndef NAMECACHE_ENTER_REVERSE 884 vp->v_type == VDIR && 885 #endif 886 (ncp->nc_nlen > 2 || 887 (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') || 888 (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) { 889 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 890 LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash); 891 } 892 mutex_exit(&ncp->nc_lock); 893 mutex_exit(namecache_lock); 894 } 895 896 /* 897 * Name cache initialization, from vfs_init() when we are booting 898 */ 899 void 900 nchinit(void) 901 { 902 int error; 903 904 TAILQ_INIT(&nclruhead); 905 namecache_cache = pool_cache_init(sizeof(struct namecache), 906 coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor, 907 cache_dtor, NULL); 908 KASSERT(namecache_cache != NULL); 909 910 namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 911 912 nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash); 913 ncvhashtbl = 914 #ifdef NAMECACHE_ENTER_REVERSE 915 hashinit(desiredvnodes, HASH_LIST, true, &ncvhash); 916 #else 917 hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash); 918 #endif 919 920 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread, 921 NULL, NULL, "cachegc"); 922 if (error != 0) 923 panic("nchinit %d", error); 924 925 evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL, 926 "namecache", "entries scanned"); 927 evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL, 928 "namecache", "entries collected"); 929 evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL, 930 "namecache", "over scan target"); 931 evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL, 932 "namecache", "under scan target"); 933 evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL, 934 "namecache", "forced reclaims"); 935 936 sysctl_cache_stat_setup(); 937 } 938 939 static int 940 cache_ctor(void *arg, void *obj, int flag) 941 { 942 struct namecache *ncp; 943 944 ncp = obj; 945 mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE); 946 947 return 0; 948 } 949 950 static void 951 cache_dtor(void *arg, void *obj) 952 { 953 struct namecache *ncp; 954 955 ncp = obj; 956 mutex_destroy(&ncp->nc_lock); 957 } 958 959 /* 960 * Called once for each CPU in the system as attached. 961 */ 962 void 963 cache_cpu_init(struct cpu_info *ci) 964 { 965 struct nchcpu *cpup; 966 size_t sz; 967 968 sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit; 969 cpup = kmem_zalloc(sz, KM_SLEEP); 970 cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit); 971 mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE); 972 ci->ci_data.cpu_nch = cpup; 973 } 974 975 /* 976 * Name cache reinitialization, for when the maximum number of vnodes increases. 977 */ 978 void 979 nchreinit(void) 980 { 981 struct namecache *ncp; 982 struct nchashhead *oldhash1, *hash1; 983 struct ncvhashhead *oldhash2, *hash2; 984 u_long i, oldmask1, oldmask2, mask1, mask2; 985 986 hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1); 987 hash2 = 988 #ifdef NAMECACHE_ENTER_REVERSE 989 hashinit(desiredvnodes, HASH_LIST, true, &mask2); 990 #else 991 hashinit(desiredvnodes/8, HASH_LIST, true, &mask2); 992 #endif 993 mutex_enter(namecache_lock); 994 cache_lock_cpus(); 995 oldhash1 = nchashtbl; 996 oldmask1 = nchash; 997 nchashtbl = hash1; 998 nchash = mask1; 999 oldhash2 = ncvhashtbl; 1000 oldmask2 = ncvhash; 1001 ncvhashtbl = hash2; 1002 ncvhash = mask2; 1003 for (i = 0; i <= oldmask1; i++) { 1004 while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) { 1005 LIST_REMOVE(ncp, nc_hash); 1006 ncp->nc_hash.le_prev = NULL; 1007 } 1008 } 1009 for (i = 0; i <= oldmask2; i++) { 1010 while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) { 1011 LIST_REMOVE(ncp, nc_vhash); 1012 ncp->nc_vhash.le_prev = NULL; 1013 } 1014 } 1015 cache_unlock_cpus(); 1016 mutex_exit(namecache_lock); 1017 hashdone(oldhash1, HASH_LIST, oldmask1); 1018 hashdone(oldhash2, HASH_LIST, oldmask2); 1019 } 1020 1021 /* 1022 * Cache flush, a particular vnode; called when a vnode is renamed to 1023 * hide entries that would now be invalid 1024 */ 1025 void 1026 cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags) 1027 { 1028 struct namecache *ncp, *ncnext; 1029 1030 mutex_enter(namecache_lock); 1031 if (flags & PURGE_PARENTS) { 1032 SDT_PROBE(vfs, namecache, purge, parents, vp, 0, 0, 0, 0); 1033 1034 for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL; 1035 ncp = ncnext) { 1036 ncnext = LIST_NEXT(ncp, nc_vlist); 1037 mutex_enter(&ncp->nc_lock); 1038 cache_invalidate(ncp); 1039 mutex_exit(&ncp->nc_lock); 1040 cache_disassociate(ncp); 1041 } 1042 } 1043 if (flags & PURGE_CHILDREN) { 1044 SDT_PROBE(vfs, namecache, purge, children, vp, 0, 0, 0, 0); 1045 for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL; 1046 ncp = ncnext) { 1047 ncnext = LIST_NEXT(ncp, nc_dvlist); 1048 mutex_enter(&ncp->nc_lock); 1049 cache_invalidate(ncp); 1050 mutex_exit(&ncp->nc_lock); 1051 cache_disassociate(ncp); 1052 } 1053 } 1054 if (name != NULL) { 1055 SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0); 1056 ncp = cache_lookup_entry(vp, name, namelen); 1057 if (ncp) { 1058 cache_invalidate(ncp); 1059 mutex_exit(&ncp->nc_lock); 1060 cache_disassociate(ncp); 1061 } 1062 } 1063 mutex_exit(namecache_lock); 1064 } 1065 1066 /* 1067 * Cache flush, a whole filesystem; called when filesys is umounted to 1068 * remove entries that would now be invalid. 1069 */ 1070 void 1071 cache_purgevfs(struct mount *mp) 1072 { 1073 struct namecache *ncp, *nxtcp; 1074 1075 SDT_PROBE(vfs, namecache, purge, vfs, mp, 0, 0, 0, 0); 1076 mutex_enter(namecache_lock); 1077 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 1078 nxtcp = TAILQ_NEXT(ncp, nc_lru); 1079 mutex_enter(&ncp->nc_lock); 1080 if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) { 1081 /* Free the resources we had. */ 1082 cache_invalidate(ncp); 1083 cache_disassociate(ncp); 1084 } 1085 mutex_exit(&ncp->nc_lock); 1086 } 1087 cache_reclaim(); 1088 mutex_exit(namecache_lock); 1089 } 1090 1091 /* 1092 * Scan global list invalidating entries until we meet a preset target. 1093 * Prefer to invalidate entries that have not scored a hit within 1094 * cache_hottime seconds. We sort the LRU list only for this routine's 1095 * benefit. 1096 */ 1097 static void 1098 cache_prune(int incache, int target) 1099 { 1100 struct namecache *ncp, *nxtcp, *sentinel; 1101 int items, recent, tryharder; 1102 1103 KASSERT(mutex_owned(namecache_lock)); 1104 1105 SDT_PROBE(vfs, namecache, prune, done, incache, target, 0, 0, 0); 1106 items = 0; 1107 tryharder = 0; 1108 recent = hardclock_ticks - hz * cache_hottime; 1109 sentinel = NULL; 1110 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 1111 if (incache <= target) 1112 break; 1113 items++; 1114 nxtcp = TAILQ_NEXT(ncp, nc_lru); 1115 if (ncp == sentinel) { 1116 /* 1117 * If we looped back on ourself, then ignore 1118 * recent entries and purge whatever we find. 1119 */ 1120 tryharder = 1; 1121 } 1122 if (ncp->nc_dvp == NULL) 1123 continue; 1124 if (!tryharder && (ncp->nc_hittime - recent) > 0) { 1125 if (sentinel == NULL) 1126 sentinel = ncp; 1127 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 1128 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 1129 continue; 1130 } 1131 mutex_enter(&ncp->nc_lock); 1132 if (ncp->nc_dvp != NULL) { 1133 cache_invalidate(ncp); 1134 cache_disassociate(ncp); 1135 incache--; 1136 } 1137 mutex_exit(&ncp->nc_lock); 1138 } 1139 cache_ev_scan.ev_count += items; 1140 } 1141 1142 /* 1143 * Collect dead cache entries from all CPUs and garbage collect. 1144 */ 1145 static void 1146 cache_reclaim(void) 1147 { 1148 struct namecache *ncp, *next; 1149 int items; 1150 1151 KASSERT(mutex_owned(namecache_lock)); 1152 1153 /* 1154 * If the number of extant entries not awaiting garbage collection 1155 * exceeds the high water mark, then reclaim stale entries until we 1156 * reach our low water mark. 1157 */ 1158 items = numcache - cache_gcpend; 1159 if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) { 1160 cache_prune(items, (int)((uint64_t)desiredvnodes * 1161 cache_lowat / 100)); 1162 cache_ev_over.ev_count++; 1163 } else 1164 cache_ev_under.ev_count++; 1165 1166 /* 1167 * Stop forward lookup activity on all CPUs and garbage collect dead 1168 * entries. 1169 */ 1170 cache_lock_cpus(); 1171 ncp = cache_gcqueue; 1172 cache_gcqueue = NULL; 1173 items = cache_gcpend; 1174 cache_gcpend = 0; 1175 while (ncp != NULL) { 1176 next = ncp->nc_gcqueue; 1177 cache_disassociate(ncp); 1178 KASSERT(ncp->nc_dvp == NULL); 1179 if (ncp->nc_hash.le_prev != NULL) { 1180 LIST_REMOVE(ncp, nc_hash); 1181 ncp->nc_hash.le_prev = NULL; 1182 } 1183 pool_cache_put(namecache_cache, ncp); 1184 ncp = next; 1185 } 1186 cache_unlock_cpus(); 1187 numcache -= items; 1188 cache_ev_gc.ev_count += items; 1189 } 1190 1191 /* 1192 * Cache maintainence thread, awakening once per second to: 1193 * 1194 * => keep number of entries below the high water mark 1195 * => sort pseudo-LRU list 1196 * => garbage collect dead entries 1197 */ 1198 static void 1199 cache_thread(void *arg) 1200 { 1201 1202 mutex_enter(namecache_lock); 1203 for (;;) { 1204 cache_reclaim(); 1205 kpause("cachegc", false, hz, namecache_lock); 1206 } 1207 } 1208 1209 #ifdef DDB 1210 void 1211 namecache_print(struct vnode *vp, void (*pr)(const char *, ...)) 1212 { 1213 struct vnode *dvp = NULL; 1214 struct namecache *ncp; 1215 1216 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1217 if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) { 1218 (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name); 1219 dvp = ncp->nc_dvp; 1220 } 1221 } 1222 if (dvp == NULL) { 1223 (*pr)("name not found\n"); 1224 return; 1225 } 1226 vp = dvp; 1227 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1228 if (ncp->nc_vp == vp) { 1229 (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name); 1230 } 1231 } 1232 } 1233 #endif 1234 1235 void 1236 namecache_count_pass2(void) 1237 { 1238 struct nchcpu *cpup = curcpu()->ci_data.cpu_nch; 1239 1240 COUNT_UNL(cpup, ncs_pass2); 1241 } 1242 1243 void 1244 namecache_count_2passes(void) 1245 { 1246 struct nchcpu *cpup = curcpu()->ci_data.cpu_nch; 1247 1248 COUNT_UNL(cpup, ncs_2passes); 1249 } 1250 1251 /* 1252 * Fetch the current values of the stats. We return the most 1253 * recent values harvested into nchstats by cache_reclaim(), which 1254 * will be less than a second old. 1255 */ 1256 static int 1257 cache_stat_sysctl(SYSCTLFN_ARGS) 1258 { 1259 struct nchstats stats; 1260 struct nchcpu *my_cpup; 1261 #ifdef CACHE_STATS_CURRENT 1262 CPU_INFO_ITERATOR cii; 1263 struct cpu_info *ci; 1264 #endif /* CACHE_STATS_CURRENT */ 1265 1266 if (oldp == NULL) { 1267 *oldlenp = sizeof(stats); 1268 return 0; 1269 } 1270 1271 if (*oldlenp < sizeof(stats)) { 1272 *oldlenp = 0; 1273 return 0; 1274 } 1275 1276 /* 1277 * Take this CPU's per-cpu lock to hold off cache_reclaim() 1278 * from doing a stats update while doing minimal damage to 1279 * concurrent operations. 1280 */ 1281 sysctl_unlock(); 1282 my_cpup = curcpu()->ci_data.cpu_nch; 1283 mutex_enter(&my_cpup->cpu_lock); 1284 stats = nchstats; 1285 #ifdef CACHE_STATS_CURRENT 1286 for (CPU_INFO_FOREACH(cii, ci)) { 1287 struct nchcpu *cpup = ci->ci_data.cpu_nch; 1288 1289 ADD(stats, cpup, ncs_goodhits); 1290 ADD(stats, cpup, ncs_neghits); 1291 ADD(stats, cpup, ncs_badhits); 1292 ADD(stats, cpup, ncs_falsehits); 1293 ADD(stats, cpup, ncs_miss); 1294 ADD(stats, cpup, ncs_long); 1295 ADD(stats, cpup, ncs_pass2); 1296 ADD(stats, cpup, ncs_2passes); 1297 ADD(stats, cpup, ncs_revhits); 1298 ADD(stats, cpup, ncs_revmiss); 1299 } 1300 #endif /* CACHE_STATS_CURRENT */ 1301 mutex_exit(&my_cpup->cpu_lock); 1302 sysctl_relock(); 1303 1304 *oldlenp = sizeof(stats); 1305 return sysctl_copyout(l, &stats, oldp, sizeof(stats)); 1306 } 1307 1308 static void 1309 sysctl_cache_stat_setup(void) 1310 { 1311 1312 KASSERT(sysctllog == NULL); 1313 sysctl_createv(&sysctllog, 0, NULL, NULL, 1314 CTLFLAG_PERMANENT, 1315 CTLTYPE_STRUCT, "namecache_stats", 1316 SYSCTL_DESCR("namecache statistics"), 1317 cache_stat_sysctl, 0, NULL, 0, 1318 CTL_VFS, CTL_CREATE, CTL_EOL); 1319 } 1320