1 /* $NetBSD: vfs_cache.c,v 1.92 2013/10/29 09:53:51 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.92 2013/10/29 09:53:51 hannken Exp $"); 62 63 #include "opt_ddb.h" 64 #include "opt_revcache.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/mount.h> 70 #include <sys/vnode.h> 71 #include <sys/namei.h> 72 #include <sys/errno.h> 73 #include <sys/pool.h> 74 #include <sys/mutex.h> 75 #include <sys/atomic.h> 76 #include <sys/kthread.h> 77 #include <sys/kernel.h> 78 #include <sys/cpu.h> 79 #include <sys/evcnt.h> 80 81 #define NAMECACHE_ENTER_REVERSE 82 /* 83 * Name caching works as follows: 84 * 85 * Names found by directory scans are retained in a cache 86 * for future reference. It is managed LRU, so frequently 87 * used names will hang around. Cache is indexed by hash value 88 * obtained from (dvp, name) where dvp refers to the directory 89 * containing name. 90 * 91 * For simplicity (and economy of storage), names longer than 92 * a maximum length of NCHNAMLEN are not cached; they occur 93 * infrequently in any case, and are almost never of interest. 94 * 95 * Upon reaching the last segment of a path, if the reference 96 * is for DELETE, or NOCACHE is set (rewrite), and the 97 * name is located in the cache, it will be dropped. 98 * The entry is dropped also when it was not possible to lock 99 * the cached vnode, either because vget() failed or the generation 100 * number has changed while waiting for the lock. 101 */ 102 103 /* 104 * Per-cpu namecache data. 105 */ 106 struct nchcpu { 107 kmutex_t cpu_lock; 108 struct nchstats cpu_stats; 109 }; 110 111 /* 112 * The type for the hash code. While the hash function generates a 113 * u32, the hash code has historically been passed around as a u_long, 114 * and the value is modified by xor'ing a uintptr_t, so it's not 115 * entirely clear what the best type is. For now I'll leave it 116 * unchanged as u_long. 117 */ 118 119 typedef u_long nchash_t; 120 121 /* 122 * Structures associated with name cacheing. 123 */ 124 125 static kmutex_t *namecache_lock __read_mostly; 126 static pool_cache_t namecache_cache __read_mostly; 127 static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned; 128 129 static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly; 130 static u_long nchash __read_mostly; 131 132 #define NCHASH2(hash, dvp) \ 133 (((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash) 134 135 static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly; 136 static u_long ncvhash __read_mostly; 137 138 #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash) 139 140 /* Number of cache entries allocated. */ 141 static long numcache __cacheline_aligned; 142 143 /* Garbage collection queue and number of entries pending in it. */ 144 static void *cache_gcqueue; 145 static u_int cache_gcpend; 146 147 /* Cache effectiveness statistics. */ 148 struct nchstats nchstats __cacheline_aligned; 149 #define COUNT(c,x) (c.x++) 150 151 static const int cache_lowat = 95; 152 static const int cache_hiwat = 98; 153 static const int cache_hottime = 5; /* number of seconds */ 154 static int doingcache = 1; /* 1 => enable the cache */ 155 156 static struct evcnt cache_ev_scan; 157 static struct evcnt cache_ev_gc; 158 static struct evcnt cache_ev_over; 159 static struct evcnt cache_ev_under; 160 static struct evcnt cache_ev_forced; 161 162 static void cache_invalidate(struct namecache *); 163 static struct namecache *cache_lookup_entry( 164 const struct vnode *, const char *, size_t); 165 static void cache_thread(void *); 166 static void cache_invalidate(struct namecache *); 167 static void cache_disassociate(struct namecache *); 168 static void cache_reclaim(void); 169 static int cache_ctor(void *, void *, int); 170 static void cache_dtor(void *, void *); 171 172 /* 173 * Compute the hash for an entry. 174 * 175 * (This is for now a wrapper around namei_hash, whose interface is 176 * for the time being slightly inconvenient.) 177 */ 178 static nchash_t 179 cache_hash(const char *name, size_t namelen) 180 { 181 const char *endptr; 182 183 endptr = name + namelen; 184 return namei_hash(name, &endptr); 185 } 186 187 /* 188 * Invalidate a cache entry and enqueue it for garbage collection. 189 */ 190 static void 191 cache_invalidate(struct namecache *ncp) 192 { 193 void *head; 194 195 KASSERT(mutex_owned(&ncp->nc_lock)); 196 197 if (ncp->nc_dvp != NULL) { 198 ncp->nc_vp = NULL; 199 ncp->nc_dvp = NULL; 200 do { 201 head = cache_gcqueue; 202 ncp->nc_gcqueue = head; 203 } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head); 204 atomic_inc_uint(&cache_gcpend); 205 } 206 } 207 208 /* 209 * Disassociate a namecache entry from any vnodes it is attached to, 210 * and remove from the global LRU list. 211 */ 212 static void 213 cache_disassociate(struct namecache *ncp) 214 { 215 216 KASSERT(mutex_owned(namecache_lock)); 217 KASSERT(ncp->nc_dvp == NULL); 218 219 if (ncp->nc_lru.tqe_prev != NULL) { 220 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 221 ncp->nc_lru.tqe_prev = NULL; 222 } 223 if (ncp->nc_vhash.le_prev != NULL) { 224 LIST_REMOVE(ncp, nc_vhash); 225 ncp->nc_vhash.le_prev = NULL; 226 } 227 if (ncp->nc_vlist.le_prev != NULL) { 228 LIST_REMOVE(ncp, nc_vlist); 229 ncp->nc_vlist.le_prev = NULL; 230 } 231 if (ncp->nc_dvlist.le_prev != NULL) { 232 LIST_REMOVE(ncp, nc_dvlist); 233 ncp->nc_dvlist.le_prev = NULL; 234 } 235 } 236 237 /* 238 * Lock all CPUs to prevent any cache lookup activity. Conceptually, 239 * this locks out all "readers". 240 */ 241 static void 242 cache_lock_cpus(void) 243 { 244 CPU_INFO_ITERATOR cii; 245 struct cpu_info *ci; 246 struct nchcpu *cpup; 247 long *s, *d, *m; 248 249 for (CPU_INFO_FOREACH(cii, ci)) { 250 cpup = ci->ci_data.cpu_nch; 251 mutex_enter(&cpup->cpu_lock); 252 253 /* Collate statistics. */ 254 d = (long *)&nchstats; 255 s = (long *)&cpup->cpu_stats; 256 m = s + sizeof(nchstats) / sizeof(long); 257 for (; s < m; s++, d++) { 258 *d += *s; 259 *s = 0; 260 } 261 } 262 } 263 264 /* 265 * Release all CPU locks. 266 */ 267 static void 268 cache_unlock_cpus(void) 269 { 270 CPU_INFO_ITERATOR cii; 271 struct cpu_info *ci; 272 struct nchcpu *cpup; 273 274 for (CPU_INFO_FOREACH(cii, ci)) { 275 cpup = ci->ci_data.cpu_nch; 276 mutex_exit(&cpup->cpu_lock); 277 } 278 } 279 280 /* 281 * Find a single cache entry and return it locked. 'namecache_lock' or 282 * at least one of the per-CPU locks must be held. 283 */ 284 static struct namecache * 285 cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen) 286 { 287 struct nchashhead *ncpp; 288 struct namecache *ncp; 289 nchash_t hash; 290 291 KASSERT(dvp != NULL); 292 hash = cache_hash(name, namelen); 293 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 294 295 LIST_FOREACH(ncp, ncpp, nc_hash) { 296 if (ncp->nc_dvp != dvp || 297 ncp->nc_nlen != namelen || 298 memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen)) 299 continue; 300 mutex_enter(&ncp->nc_lock); 301 if (__predict_true(ncp->nc_dvp == dvp)) { 302 ncp->nc_hittime = hardclock_ticks; 303 return ncp; 304 } 305 /* Raced: entry has been nullified. */ 306 mutex_exit(&ncp->nc_lock); 307 } 308 309 return NULL; 310 } 311 312 /* 313 * Look for a the name in the cache. We don't do this 314 * if the segment name is long, simply so the cache can avoid 315 * holding long names (which would either waste space, or 316 * add greatly to the complexity). 317 * 318 * Lookup is called with DVP pointing to the directory to search, 319 * and CNP providing the name of the entry being sought: cn_nameptr 320 * is the name, cn_namelen is its length, and cn_flags is the flags 321 * word from the namei operation. 322 * 323 * DVP must be locked. 324 * 325 * There are three possible non-error return states: 326 * 1. Nothing was found in the cache. Nothing is known about 327 * the requested name. 328 * 2. A negative entry was found in the cache, meaning that the 329 * requested name definitely does not exist. 330 * 3. A positive entry was found in the cache, meaning that the 331 * requested name does exist and that we are providing the 332 * vnode. 333 * In these cases the results are: 334 * 1. 0 returned; VN is set to NULL. 335 * 2. 1 returned; VN is set to NULL. 336 * 3. 1 returned; VN is set to the vnode found. 337 * 338 * The additional result argument ISWHT is set to zero, unless a 339 * negative entry is found that was entered as a whiteout, in which 340 * case ISWHT is set to one. 341 * 342 * The ISWHT_RET argument pointer may be null. In this case an 343 * assertion is made that the whiteout flag is not set. File systems 344 * that do not support whiteouts can/should do this. 345 * 346 * Filesystems that do support whiteouts should add ISWHITEOUT to 347 * cnp->cn_flags if ISWHT comes back nonzero. 348 * 349 * When a vnode is returned, it is locked, as per the vnode lookup 350 * locking protocol. 351 * 352 * There is no way for this function to fail, in the sense of 353 * generating an error that requires aborting the namei operation. 354 * 355 * (Prior to October 2012, this function returned an integer status, 356 * and a vnode, and mucked with the flags word in CNP for whiteouts. 357 * The integer status was -1 for "nothing found", ENOENT for "a 358 * negative entry found", 0 for "a positive entry found", and possibly 359 * other errors, and the value of VN might or might not have been set 360 * depending on what error occurred.) 361 */ 362 int 363 cache_lookup(struct vnode *dvp, const char *name, size_t namelen, 364 uint32_t nameiop, uint32_t cnflags, 365 int *iswht_ret, struct vnode **vn_ret) 366 { 367 struct namecache *ncp; 368 struct vnode *vp; 369 struct nchcpu *cpup; 370 int error; 371 372 /* Establish default result values */ 373 if (iswht_ret != NULL) { 374 *iswht_ret = 0; 375 } 376 *vn_ret = NULL; 377 378 if (__predict_false(!doingcache)) { 379 return 0; 380 } 381 382 cpup = curcpu()->ci_data.cpu_nch; 383 mutex_enter(&cpup->cpu_lock); 384 if (__predict_false(namelen > NCHNAMLEN)) { 385 COUNT(cpup->cpu_stats, ncs_long); 386 mutex_exit(&cpup->cpu_lock); 387 /* found nothing */ 388 return 0; 389 } 390 ncp = cache_lookup_entry(dvp, name, namelen); 391 if (__predict_false(ncp == NULL)) { 392 COUNT(cpup->cpu_stats, ncs_miss); 393 mutex_exit(&cpup->cpu_lock); 394 /* found nothing */ 395 return 0; 396 } 397 if ((cnflags & MAKEENTRY) == 0) { 398 COUNT(cpup->cpu_stats, ncs_badhits); 399 /* 400 * Last component and we are renaming or deleting, 401 * the cache entry is invalid, or otherwise don't 402 * want cache entry to exist. 403 */ 404 cache_invalidate(ncp); 405 mutex_exit(&ncp->nc_lock); 406 mutex_exit(&cpup->cpu_lock); 407 /* found nothing */ 408 return 0; 409 } 410 if (ncp->nc_vp == NULL) { 411 if (iswht_ret != NULL) { 412 /* 413 * Restore the ISWHITEOUT flag saved earlier. 414 */ 415 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 416 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 417 } else { 418 KASSERT(ncp->nc_flags == 0); 419 } 420 421 if (__predict_true(nameiop != CREATE || 422 (cnflags & ISLASTCN) == 0)) { 423 COUNT(cpup->cpu_stats, ncs_neghits); 424 mutex_exit(&ncp->nc_lock); 425 mutex_exit(&cpup->cpu_lock); 426 /* found neg entry; vn is already null from above */ 427 return 1; 428 } else { 429 COUNT(cpup->cpu_stats, ncs_badhits); 430 /* 431 * Last component and we are renaming or 432 * deleting, the cache entry is invalid, 433 * or otherwise don't want cache entry to 434 * exist. 435 */ 436 cache_invalidate(ncp); 437 mutex_exit(&ncp->nc_lock); 438 mutex_exit(&cpup->cpu_lock); 439 /* found nothing */ 440 return 0; 441 } 442 } 443 444 vp = ncp->nc_vp; 445 mutex_enter(vp->v_interlock); 446 mutex_exit(&ncp->nc_lock); 447 mutex_exit(&cpup->cpu_lock); 448 error = vget(vp, LK_NOWAIT); 449 if (error) { 450 KASSERT(error == EBUSY); 451 /* 452 * This vnode is being cleaned out. 453 * XXX badhits? 454 */ 455 COUNT(cpup->cpu_stats, ncs_falsehits); 456 /* found nothing */ 457 return 0; 458 } 459 460 #ifdef DEBUG 461 /* 462 * since we released nb->nb_lock, 463 * we can't use this pointer any more. 464 */ 465 ncp = NULL; 466 #endif /* DEBUG */ 467 468 if (vp == dvp) { /* lookup on "." */ 469 error = 0; 470 } else if (cnflags & ISDOTDOT) { 471 VOP_UNLOCK(dvp); 472 error = vn_lock(vp, LK_EXCLUSIVE); 473 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 474 } else { 475 error = vn_lock(vp, LK_EXCLUSIVE); 476 } 477 478 /* 479 * Check that the lock succeeded. 480 */ 481 if (error) { 482 /* We don't have the right lock, but this is only for stats. */ 483 COUNT(cpup->cpu_stats, ncs_badhits); 484 485 vrele(vp); 486 /* found nothing */ 487 return 0; 488 } 489 490 /* We don't have the right lock, but this is only for stats. */ 491 COUNT(cpup->cpu_stats, ncs_goodhits); 492 493 /* found it */ 494 *vn_ret = vp; 495 return 1; 496 } 497 498 int 499 cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen, 500 uint32_t cnflags, 501 int *iswht_ret, struct vnode **vn_ret) 502 { 503 struct namecache *ncp; 504 struct vnode *vp; 505 struct nchcpu *cpup; 506 int error; 507 508 /* Establish default results. */ 509 if (iswht_ret != NULL) { 510 *iswht_ret = 0; 511 } 512 *vn_ret = NULL; 513 514 if (__predict_false(!doingcache)) { 515 /* found nothing */ 516 return 0; 517 } 518 519 cpup = curcpu()->ci_data.cpu_nch; 520 mutex_enter(&cpup->cpu_lock); 521 if (__predict_false(namelen > NCHNAMLEN)) { 522 COUNT(cpup->cpu_stats, ncs_long); 523 mutex_exit(&cpup->cpu_lock); 524 /* found nothing */ 525 return 0; 526 } 527 ncp = cache_lookup_entry(dvp, name, namelen); 528 if (__predict_false(ncp == NULL)) { 529 COUNT(cpup->cpu_stats, ncs_miss); 530 mutex_exit(&cpup->cpu_lock); 531 /* found nothing */ 532 return 0; 533 } 534 vp = ncp->nc_vp; 535 if (vp == NULL) { 536 /* 537 * Restore the ISWHITEOUT flag saved earlier. 538 */ 539 if (iswht_ret != NULL) { 540 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 541 /*cnp->cn_flags |= ncp->nc_flags;*/ 542 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 543 } 544 COUNT(cpup->cpu_stats, ncs_neghits); 545 mutex_exit(&ncp->nc_lock); 546 mutex_exit(&cpup->cpu_lock); 547 /* found negative entry; vn is already null from above */ 548 return 1; 549 } 550 mutex_enter(vp->v_interlock); 551 mutex_exit(&ncp->nc_lock); 552 mutex_exit(&cpup->cpu_lock); 553 error = vget(vp, LK_NOWAIT); 554 if (error) { 555 KASSERT(error == EBUSY); 556 /* 557 * This vnode is being cleaned out. 558 * XXX badhits? 559 */ 560 COUNT(cpup->cpu_stats, ncs_falsehits); 561 /* found nothing */ 562 return 0; 563 } 564 565 /* Unlocked, but only for stats. */ 566 COUNT(cpup->cpu_stats, ncs_goodhits); /* XXX can be "badhits" */ 567 568 /* found it */ 569 *vn_ret = vp; 570 return 1; 571 } 572 573 /* 574 * Scan cache looking for name of directory entry pointing at vp. 575 * 576 * If the lookup succeeds the vnode is referenced and stored in dvpp. 577 * 578 * If bufp is non-NULL, also place the name in the buffer which starts 579 * at bufp, immediately before *bpp, and move bpp backwards to point 580 * at the start of it. (Yes, this is a little baroque, but it's done 581 * this way to cater to the whims of getcwd). 582 * 583 * Returns 0 on success, -1 on cache miss, positive errno on failure. 584 */ 585 int 586 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp) 587 { 588 struct namecache *ncp; 589 struct vnode *dvp; 590 struct ncvhashhead *nvcpp; 591 char *bp; 592 int error, nlen; 593 594 if (!doingcache) 595 goto out; 596 597 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 598 599 mutex_enter(namecache_lock); 600 LIST_FOREACH(ncp, nvcpp, nc_vhash) { 601 mutex_enter(&ncp->nc_lock); 602 if (ncp->nc_vp == vp && 603 (dvp = ncp->nc_dvp) != NULL && 604 dvp != vp) { /* avoid pesky . entries.. */ 605 606 #ifdef DIAGNOSTIC 607 if (ncp->nc_nlen == 1 && 608 ncp->nc_name[0] == '.') 609 panic("cache_revlookup: found entry for ."); 610 611 if (ncp->nc_nlen == 2 && 612 ncp->nc_name[0] == '.' && 613 ncp->nc_name[1] == '.') 614 panic("cache_revlookup: found entry for .."); 615 #endif 616 COUNT(nchstats, ncs_revhits); 617 nlen = ncp->nc_nlen; 618 619 if (bufp) { 620 bp = *bpp; 621 bp -= nlen; 622 if (bp <= bufp) { 623 *dvpp = NULL; 624 mutex_exit(&ncp->nc_lock); 625 mutex_exit(namecache_lock); 626 return (ERANGE); 627 } 628 memcpy(bp, ncp->nc_name, nlen); 629 *bpp = bp; 630 } 631 632 mutex_enter(dvp->v_interlock); 633 mutex_exit(&ncp->nc_lock); 634 mutex_exit(namecache_lock); 635 error = vget(dvp, LK_NOWAIT); 636 if (error) { 637 KASSERT(error == EBUSY); 638 if (bufp) 639 (*bpp) += nlen; 640 *dvpp = NULL; 641 return -1; 642 } 643 *dvpp = dvp; 644 return (0); 645 } 646 mutex_exit(&ncp->nc_lock); 647 } 648 COUNT(nchstats, ncs_revmiss); 649 mutex_exit(namecache_lock); 650 out: 651 *dvpp = NULL; 652 return (-1); 653 } 654 655 /* 656 * Add an entry to the cache 657 */ 658 void 659 cache_enter(struct vnode *dvp, struct vnode *vp, 660 const char *name, size_t namelen, uint32_t cnflags) 661 { 662 struct namecache *ncp; 663 struct namecache *oncp; 664 struct nchashhead *ncpp; 665 struct ncvhashhead *nvcpp; 666 nchash_t hash; 667 668 /* First, check whether we can/should add a cache entry. */ 669 if ((cnflags & MAKEENTRY) == 0 || 670 __predict_false(namelen > NCHNAMLEN || !doingcache)) { 671 return; 672 } 673 674 if (numcache > desiredvnodes) { 675 mutex_enter(namecache_lock); 676 cache_ev_forced.ev_count++; 677 cache_reclaim(); 678 mutex_exit(namecache_lock); 679 } 680 681 ncp = pool_cache_get(namecache_cache, PR_WAITOK); 682 mutex_enter(namecache_lock); 683 numcache++; 684 685 /* 686 * Concurrent lookups in the same directory may race for a 687 * cache entry. if there's a duplicated entry, free it. 688 */ 689 oncp = cache_lookup_entry(dvp, name, namelen); 690 if (oncp) { 691 cache_invalidate(oncp); 692 mutex_exit(&oncp->nc_lock); 693 } 694 695 /* Grab the vnode we just found. */ 696 mutex_enter(&ncp->nc_lock); 697 ncp->nc_vp = vp; 698 ncp->nc_flags = 0; 699 ncp->nc_hittime = 0; 700 ncp->nc_gcqueue = NULL; 701 if (vp == NULL) { 702 /* 703 * For negative hits, save the ISWHITEOUT flag so we can 704 * restore it later when the cache entry is used again. 705 */ 706 ncp->nc_flags = cnflags & ISWHITEOUT; 707 } 708 709 /* Fill in cache info. */ 710 ncp->nc_dvp = dvp; 711 LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist); 712 if (vp) 713 LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist); 714 else { 715 ncp->nc_vlist.le_prev = NULL; 716 ncp->nc_vlist.le_next = NULL; 717 } 718 KASSERT(namelen <= NCHNAMLEN); 719 ncp->nc_nlen = namelen; 720 memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen); 721 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 722 hash = cache_hash(name, namelen); 723 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 724 725 /* 726 * Flush updates before making visible in table. No need for a 727 * memory barrier on the other side: to see modifications the 728 * list must be followed, meaning a dependent pointer load. 729 * The below is LIST_INSERT_HEAD() inlined, with the memory 730 * barrier included in the correct place. 731 */ 732 if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL) 733 ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next; 734 ncp->nc_hash.le_prev = &ncpp->lh_first; 735 membar_producer(); 736 ncpp->lh_first = ncp; 737 738 ncp->nc_vhash.le_prev = NULL; 739 ncp->nc_vhash.le_next = NULL; 740 741 /* 742 * Create reverse-cache entries (used in getcwd) for directories. 743 * (and in linux procfs exe node) 744 */ 745 if (vp != NULL && 746 vp != dvp && 747 #ifndef NAMECACHE_ENTER_REVERSE 748 vp->v_type == VDIR && 749 #endif 750 (ncp->nc_nlen > 2 || 751 (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') || 752 (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) { 753 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 754 LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash); 755 } 756 mutex_exit(&ncp->nc_lock); 757 mutex_exit(namecache_lock); 758 } 759 760 /* 761 * Name cache initialization, from vfs_init() when we are booting 762 */ 763 void 764 nchinit(void) 765 { 766 int error; 767 768 TAILQ_INIT(&nclruhead); 769 namecache_cache = pool_cache_init(sizeof(struct namecache), 770 coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor, 771 cache_dtor, NULL); 772 KASSERT(namecache_cache != NULL); 773 774 namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 775 776 nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash); 777 ncvhashtbl = 778 #ifdef NAMECACHE_ENTER_REVERSE 779 hashinit(desiredvnodes, HASH_LIST, true, &ncvhash); 780 #else 781 hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash); 782 #endif 783 784 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread, 785 NULL, NULL, "cachegc"); 786 if (error != 0) 787 panic("nchinit %d", error); 788 789 evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL, 790 "namecache", "entries scanned"); 791 evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL, 792 "namecache", "entries collected"); 793 evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL, 794 "namecache", "over scan target"); 795 evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL, 796 "namecache", "under scan target"); 797 evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL, 798 "namecache", "forced reclaims"); 799 } 800 801 static int 802 cache_ctor(void *arg, void *obj, int flag) 803 { 804 struct namecache *ncp; 805 806 ncp = obj; 807 mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE); 808 809 return 0; 810 } 811 812 static void 813 cache_dtor(void *arg, void *obj) 814 { 815 struct namecache *ncp; 816 817 ncp = obj; 818 mutex_destroy(&ncp->nc_lock); 819 } 820 821 /* 822 * Called once for each CPU in the system as attached. 823 */ 824 void 825 cache_cpu_init(struct cpu_info *ci) 826 { 827 struct nchcpu *cpup; 828 size_t sz; 829 830 sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit; 831 cpup = kmem_zalloc(sz, KM_SLEEP); 832 cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit); 833 mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE); 834 ci->ci_data.cpu_nch = cpup; 835 } 836 837 /* 838 * Name cache reinitialization, for when the maximum number of vnodes increases. 839 */ 840 void 841 nchreinit(void) 842 { 843 struct namecache *ncp; 844 struct nchashhead *oldhash1, *hash1; 845 struct ncvhashhead *oldhash2, *hash2; 846 u_long i, oldmask1, oldmask2, mask1, mask2; 847 848 hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1); 849 hash2 = 850 #ifdef NAMECACHE_ENTER_REVERSE 851 hashinit(desiredvnodes, HASH_LIST, true, &mask2); 852 #else 853 hashinit(desiredvnodes/8, HASH_LIST, true, &mask2); 854 #endif 855 mutex_enter(namecache_lock); 856 cache_lock_cpus(); 857 oldhash1 = nchashtbl; 858 oldmask1 = nchash; 859 nchashtbl = hash1; 860 nchash = mask1; 861 oldhash2 = ncvhashtbl; 862 oldmask2 = ncvhash; 863 ncvhashtbl = hash2; 864 ncvhash = mask2; 865 for (i = 0; i <= oldmask1; i++) { 866 while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) { 867 LIST_REMOVE(ncp, nc_hash); 868 ncp->nc_hash.le_prev = NULL; 869 } 870 } 871 for (i = 0; i <= oldmask2; i++) { 872 while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) { 873 LIST_REMOVE(ncp, nc_vhash); 874 ncp->nc_vhash.le_prev = NULL; 875 } 876 } 877 cache_unlock_cpus(); 878 mutex_exit(namecache_lock); 879 hashdone(oldhash1, HASH_LIST, oldmask1); 880 hashdone(oldhash2, HASH_LIST, oldmask2); 881 } 882 883 /* 884 * Cache flush, a particular vnode; called when a vnode is renamed to 885 * hide entries that would now be invalid 886 */ 887 void 888 cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags) 889 { 890 struct namecache *ncp, *ncnext; 891 892 mutex_enter(namecache_lock); 893 if (flags & PURGE_PARENTS) { 894 for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL; 895 ncp = ncnext) { 896 ncnext = LIST_NEXT(ncp, nc_vlist); 897 mutex_enter(&ncp->nc_lock); 898 cache_invalidate(ncp); 899 mutex_exit(&ncp->nc_lock); 900 cache_disassociate(ncp); 901 } 902 } 903 if (flags & PURGE_CHILDREN) { 904 for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL; 905 ncp = ncnext) { 906 ncnext = LIST_NEXT(ncp, nc_dvlist); 907 mutex_enter(&ncp->nc_lock); 908 cache_invalidate(ncp); 909 mutex_exit(&ncp->nc_lock); 910 cache_disassociate(ncp); 911 } 912 } 913 if (name != NULL) { 914 ncp = cache_lookup_entry(vp, name, namelen); 915 if (ncp) { 916 cache_invalidate(ncp); 917 mutex_exit(&ncp->nc_lock); 918 cache_disassociate(ncp); 919 } 920 } 921 mutex_exit(namecache_lock); 922 } 923 924 /* 925 * Cache flush, a whole filesystem; called when filesys is umounted to 926 * remove entries that would now be invalid. 927 */ 928 void 929 cache_purgevfs(struct mount *mp) 930 { 931 struct namecache *ncp, *nxtcp; 932 933 mutex_enter(namecache_lock); 934 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 935 nxtcp = TAILQ_NEXT(ncp, nc_lru); 936 mutex_enter(&ncp->nc_lock); 937 if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) { 938 /* Free the resources we had. */ 939 cache_invalidate(ncp); 940 cache_disassociate(ncp); 941 } 942 mutex_exit(&ncp->nc_lock); 943 } 944 cache_reclaim(); 945 mutex_exit(namecache_lock); 946 } 947 948 /* 949 * Scan global list invalidating entries until we meet a preset target. 950 * Prefer to invalidate entries that have not scored a hit within 951 * cache_hottime seconds. We sort the LRU list only for this routine's 952 * benefit. 953 */ 954 static void 955 cache_prune(int incache, int target) 956 { 957 struct namecache *ncp, *nxtcp, *sentinel; 958 int items, recent, tryharder; 959 960 KASSERT(mutex_owned(namecache_lock)); 961 962 items = 0; 963 tryharder = 0; 964 recent = hardclock_ticks - hz * cache_hottime; 965 sentinel = NULL; 966 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 967 if (incache <= target) 968 break; 969 items++; 970 nxtcp = TAILQ_NEXT(ncp, nc_lru); 971 if (ncp->nc_dvp == NULL) 972 continue; 973 if (ncp == sentinel) { 974 /* 975 * If we looped back on ourself, then ignore 976 * recent entries and purge whatever we find. 977 */ 978 tryharder = 1; 979 } 980 if (!tryharder && (ncp->nc_hittime - recent) > 0) { 981 if (sentinel == NULL) 982 sentinel = ncp; 983 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 984 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 985 continue; 986 } 987 mutex_enter(&ncp->nc_lock); 988 if (ncp->nc_dvp != NULL) { 989 cache_invalidate(ncp); 990 cache_disassociate(ncp); 991 incache--; 992 } 993 mutex_exit(&ncp->nc_lock); 994 } 995 cache_ev_scan.ev_count += items; 996 } 997 998 /* 999 * Collect dead cache entries from all CPUs and garbage collect. 1000 */ 1001 static void 1002 cache_reclaim(void) 1003 { 1004 struct namecache *ncp, *next; 1005 int items; 1006 1007 KASSERT(mutex_owned(namecache_lock)); 1008 1009 /* 1010 * If the number of extant entries not awaiting garbage collection 1011 * exceeds the high water mark, then reclaim stale entries until we 1012 * reach our low water mark. 1013 */ 1014 items = numcache - cache_gcpend; 1015 if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) { 1016 cache_prune(items, (int)((uint64_t)desiredvnodes * 1017 cache_lowat / 100)); 1018 cache_ev_over.ev_count++; 1019 } else 1020 cache_ev_under.ev_count++; 1021 1022 /* 1023 * Stop forward lookup activity on all CPUs and garbage collect dead 1024 * entries. 1025 */ 1026 cache_lock_cpus(); 1027 ncp = cache_gcqueue; 1028 cache_gcqueue = NULL; 1029 items = cache_gcpend; 1030 cache_gcpend = 0; 1031 while (ncp != NULL) { 1032 next = ncp->nc_gcqueue; 1033 cache_disassociate(ncp); 1034 KASSERT(ncp->nc_dvp == NULL); 1035 if (ncp->nc_hash.le_prev != NULL) { 1036 LIST_REMOVE(ncp, nc_hash); 1037 ncp->nc_hash.le_prev = NULL; 1038 } 1039 pool_cache_put(namecache_cache, ncp); 1040 ncp = next; 1041 } 1042 cache_unlock_cpus(); 1043 numcache -= items; 1044 cache_ev_gc.ev_count += items; 1045 } 1046 1047 /* 1048 * Cache maintainence thread, awakening once per second to: 1049 * 1050 * => keep number of entries below the high water mark 1051 * => sort pseudo-LRU list 1052 * => garbage collect dead entries 1053 */ 1054 static void 1055 cache_thread(void *arg) 1056 { 1057 1058 mutex_enter(namecache_lock); 1059 for (;;) { 1060 cache_reclaim(); 1061 kpause("cachegc", false, hz, namecache_lock); 1062 } 1063 } 1064 1065 #ifdef DDB 1066 void 1067 namecache_print(struct vnode *vp, void (*pr)(const char *, ...)) 1068 { 1069 struct vnode *dvp = NULL; 1070 struct namecache *ncp; 1071 1072 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1073 if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) { 1074 (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name); 1075 dvp = ncp->nc_dvp; 1076 } 1077 } 1078 if (dvp == NULL) { 1079 (*pr)("name not found\n"); 1080 return; 1081 } 1082 vp = dvp; 1083 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1084 if (ncp->nc_vp == vp) { 1085 (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name); 1086 } 1087 } 1088 } 1089 #endif 1090