1 /* $NetBSD: vfs_cache.c,v 1.94 2014/02/07 15:29:22 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94 58 */ 59 60 #include <sys/cdefs.h> 61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.94 2014/02/07 15:29:22 hannken Exp $"); 62 63 #include "opt_ddb.h" 64 #include "opt_revcache.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/mount.h> 70 #include <sys/vnode.h> 71 #include <sys/namei.h> 72 #include <sys/errno.h> 73 #include <sys/pool.h> 74 #include <sys/mutex.h> 75 #include <sys/atomic.h> 76 #include <sys/kthread.h> 77 #include <sys/kernel.h> 78 #include <sys/cpu.h> 79 #include <sys/evcnt.h> 80 81 #define NAMECACHE_ENTER_REVERSE 82 /* 83 * Name caching works as follows: 84 * 85 * Names found by directory scans are retained in a cache 86 * for future reference. It is managed LRU, so frequently 87 * used names will hang around. Cache is indexed by hash value 88 * obtained from (dvp, name) where dvp refers to the directory 89 * containing name. 90 * 91 * For simplicity (and economy of storage), names longer than 92 * a maximum length of NCHNAMLEN are not cached; they occur 93 * infrequently in any case, and are almost never of interest. 94 * 95 * Upon reaching the last segment of a path, if the reference 96 * is for DELETE, or NOCACHE is set (rewrite), and the 97 * name is located in the cache, it will be dropped. 98 * The entry is dropped also when it was not possible to lock 99 * the cached vnode, either because vget() failed or the generation 100 * number has changed while waiting for the lock. 101 */ 102 103 /* 104 * Per-cpu namecache data. 105 */ 106 struct nchcpu { 107 kmutex_t cpu_lock; 108 struct nchstats cpu_stats; 109 }; 110 111 /* 112 * The type for the hash code. While the hash function generates a 113 * u32, the hash code has historically been passed around as a u_long, 114 * and the value is modified by xor'ing a uintptr_t, so it's not 115 * entirely clear what the best type is. For now I'll leave it 116 * unchanged as u_long. 117 */ 118 119 typedef u_long nchash_t; 120 121 /* 122 * Structures associated with name cacheing. 123 */ 124 125 static kmutex_t *namecache_lock __read_mostly; 126 static pool_cache_t namecache_cache __read_mostly; 127 static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned; 128 129 static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly; 130 static u_long nchash __read_mostly; 131 132 #define NCHASH2(hash, dvp) \ 133 (((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash) 134 135 static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly; 136 static u_long ncvhash __read_mostly; 137 138 #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash) 139 140 /* Number of cache entries allocated. */ 141 static long numcache __cacheline_aligned; 142 143 /* Garbage collection queue and number of entries pending in it. */ 144 static void *cache_gcqueue; 145 static u_int cache_gcpend; 146 147 /* Cache effectiveness statistics. */ 148 struct nchstats nchstats __cacheline_aligned; 149 #define COUNT(c,x) (c.x++) 150 151 static const int cache_lowat = 95; 152 static const int cache_hiwat = 98; 153 static const int cache_hottime = 5; /* number of seconds */ 154 static int doingcache = 1; /* 1 => enable the cache */ 155 156 static struct evcnt cache_ev_scan; 157 static struct evcnt cache_ev_gc; 158 static struct evcnt cache_ev_over; 159 static struct evcnt cache_ev_under; 160 static struct evcnt cache_ev_forced; 161 162 static void cache_invalidate(struct namecache *); 163 static struct namecache *cache_lookup_entry( 164 const struct vnode *, const char *, size_t); 165 static void cache_thread(void *); 166 static void cache_invalidate(struct namecache *); 167 static void cache_disassociate(struct namecache *); 168 static void cache_reclaim(void); 169 static int cache_ctor(void *, void *, int); 170 static void cache_dtor(void *, void *); 171 172 /* 173 * Compute the hash for an entry. 174 * 175 * (This is for now a wrapper around namei_hash, whose interface is 176 * for the time being slightly inconvenient.) 177 */ 178 static nchash_t 179 cache_hash(const char *name, size_t namelen) 180 { 181 const char *endptr; 182 183 endptr = name + namelen; 184 return namei_hash(name, &endptr); 185 } 186 187 /* 188 * Invalidate a cache entry and enqueue it for garbage collection. 189 */ 190 static void 191 cache_invalidate(struct namecache *ncp) 192 { 193 void *head; 194 195 KASSERT(mutex_owned(&ncp->nc_lock)); 196 197 if (ncp->nc_dvp != NULL) { 198 ncp->nc_vp = NULL; 199 ncp->nc_dvp = NULL; 200 do { 201 head = cache_gcqueue; 202 ncp->nc_gcqueue = head; 203 } while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head); 204 atomic_inc_uint(&cache_gcpend); 205 } 206 } 207 208 /* 209 * Disassociate a namecache entry from any vnodes it is attached to, 210 * and remove from the global LRU list. 211 */ 212 static void 213 cache_disassociate(struct namecache *ncp) 214 { 215 216 KASSERT(mutex_owned(namecache_lock)); 217 KASSERT(ncp->nc_dvp == NULL); 218 219 if (ncp->nc_lru.tqe_prev != NULL) { 220 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 221 ncp->nc_lru.tqe_prev = NULL; 222 } 223 if (ncp->nc_vhash.le_prev != NULL) { 224 LIST_REMOVE(ncp, nc_vhash); 225 ncp->nc_vhash.le_prev = NULL; 226 } 227 if (ncp->nc_vlist.le_prev != NULL) { 228 LIST_REMOVE(ncp, nc_vlist); 229 ncp->nc_vlist.le_prev = NULL; 230 } 231 if (ncp->nc_dvlist.le_prev != NULL) { 232 LIST_REMOVE(ncp, nc_dvlist); 233 ncp->nc_dvlist.le_prev = NULL; 234 } 235 } 236 237 /* 238 * Lock all CPUs to prevent any cache lookup activity. Conceptually, 239 * this locks out all "readers". 240 */ 241 static void 242 cache_lock_cpus(void) 243 { 244 CPU_INFO_ITERATOR cii; 245 struct cpu_info *ci; 246 struct nchcpu *cpup; 247 long *s, *d, *m; 248 249 for (CPU_INFO_FOREACH(cii, ci)) { 250 cpup = ci->ci_data.cpu_nch; 251 mutex_enter(&cpup->cpu_lock); 252 253 /* Collate statistics. */ 254 d = (long *)&nchstats; 255 s = (long *)&cpup->cpu_stats; 256 m = s + sizeof(nchstats) / sizeof(long); 257 for (; s < m; s++, d++) { 258 *d += *s; 259 *s = 0; 260 } 261 } 262 } 263 264 /* 265 * Release all CPU locks. 266 */ 267 static void 268 cache_unlock_cpus(void) 269 { 270 CPU_INFO_ITERATOR cii; 271 struct cpu_info *ci; 272 struct nchcpu *cpup; 273 274 for (CPU_INFO_FOREACH(cii, ci)) { 275 cpup = ci->ci_data.cpu_nch; 276 mutex_exit(&cpup->cpu_lock); 277 } 278 } 279 280 /* 281 * Find a single cache entry and return it locked. 'namecache_lock' or 282 * at least one of the per-CPU locks must be held. 283 */ 284 static struct namecache * 285 cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen) 286 { 287 struct nchashhead *ncpp; 288 struct namecache *ncp; 289 nchash_t hash; 290 291 KASSERT(dvp != NULL); 292 hash = cache_hash(name, namelen); 293 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 294 295 LIST_FOREACH(ncp, ncpp, nc_hash) { 296 if (ncp->nc_dvp != dvp || 297 ncp->nc_nlen != namelen || 298 memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen)) 299 continue; 300 mutex_enter(&ncp->nc_lock); 301 if (__predict_true(ncp->nc_dvp == dvp)) { 302 ncp->nc_hittime = hardclock_ticks; 303 return ncp; 304 } 305 /* Raced: entry has been nullified. */ 306 mutex_exit(&ncp->nc_lock); 307 } 308 309 return NULL; 310 } 311 312 /* 313 * Look for a the name in the cache. We don't do this 314 * if the segment name is long, simply so the cache can avoid 315 * holding long names (which would either waste space, or 316 * add greatly to the complexity). 317 * 318 * Lookup is called with DVP pointing to the directory to search, 319 * and CNP providing the name of the entry being sought: cn_nameptr 320 * is the name, cn_namelen is its length, and cn_flags is the flags 321 * word from the namei operation. 322 * 323 * DVP must be locked. 324 * 325 * There are three possible non-error return states: 326 * 1. Nothing was found in the cache. Nothing is known about 327 * the requested name. 328 * 2. A negative entry was found in the cache, meaning that the 329 * requested name definitely does not exist. 330 * 3. A positive entry was found in the cache, meaning that the 331 * requested name does exist and that we are providing the 332 * vnode. 333 * In these cases the results are: 334 * 1. 0 returned; VN is set to NULL. 335 * 2. 1 returned; VN is set to NULL. 336 * 3. 1 returned; VN is set to the vnode found. 337 * 338 * The additional result argument ISWHT is set to zero, unless a 339 * negative entry is found that was entered as a whiteout, in which 340 * case ISWHT is set to one. 341 * 342 * The ISWHT_RET argument pointer may be null. In this case an 343 * assertion is made that the whiteout flag is not set. File systems 344 * that do not support whiteouts can/should do this. 345 * 346 * Filesystems that do support whiteouts should add ISWHITEOUT to 347 * cnp->cn_flags if ISWHT comes back nonzero. 348 * 349 * When a vnode is returned, it is locked, as per the vnode lookup 350 * locking protocol. 351 * 352 * There is no way for this function to fail, in the sense of 353 * generating an error that requires aborting the namei operation. 354 * 355 * (Prior to October 2012, this function returned an integer status, 356 * and a vnode, and mucked with the flags word in CNP for whiteouts. 357 * The integer status was -1 for "nothing found", ENOENT for "a 358 * negative entry found", 0 for "a positive entry found", and possibly 359 * other errors, and the value of VN might or might not have been set 360 * depending on what error occurred.) 361 */ 362 int 363 cache_lookup(struct vnode *dvp, const char *name, size_t namelen, 364 uint32_t nameiop, uint32_t cnflags, 365 int *iswht_ret, struct vnode **vn_ret) 366 { 367 struct namecache *ncp; 368 struct vnode *vp; 369 struct nchcpu *cpup; 370 int error; 371 372 /* Establish default result values */ 373 if (iswht_ret != NULL) { 374 *iswht_ret = 0; 375 } 376 *vn_ret = NULL; 377 378 if (__predict_false(!doingcache)) { 379 return 0; 380 } 381 382 cpup = curcpu()->ci_data.cpu_nch; 383 mutex_enter(&cpup->cpu_lock); 384 if (__predict_false(namelen > NCHNAMLEN)) { 385 COUNT(cpup->cpu_stats, ncs_long); 386 mutex_exit(&cpup->cpu_lock); 387 /* found nothing */ 388 return 0; 389 } 390 ncp = cache_lookup_entry(dvp, name, namelen); 391 if (__predict_false(ncp == NULL)) { 392 COUNT(cpup->cpu_stats, ncs_miss); 393 mutex_exit(&cpup->cpu_lock); 394 /* found nothing */ 395 return 0; 396 } 397 if ((cnflags & MAKEENTRY) == 0) { 398 COUNT(cpup->cpu_stats, ncs_badhits); 399 /* 400 * Last component and we are renaming or deleting, 401 * the cache entry is invalid, or otherwise don't 402 * want cache entry to exist. 403 */ 404 cache_invalidate(ncp); 405 mutex_exit(&ncp->nc_lock); 406 mutex_exit(&cpup->cpu_lock); 407 /* found nothing */ 408 return 0; 409 } 410 if (ncp->nc_vp == NULL) { 411 if (iswht_ret != NULL) { 412 /* 413 * Restore the ISWHITEOUT flag saved earlier. 414 */ 415 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 416 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 417 } else { 418 KASSERT(ncp->nc_flags == 0); 419 } 420 421 if (__predict_true(nameiop != CREATE || 422 (cnflags & ISLASTCN) == 0)) { 423 COUNT(cpup->cpu_stats, ncs_neghits); 424 mutex_exit(&ncp->nc_lock); 425 mutex_exit(&cpup->cpu_lock); 426 /* found neg entry; vn is already null from above */ 427 return 1; 428 } else { 429 COUNT(cpup->cpu_stats, ncs_badhits); 430 /* 431 * Last component and we are renaming or 432 * deleting, the cache entry is invalid, 433 * or otherwise don't want cache entry to 434 * exist. 435 */ 436 cache_invalidate(ncp); 437 mutex_exit(&ncp->nc_lock); 438 mutex_exit(&cpup->cpu_lock); 439 /* found nothing */ 440 return 0; 441 } 442 } 443 444 vp = ncp->nc_vp; 445 mutex_enter(vp->v_interlock); 446 mutex_exit(&ncp->nc_lock); 447 mutex_exit(&cpup->cpu_lock); 448 error = vget(vp, LK_NOWAIT); 449 if (error) { 450 KASSERT(error == EBUSY); 451 /* 452 * This vnode is being cleaned out. 453 * XXX badhits? 454 */ 455 COUNT(cpup->cpu_stats, ncs_falsehits); 456 /* found nothing */ 457 return 0; 458 } 459 460 #ifdef DEBUG 461 /* 462 * since we released nb->nb_lock, 463 * we can't use this pointer any more. 464 */ 465 ncp = NULL; 466 #endif /* DEBUG */ 467 468 /* We don't have the right lock, but this is only for stats. */ 469 COUNT(cpup->cpu_stats, ncs_goodhits); 470 471 /* found it */ 472 *vn_ret = vp; 473 return 1; 474 } 475 476 int 477 cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen, 478 uint32_t cnflags, 479 int *iswht_ret, struct vnode **vn_ret) 480 { 481 struct namecache *ncp; 482 struct vnode *vp; 483 struct nchcpu *cpup; 484 int error; 485 486 /* Establish default results. */ 487 if (iswht_ret != NULL) { 488 *iswht_ret = 0; 489 } 490 *vn_ret = NULL; 491 492 if (__predict_false(!doingcache)) { 493 /* found nothing */ 494 return 0; 495 } 496 497 cpup = curcpu()->ci_data.cpu_nch; 498 mutex_enter(&cpup->cpu_lock); 499 if (__predict_false(namelen > NCHNAMLEN)) { 500 COUNT(cpup->cpu_stats, ncs_long); 501 mutex_exit(&cpup->cpu_lock); 502 /* found nothing */ 503 return 0; 504 } 505 ncp = cache_lookup_entry(dvp, name, namelen); 506 if (__predict_false(ncp == NULL)) { 507 COUNT(cpup->cpu_stats, ncs_miss); 508 mutex_exit(&cpup->cpu_lock); 509 /* found nothing */ 510 return 0; 511 } 512 vp = ncp->nc_vp; 513 if (vp == NULL) { 514 /* 515 * Restore the ISWHITEOUT flag saved earlier. 516 */ 517 if (iswht_ret != NULL) { 518 KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0); 519 /*cnp->cn_flags |= ncp->nc_flags;*/ 520 *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; 521 } 522 COUNT(cpup->cpu_stats, ncs_neghits); 523 mutex_exit(&ncp->nc_lock); 524 mutex_exit(&cpup->cpu_lock); 525 /* found negative entry; vn is already null from above */ 526 return 1; 527 } 528 mutex_enter(vp->v_interlock); 529 mutex_exit(&ncp->nc_lock); 530 mutex_exit(&cpup->cpu_lock); 531 error = vget(vp, LK_NOWAIT); 532 if (error) { 533 KASSERT(error == EBUSY); 534 /* 535 * This vnode is being cleaned out. 536 * XXX badhits? 537 */ 538 COUNT(cpup->cpu_stats, ncs_falsehits); 539 /* found nothing */ 540 return 0; 541 } 542 543 /* Unlocked, but only for stats. */ 544 COUNT(cpup->cpu_stats, ncs_goodhits); /* XXX can be "badhits" */ 545 546 /* found it */ 547 *vn_ret = vp; 548 return 1; 549 } 550 551 /* 552 * Scan cache looking for name of directory entry pointing at vp. 553 * 554 * If the lookup succeeds the vnode is referenced and stored in dvpp. 555 * 556 * If bufp is non-NULL, also place the name in the buffer which starts 557 * at bufp, immediately before *bpp, and move bpp backwards to point 558 * at the start of it. (Yes, this is a little baroque, but it's done 559 * this way to cater to the whims of getcwd). 560 * 561 * Returns 0 on success, -1 on cache miss, positive errno on failure. 562 */ 563 int 564 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp) 565 { 566 struct namecache *ncp; 567 struct vnode *dvp; 568 struct ncvhashhead *nvcpp; 569 char *bp; 570 int error, nlen; 571 572 if (!doingcache) 573 goto out; 574 575 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 576 577 mutex_enter(namecache_lock); 578 LIST_FOREACH(ncp, nvcpp, nc_vhash) { 579 mutex_enter(&ncp->nc_lock); 580 if (ncp->nc_vp == vp && 581 (dvp = ncp->nc_dvp) != NULL && 582 dvp != vp) { /* avoid pesky . entries.. */ 583 584 #ifdef DIAGNOSTIC 585 if (ncp->nc_nlen == 1 && 586 ncp->nc_name[0] == '.') 587 panic("cache_revlookup: found entry for ."); 588 589 if (ncp->nc_nlen == 2 && 590 ncp->nc_name[0] == '.' && 591 ncp->nc_name[1] == '.') 592 panic("cache_revlookup: found entry for .."); 593 #endif 594 COUNT(nchstats, ncs_revhits); 595 nlen = ncp->nc_nlen; 596 597 if (bufp) { 598 bp = *bpp; 599 bp -= nlen; 600 if (bp <= bufp) { 601 *dvpp = NULL; 602 mutex_exit(&ncp->nc_lock); 603 mutex_exit(namecache_lock); 604 return (ERANGE); 605 } 606 memcpy(bp, ncp->nc_name, nlen); 607 *bpp = bp; 608 } 609 610 mutex_enter(dvp->v_interlock); 611 mutex_exit(&ncp->nc_lock); 612 mutex_exit(namecache_lock); 613 error = vget(dvp, LK_NOWAIT); 614 if (error) { 615 KASSERT(error == EBUSY); 616 if (bufp) 617 (*bpp) += nlen; 618 *dvpp = NULL; 619 return -1; 620 } 621 *dvpp = dvp; 622 return (0); 623 } 624 mutex_exit(&ncp->nc_lock); 625 } 626 COUNT(nchstats, ncs_revmiss); 627 mutex_exit(namecache_lock); 628 out: 629 *dvpp = NULL; 630 return (-1); 631 } 632 633 /* 634 * Add an entry to the cache 635 */ 636 void 637 cache_enter(struct vnode *dvp, struct vnode *vp, 638 const char *name, size_t namelen, uint32_t cnflags) 639 { 640 struct namecache *ncp; 641 struct namecache *oncp; 642 struct nchashhead *ncpp; 643 struct ncvhashhead *nvcpp; 644 nchash_t hash; 645 646 /* First, check whether we can/should add a cache entry. */ 647 if ((cnflags & MAKEENTRY) == 0 || 648 __predict_false(namelen > NCHNAMLEN || !doingcache)) { 649 return; 650 } 651 652 if (numcache > desiredvnodes) { 653 mutex_enter(namecache_lock); 654 cache_ev_forced.ev_count++; 655 cache_reclaim(); 656 mutex_exit(namecache_lock); 657 } 658 659 ncp = pool_cache_get(namecache_cache, PR_WAITOK); 660 mutex_enter(namecache_lock); 661 numcache++; 662 663 /* 664 * Concurrent lookups in the same directory may race for a 665 * cache entry. if there's a duplicated entry, free it. 666 */ 667 oncp = cache_lookup_entry(dvp, name, namelen); 668 if (oncp) { 669 cache_invalidate(oncp); 670 mutex_exit(&oncp->nc_lock); 671 } 672 673 /* Grab the vnode we just found. */ 674 mutex_enter(&ncp->nc_lock); 675 ncp->nc_vp = vp; 676 ncp->nc_flags = 0; 677 ncp->nc_hittime = 0; 678 ncp->nc_gcqueue = NULL; 679 if (vp == NULL) { 680 /* 681 * For negative hits, save the ISWHITEOUT flag so we can 682 * restore it later when the cache entry is used again. 683 */ 684 ncp->nc_flags = cnflags & ISWHITEOUT; 685 } 686 687 /* Fill in cache info. */ 688 ncp->nc_dvp = dvp; 689 LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist); 690 if (vp) 691 LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist); 692 else { 693 ncp->nc_vlist.le_prev = NULL; 694 ncp->nc_vlist.le_next = NULL; 695 } 696 KASSERT(namelen <= NCHNAMLEN); 697 ncp->nc_nlen = namelen; 698 memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen); 699 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 700 hash = cache_hash(name, namelen); 701 ncpp = &nchashtbl[NCHASH2(hash, dvp)]; 702 703 /* 704 * Flush updates before making visible in table. No need for a 705 * memory barrier on the other side: to see modifications the 706 * list must be followed, meaning a dependent pointer load. 707 * The below is LIST_INSERT_HEAD() inlined, with the memory 708 * barrier included in the correct place. 709 */ 710 if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL) 711 ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next; 712 ncp->nc_hash.le_prev = &ncpp->lh_first; 713 membar_producer(); 714 ncpp->lh_first = ncp; 715 716 ncp->nc_vhash.le_prev = NULL; 717 ncp->nc_vhash.le_next = NULL; 718 719 /* 720 * Create reverse-cache entries (used in getcwd) for directories. 721 * (and in linux procfs exe node) 722 */ 723 if (vp != NULL && 724 vp != dvp && 725 #ifndef NAMECACHE_ENTER_REVERSE 726 vp->v_type == VDIR && 727 #endif 728 (ncp->nc_nlen > 2 || 729 (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') || 730 (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) { 731 nvcpp = &ncvhashtbl[NCVHASH(vp)]; 732 LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash); 733 } 734 mutex_exit(&ncp->nc_lock); 735 mutex_exit(namecache_lock); 736 } 737 738 /* 739 * Name cache initialization, from vfs_init() when we are booting 740 */ 741 void 742 nchinit(void) 743 { 744 int error; 745 746 TAILQ_INIT(&nclruhead); 747 namecache_cache = pool_cache_init(sizeof(struct namecache), 748 coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor, 749 cache_dtor, NULL); 750 KASSERT(namecache_cache != NULL); 751 752 namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 753 754 nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash); 755 ncvhashtbl = 756 #ifdef NAMECACHE_ENTER_REVERSE 757 hashinit(desiredvnodes, HASH_LIST, true, &ncvhash); 758 #else 759 hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash); 760 #endif 761 762 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread, 763 NULL, NULL, "cachegc"); 764 if (error != 0) 765 panic("nchinit %d", error); 766 767 evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL, 768 "namecache", "entries scanned"); 769 evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL, 770 "namecache", "entries collected"); 771 evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL, 772 "namecache", "over scan target"); 773 evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL, 774 "namecache", "under scan target"); 775 evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL, 776 "namecache", "forced reclaims"); 777 } 778 779 static int 780 cache_ctor(void *arg, void *obj, int flag) 781 { 782 struct namecache *ncp; 783 784 ncp = obj; 785 mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE); 786 787 return 0; 788 } 789 790 static void 791 cache_dtor(void *arg, void *obj) 792 { 793 struct namecache *ncp; 794 795 ncp = obj; 796 mutex_destroy(&ncp->nc_lock); 797 } 798 799 /* 800 * Called once for each CPU in the system as attached. 801 */ 802 void 803 cache_cpu_init(struct cpu_info *ci) 804 { 805 struct nchcpu *cpup; 806 size_t sz; 807 808 sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit; 809 cpup = kmem_zalloc(sz, KM_SLEEP); 810 cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit); 811 mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE); 812 ci->ci_data.cpu_nch = cpup; 813 } 814 815 /* 816 * Name cache reinitialization, for when the maximum number of vnodes increases. 817 */ 818 void 819 nchreinit(void) 820 { 821 struct namecache *ncp; 822 struct nchashhead *oldhash1, *hash1; 823 struct ncvhashhead *oldhash2, *hash2; 824 u_long i, oldmask1, oldmask2, mask1, mask2; 825 826 hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1); 827 hash2 = 828 #ifdef NAMECACHE_ENTER_REVERSE 829 hashinit(desiredvnodes, HASH_LIST, true, &mask2); 830 #else 831 hashinit(desiredvnodes/8, HASH_LIST, true, &mask2); 832 #endif 833 mutex_enter(namecache_lock); 834 cache_lock_cpus(); 835 oldhash1 = nchashtbl; 836 oldmask1 = nchash; 837 nchashtbl = hash1; 838 nchash = mask1; 839 oldhash2 = ncvhashtbl; 840 oldmask2 = ncvhash; 841 ncvhashtbl = hash2; 842 ncvhash = mask2; 843 for (i = 0; i <= oldmask1; i++) { 844 while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) { 845 LIST_REMOVE(ncp, nc_hash); 846 ncp->nc_hash.le_prev = NULL; 847 } 848 } 849 for (i = 0; i <= oldmask2; i++) { 850 while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) { 851 LIST_REMOVE(ncp, nc_vhash); 852 ncp->nc_vhash.le_prev = NULL; 853 } 854 } 855 cache_unlock_cpus(); 856 mutex_exit(namecache_lock); 857 hashdone(oldhash1, HASH_LIST, oldmask1); 858 hashdone(oldhash2, HASH_LIST, oldmask2); 859 } 860 861 /* 862 * Cache flush, a particular vnode; called when a vnode is renamed to 863 * hide entries that would now be invalid 864 */ 865 void 866 cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags) 867 { 868 struct namecache *ncp, *ncnext; 869 870 mutex_enter(namecache_lock); 871 if (flags & PURGE_PARENTS) { 872 for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL; 873 ncp = ncnext) { 874 ncnext = LIST_NEXT(ncp, nc_vlist); 875 mutex_enter(&ncp->nc_lock); 876 cache_invalidate(ncp); 877 mutex_exit(&ncp->nc_lock); 878 cache_disassociate(ncp); 879 } 880 } 881 if (flags & PURGE_CHILDREN) { 882 for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL; 883 ncp = ncnext) { 884 ncnext = LIST_NEXT(ncp, nc_dvlist); 885 mutex_enter(&ncp->nc_lock); 886 cache_invalidate(ncp); 887 mutex_exit(&ncp->nc_lock); 888 cache_disassociate(ncp); 889 } 890 } 891 if (name != NULL) { 892 ncp = cache_lookup_entry(vp, name, namelen); 893 if (ncp) { 894 cache_invalidate(ncp); 895 mutex_exit(&ncp->nc_lock); 896 cache_disassociate(ncp); 897 } 898 } 899 mutex_exit(namecache_lock); 900 } 901 902 /* 903 * Cache flush, a whole filesystem; called when filesys is umounted to 904 * remove entries that would now be invalid. 905 */ 906 void 907 cache_purgevfs(struct mount *mp) 908 { 909 struct namecache *ncp, *nxtcp; 910 911 mutex_enter(namecache_lock); 912 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 913 nxtcp = TAILQ_NEXT(ncp, nc_lru); 914 mutex_enter(&ncp->nc_lock); 915 if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) { 916 /* Free the resources we had. */ 917 cache_invalidate(ncp); 918 cache_disassociate(ncp); 919 } 920 mutex_exit(&ncp->nc_lock); 921 } 922 cache_reclaim(); 923 mutex_exit(namecache_lock); 924 } 925 926 /* 927 * Scan global list invalidating entries until we meet a preset target. 928 * Prefer to invalidate entries that have not scored a hit within 929 * cache_hottime seconds. We sort the LRU list only for this routine's 930 * benefit. 931 */ 932 static void 933 cache_prune(int incache, int target) 934 { 935 struct namecache *ncp, *nxtcp, *sentinel; 936 int items, recent, tryharder; 937 938 KASSERT(mutex_owned(namecache_lock)); 939 940 items = 0; 941 tryharder = 0; 942 recent = hardclock_ticks - hz * cache_hottime; 943 sentinel = NULL; 944 for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { 945 if (incache <= target) 946 break; 947 items++; 948 nxtcp = TAILQ_NEXT(ncp, nc_lru); 949 if (ncp == sentinel) { 950 /* 951 * If we looped back on ourself, then ignore 952 * recent entries and purge whatever we find. 953 */ 954 tryharder = 1; 955 } 956 if (ncp->nc_dvp == NULL) 957 continue; 958 if (!tryharder && (ncp->nc_hittime - recent) > 0) { 959 if (sentinel == NULL) 960 sentinel = ncp; 961 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 962 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 963 continue; 964 } 965 mutex_enter(&ncp->nc_lock); 966 if (ncp->nc_dvp != NULL) { 967 cache_invalidate(ncp); 968 cache_disassociate(ncp); 969 incache--; 970 } 971 mutex_exit(&ncp->nc_lock); 972 } 973 cache_ev_scan.ev_count += items; 974 } 975 976 /* 977 * Collect dead cache entries from all CPUs and garbage collect. 978 */ 979 static void 980 cache_reclaim(void) 981 { 982 struct namecache *ncp, *next; 983 int items; 984 985 KASSERT(mutex_owned(namecache_lock)); 986 987 /* 988 * If the number of extant entries not awaiting garbage collection 989 * exceeds the high water mark, then reclaim stale entries until we 990 * reach our low water mark. 991 */ 992 items = numcache - cache_gcpend; 993 if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) { 994 cache_prune(items, (int)((uint64_t)desiredvnodes * 995 cache_lowat / 100)); 996 cache_ev_over.ev_count++; 997 } else 998 cache_ev_under.ev_count++; 999 1000 /* 1001 * Stop forward lookup activity on all CPUs and garbage collect dead 1002 * entries. 1003 */ 1004 cache_lock_cpus(); 1005 ncp = cache_gcqueue; 1006 cache_gcqueue = NULL; 1007 items = cache_gcpend; 1008 cache_gcpend = 0; 1009 while (ncp != NULL) { 1010 next = ncp->nc_gcqueue; 1011 cache_disassociate(ncp); 1012 KASSERT(ncp->nc_dvp == NULL); 1013 if (ncp->nc_hash.le_prev != NULL) { 1014 LIST_REMOVE(ncp, nc_hash); 1015 ncp->nc_hash.le_prev = NULL; 1016 } 1017 pool_cache_put(namecache_cache, ncp); 1018 ncp = next; 1019 } 1020 cache_unlock_cpus(); 1021 numcache -= items; 1022 cache_ev_gc.ev_count += items; 1023 } 1024 1025 /* 1026 * Cache maintainence thread, awakening once per second to: 1027 * 1028 * => keep number of entries below the high water mark 1029 * => sort pseudo-LRU list 1030 * => garbage collect dead entries 1031 */ 1032 static void 1033 cache_thread(void *arg) 1034 { 1035 1036 mutex_enter(namecache_lock); 1037 for (;;) { 1038 cache_reclaim(); 1039 kpause("cachegc", false, hz, namecache_lock); 1040 } 1041 } 1042 1043 #ifdef DDB 1044 void 1045 namecache_print(struct vnode *vp, void (*pr)(const char *, ...)) 1046 { 1047 struct vnode *dvp = NULL; 1048 struct namecache *ncp; 1049 1050 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1051 if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) { 1052 (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name); 1053 dvp = ncp->nc_dvp; 1054 } 1055 } 1056 if (dvp == NULL) { 1057 (*pr)("name not found\n"); 1058 return; 1059 } 1060 vp = dvp; 1061 TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { 1062 if (ncp->nc_vp == vp) { 1063 (*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name); 1064 } 1065 } 1066 } 1067 #endif 1068