xref: /netbsd-src/sys/kern/vfs_cache.c (revision cd22f25e6f6d1cc1f197fe8c5468a80f51d1c4e1)
1 /*	$NetBSD: vfs_cache.c,v 1.75 2008/04/28 20:24:05 martin Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 1989, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. Neither the name of the University nor the names of its contributors
42  *    may be used to endorse or promote products derived from this software
43  *    without specific prior written permission.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  *
57  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
58  */
59 
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.75 2008/04/28 20:24:05 martin Exp $");
62 
63 #include "opt_ddb.h"
64 #include "opt_revcache.h"
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/time.h>
69 #include <sys/mount.h>
70 #include <sys/vnode.h>
71 #include <sys/namei.h>
72 #include <sys/errno.h>
73 #include <sys/malloc.h>
74 #include <sys/pool.h>
75 #include <sys/mutex.h>
76 #include <sys/atomic.h>
77 #include <sys/kthread.h>
78 #include <sys/kernel.h>
79 #include <sys/cpu.h>
80 #include <sys/evcnt.h>
81 
82 #define NAMECACHE_ENTER_REVERSE
83 /*
84  * Name caching works as follows:
85  *
86  * Names found by directory scans are retained in a cache
87  * for future reference.  It is managed LRU, so frequently
88  * used names will hang around.  Cache is indexed by hash value
89  * obtained from (dvp, name) where dvp refers to the directory
90  * containing name.
91  *
92  * For simplicity (and economy of storage), names longer than
93  * a maximum length of NCHNAMLEN are not cached; they occur
94  * infrequently in any case, and are almost never of interest.
95  *
96  * Upon reaching the last segment of a path, if the reference
97  * is for DELETE, or NOCACHE is set (rewrite), and the
98  * name is located in the cache, it will be dropped.
99  * The entry is dropped also when it was not possible to lock
100  * the cached vnode, either because vget() failed or the generation
101  * number has changed while waiting for the lock.
102  */
103 
104 /*
105  * Structures associated with name cacheing.
106  */
107 LIST_HEAD(nchashhead, namecache) *nchashtbl;
108 u_long	nchash;				/* size of hash table - 1 */
109 #define	NCHASH(cnp, dvp)	\
110 	(((cnp)->cn_hash ^ ((uintptr_t)(dvp) >> 3)) & nchash)
111 
112 LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
113 u_long	ncvhash;			/* size of hash table - 1 */
114 #define	NCVHASH(vp)		(((uintptr_t)(vp) >> 3) & ncvhash)
115 
116 long	numcache;			/* number of cache entries allocated */
117 static u_int	cache_gcpend;		/* number of entries pending GC */
118 static void	*cache_gcqueue;		/* garbage collection queue */
119 
120 TAILQ_HEAD(, namecache) nclruhead =		/* LRU chain */
121 	TAILQ_HEAD_INITIALIZER(nclruhead);
122 #define	COUNT(x)	nchstats.x++
123 struct	nchstats nchstats;		/* cache effectiveness statistics */
124 
125 static pool_cache_t namecache_cache;
126 
127 MALLOC_DEFINE(M_CACHE, "namecache", "Dynamically allocated cache entries");
128 
129 int cache_lowat = 95;
130 int cache_hiwat = 98;
131 int cache_hottime = 5;			/* number of seconds */
132 int doingcache = 1;			/* 1 => enable the cache */
133 
134 static struct evcnt cache_ev_scan;
135 static struct evcnt cache_ev_gc;
136 static struct evcnt cache_ev_over;
137 static struct evcnt cache_ev_under;
138 static struct evcnt cache_ev_forced;
139 
140 /* A single lock to serialize modifications. */
141 static kmutex_t *namecache_lock;
142 
143 static void cache_invalidate(struct namecache *);
144 static inline struct namecache *cache_lookup_entry(
145     const struct vnode *, const struct componentname *);
146 static void cache_thread(void *);
147 static void cache_invalidate(struct namecache *);
148 static void cache_disassociate(struct namecache *);
149 static void cache_reclaim(void);
150 static int cache_ctor(void *, void *, int);
151 static void cache_dtor(void *, void *);
152 
153 /*
154  * Invalidate a cache entry and enqueue it for garbage collection.
155  */
156 static void
157 cache_invalidate(struct namecache *ncp)
158 {
159 	void *head;
160 
161 	KASSERT(mutex_owned(&ncp->nc_lock));
162 
163 	if (ncp->nc_dvp != NULL) {
164 		ncp->nc_vp = NULL;
165 		ncp->nc_dvp = NULL;
166 		do {
167 			head = cache_gcqueue;
168 			ncp->nc_gcqueue = head;
169 		} while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
170 		atomic_inc_uint(&cache_gcpend);
171 	}
172 }
173 
174 /*
175  * Disassociate a namecache entry from any vnodes it is attached to,
176  * and remove from the global LRU list.
177  */
178 static void
179 cache_disassociate(struct namecache *ncp)
180 {
181 
182 	KASSERT(mutex_owned(namecache_lock));
183 	KASSERT(ncp->nc_dvp == NULL);
184 
185 	if (ncp->nc_lru.tqe_prev != NULL) {
186 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
187 		ncp->nc_lru.tqe_prev = NULL;
188 	}
189 	if (ncp->nc_vhash.le_prev != NULL) {
190 		LIST_REMOVE(ncp, nc_vhash);
191 		ncp->nc_vhash.le_prev = NULL;
192 	}
193 	if (ncp->nc_vlist.le_prev != NULL) {
194 		LIST_REMOVE(ncp, nc_vlist);
195 		ncp->nc_vlist.le_prev = NULL;
196 	}
197 	if (ncp->nc_dvlist.le_prev != NULL) {
198 		LIST_REMOVE(ncp, nc_dvlist);
199 		ncp->nc_dvlist.le_prev = NULL;
200 	}
201 }
202 
203 /*
204  * Lock all CPUs to prevent any cache lookup activity.  Conceptually,
205  * this locks out all "readers".
206  */
207 static void
208 cache_lock_cpus(void)
209 {
210 	CPU_INFO_ITERATOR cii;
211 	struct cpu_info *ci;
212 
213 	for (CPU_INFO_FOREACH(cii, ci)) {
214 		mutex_enter(ci->ci_data.cpu_cachelock);
215 	}
216 }
217 
218 /*
219  * Release all CPU locks.
220  */
221 static void
222 cache_unlock_cpus(void)
223 {
224 	CPU_INFO_ITERATOR cii;
225 	struct cpu_info *ci;
226 
227 	for (CPU_INFO_FOREACH(cii, ci)) {
228 		mutex_exit(ci->ci_data.cpu_cachelock);
229 	}
230 }
231 
232 /*
233  * Find a single cache entry and return it locked.  'namecache_lock' or
234  * at least one of the per-CPU locks must be held.
235  */
236 static struct namecache *
237 cache_lookup_entry(const struct vnode *dvp, const struct componentname *cnp)
238 {
239 	struct nchashhead *ncpp;
240 	struct namecache *ncp;
241 
242 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
243 
244 	LIST_FOREACH(ncp, ncpp, nc_hash) {
245 		if (ncp->nc_dvp != dvp ||
246 		    ncp->nc_nlen != cnp->cn_namelen ||
247 		    memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
248 		    	continue;
249 	    	mutex_enter(&ncp->nc_lock);
250 		if (ncp->nc_dvp == dvp) {
251 			ncp->nc_hittime = hardclock_ticks;
252 			return ncp;
253 		}
254 		/* Raced: entry has been nullified. */
255 		mutex_exit(&ncp->nc_lock);
256 	}
257 
258 	return NULL;
259 }
260 
261 /*
262  * Look for a the name in the cache. We don't do this
263  * if the segment name is long, simply so the cache can avoid
264  * holding long names (which would either waste space, or
265  * add greatly to the complexity).
266  *
267  * Lookup is called with ni_dvp pointing to the directory to search,
268  * ni_ptr pointing to the name of the entry being sought, ni_namelen
269  * tells the length of the name, and ni_hash contains a hash of
270  * the name. If the lookup succeeds, the vnode is locked, stored in ni_vp
271  * and a status of zero is returned. If the locking fails for whatever
272  * reason, the vnode is unlocked and the error is returned to caller.
273  * If the lookup determines that the name does not exist (negative cacheing),
274  * a status of ENOENT is returned. If the lookup fails, a status of -1
275  * is returned.
276  */
277 int
278 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
279 {
280 	struct namecache *ncp;
281 	struct vnode *vp;
282 	kmutex_t *cpulock;
283 	int error;
284 
285 	if (!doingcache) {
286 		cnp->cn_flags &= ~MAKEENTRY;
287 		*vpp = NULL;
288 		return (-1);
289 	}
290 
291 	if (cnp->cn_namelen > NCHNAMLEN) {
292 		/* Unlocked, but only for stats. */
293 		COUNT(ncs_long);
294 		cnp->cn_flags &= ~MAKEENTRY;
295 		goto fail;
296 	}
297 	cpulock = curcpu()->ci_data.cpu_cachelock;
298 	mutex_enter(cpulock);
299 	ncp = cache_lookup_entry(dvp, cnp);
300 	if (ncp == NULL) {
301 		COUNT(ncs_miss);
302 		goto fail_wlock;
303 	}
304 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
305 		COUNT(ncs_badhits);
306 		goto remove;
307 	} else if (ncp->nc_vp == NULL) {
308 		/*
309 		 * Restore the ISWHITEOUT flag saved earlier.
310 		 */
311 		cnp->cn_flags |= ncp->nc_flags;
312 		if (cnp->cn_nameiop != CREATE ||
313 		    (cnp->cn_flags & ISLASTCN) == 0) {
314 			COUNT(ncs_neghits);
315 			mutex_exit(&ncp->nc_lock);
316 			mutex_exit(cpulock);
317 			return (ENOENT);
318 		} else {
319 			COUNT(ncs_badhits);
320 			goto remove;
321 		}
322 	}
323 
324 	vp = ncp->nc_vp;
325 	mutex_enter(&vp->v_interlock);
326 	mutex_exit(&ncp->nc_lock);
327 	mutex_exit(cpulock);
328 	error = vget(vp, LK_NOWAIT | LK_INTERLOCK);
329 
330 #ifdef DEBUG
331 	/*
332 	 * since we released nb->nb_lock,
333 	 * we can't use this pointer any more.
334 	 */
335 	ncp = NULL;
336 #endif /* DEBUG */
337 
338 	if (error) {
339 		KASSERT(error == EBUSY);
340 		/*
341 		 * this vnode is being cleaned out.
342 		 */
343 		COUNT(ncs_falsehits); /* XXX badhits? */
344 		goto fail;
345 	}
346 
347 	if (vp == dvp) {	/* lookup on "." */
348 		error = 0;
349 	} else if (cnp->cn_flags & ISDOTDOT) {
350 		VOP_UNLOCK(dvp, 0);
351 		error = vn_lock(vp, LK_EXCLUSIVE);
352 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
353 	} else {
354 		error = vn_lock(vp, LK_EXCLUSIVE);
355 	}
356 
357 	/*
358 	 * Check that the lock succeeded.
359 	 */
360 	if (error) {
361 		/* Unlocked, but only for stats. */
362 		COUNT(ncs_badhits);
363 		*vpp = NULL;
364 		return (-1);
365 	}
366 
367 	/* Unlocked, but only for stats. */
368 	COUNT(ncs_goodhits);
369 	*vpp = vp;
370 	return (0);
371 
372 remove:
373 	/*
374 	 * Last component and we are renaming or deleting,
375 	 * the cache entry is invalid, or otherwise don't
376 	 * want cache entry to exist.
377 	 */
378 	cache_invalidate(ncp);
379 	mutex_exit(&ncp->nc_lock);
380 fail_wlock:
381 	mutex_exit(cpulock);
382 fail:
383 	*vpp = NULL;
384 	return (-1);
385 }
386 
387 int
388 cache_lookup_raw(struct vnode *dvp, struct vnode **vpp,
389     struct componentname *cnp)
390 {
391 	struct namecache *ncp;
392 	struct vnode *vp;
393 	kmutex_t *cpulock;
394 	int error;
395 
396 	if (!doingcache) {
397 		cnp->cn_flags &= ~MAKEENTRY;
398 		*vpp = NULL;
399 		return (-1);
400 	}
401 
402 	if (cnp->cn_namelen > NCHNAMLEN) {
403 		/* Unlocked, but only for stats. */
404 		COUNT(ncs_long);
405 		cnp->cn_flags &= ~MAKEENTRY;
406 		goto fail;
407 	}
408 	cpulock = curcpu()->ci_data.cpu_cachelock;
409 	mutex_enter(cpulock);
410 	ncp = cache_lookup_entry(dvp, cnp);
411 	if (ncp == NULL) {
412 		COUNT(ncs_miss);
413 		goto fail_wlock;
414 	}
415 	vp = ncp->nc_vp;
416 	if (vp == NULL) {
417 		/*
418 		 * Restore the ISWHITEOUT flag saved earlier.
419 		 */
420 		cnp->cn_flags |= ncp->nc_flags;
421 		COUNT(ncs_neghits);
422 		mutex_exit(&ncp->nc_lock);
423 		mutex_exit(cpulock);
424 		return (ENOENT);
425 	}
426 	mutex_enter(&vp->v_interlock);
427 	mutex_exit(&ncp->nc_lock);
428 	mutex_exit(cpulock);
429 	error = vget(vp, LK_NOWAIT | LK_INTERLOCK);
430 
431 	if (error) {
432 		KASSERT(error == EBUSY);
433 		/*
434 		 * this vnode is being cleaned out.
435 		 */
436 		COUNT(ncs_falsehits); /* XXX badhits? */
437 		goto fail;
438 	}
439 
440 	*vpp = vp;
441 
442 	return 0;
443 
444 fail_wlock:
445 	mutex_exit(cpulock);
446 fail:
447 	*vpp = NULL;
448 	return -1;
449 }
450 
451 /*
452  * Scan cache looking for name of directory entry pointing at vp.
453  *
454  * Fill in dvpp.
455  *
456  * If bufp is non-NULL, also place the name in the buffer which starts
457  * at bufp, immediately before *bpp, and move bpp backwards to point
458  * at the start of it.  (Yes, this is a little baroque, but it's done
459  * this way to cater to the whims of getcwd).
460  *
461  * Returns 0 on success, -1 on cache miss, positive errno on failure.
462  */
463 int
464 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
465 {
466 	struct namecache *ncp;
467 	struct vnode *dvp;
468 	struct ncvhashhead *nvcpp;
469 	char *bp;
470 
471 	if (!doingcache)
472 		goto out;
473 
474 	nvcpp = &ncvhashtbl[NCVHASH(vp)];
475 
476 	mutex_enter(namecache_lock);
477 	LIST_FOREACH(ncp, nvcpp, nc_vhash) {
478 		mutex_enter(&ncp->nc_lock);
479 		if (ncp->nc_vp == vp &&
480 		    (dvp = ncp->nc_dvp) != NULL &&
481 		    dvp != vp) { 		/* avoid pesky . entries.. */
482 
483 #ifdef DIAGNOSTIC
484 			if (ncp->nc_nlen == 1 &&
485 			    ncp->nc_name[0] == '.')
486 				panic("cache_revlookup: found entry for .");
487 
488 			if (ncp->nc_nlen == 2 &&
489 			    ncp->nc_name[0] == '.' &&
490 			    ncp->nc_name[1] == '.')
491 				panic("cache_revlookup: found entry for ..");
492 #endif
493 			COUNT(ncs_revhits);
494 
495 			if (bufp) {
496 				bp = *bpp;
497 				bp -= ncp->nc_nlen;
498 				if (bp <= bufp) {
499 					*dvpp = NULL;
500 					mutex_exit(&ncp->nc_lock);
501 					mutex_exit(namecache_lock);
502 					return (ERANGE);
503 				}
504 				memcpy(bp, ncp->nc_name, ncp->nc_nlen);
505 				*bpp = bp;
506 			}
507 
508 			/* XXX MP: how do we know dvp won't evaporate? */
509 			*dvpp = dvp;
510 			mutex_exit(&ncp->nc_lock);
511 			mutex_exit(namecache_lock);
512 			return (0);
513 		}
514 		mutex_exit(&ncp->nc_lock);
515 	}
516 	COUNT(ncs_revmiss);
517 	mutex_exit(namecache_lock);
518  out:
519 	*dvpp = NULL;
520 	return (-1);
521 }
522 
523 /*
524  * Add an entry to the cache
525  */
526 void
527 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
528 {
529 	struct namecache *ncp;
530 	struct namecache *oncp;
531 	struct nchashhead *ncpp;
532 	struct ncvhashhead *nvcpp;
533 
534 #ifdef DIAGNOSTIC
535 	if (cnp->cn_namelen > NCHNAMLEN)
536 		panic("cache_enter: name too long");
537 #endif
538 	if (!doingcache)
539 		return;
540 
541 	if (numcache > desiredvnodes) {
542 		mutex_enter(namecache_lock);
543 		cache_ev_forced.ev_count++;
544 		cache_reclaim();
545 		mutex_exit(namecache_lock);
546 	}
547 
548 	ncp = pool_cache_get(namecache_cache, PR_WAITOK);
549 	mutex_enter(namecache_lock);
550 	numcache++;
551 
552 	/*
553 	 * Concurrent lookups in the same directory may race for a
554 	 * cache entry.  if there's a duplicated entry, free it.
555 	 */
556 	oncp = cache_lookup_entry(dvp, cnp);
557 	if (oncp) {
558 		cache_invalidate(oncp);
559 		mutex_exit(&oncp->nc_lock);
560 	}
561 
562 	/* Grab the vnode we just found. */
563 	mutex_enter(&ncp->nc_lock);
564 	ncp->nc_vp = vp;
565 	ncp->nc_flags = 0;
566 	ncp->nc_hittime = 0;
567 	ncp->nc_gcqueue = NULL;
568 	if (vp == NULL) {
569 		/*
570 		 * For negative hits, save the ISWHITEOUT flag so we can
571 		 * restore it later when the cache entry is used again.
572 		 */
573 		ncp->nc_flags = cnp->cn_flags & ISWHITEOUT;
574 	}
575 	/* Fill in cache info. */
576 	ncp->nc_dvp = dvp;
577 	LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist);
578 	if (vp)
579 		LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist);
580 	else {
581 		ncp->nc_vlist.le_prev = NULL;
582 		ncp->nc_vlist.le_next = NULL;
583 	}
584 	ncp->nc_nlen = cnp->cn_namelen;
585 	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
586 	memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen);
587 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
588 
589 	/*
590 	 * Flush updates before making visible in table.  No need for a
591 	 * memory barrier on the other side: to see modifications the
592 	 * list must be followed, meaning a dependent pointer load.
593 	 * The below is LIST_INSERT_HEAD() inlined, with the memory
594 	 * barrier included in the correct place.
595 	 */
596 	if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL)
597 		ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next;
598 	ncp->nc_hash.le_prev = &ncpp->lh_first;
599 	membar_producer();
600 	ncpp->lh_first = ncp;
601 
602 	ncp->nc_vhash.le_prev = NULL;
603 	ncp->nc_vhash.le_next = NULL;
604 
605 	/*
606 	 * Create reverse-cache entries (used in getcwd) for directories.
607 	 * (and in linux procfs exe node)
608 	 */
609 	if (vp != NULL &&
610 	    vp != dvp &&
611 #ifndef NAMECACHE_ENTER_REVERSE
612 	    vp->v_type == VDIR &&
613 #endif
614 	    (ncp->nc_nlen > 2 ||
615 	    (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
616 	    (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
617 		nvcpp = &ncvhashtbl[NCVHASH(vp)];
618 		LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
619 	}
620 	mutex_exit(&ncp->nc_lock);
621 	mutex_exit(namecache_lock);
622 }
623 
624 /*
625  * Name cache initialization, from vfs_init() when we are booting
626  */
627 void
628 nchinit(void)
629 {
630 	int error;
631 
632 	namecache_cache = pool_cache_init(sizeof(struct namecache),
633 	    coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
634 	    cache_dtor, NULL);
635 	KASSERT(namecache_cache != NULL);
636 
637 	namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
638 
639 	nchashtbl =
640 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &nchash);
641 	ncvhashtbl =
642 #ifdef NAMECACHE_ENTER_REVERSE
643 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash);
644 #else
645 	    hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash);
646 #endif
647 
648 	error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
649 	    NULL, NULL, "cachegc");
650 	if (error != 0)
651 		panic("nchinit %d", error);
652 
653 	evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
654 	   "namecache", "entries scanned");
655 	evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
656 	   "namecache", "entries collected");
657 	evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
658 	   "namecache", "over scan target");
659 	evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
660 	   "namecache", "under scan target");
661 	evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
662 	   "namecache", "forced reclaims");
663 }
664 
665 static int
666 cache_ctor(void *arg, void *obj, int flag)
667 {
668 	struct namecache *ncp;
669 
670 	ncp = obj;
671 	mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
672 
673 	return 0;
674 }
675 
676 static void
677 cache_dtor(void *arg, void *obj)
678 {
679 	struct namecache *ncp;
680 
681 	ncp = obj;
682 	mutex_destroy(&ncp->nc_lock);
683 }
684 
685 /*
686  * Called once for each CPU in the system as attached.
687  */
688 void
689 cache_cpu_init(struct cpu_info *ci)
690 {
691 
692 	ci->ci_data.cpu_cachelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
693 }
694 
695 /*
696  * Name cache reinitialization, for when the maximum number of vnodes increases.
697  */
698 void
699 nchreinit(void)
700 {
701 	struct namecache *ncp;
702 	struct nchashhead *oldhash1, *hash1;
703 	struct ncvhashhead *oldhash2, *hash2;
704 	u_long i, oldmask1, oldmask2, mask1, mask2;
705 
706 	hash1 = hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask1);
707 	hash2 =
708 #ifdef NAMECACHE_ENTER_REVERSE
709 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask2);
710 #else
711 	    hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &mask2);
712 #endif
713 	mutex_enter(namecache_lock);
714 	cache_lock_cpus();
715 	oldhash1 = nchashtbl;
716 	oldmask1 = nchash;
717 	nchashtbl = hash1;
718 	nchash = mask1;
719 	oldhash2 = ncvhashtbl;
720 	oldmask2 = ncvhash;
721 	ncvhashtbl = hash2;
722 	ncvhash = mask2;
723 	for (i = 0; i <= oldmask1; i++) {
724 		while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
725 			LIST_REMOVE(ncp, nc_hash);
726 			ncp->nc_hash.le_prev = NULL;
727 		}
728 	}
729 	for (i = 0; i <= oldmask2; i++) {
730 		while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
731 			LIST_REMOVE(ncp, nc_vhash);
732 			ncp->nc_vhash.le_prev = NULL;
733 		}
734 	}
735 	cache_unlock_cpus();
736 	mutex_exit(namecache_lock);
737 	hashdone(oldhash1, M_CACHE);
738 	hashdone(oldhash2, M_CACHE);
739 }
740 
741 /*
742  * Cache flush, a particular vnode; called when a vnode is renamed to
743  * hide entries that would now be invalid
744  */
745 void
746 cache_purge1(struct vnode *vp, const struct componentname *cnp, int flags)
747 {
748 	struct namecache *ncp, *ncnext;
749 
750 	mutex_enter(namecache_lock);
751 	if (flags & PURGE_PARENTS) {
752 		for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL;
753 		    ncp = ncnext) {
754 			ncnext = LIST_NEXT(ncp, nc_vlist);
755 			mutex_enter(&ncp->nc_lock);
756 			cache_invalidate(ncp);
757 			mutex_exit(&ncp->nc_lock);
758 			cache_disassociate(ncp);
759 		}
760 	}
761 	if (flags & PURGE_CHILDREN) {
762 		for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL;
763 		    ncp = ncnext) {
764 			ncnext = LIST_NEXT(ncp, nc_dvlist);
765 			mutex_enter(&ncp->nc_lock);
766 			cache_invalidate(ncp);
767 			mutex_exit(&ncp->nc_lock);
768 			cache_disassociate(ncp);
769 		}
770 	}
771 	if (cnp != NULL) {
772 		ncp = cache_lookup_entry(vp, cnp);
773 		if (ncp) {
774 			cache_invalidate(ncp);
775 			cache_disassociate(ncp);
776 			mutex_exit(&ncp->nc_lock);
777 		}
778 	}
779 	mutex_exit(namecache_lock);
780 }
781 
782 /*
783  * Cache flush, a whole filesystem; called when filesys is umounted to
784  * remove entries that would now be invalid.
785  */
786 void
787 cache_purgevfs(struct mount *mp)
788 {
789 	struct namecache *ncp, *nxtcp;
790 
791 	mutex_enter(namecache_lock);
792 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
793 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
794 		mutex_enter(&ncp->nc_lock);
795 		if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
796 			/* Free the resources we had. */
797 			cache_invalidate(ncp);
798 			cache_disassociate(ncp);
799 		}
800 		mutex_exit(&ncp->nc_lock);
801 	}
802 	cache_reclaim();
803 	mutex_exit(namecache_lock);
804 }
805 
806 /*
807  * Scan global list invalidating entries until we meet a preset target.
808  * Prefer to invalidate entries that have not scored a hit within
809  * cache_hottime seconds.  We sort the LRU list only for this routine's
810  * benefit.
811  */
812 static void
813 cache_prune(int incache, int target)
814 {
815 	struct namecache *ncp, *nxtcp, *sentinel;
816 	int items, recent, tryharder;
817 
818 	KASSERT(mutex_owned(namecache_lock));
819 
820 	items = 0;
821 	tryharder = 0;
822 	recent = hardclock_ticks - hz * cache_hottime;
823 	sentinel = NULL;
824 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
825 		if (incache <= target)
826 			break;
827 		items++;
828 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
829 		if (ncp->nc_dvp == NULL)
830 			continue;
831 		if (ncp == sentinel) {
832 			/*
833 			 * If we looped back on ourself, then ignore
834 			 * recent entries and purge whatever we find.
835 			 */
836 			tryharder = 1;
837 		}
838 		if (!tryharder && ncp->nc_hittime > recent) {
839 			if (sentinel == NULL)
840 				sentinel = ncp;
841 			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
842 			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
843 			continue;
844 		}
845 		mutex_enter(&ncp->nc_lock);
846 		if (ncp->nc_dvp != NULL) {
847 			cache_invalidate(ncp);
848 			cache_disassociate(ncp);
849 			incache--;
850 		}
851 		mutex_exit(&ncp->nc_lock);
852 	}
853 	cache_ev_scan.ev_count += items;
854 }
855 
856 /*
857  * Collect dead cache entries from all CPUs and garbage collect.
858  */
859 static void
860 cache_reclaim(void)
861 {
862 	struct namecache *ncp, *next;
863 	int items;
864 
865 	KASSERT(mutex_owned(namecache_lock));
866 
867 	/*
868 	 * If the number of extant entries not awaiting garbage collection
869 	 * exceeds the high water mark, then reclaim stale entries until we
870 	 * reach our low water mark.
871 	 */
872 	items = numcache - cache_gcpend;
873 	if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
874 		cache_prune(items, (int)((uint64_t)desiredvnodes *
875 		    cache_lowat / 100));
876 		cache_ev_over.ev_count++;
877 	} else
878 		cache_ev_under.ev_count++;
879 
880 	/*
881 	 * Stop forward lookup activity on all CPUs and garbage collect dead
882 	 * entries.
883 	 */
884 	cache_lock_cpus();
885 	ncp = cache_gcqueue;
886 	cache_gcqueue = NULL;
887 	items = cache_gcpend;
888 	cache_gcpend = 0;
889 	while (ncp != NULL) {
890 		next = ncp->nc_gcqueue;
891 		cache_disassociate(ncp);
892 		KASSERT(ncp->nc_dvp == NULL);
893 		if (ncp->nc_hash.le_prev != NULL) {
894 			LIST_REMOVE(ncp, nc_hash);
895 			ncp->nc_hash.le_prev = NULL;
896 		}
897 		pool_cache_put(namecache_cache, ncp);
898 		ncp = next;
899 	}
900 	cache_unlock_cpus();
901 	numcache -= items;
902 	cache_ev_gc.ev_count += items;
903 }
904 
905 /*
906  * Cache maintainence thread, awakening once per second to:
907  *
908  * => keep number of entries below the high water mark
909  * => sort pseudo-LRU list
910  * => garbage collect dead entries
911  */
912 static void
913 cache_thread(void *arg)
914 {
915 
916 	mutex_enter(namecache_lock);
917 	for (;;) {
918 		cache_reclaim();
919 		kpause("cachegc", false, hz, namecache_lock);
920 	}
921 }
922 
923 #ifdef DDB
924 void
925 namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
926 {
927 	struct vnode *dvp = NULL;
928 	struct namecache *ncp;
929 
930 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
931 		if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) {
932 			(*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
933 			dvp = ncp->nc_dvp;
934 		}
935 	}
936 	if (dvp == NULL) {
937 		(*pr)("name not found\n");
938 		return;
939 	}
940 	vp = dvp;
941 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
942 		if (ncp->nc_vp == vp) {
943 			(*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
944 		}
945 	}
946 }
947 #endif
948