xref: /netbsd-src/sys/kern/vfs_cache.c (revision 4b71a66d0f279143147d63ebfcfd8a59499a3684)
1 /*	$NetBSD: vfs_cache.c,v 1.76 2008/05/05 17:11:17 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 1989, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. Neither the name of the University nor the names of its contributors
42  *    may be used to endorse or promote products derived from this software
43  *    without specific prior written permission.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  *
57  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
58  */
59 
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.76 2008/05/05 17:11:17 ad Exp $");
62 
63 #include "opt_ddb.h"
64 #include "opt_revcache.h"
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/time.h>
69 #include <sys/mount.h>
70 #include <sys/vnode.h>
71 #include <sys/namei.h>
72 #include <sys/errno.h>
73 #include <sys/pool.h>
74 #include <sys/mutex.h>
75 #include <sys/atomic.h>
76 #include <sys/kthread.h>
77 #include <sys/kernel.h>
78 #include <sys/cpu.h>
79 #include <sys/evcnt.h>
80 
81 #define NAMECACHE_ENTER_REVERSE
82 /*
83  * Name caching works as follows:
84  *
85  * Names found by directory scans are retained in a cache
86  * for future reference.  It is managed LRU, so frequently
87  * used names will hang around.  Cache is indexed by hash value
88  * obtained from (dvp, name) where dvp refers to the directory
89  * containing name.
90  *
91  * For simplicity (and economy of storage), names longer than
92  * a maximum length of NCHNAMLEN are not cached; they occur
93  * infrequently in any case, and are almost never of interest.
94  *
95  * Upon reaching the last segment of a path, if the reference
96  * is for DELETE, or NOCACHE is set (rewrite), and the
97  * name is located in the cache, it will be dropped.
98  * The entry is dropped also when it was not possible to lock
99  * the cached vnode, either because vget() failed or the generation
100  * number has changed while waiting for the lock.
101  */
102 
103 /*
104  * Structures associated with name cacheing.
105  */
106 LIST_HEAD(nchashhead, namecache) *nchashtbl;
107 u_long	nchash;				/* size of hash table - 1 */
108 #define	NCHASH(cnp, dvp)	\
109 	(((cnp)->cn_hash ^ ((uintptr_t)(dvp) >> 3)) & nchash)
110 
111 LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
112 u_long	ncvhash;			/* size of hash table - 1 */
113 #define	NCVHASH(vp)		(((uintptr_t)(vp) >> 3) & ncvhash)
114 
115 long	numcache;			/* number of cache entries allocated */
116 static u_int	cache_gcpend;		/* number of entries pending GC */
117 static void	*cache_gcqueue;		/* garbage collection queue */
118 
119 TAILQ_HEAD(, namecache) nclruhead =		/* LRU chain */
120 	TAILQ_HEAD_INITIALIZER(nclruhead);
121 #define	COUNT(x)	nchstats.x++
122 struct	nchstats nchstats;		/* cache effectiveness statistics */
123 
124 static pool_cache_t namecache_cache;
125 
126 MALLOC_DEFINE(M_CACHE, "namecache", "Dynamically allocated cache entries");
127 
128 int cache_lowat = 95;
129 int cache_hiwat = 98;
130 int cache_hottime = 5;			/* number of seconds */
131 int doingcache = 1;			/* 1 => enable the cache */
132 
133 static struct evcnt cache_ev_scan;
134 static struct evcnt cache_ev_gc;
135 static struct evcnt cache_ev_over;
136 static struct evcnt cache_ev_under;
137 static struct evcnt cache_ev_forced;
138 
139 /* A single lock to serialize modifications. */
140 static kmutex_t *namecache_lock;
141 
142 static void cache_invalidate(struct namecache *);
143 static inline struct namecache *cache_lookup_entry(
144     const struct vnode *, const struct componentname *);
145 static void cache_thread(void *);
146 static void cache_invalidate(struct namecache *);
147 static void cache_disassociate(struct namecache *);
148 static void cache_reclaim(void);
149 static int cache_ctor(void *, void *, int);
150 static void cache_dtor(void *, void *);
151 
152 /*
153  * Invalidate a cache entry and enqueue it for garbage collection.
154  */
155 static void
156 cache_invalidate(struct namecache *ncp)
157 {
158 	void *head;
159 
160 	KASSERT(mutex_owned(&ncp->nc_lock));
161 
162 	if (ncp->nc_dvp != NULL) {
163 		ncp->nc_vp = NULL;
164 		ncp->nc_dvp = NULL;
165 		do {
166 			head = cache_gcqueue;
167 			ncp->nc_gcqueue = head;
168 		} while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
169 		atomic_inc_uint(&cache_gcpend);
170 	}
171 }
172 
173 /*
174  * Disassociate a namecache entry from any vnodes it is attached to,
175  * and remove from the global LRU list.
176  */
177 static void
178 cache_disassociate(struct namecache *ncp)
179 {
180 
181 	KASSERT(mutex_owned(namecache_lock));
182 	KASSERT(ncp->nc_dvp == NULL);
183 
184 	if (ncp->nc_lru.tqe_prev != NULL) {
185 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
186 		ncp->nc_lru.tqe_prev = NULL;
187 	}
188 	if (ncp->nc_vhash.le_prev != NULL) {
189 		LIST_REMOVE(ncp, nc_vhash);
190 		ncp->nc_vhash.le_prev = NULL;
191 	}
192 	if (ncp->nc_vlist.le_prev != NULL) {
193 		LIST_REMOVE(ncp, nc_vlist);
194 		ncp->nc_vlist.le_prev = NULL;
195 	}
196 	if (ncp->nc_dvlist.le_prev != NULL) {
197 		LIST_REMOVE(ncp, nc_dvlist);
198 		ncp->nc_dvlist.le_prev = NULL;
199 	}
200 }
201 
202 /*
203  * Lock all CPUs to prevent any cache lookup activity.  Conceptually,
204  * this locks out all "readers".
205  */
206 static void
207 cache_lock_cpus(void)
208 {
209 	CPU_INFO_ITERATOR cii;
210 	struct cpu_info *ci;
211 
212 	for (CPU_INFO_FOREACH(cii, ci)) {
213 		mutex_enter(ci->ci_data.cpu_cachelock);
214 	}
215 }
216 
217 /*
218  * Release all CPU locks.
219  */
220 static void
221 cache_unlock_cpus(void)
222 {
223 	CPU_INFO_ITERATOR cii;
224 	struct cpu_info *ci;
225 
226 	for (CPU_INFO_FOREACH(cii, ci)) {
227 		mutex_exit(ci->ci_data.cpu_cachelock);
228 	}
229 }
230 
231 /*
232  * Find a single cache entry and return it locked.  'namecache_lock' or
233  * at least one of the per-CPU locks must be held.
234  */
235 static struct namecache *
236 cache_lookup_entry(const struct vnode *dvp, const struct componentname *cnp)
237 {
238 	struct nchashhead *ncpp;
239 	struct namecache *ncp;
240 
241 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
242 
243 	LIST_FOREACH(ncp, ncpp, nc_hash) {
244 		if (ncp->nc_dvp != dvp ||
245 		    ncp->nc_nlen != cnp->cn_namelen ||
246 		    memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
247 		    	continue;
248 	    	mutex_enter(&ncp->nc_lock);
249 		if (ncp->nc_dvp == dvp) {
250 			ncp->nc_hittime = hardclock_ticks;
251 			return ncp;
252 		}
253 		/* Raced: entry has been nullified. */
254 		mutex_exit(&ncp->nc_lock);
255 	}
256 
257 	return NULL;
258 }
259 
260 /*
261  * Look for a the name in the cache. We don't do this
262  * if the segment name is long, simply so the cache can avoid
263  * holding long names (which would either waste space, or
264  * add greatly to the complexity).
265  *
266  * Lookup is called with ni_dvp pointing to the directory to search,
267  * ni_ptr pointing to the name of the entry being sought, ni_namelen
268  * tells the length of the name, and ni_hash contains a hash of
269  * the name. If the lookup succeeds, the vnode is locked, stored in ni_vp
270  * and a status of zero is returned. If the locking fails for whatever
271  * reason, the vnode is unlocked and the error is returned to caller.
272  * If the lookup determines that the name does not exist (negative cacheing),
273  * a status of ENOENT is returned. If the lookup fails, a status of -1
274  * is returned.
275  */
276 int
277 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
278 {
279 	struct namecache *ncp;
280 	struct vnode *vp;
281 	kmutex_t *cpulock;
282 	int error;
283 
284 	if (!doingcache) {
285 		cnp->cn_flags &= ~MAKEENTRY;
286 		*vpp = NULL;
287 		return (-1);
288 	}
289 
290 	if (cnp->cn_namelen > NCHNAMLEN) {
291 		/* Unlocked, but only for stats. */
292 		COUNT(ncs_long);
293 		cnp->cn_flags &= ~MAKEENTRY;
294 		goto fail;
295 	}
296 	cpulock = curcpu()->ci_data.cpu_cachelock;
297 	mutex_enter(cpulock);
298 	ncp = cache_lookup_entry(dvp, cnp);
299 	if (ncp == NULL) {
300 		COUNT(ncs_miss);
301 		goto fail_wlock;
302 	}
303 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
304 		COUNT(ncs_badhits);
305 		goto remove;
306 	} else if (ncp->nc_vp == NULL) {
307 		/*
308 		 * Restore the ISWHITEOUT flag saved earlier.
309 		 */
310 		cnp->cn_flags |= ncp->nc_flags;
311 		if (cnp->cn_nameiop != CREATE ||
312 		    (cnp->cn_flags & ISLASTCN) == 0) {
313 			COUNT(ncs_neghits);
314 			mutex_exit(&ncp->nc_lock);
315 			mutex_exit(cpulock);
316 			return (ENOENT);
317 		} else {
318 			COUNT(ncs_badhits);
319 			goto remove;
320 		}
321 	}
322 
323 	vp = ncp->nc_vp;
324 	mutex_enter(&vp->v_interlock);
325 	mutex_exit(&ncp->nc_lock);
326 	mutex_exit(cpulock);
327 	error = vget(vp, LK_NOWAIT | LK_INTERLOCK);
328 
329 #ifdef DEBUG
330 	/*
331 	 * since we released nb->nb_lock,
332 	 * we can't use this pointer any more.
333 	 */
334 	ncp = NULL;
335 #endif /* DEBUG */
336 
337 	if (error) {
338 		KASSERT(error == EBUSY);
339 		/*
340 		 * this vnode is being cleaned out.
341 		 */
342 		COUNT(ncs_falsehits); /* XXX badhits? */
343 		goto fail;
344 	}
345 
346 	if (vp == dvp) {	/* lookup on "." */
347 		error = 0;
348 	} else if (cnp->cn_flags & ISDOTDOT) {
349 		VOP_UNLOCK(dvp, 0);
350 		error = vn_lock(vp, LK_EXCLUSIVE);
351 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
352 	} else {
353 		error = vn_lock(vp, LK_EXCLUSIVE);
354 	}
355 
356 	/*
357 	 * Check that the lock succeeded.
358 	 */
359 	if (error) {
360 		/* Unlocked, but only for stats. */
361 		COUNT(ncs_badhits);
362 		*vpp = NULL;
363 		return (-1);
364 	}
365 
366 	/* Unlocked, but only for stats. */
367 	COUNT(ncs_goodhits);
368 	*vpp = vp;
369 	return (0);
370 
371 remove:
372 	/*
373 	 * Last component and we are renaming or deleting,
374 	 * the cache entry is invalid, or otherwise don't
375 	 * want cache entry to exist.
376 	 */
377 	cache_invalidate(ncp);
378 	mutex_exit(&ncp->nc_lock);
379 fail_wlock:
380 	mutex_exit(cpulock);
381 fail:
382 	*vpp = NULL;
383 	return (-1);
384 }
385 
386 int
387 cache_lookup_raw(struct vnode *dvp, struct vnode **vpp,
388     struct componentname *cnp)
389 {
390 	struct namecache *ncp;
391 	struct vnode *vp;
392 	kmutex_t *cpulock;
393 	int error;
394 
395 	if (!doingcache) {
396 		cnp->cn_flags &= ~MAKEENTRY;
397 		*vpp = NULL;
398 		return (-1);
399 	}
400 
401 	if (cnp->cn_namelen > NCHNAMLEN) {
402 		/* Unlocked, but only for stats. */
403 		COUNT(ncs_long);
404 		cnp->cn_flags &= ~MAKEENTRY;
405 		goto fail;
406 	}
407 	cpulock = curcpu()->ci_data.cpu_cachelock;
408 	mutex_enter(cpulock);
409 	ncp = cache_lookup_entry(dvp, cnp);
410 	if (ncp == NULL) {
411 		COUNT(ncs_miss);
412 		goto fail_wlock;
413 	}
414 	vp = ncp->nc_vp;
415 	if (vp == NULL) {
416 		/*
417 		 * Restore the ISWHITEOUT flag saved earlier.
418 		 */
419 		cnp->cn_flags |= ncp->nc_flags;
420 		COUNT(ncs_neghits);
421 		mutex_exit(&ncp->nc_lock);
422 		mutex_exit(cpulock);
423 		return (ENOENT);
424 	}
425 	mutex_enter(&vp->v_interlock);
426 	mutex_exit(&ncp->nc_lock);
427 	mutex_exit(cpulock);
428 	error = vget(vp, LK_NOWAIT | LK_INTERLOCK);
429 
430 	if (error) {
431 		KASSERT(error == EBUSY);
432 		/*
433 		 * this vnode is being cleaned out.
434 		 */
435 		COUNT(ncs_falsehits); /* XXX badhits? */
436 		goto fail;
437 	}
438 
439 	*vpp = vp;
440 
441 	return 0;
442 
443 fail_wlock:
444 	mutex_exit(cpulock);
445 fail:
446 	*vpp = NULL;
447 	return -1;
448 }
449 
450 /*
451  * Scan cache looking for name of directory entry pointing at vp.
452  *
453  * Fill in dvpp.
454  *
455  * If bufp is non-NULL, also place the name in the buffer which starts
456  * at bufp, immediately before *bpp, and move bpp backwards to point
457  * at the start of it.  (Yes, this is a little baroque, but it's done
458  * this way to cater to the whims of getcwd).
459  *
460  * Returns 0 on success, -1 on cache miss, positive errno on failure.
461  */
462 int
463 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
464 {
465 	struct namecache *ncp;
466 	struct vnode *dvp;
467 	struct ncvhashhead *nvcpp;
468 	char *bp;
469 
470 	if (!doingcache)
471 		goto out;
472 
473 	nvcpp = &ncvhashtbl[NCVHASH(vp)];
474 
475 	mutex_enter(namecache_lock);
476 	LIST_FOREACH(ncp, nvcpp, nc_vhash) {
477 		mutex_enter(&ncp->nc_lock);
478 		if (ncp->nc_vp == vp &&
479 		    (dvp = ncp->nc_dvp) != NULL &&
480 		    dvp != vp) { 		/* avoid pesky . entries.. */
481 
482 #ifdef DIAGNOSTIC
483 			if (ncp->nc_nlen == 1 &&
484 			    ncp->nc_name[0] == '.')
485 				panic("cache_revlookup: found entry for .");
486 
487 			if (ncp->nc_nlen == 2 &&
488 			    ncp->nc_name[0] == '.' &&
489 			    ncp->nc_name[1] == '.')
490 				panic("cache_revlookup: found entry for ..");
491 #endif
492 			COUNT(ncs_revhits);
493 
494 			if (bufp) {
495 				bp = *bpp;
496 				bp -= ncp->nc_nlen;
497 				if (bp <= bufp) {
498 					*dvpp = NULL;
499 					mutex_exit(&ncp->nc_lock);
500 					mutex_exit(namecache_lock);
501 					return (ERANGE);
502 				}
503 				memcpy(bp, ncp->nc_name, ncp->nc_nlen);
504 				*bpp = bp;
505 			}
506 
507 			/* XXX MP: how do we know dvp won't evaporate? */
508 			*dvpp = dvp;
509 			mutex_exit(&ncp->nc_lock);
510 			mutex_exit(namecache_lock);
511 			return (0);
512 		}
513 		mutex_exit(&ncp->nc_lock);
514 	}
515 	COUNT(ncs_revmiss);
516 	mutex_exit(namecache_lock);
517  out:
518 	*dvpp = NULL;
519 	return (-1);
520 }
521 
522 /*
523  * Add an entry to the cache
524  */
525 void
526 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
527 {
528 	struct namecache *ncp;
529 	struct namecache *oncp;
530 	struct nchashhead *ncpp;
531 	struct ncvhashhead *nvcpp;
532 
533 #ifdef DIAGNOSTIC
534 	if (cnp->cn_namelen > NCHNAMLEN)
535 		panic("cache_enter: name too long");
536 #endif
537 	if (!doingcache)
538 		return;
539 
540 	if (numcache > desiredvnodes) {
541 		mutex_enter(namecache_lock);
542 		cache_ev_forced.ev_count++;
543 		cache_reclaim();
544 		mutex_exit(namecache_lock);
545 	}
546 
547 	ncp = pool_cache_get(namecache_cache, PR_WAITOK);
548 	mutex_enter(namecache_lock);
549 	numcache++;
550 
551 	/*
552 	 * Concurrent lookups in the same directory may race for a
553 	 * cache entry.  if there's a duplicated entry, free it.
554 	 */
555 	oncp = cache_lookup_entry(dvp, cnp);
556 	if (oncp) {
557 		cache_invalidate(oncp);
558 		mutex_exit(&oncp->nc_lock);
559 	}
560 
561 	/* Grab the vnode we just found. */
562 	mutex_enter(&ncp->nc_lock);
563 	ncp->nc_vp = vp;
564 	ncp->nc_flags = 0;
565 	ncp->nc_hittime = 0;
566 	ncp->nc_gcqueue = NULL;
567 	if (vp == NULL) {
568 		/*
569 		 * For negative hits, save the ISWHITEOUT flag so we can
570 		 * restore it later when the cache entry is used again.
571 		 */
572 		ncp->nc_flags = cnp->cn_flags & ISWHITEOUT;
573 	}
574 	/* Fill in cache info. */
575 	ncp->nc_dvp = dvp;
576 	LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist);
577 	if (vp)
578 		LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist);
579 	else {
580 		ncp->nc_vlist.le_prev = NULL;
581 		ncp->nc_vlist.le_next = NULL;
582 	}
583 	ncp->nc_nlen = cnp->cn_namelen;
584 	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
585 	memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen);
586 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
587 
588 	/*
589 	 * Flush updates before making visible in table.  No need for a
590 	 * memory barrier on the other side: to see modifications the
591 	 * list must be followed, meaning a dependent pointer load.
592 	 * The below is LIST_INSERT_HEAD() inlined, with the memory
593 	 * barrier included in the correct place.
594 	 */
595 	if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL)
596 		ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next;
597 	ncp->nc_hash.le_prev = &ncpp->lh_first;
598 	membar_producer();
599 	ncpp->lh_first = ncp;
600 
601 	ncp->nc_vhash.le_prev = NULL;
602 	ncp->nc_vhash.le_next = NULL;
603 
604 	/*
605 	 * Create reverse-cache entries (used in getcwd) for directories.
606 	 * (and in linux procfs exe node)
607 	 */
608 	if (vp != NULL &&
609 	    vp != dvp &&
610 #ifndef NAMECACHE_ENTER_REVERSE
611 	    vp->v_type == VDIR &&
612 #endif
613 	    (ncp->nc_nlen > 2 ||
614 	    (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
615 	    (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
616 		nvcpp = &ncvhashtbl[NCVHASH(vp)];
617 		LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
618 	}
619 	mutex_exit(&ncp->nc_lock);
620 	mutex_exit(namecache_lock);
621 }
622 
623 /*
624  * Name cache initialization, from vfs_init() when we are booting
625  */
626 void
627 nchinit(void)
628 {
629 	int error;
630 
631 	namecache_cache = pool_cache_init(sizeof(struct namecache),
632 	    coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
633 	    cache_dtor, NULL);
634 	KASSERT(namecache_cache != NULL);
635 
636 	namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
637 
638 	nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash);
639 	ncvhashtbl =
640 #ifdef NAMECACHE_ENTER_REVERSE
641 	    hashinit(desiredvnodes, HASH_LIST, true, &ncvhash);
642 #else
643 	    hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash);
644 #endif
645 
646 	error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
647 	    NULL, NULL, "cachegc");
648 	if (error != 0)
649 		panic("nchinit %d", error);
650 
651 	evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
652 	   "namecache", "entries scanned");
653 	evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
654 	   "namecache", "entries collected");
655 	evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
656 	   "namecache", "over scan target");
657 	evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
658 	   "namecache", "under scan target");
659 	evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
660 	   "namecache", "forced reclaims");
661 }
662 
663 static int
664 cache_ctor(void *arg, void *obj, int flag)
665 {
666 	struct namecache *ncp;
667 
668 	ncp = obj;
669 	mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
670 
671 	return 0;
672 }
673 
674 static void
675 cache_dtor(void *arg, void *obj)
676 {
677 	struct namecache *ncp;
678 
679 	ncp = obj;
680 	mutex_destroy(&ncp->nc_lock);
681 }
682 
683 /*
684  * Called once for each CPU in the system as attached.
685  */
686 void
687 cache_cpu_init(struct cpu_info *ci)
688 {
689 
690 	ci->ci_data.cpu_cachelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
691 }
692 
693 /*
694  * Name cache reinitialization, for when the maximum number of vnodes increases.
695  */
696 void
697 nchreinit(void)
698 {
699 	struct namecache *ncp;
700 	struct nchashhead *oldhash1, *hash1;
701 	struct ncvhashhead *oldhash2, *hash2;
702 	u_long i, oldmask1, oldmask2, mask1, mask2;
703 
704 	hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1);
705 	hash2 =
706 #ifdef NAMECACHE_ENTER_REVERSE
707 	    hashinit(desiredvnodes, HASH_LIST, true, &mask2);
708 #else
709 	    hashinit(desiredvnodes/8, HASH_LIST, true, &mask2);
710 #endif
711 	mutex_enter(namecache_lock);
712 	cache_lock_cpus();
713 	oldhash1 = nchashtbl;
714 	oldmask1 = nchash;
715 	nchashtbl = hash1;
716 	nchash = mask1;
717 	oldhash2 = ncvhashtbl;
718 	oldmask2 = ncvhash;
719 	ncvhashtbl = hash2;
720 	ncvhash = mask2;
721 	for (i = 0; i <= oldmask1; i++) {
722 		while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
723 			LIST_REMOVE(ncp, nc_hash);
724 			ncp->nc_hash.le_prev = NULL;
725 		}
726 	}
727 	for (i = 0; i <= oldmask2; i++) {
728 		while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
729 			LIST_REMOVE(ncp, nc_vhash);
730 			ncp->nc_vhash.le_prev = NULL;
731 		}
732 	}
733 	cache_unlock_cpus();
734 	mutex_exit(namecache_lock);
735 	hashdone(oldhash1, HASH_LIST, oldmask1);
736 	hashdone(oldhash2, HASH_LIST, oldmask2);
737 }
738 
739 /*
740  * Cache flush, a particular vnode; called when a vnode is renamed to
741  * hide entries that would now be invalid
742  */
743 void
744 cache_purge1(struct vnode *vp, const struct componentname *cnp, int flags)
745 {
746 	struct namecache *ncp, *ncnext;
747 
748 	mutex_enter(namecache_lock);
749 	if (flags & PURGE_PARENTS) {
750 		for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL;
751 		    ncp = ncnext) {
752 			ncnext = LIST_NEXT(ncp, nc_vlist);
753 			mutex_enter(&ncp->nc_lock);
754 			cache_invalidate(ncp);
755 			mutex_exit(&ncp->nc_lock);
756 			cache_disassociate(ncp);
757 		}
758 	}
759 	if (flags & PURGE_CHILDREN) {
760 		for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL;
761 		    ncp = ncnext) {
762 			ncnext = LIST_NEXT(ncp, nc_dvlist);
763 			mutex_enter(&ncp->nc_lock);
764 			cache_invalidate(ncp);
765 			mutex_exit(&ncp->nc_lock);
766 			cache_disassociate(ncp);
767 		}
768 	}
769 	if (cnp != NULL) {
770 		ncp = cache_lookup_entry(vp, cnp);
771 		if (ncp) {
772 			cache_invalidate(ncp);
773 			cache_disassociate(ncp);
774 			mutex_exit(&ncp->nc_lock);
775 		}
776 	}
777 	mutex_exit(namecache_lock);
778 }
779 
780 /*
781  * Cache flush, a whole filesystem; called when filesys is umounted to
782  * remove entries that would now be invalid.
783  */
784 void
785 cache_purgevfs(struct mount *mp)
786 {
787 	struct namecache *ncp, *nxtcp;
788 
789 	mutex_enter(namecache_lock);
790 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
791 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
792 		mutex_enter(&ncp->nc_lock);
793 		if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
794 			/* Free the resources we had. */
795 			cache_invalidate(ncp);
796 			cache_disassociate(ncp);
797 		}
798 		mutex_exit(&ncp->nc_lock);
799 	}
800 	cache_reclaim();
801 	mutex_exit(namecache_lock);
802 }
803 
804 /*
805  * Scan global list invalidating entries until we meet a preset target.
806  * Prefer to invalidate entries that have not scored a hit within
807  * cache_hottime seconds.  We sort the LRU list only for this routine's
808  * benefit.
809  */
810 static void
811 cache_prune(int incache, int target)
812 {
813 	struct namecache *ncp, *nxtcp, *sentinel;
814 	int items, recent, tryharder;
815 
816 	KASSERT(mutex_owned(namecache_lock));
817 
818 	items = 0;
819 	tryharder = 0;
820 	recent = hardclock_ticks - hz * cache_hottime;
821 	sentinel = NULL;
822 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
823 		if (incache <= target)
824 			break;
825 		items++;
826 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
827 		if (ncp->nc_dvp == NULL)
828 			continue;
829 		if (ncp == sentinel) {
830 			/*
831 			 * If we looped back on ourself, then ignore
832 			 * recent entries and purge whatever we find.
833 			 */
834 			tryharder = 1;
835 		}
836 		if (!tryharder && ncp->nc_hittime > recent) {
837 			if (sentinel == NULL)
838 				sentinel = ncp;
839 			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
840 			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
841 			continue;
842 		}
843 		mutex_enter(&ncp->nc_lock);
844 		if (ncp->nc_dvp != NULL) {
845 			cache_invalidate(ncp);
846 			cache_disassociate(ncp);
847 			incache--;
848 		}
849 		mutex_exit(&ncp->nc_lock);
850 	}
851 	cache_ev_scan.ev_count += items;
852 }
853 
854 /*
855  * Collect dead cache entries from all CPUs and garbage collect.
856  */
857 static void
858 cache_reclaim(void)
859 {
860 	struct namecache *ncp, *next;
861 	int items;
862 
863 	KASSERT(mutex_owned(namecache_lock));
864 
865 	/*
866 	 * If the number of extant entries not awaiting garbage collection
867 	 * exceeds the high water mark, then reclaim stale entries until we
868 	 * reach our low water mark.
869 	 */
870 	items = numcache - cache_gcpend;
871 	if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
872 		cache_prune(items, (int)((uint64_t)desiredvnodes *
873 		    cache_lowat / 100));
874 		cache_ev_over.ev_count++;
875 	} else
876 		cache_ev_under.ev_count++;
877 
878 	/*
879 	 * Stop forward lookup activity on all CPUs and garbage collect dead
880 	 * entries.
881 	 */
882 	cache_lock_cpus();
883 	ncp = cache_gcqueue;
884 	cache_gcqueue = NULL;
885 	items = cache_gcpend;
886 	cache_gcpend = 0;
887 	while (ncp != NULL) {
888 		next = ncp->nc_gcqueue;
889 		cache_disassociate(ncp);
890 		KASSERT(ncp->nc_dvp == NULL);
891 		if (ncp->nc_hash.le_prev != NULL) {
892 			LIST_REMOVE(ncp, nc_hash);
893 			ncp->nc_hash.le_prev = NULL;
894 		}
895 		pool_cache_put(namecache_cache, ncp);
896 		ncp = next;
897 	}
898 	cache_unlock_cpus();
899 	numcache -= items;
900 	cache_ev_gc.ev_count += items;
901 }
902 
903 /*
904  * Cache maintainence thread, awakening once per second to:
905  *
906  * => keep number of entries below the high water mark
907  * => sort pseudo-LRU list
908  * => garbage collect dead entries
909  */
910 static void
911 cache_thread(void *arg)
912 {
913 
914 	mutex_enter(namecache_lock);
915 	for (;;) {
916 		cache_reclaim();
917 		kpause("cachegc", false, hz, namecache_lock);
918 	}
919 }
920 
921 #ifdef DDB
922 void
923 namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
924 {
925 	struct vnode *dvp = NULL;
926 	struct namecache *ncp;
927 
928 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
929 		if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) {
930 			(*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
931 			dvp = ncp->nc_dvp;
932 		}
933 	}
934 	if (dvp == NULL) {
935 		(*pr)("name not found\n");
936 		return;
937 	}
938 	vp = dvp;
939 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
940 		if (ncp->nc_vp == vp) {
941 			(*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
942 		}
943 	}
944 }
945 #endif
946