xref: /netbsd-src/sys/kern/vfs_cache.c (revision e61202360d5611414dd6f6115934a96aa1f50b1a)
1 /*	$NetBSD: vfs_cache.c,v 1.89 2012/07/22 00:53:18 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 1989, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. Neither the name of the University nor the names of its contributors
42  *    may be used to endorse or promote products derived from this software
43  *    without specific prior written permission.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  *
57  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
58  */
59 
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.89 2012/07/22 00:53:18 rmind Exp $");
62 
63 #include "opt_ddb.h"
64 #include "opt_revcache.h"
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/time.h>
69 #include <sys/mount.h>
70 #include <sys/vnode.h>
71 #include <sys/namei.h>
72 #include <sys/errno.h>
73 #include <sys/pool.h>
74 #include <sys/mutex.h>
75 #include <sys/atomic.h>
76 #include <sys/kthread.h>
77 #include <sys/kernel.h>
78 #include <sys/cpu.h>
79 #include <sys/evcnt.h>
80 
81 #define NAMECACHE_ENTER_REVERSE
82 /*
83  * Name caching works as follows:
84  *
85  * Names found by directory scans are retained in a cache
86  * for future reference.  It is managed LRU, so frequently
87  * used names will hang around.  Cache is indexed by hash value
88  * obtained from (dvp, name) where dvp refers to the directory
89  * containing name.
90  *
91  * For simplicity (and economy of storage), names longer than
92  * a maximum length of NCHNAMLEN are not cached; they occur
93  * infrequently in any case, and are almost never of interest.
94  *
95  * Upon reaching the last segment of a path, if the reference
96  * is for DELETE, or NOCACHE is set (rewrite), and the
97  * name is located in the cache, it will be dropped.
98  * The entry is dropped also when it was not possible to lock
99  * the cached vnode, either because vget() failed or the generation
100  * number has changed while waiting for the lock.
101  */
102 
103 /*
104  * Per-cpu namecache data.
105  */
106 struct nchcpu {
107 	kmutex_t	cpu_lock;
108 	struct nchstats	cpu_stats;
109 };
110 
111 /*
112  * Structures associated with name cacheing.
113  */
114 
115 static kmutex_t *namecache_lock __read_mostly;
116 static pool_cache_t namecache_cache __read_mostly;
117 static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned;
118 
119 static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly;
120 static u_long	nchash __read_mostly;
121 
122 #define	NCHASH(cnp, dvp)	\
123 	(((cnp)->cn_hash ^ ((uintptr_t)(dvp) >> 3)) & nchash)
124 
125 static LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl __read_mostly;
126 static u_long	ncvhash __read_mostly;
127 
128 #define	NCVHASH(vp)		(((uintptr_t)(vp) >> 3) & ncvhash)
129 
130 /* Number of cache entries allocated. */
131 static long	numcache __cacheline_aligned;
132 
133 /* Garbage collection queue and number of entries pending in it. */
134 static void	*cache_gcqueue;
135 static u_int	cache_gcpend;
136 
137 /* Cache effectiveness statistics. */
138 struct nchstats	nchstats __cacheline_aligned;
139 #define	COUNT(c,x)	(c.x++)
140 
141 static const int cache_lowat = 95;
142 static const int cache_hiwat = 98;
143 static const int cache_hottime = 5;	/* number of seconds */
144 static int doingcache = 1;		/* 1 => enable the cache */
145 
146 static struct evcnt cache_ev_scan;
147 static struct evcnt cache_ev_gc;
148 static struct evcnt cache_ev_over;
149 static struct evcnt cache_ev_under;
150 static struct evcnt cache_ev_forced;
151 
152 static void cache_invalidate(struct namecache *);
153 static struct namecache *cache_lookup_entry(
154     const struct vnode *, const struct componentname *);
155 static void cache_thread(void *);
156 static void cache_invalidate(struct namecache *);
157 static void cache_disassociate(struct namecache *);
158 static void cache_reclaim(void);
159 static int cache_ctor(void *, void *, int);
160 static void cache_dtor(void *, void *);
161 
162 /*
163  * Invalidate a cache entry and enqueue it for garbage collection.
164  */
165 static void
166 cache_invalidate(struct namecache *ncp)
167 {
168 	void *head;
169 
170 	KASSERT(mutex_owned(&ncp->nc_lock));
171 
172 	if (ncp->nc_dvp != NULL) {
173 		ncp->nc_vp = NULL;
174 		ncp->nc_dvp = NULL;
175 		do {
176 			head = cache_gcqueue;
177 			ncp->nc_gcqueue = head;
178 		} while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
179 		atomic_inc_uint(&cache_gcpend);
180 	}
181 }
182 
183 /*
184  * Disassociate a namecache entry from any vnodes it is attached to,
185  * and remove from the global LRU list.
186  */
187 static void
188 cache_disassociate(struct namecache *ncp)
189 {
190 
191 	KASSERT(mutex_owned(namecache_lock));
192 	KASSERT(ncp->nc_dvp == NULL);
193 
194 	if (ncp->nc_lru.tqe_prev != NULL) {
195 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
196 		ncp->nc_lru.tqe_prev = NULL;
197 	}
198 	if (ncp->nc_vhash.le_prev != NULL) {
199 		LIST_REMOVE(ncp, nc_vhash);
200 		ncp->nc_vhash.le_prev = NULL;
201 	}
202 	if (ncp->nc_vlist.le_prev != NULL) {
203 		LIST_REMOVE(ncp, nc_vlist);
204 		ncp->nc_vlist.le_prev = NULL;
205 	}
206 	if (ncp->nc_dvlist.le_prev != NULL) {
207 		LIST_REMOVE(ncp, nc_dvlist);
208 		ncp->nc_dvlist.le_prev = NULL;
209 	}
210 }
211 
212 /*
213  * Lock all CPUs to prevent any cache lookup activity.  Conceptually,
214  * this locks out all "readers".
215  */
216 static void
217 cache_lock_cpus(void)
218 {
219 	CPU_INFO_ITERATOR cii;
220 	struct cpu_info *ci;
221 	struct nchcpu *cpup;
222 	long *s, *d, *m;
223 
224 	for (CPU_INFO_FOREACH(cii, ci)) {
225 		cpup = ci->ci_data.cpu_nch;
226 		mutex_enter(&cpup->cpu_lock);
227 
228 		/* Collate statistics. */
229 		d = (long *)&nchstats;
230 		s = (long *)&cpup->cpu_stats;
231 		m = s + sizeof(nchstats) / sizeof(long);
232 		for (; s < m; s++, d++) {
233 			*d += *s;
234 			*s = 0;
235 		}
236 	}
237 }
238 
239 /*
240  * Release all CPU locks.
241  */
242 static void
243 cache_unlock_cpus(void)
244 {
245 	CPU_INFO_ITERATOR cii;
246 	struct cpu_info *ci;
247 	struct nchcpu *cpup;
248 
249 	for (CPU_INFO_FOREACH(cii, ci)) {
250 		cpup = ci->ci_data.cpu_nch;
251 		mutex_exit(&cpup->cpu_lock);
252 	}
253 }
254 
255 /*
256  * Find a single cache entry and return it locked.  'namecache_lock' or
257  * at least one of the per-CPU locks must be held.
258  */
259 static struct namecache *
260 cache_lookup_entry(const struct vnode *dvp, const struct componentname *cnp)
261 {
262 	struct nchashhead *ncpp;
263 	struct namecache *ncp;
264 
265 	KASSERT(dvp != NULL);
266 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
267 
268 	LIST_FOREACH(ncp, ncpp, nc_hash) {
269 		if (ncp->nc_dvp != dvp ||
270 		    ncp->nc_nlen != cnp->cn_namelen ||
271 		    memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
272 		    	continue;
273 	    	mutex_enter(&ncp->nc_lock);
274 		if (__predict_true(ncp->nc_dvp == dvp)) {
275 			ncp->nc_hittime = hardclock_ticks;
276 			return ncp;
277 		}
278 		/* Raced: entry has been nullified. */
279 		mutex_exit(&ncp->nc_lock);
280 	}
281 
282 	return NULL;
283 }
284 
285 /*
286  * Look for a the name in the cache. We don't do this
287  * if the segment name is long, simply so the cache can avoid
288  * holding long names (which would either waste space, or
289  * add greatly to the complexity).
290  *
291  * Lookup is called with ni_dvp pointing to the directory to search,
292  * ni_ptr pointing to the name of the entry being sought, ni_namelen
293  * tells the length of the name, and ni_hash contains a hash of
294  * the name. If the lookup succeeds, the vnode is locked, stored in ni_vp
295  * and a status of zero is returned. If the locking fails for whatever
296  * reason, the vnode is unlocked and the error is returned to caller.
297  * If the lookup determines that the name does not exist (negative cacheing),
298  * a status of ENOENT is returned. If the lookup fails, a status of -1
299  * is returned.
300  */
301 int
302 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
303 {
304 	struct namecache *ncp;
305 	struct vnode *vp;
306 	struct nchcpu *cpup;
307 	int error;
308 
309 	if (__predict_false(!doingcache)) {
310 		cnp->cn_flags &= ~MAKEENTRY;
311 		*vpp = NULL;
312 		return -1;
313 	}
314 
315 	cpup = curcpu()->ci_data.cpu_nch;
316 	mutex_enter(&cpup->cpu_lock);
317 	if (__predict_false(cnp->cn_namelen > NCHNAMLEN)) {
318 		COUNT(cpup->cpu_stats, ncs_long);
319 		cnp->cn_flags &= ~MAKEENTRY;
320 		mutex_exit(&cpup->cpu_lock);
321 		*vpp = NULL;
322 		return -1;
323 	}
324 	ncp = cache_lookup_entry(dvp, cnp);
325 	if (__predict_false(ncp == NULL)) {
326 		COUNT(cpup->cpu_stats, ncs_miss);
327 		mutex_exit(&cpup->cpu_lock);
328 		*vpp = NULL;
329 		return -1;
330 	}
331 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
332 		COUNT(cpup->cpu_stats, ncs_badhits);
333 		/*
334 		 * Last component and we are renaming or deleting,
335 		 * the cache entry is invalid, or otherwise don't
336 		 * want cache entry to exist.
337 		 */
338 		cache_invalidate(ncp);
339 		mutex_exit(&ncp->nc_lock);
340 		mutex_exit(&cpup->cpu_lock);
341 		*vpp = NULL;
342 		return -1;
343 	} else if (ncp->nc_vp == NULL) {
344 		/*
345 		 * Restore the ISWHITEOUT flag saved earlier.
346 		 */
347 		KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
348 		cnp->cn_flags |= ncp->nc_flags;
349 		if (__predict_true(cnp->cn_nameiop != CREATE ||
350 		    (cnp->cn_flags & ISLASTCN) == 0)) {
351 			COUNT(cpup->cpu_stats, ncs_neghits);
352 			mutex_exit(&ncp->nc_lock);
353 			mutex_exit(&cpup->cpu_lock);
354 			return ENOENT;
355 		} else {
356 			COUNT(cpup->cpu_stats, ncs_badhits);
357 			/*
358 			 * Last component and we are renaming or
359 			 * deleting, the cache entry is invalid,
360 			 * or otherwise don't want cache entry to
361 			 * exist.
362 			 */
363 			cache_invalidate(ncp);
364 			mutex_exit(&ncp->nc_lock);
365 			mutex_exit(&cpup->cpu_lock);
366 			*vpp = NULL;
367 			return -1;
368 		}
369 	}
370 
371 	vp = ncp->nc_vp;
372 	if (vtryget(vp)) {
373 		mutex_exit(&ncp->nc_lock);
374 		mutex_exit(&cpup->cpu_lock);
375 	} else {
376 		mutex_enter(vp->v_interlock);
377 		mutex_exit(&ncp->nc_lock);
378 		mutex_exit(&cpup->cpu_lock);
379 		error = vget(vp, LK_NOWAIT);
380 		if (error) {
381 			KASSERT(error == EBUSY);
382 			/*
383 			 * This vnode is being cleaned out.
384 			 * XXX badhits?
385 			 */
386 			COUNT(cpup->cpu_stats, ncs_falsehits);
387 			*vpp = NULL;
388 			return -1;
389 		}
390 	}
391 
392 #ifdef DEBUG
393 	/*
394 	 * since we released nb->nb_lock,
395 	 * we can't use this pointer any more.
396 	 */
397 	ncp = NULL;
398 #endif /* DEBUG */
399 
400 	if (vp == dvp) {	/* lookup on "." */
401 		error = 0;
402 	} else if (cnp->cn_flags & ISDOTDOT) {
403 		VOP_UNLOCK(dvp);
404 		error = vn_lock(vp, LK_EXCLUSIVE);
405 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
406 	} else {
407 		error = vn_lock(vp, LK_EXCLUSIVE);
408 	}
409 
410 	/*
411 	 * Check that the lock succeeded.
412 	 */
413 	if (error) {
414 		/* Unlocked, but only for stats. */
415 		COUNT(cpup->cpu_stats, ncs_badhits);
416 		vrele(vp);
417 		*vpp = NULL;
418 		return -1;
419 	}
420 
421 	/* Unlocked, but only for stats. */
422 	COUNT(cpup->cpu_stats, ncs_goodhits);
423 	*vpp = vp;
424 	return 0;
425 }
426 
427 int
428 cache_lookup_raw(struct vnode *dvp, struct vnode **vpp,
429     struct componentname *cnp)
430 {
431 	struct namecache *ncp;
432 	struct vnode *vp;
433 	struct nchcpu *cpup;
434 	int error;
435 
436 	if (__predict_false(!doingcache)) {
437 		cnp->cn_flags &= ~MAKEENTRY;
438 		*vpp = NULL;
439 		return (-1);
440 	}
441 
442 	cpup = curcpu()->ci_data.cpu_nch;
443 	mutex_enter(&cpup->cpu_lock);
444 	if (__predict_false(cnp->cn_namelen > NCHNAMLEN)) {
445 		COUNT(cpup->cpu_stats, ncs_long);
446 		cnp->cn_flags &= ~MAKEENTRY;
447 		mutex_exit(&cpup->cpu_lock);
448 		*vpp = NULL;
449 		return -1;
450 	}
451 	ncp = cache_lookup_entry(dvp, cnp);
452 	if (__predict_false(ncp == NULL)) {
453 		COUNT(cpup->cpu_stats, ncs_miss);
454 		mutex_exit(&cpup->cpu_lock);
455 		*vpp = NULL;
456 		return -1;
457 	}
458 	vp = ncp->nc_vp;
459 	if (vp == NULL) {
460 		/*
461 		 * Restore the ISWHITEOUT flag saved earlier.
462 		 */
463 		KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
464 		cnp->cn_flags |= ncp->nc_flags;
465 		COUNT(cpup->cpu_stats, ncs_neghits);
466 		mutex_exit(&ncp->nc_lock);
467 		mutex_exit(&cpup->cpu_lock);
468 		return ENOENT;
469 	}
470 	if (vtryget(vp)) {
471 		mutex_exit(&ncp->nc_lock);
472 		mutex_exit(&cpup->cpu_lock);
473 	} else {
474 		mutex_enter(vp->v_interlock);
475 		mutex_exit(&ncp->nc_lock);
476 		mutex_exit(&cpup->cpu_lock);
477 		error = vget(vp, LK_NOWAIT);
478 		if (error) {
479 			KASSERT(error == EBUSY);
480 			/*
481 			 * This vnode is being cleaned out.
482 			 * XXX badhits?
483 			 */
484 			COUNT(cpup->cpu_stats, ncs_falsehits);
485 			*vpp = NULL;
486 			return -1;
487 		}
488 	}
489 
490 	/* Unlocked, but only for stats. */
491 	COUNT(cpup->cpu_stats, ncs_goodhits); /* XXX can be "badhits" */
492 	*vpp = vp;
493 	return 0;
494 }
495 
496 /*
497  * Scan cache looking for name of directory entry pointing at vp.
498  *
499  * If the lookup succeeds the vnode is referenced and stored in dvpp.
500  *
501  * If bufp is non-NULL, also place the name in the buffer which starts
502  * at bufp, immediately before *bpp, and move bpp backwards to point
503  * at the start of it.  (Yes, this is a little baroque, but it's done
504  * this way to cater to the whims of getcwd).
505  *
506  * Returns 0 on success, -1 on cache miss, positive errno on failure.
507  */
508 int
509 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
510 {
511 	struct namecache *ncp;
512 	struct vnode *dvp;
513 	struct ncvhashhead *nvcpp;
514 	char *bp;
515 	int error, nlen;
516 
517 	if (!doingcache)
518 		goto out;
519 
520 	nvcpp = &ncvhashtbl[NCVHASH(vp)];
521 
522 	mutex_enter(namecache_lock);
523 	LIST_FOREACH(ncp, nvcpp, nc_vhash) {
524 		mutex_enter(&ncp->nc_lock);
525 		if (ncp->nc_vp == vp &&
526 		    (dvp = ncp->nc_dvp) != NULL &&
527 		    dvp != vp) { 		/* avoid pesky . entries.. */
528 
529 #ifdef DIAGNOSTIC
530 			if (ncp->nc_nlen == 1 &&
531 			    ncp->nc_name[0] == '.')
532 				panic("cache_revlookup: found entry for .");
533 
534 			if (ncp->nc_nlen == 2 &&
535 			    ncp->nc_name[0] == '.' &&
536 			    ncp->nc_name[1] == '.')
537 				panic("cache_revlookup: found entry for ..");
538 #endif
539 			COUNT(nchstats, ncs_revhits);
540 			nlen = ncp->nc_nlen;
541 
542 			if (bufp) {
543 				bp = *bpp;
544 				bp -= nlen;
545 				if (bp <= bufp) {
546 					*dvpp = NULL;
547 					mutex_exit(&ncp->nc_lock);
548 					mutex_exit(namecache_lock);
549 					return (ERANGE);
550 				}
551 				memcpy(bp, ncp->nc_name, nlen);
552 				*bpp = bp;
553 			}
554 
555 			if (vtryget(dvp)) {
556 				mutex_exit(&ncp->nc_lock);
557 				mutex_exit(namecache_lock);
558 			} else {
559 				mutex_enter(dvp->v_interlock);
560 				mutex_exit(&ncp->nc_lock);
561 				mutex_exit(namecache_lock);
562 				error = vget(dvp, LK_NOWAIT);
563 				if (error) {
564 					KASSERT(error == EBUSY);
565 					if (bufp)
566 						(*bpp) += nlen;
567 					*dvpp = NULL;
568 					return -1;
569 				}
570 			}
571 			*dvpp = dvp;
572 			return (0);
573 		}
574 		mutex_exit(&ncp->nc_lock);
575 	}
576 	COUNT(nchstats, ncs_revmiss);
577 	mutex_exit(namecache_lock);
578  out:
579 	*dvpp = NULL;
580 	return (-1);
581 }
582 
583 /*
584  * Add an entry to the cache
585  */
586 void
587 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
588 {
589 	struct namecache *ncp;
590 	struct namecache *oncp;
591 	struct nchashhead *ncpp;
592 	struct ncvhashhead *nvcpp;
593 
594 	/* First, check whether we can/should add a cache entry. */
595 	if ((cnp->cn_flags & MAKEENTRY) == 0 ||
596 	    __predict_false(cnp->cn_namelen > NCHNAMLEN || !doingcache)) {
597 		return;
598 	}
599 
600 	if (numcache > desiredvnodes) {
601 		mutex_enter(namecache_lock);
602 		cache_ev_forced.ev_count++;
603 		cache_reclaim();
604 		mutex_exit(namecache_lock);
605 	}
606 
607 	ncp = pool_cache_get(namecache_cache, PR_WAITOK);
608 	mutex_enter(namecache_lock);
609 	numcache++;
610 
611 	/*
612 	 * Concurrent lookups in the same directory may race for a
613 	 * cache entry.  if there's a duplicated entry, free it.
614 	 */
615 	oncp = cache_lookup_entry(dvp, cnp);
616 	if (oncp) {
617 		cache_invalidate(oncp);
618 		mutex_exit(&oncp->nc_lock);
619 	}
620 
621 	/* Grab the vnode we just found. */
622 	mutex_enter(&ncp->nc_lock);
623 	ncp->nc_vp = vp;
624 	ncp->nc_flags = 0;
625 	ncp->nc_hittime = 0;
626 	ncp->nc_gcqueue = NULL;
627 	if (vp == NULL) {
628 		/*
629 		 * For negative hits, save the ISWHITEOUT flag so we can
630 		 * restore it later when the cache entry is used again.
631 		 */
632 		ncp->nc_flags = cnp->cn_flags & ISWHITEOUT;
633 	}
634 
635 	/* Fill in cache info. */
636 	ncp->nc_dvp = dvp;
637 	LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist);
638 	if (vp)
639 		LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist);
640 	else {
641 		ncp->nc_vlist.le_prev = NULL;
642 		ncp->nc_vlist.le_next = NULL;
643 	}
644 	KASSERT(cnp->cn_namelen <= NCHNAMLEN);
645 	ncp->nc_nlen = cnp->cn_namelen;
646 	memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen);
647 	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
648 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
649 
650 	/*
651 	 * Flush updates before making visible in table.  No need for a
652 	 * memory barrier on the other side: to see modifications the
653 	 * list must be followed, meaning a dependent pointer load.
654 	 * The below is LIST_INSERT_HEAD() inlined, with the memory
655 	 * barrier included in the correct place.
656 	 */
657 	if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL)
658 		ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next;
659 	ncp->nc_hash.le_prev = &ncpp->lh_first;
660 	membar_producer();
661 	ncpp->lh_first = ncp;
662 
663 	ncp->nc_vhash.le_prev = NULL;
664 	ncp->nc_vhash.le_next = NULL;
665 
666 	/*
667 	 * Create reverse-cache entries (used in getcwd) for directories.
668 	 * (and in linux procfs exe node)
669 	 */
670 	if (vp != NULL &&
671 	    vp != dvp &&
672 #ifndef NAMECACHE_ENTER_REVERSE
673 	    vp->v_type == VDIR &&
674 #endif
675 	    (ncp->nc_nlen > 2 ||
676 	    (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
677 	    (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
678 		nvcpp = &ncvhashtbl[NCVHASH(vp)];
679 		LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
680 	}
681 	mutex_exit(&ncp->nc_lock);
682 	mutex_exit(namecache_lock);
683 }
684 
685 /*
686  * Name cache initialization, from vfs_init() when we are booting
687  */
688 void
689 nchinit(void)
690 {
691 	int error;
692 
693 	TAILQ_INIT(&nclruhead);
694 	namecache_cache = pool_cache_init(sizeof(struct namecache),
695 	    coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
696 	    cache_dtor, NULL);
697 	KASSERT(namecache_cache != NULL);
698 
699 	namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
700 
701 	nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash);
702 	ncvhashtbl =
703 #ifdef NAMECACHE_ENTER_REVERSE
704 	    hashinit(desiredvnodes, HASH_LIST, true, &ncvhash);
705 #else
706 	    hashinit(desiredvnodes/8, HASH_LIST, true, &ncvhash);
707 #endif
708 
709 	error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
710 	    NULL, NULL, "cachegc");
711 	if (error != 0)
712 		panic("nchinit %d", error);
713 
714 	evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
715 	   "namecache", "entries scanned");
716 	evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
717 	   "namecache", "entries collected");
718 	evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
719 	   "namecache", "over scan target");
720 	evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
721 	   "namecache", "under scan target");
722 	evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
723 	   "namecache", "forced reclaims");
724 }
725 
726 static int
727 cache_ctor(void *arg, void *obj, int flag)
728 {
729 	struct namecache *ncp;
730 
731 	ncp = obj;
732 	mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
733 
734 	return 0;
735 }
736 
737 static void
738 cache_dtor(void *arg, void *obj)
739 {
740 	struct namecache *ncp;
741 
742 	ncp = obj;
743 	mutex_destroy(&ncp->nc_lock);
744 }
745 
746 /*
747  * Called once for each CPU in the system as attached.
748  */
749 void
750 cache_cpu_init(struct cpu_info *ci)
751 {
752 	struct nchcpu *cpup;
753 	size_t sz;
754 
755 	sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit;
756 	cpup = kmem_zalloc(sz, KM_SLEEP);
757 	cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit);
758 	mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE);
759 	ci->ci_data.cpu_nch = cpup;
760 }
761 
762 /*
763  * Name cache reinitialization, for when the maximum number of vnodes increases.
764  */
765 void
766 nchreinit(void)
767 {
768 	struct namecache *ncp;
769 	struct nchashhead *oldhash1, *hash1;
770 	struct ncvhashhead *oldhash2, *hash2;
771 	u_long i, oldmask1, oldmask2, mask1, mask2;
772 
773 	hash1 = hashinit(desiredvnodes, HASH_LIST, true, &mask1);
774 	hash2 =
775 #ifdef NAMECACHE_ENTER_REVERSE
776 	    hashinit(desiredvnodes, HASH_LIST, true, &mask2);
777 #else
778 	    hashinit(desiredvnodes/8, HASH_LIST, true, &mask2);
779 #endif
780 	mutex_enter(namecache_lock);
781 	cache_lock_cpus();
782 	oldhash1 = nchashtbl;
783 	oldmask1 = nchash;
784 	nchashtbl = hash1;
785 	nchash = mask1;
786 	oldhash2 = ncvhashtbl;
787 	oldmask2 = ncvhash;
788 	ncvhashtbl = hash2;
789 	ncvhash = mask2;
790 	for (i = 0; i <= oldmask1; i++) {
791 		while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
792 			LIST_REMOVE(ncp, nc_hash);
793 			ncp->nc_hash.le_prev = NULL;
794 		}
795 	}
796 	for (i = 0; i <= oldmask2; i++) {
797 		while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
798 			LIST_REMOVE(ncp, nc_vhash);
799 			ncp->nc_vhash.le_prev = NULL;
800 		}
801 	}
802 	cache_unlock_cpus();
803 	mutex_exit(namecache_lock);
804 	hashdone(oldhash1, HASH_LIST, oldmask1);
805 	hashdone(oldhash2, HASH_LIST, oldmask2);
806 }
807 
808 /*
809  * Cache flush, a particular vnode; called when a vnode is renamed to
810  * hide entries that would now be invalid
811  */
812 void
813 cache_purge1(struct vnode *vp, const struct componentname *cnp, int flags)
814 {
815 	struct namecache *ncp, *ncnext;
816 
817 	mutex_enter(namecache_lock);
818 	if (flags & PURGE_PARENTS) {
819 		for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL;
820 		    ncp = ncnext) {
821 			ncnext = LIST_NEXT(ncp, nc_vlist);
822 			mutex_enter(&ncp->nc_lock);
823 			cache_invalidate(ncp);
824 			mutex_exit(&ncp->nc_lock);
825 			cache_disassociate(ncp);
826 		}
827 	}
828 	if (flags & PURGE_CHILDREN) {
829 		for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL;
830 		    ncp = ncnext) {
831 			ncnext = LIST_NEXT(ncp, nc_dvlist);
832 			mutex_enter(&ncp->nc_lock);
833 			cache_invalidate(ncp);
834 			mutex_exit(&ncp->nc_lock);
835 			cache_disassociate(ncp);
836 		}
837 	}
838 	if (cnp != NULL) {
839 		ncp = cache_lookup_entry(vp, cnp);
840 		if (ncp) {
841 			cache_invalidate(ncp);
842 			mutex_exit(&ncp->nc_lock);
843 			cache_disassociate(ncp);
844 		}
845 	}
846 	mutex_exit(namecache_lock);
847 }
848 
849 /*
850  * Cache flush, a whole filesystem; called when filesys is umounted to
851  * remove entries that would now be invalid.
852  */
853 void
854 cache_purgevfs(struct mount *mp)
855 {
856 	struct namecache *ncp, *nxtcp;
857 
858 	mutex_enter(namecache_lock);
859 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
860 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
861 		mutex_enter(&ncp->nc_lock);
862 		if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
863 			/* Free the resources we had. */
864 			cache_invalidate(ncp);
865 			cache_disassociate(ncp);
866 		}
867 		mutex_exit(&ncp->nc_lock);
868 	}
869 	cache_reclaim();
870 	mutex_exit(namecache_lock);
871 }
872 
873 /*
874  * Scan global list invalidating entries until we meet a preset target.
875  * Prefer to invalidate entries that have not scored a hit within
876  * cache_hottime seconds.  We sort the LRU list only for this routine's
877  * benefit.
878  */
879 static void
880 cache_prune(int incache, int target)
881 {
882 	struct namecache *ncp, *nxtcp, *sentinel;
883 	int items, recent, tryharder;
884 
885 	KASSERT(mutex_owned(namecache_lock));
886 
887 	items = 0;
888 	tryharder = 0;
889 	recent = hardclock_ticks - hz * cache_hottime;
890 	sentinel = NULL;
891 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
892 		if (incache <= target)
893 			break;
894 		items++;
895 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
896 		if (ncp->nc_dvp == NULL)
897 			continue;
898 		if (ncp == sentinel) {
899 			/*
900 			 * If we looped back on ourself, then ignore
901 			 * recent entries and purge whatever we find.
902 			 */
903 			tryharder = 1;
904 		}
905 		if (!tryharder && (ncp->nc_hittime - recent) > 0) {
906 			if (sentinel == NULL)
907 				sentinel = ncp;
908 			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
909 			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
910 			continue;
911 		}
912 		mutex_enter(&ncp->nc_lock);
913 		if (ncp->nc_dvp != NULL) {
914 			cache_invalidate(ncp);
915 			cache_disassociate(ncp);
916 			incache--;
917 		}
918 		mutex_exit(&ncp->nc_lock);
919 	}
920 	cache_ev_scan.ev_count += items;
921 }
922 
923 /*
924  * Collect dead cache entries from all CPUs and garbage collect.
925  */
926 static void
927 cache_reclaim(void)
928 {
929 	struct namecache *ncp, *next;
930 	int items;
931 
932 	KASSERT(mutex_owned(namecache_lock));
933 
934 	/*
935 	 * If the number of extant entries not awaiting garbage collection
936 	 * exceeds the high water mark, then reclaim stale entries until we
937 	 * reach our low water mark.
938 	 */
939 	items = numcache - cache_gcpend;
940 	if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
941 		cache_prune(items, (int)((uint64_t)desiredvnodes *
942 		    cache_lowat / 100));
943 		cache_ev_over.ev_count++;
944 	} else
945 		cache_ev_under.ev_count++;
946 
947 	/*
948 	 * Stop forward lookup activity on all CPUs and garbage collect dead
949 	 * entries.
950 	 */
951 	cache_lock_cpus();
952 	ncp = cache_gcqueue;
953 	cache_gcqueue = NULL;
954 	items = cache_gcpend;
955 	cache_gcpend = 0;
956 	while (ncp != NULL) {
957 		next = ncp->nc_gcqueue;
958 		cache_disassociate(ncp);
959 		KASSERT(ncp->nc_dvp == NULL);
960 		if (ncp->nc_hash.le_prev != NULL) {
961 			LIST_REMOVE(ncp, nc_hash);
962 			ncp->nc_hash.le_prev = NULL;
963 		}
964 		pool_cache_put(namecache_cache, ncp);
965 		ncp = next;
966 	}
967 	cache_unlock_cpus();
968 	numcache -= items;
969 	cache_ev_gc.ev_count += items;
970 }
971 
972 /*
973  * Cache maintainence thread, awakening once per second to:
974  *
975  * => keep number of entries below the high water mark
976  * => sort pseudo-LRU list
977  * => garbage collect dead entries
978  */
979 static void
980 cache_thread(void *arg)
981 {
982 
983 	mutex_enter(namecache_lock);
984 	for (;;) {
985 		cache_reclaim();
986 		kpause("cachegc", false, hz, namecache_lock);
987 	}
988 }
989 
990 #ifdef DDB
991 void
992 namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
993 {
994 	struct vnode *dvp = NULL;
995 	struct namecache *ncp;
996 
997 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
998 		if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) {
999 			(*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
1000 			dvp = ncp->nc_dvp;
1001 		}
1002 	}
1003 	if (dvp == NULL) {
1004 		(*pr)("name not found\n");
1005 		return;
1006 	}
1007 	vp = dvp;
1008 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
1009 		if (ncp->nc_vp == vp) {
1010 			(*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
1011 		}
1012 	}
1013 }
1014 #endif
1015