xref: /netbsd-src/sys/kern/vfs_cache.c (revision da5f4674a3fc214be3572d358b66af40ab9401e7)
1 /*	$NetBSD: vfs_cache.c,v 1.53 2003/08/08 20:19:56 yamt Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.53 2003/08/08 20:19:56 yamt Exp $");
36 
37 #include "opt_ddb.h"
38 #include "opt_revcache.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/time.h>
43 #include <sys/mount.h>
44 #include <sys/vnode.h>
45 #include <sys/namei.h>
46 #include <sys/errno.h>
47 #include <sys/malloc.h>
48 #include <sys/pool.h>
49 #include <sys/lock.h>
50 
51 /*
52  * Name caching works as follows:
53  *
54  * Names found by directory scans are retained in a cache
55  * for future reference.  It is managed LRU, so frequently
56  * used names will hang around.  Cache is indexed by hash value
57  * obtained from (dvp, name) where dvp refers to the directory
58  * containing name.
59  *
60  * For simplicity (and economy of storage), names longer than
61  * a maximum length of NCHNAMLEN are not cached; they occur
62  * infrequently in any case, and are almost never of interest.
63  *
64  * Upon reaching the last segment of a path, if the reference
65  * is for DELETE, or NOCACHE is set (rewrite), and the
66  * name is located in the cache, it will be dropped.
67  * The entry is dropped also when it was not possible to lock
68  * the cached vnode, either because vget() failed or the generation
69  * number has changed while waiting for the lock.
70  */
71 
72 /*
73  * Structures associated with name cacheing.
74  */
75 LIST_HEAD(nchashhead, namecache) *nchashtbl;
76 u_long	nchash;				/* size of hash table - 1 */
77 long	numcache;			/* number of cache entries allocated */
78 #define	NCHASH(cnp, dvp)	\
79 	(((cnp)->cn_hash ^ ((uintptr_t)(dvp) >> 3)) & nchash)
80 
81 LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
82 u_long	ncvhash;			/* size of hash table - 1 */
83 #define	NCVHASH(vp)		(((uintptr_t)(vp) >> 3) & ncvhash)
84 
85 TAILQ_HEAD(, namecache) nclruhead;		/* LRU chain */
86 struct	nchstats nchstats;		/* cache effectiveness statistics */
87 
88 struct pool namecache_pool;
89 
90 MALLOC_DEFINE(M_CACHE, "namecache", "Dynamically allocated cache entries");
91 
92 int doingcache = 1;			/* 1 => enable the cache */
93 
94 /* A single lock to protect cache insertion, removal and lookup */
95 static struct simplelock namecache_slock = SIMPLELOCK_INITIALIZER;
96 
97 static void cache_remove(struct namecache *);
98 static void cache_free(struct namecache *);
99 
100 static void
101 cache_remove(struct namecache *ncp)
102 {
103 
104 	LOCK_ASSERT(simple_lock_held(&namecache_slock));
105 
106 	ncp->nc_dvp = NULL;
107 	ncp->nc_vp = NULL;
108 
109 	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
110 	if (ncp->nc_hash.le_prev != NULL) {
111 		LIST_REMOVE(ncp, nc_hash);
112 		ncp->nc_hash.le_prev = NULL;
113 	}
114 	if (ncp->nc_vhash.le_prev != NULL) {
115 		LIST_REMOVE(ncp, nc_vhash);
116 		ncp->nc_vhash.le_prev = NULL;
117 	}
118 	if (ncp->nc_vlist.le_prev != NULL) {
119 		LIST_REMOVE(ncp, nc_vlist);
120 		ncp->nc_vlist.le_prev = NULL;
121 	}
122 	if (ncp->nc_dvlist.le_prev != NULL) {
123 		LIST_REMOVE(ncp, nc_dvlist);
124 		ncp->nc_dvlist.le_prev = NULL;
125 	}
126 }
127 
128 static void
129 cache_free(struct namecache *ncp)
130 {
131 
132 	pool_put(&namecache_pool, ncp);
133 	numcache--; /* XXX MP */
134 }
135 
136 /*
137  * Look for a the name in the cache. We don't do this
138  * if the segment name is long, simply so the cache can avoid
139  * holding long names (which would either waste space, or
140  * add greatly to the complexity).
141  *
142  * Lookup is called with ni_dvp pointing to the directory to search,
143  * ni_ptr pointing to the name of the entry being sought, ni_namelen
144  * tells the length of the name, and ni_hash contains a hash of
145  * the name. If the lookup succeeds, the vnode is locked, stored in ni_vp
146  * and a status of zero is returned. If the locking fails for whatever
147  * reason, the vnode is unlocked and the error is returned to caller.
148  * If the lookup determines that the name does not exist (negative cacheing),
149  * a status of ENOENT is returned. If the lookup fails, a status of -1
150  * is returned.
151  */
152 int
153 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
154 {
155 	struct namecache *ncp;
156 	struct nchashhead *ncpp;
157 	struct vnode *vp;
158 	int error;
159 
160 	if (!doingcache) {
161 		cnp->cn_flags &= ~MAKEENTRY;
162 		*vpp = NULL;
163 		return (-1);
164 	}
165 
166 	if (cnp->cn_namelen > NCHNAMLEN) {
167 		/* XXXSMP - updating stats without lock; do we care? */
168 		nchstats.ncs_long++;
169 		cnp->cn_flags &= ~MAKEENTRY;
170 		goto fail;
171 	}
172 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
173 	simple_lock(&namecache_slock);
174 	LIST_FOREACH(ncp, ncpp, nc_hash) {
175 		if (ncp->nc_dvp == dvp &&
176 		    ncp->nc_nlen == cnp->cn_namelen &&
177 		    !memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
178 			break;
179 	}
180 	if (ncp == 0) {
181 		nchstats.ncs_miss++;
182 		goto fail_wlock;
183 	}
184 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
185 		nchstats.ncs_badhits++;
186 		goto remove;
187 	} else if (ncp->nc_vp == NULL) {
188 		/*
189 		 * Restore the ISWHITEOUT flag saved earlier.
190 		 */
191 		cnp->cn_flags |= ncp->nc_flags;
192 		if (cnp->cn_nameiop != CREATE ||
193 		    (cnp->cn_flags & ISLASTCN) == 0) {
194 			nchstats.ncs_neghits++;
195 			/*
196 			 * Move this slot to end of LRU chain,
197 			 * if not already there.
198 			 */
199 			if (TAILQ_NEXT(ncp, nc_lru) != 0) {
200 				TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
201 				TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
202 			}
203 			simple_unlock(&namecache_slock);
204 			return (ENOENT);
205 		} else {
206 			nchstats.ncs_badhits++;
207 			goto remove;
208 		}
209 	}
210 
211 	vp = ncp->nc_vp;
212 
213 	/*
214 	 * Move this slot to end of LRU chain, if not already there.
215 	 */
216 	if (TAILQ_NEXT(ncp, nc_lru) != 0) {
217 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
218 		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
219 	}
220 
221 	if (vp != dvp)
222 		simple_lock(&vp->v_interlock);
223 
224 	/* Release the name cache mutex while we acquire vnode locks */
225 	simple_unlock(&namecache_slock);
226 
227 #ifdef DEBUG
228 	/*
229 	 * since we released namecache_slock,
230 	 * we can't use this pointer any more.
231 	 */
232 	ncp = NULL;
233 #endif /* DEBUG */
234 
235 	if (vp != dvp && __predict_false(vp->v_flag & VXLOCK)) {
236 		/*
237 		 * this vnode is being cleaned out.
238 		 */
239 		simple_unlock(&vp->v_interlock);
240 		nchstats.ncs_falsehits++; /* XXX badhits? */
241 		goto fail;
242 	}
243 
244 	if (vp == dvp) {	/* lookup on "." */
245 		VREF(dvp);
246 		error = 0;
247 	} else if (cnp->cn_flags & ISDOTDOT) {
248 		VOP_UNLOCK(dvp, 0);
249 		cnp->cn_flags |= PDIRUNLOCK;
250 		error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK);
251 		/*
252 		 * If the above vget() succeeded and both LOCKPARENT and
253 		 * ISLASTCN is set, lock the directory vnode as well.
254 		 */
255 		if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) {
256 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0) {
257 				vput(vp);
258 				return (error);
259 			}
260 			cnp->cn_flags &= ~PDIRUNLOCK;
261 		}
262 	} else {
263 		error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK);
264 		/*
265 		 * If the above vget() failed or either of LOCKPARENT or
266 		 * ISLASTCN is set, unlock the directory vnode.
267 		 */
268 		if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
269 			VOP_UNLOCK(dvp, 0);
270 			cnp->cn_flags |= PDIRUNLOCK;
271 		}
272 	}
273 
274 	/*
275 	 * Check that the lock succeeded, and that the capability number did
276 	 * not change while we were waiting for the lock.
277 	 */
278 	if (error) {
279 		/* XXXSMP - updating stats without lock; do we care? */
280 		nchstats.ncs_badhits++;
281 
282 		/*
283 		 * The parent needs to be locked when we return to VOP_LOOKUP().
284 		 * The `.' case here should be extremely rare (if it can happen
285 		 * at all), so we don't bother optimizing out the unlock/relock.
286 		 */
287 		if (vp == dvp ||
288 		    error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
289 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0)
290 				return (error);
291 			cnp->cn_flags &= ~PDIRUNLOCK;
292 		}
293 		*vpp = NULL;
294 		return (-1);
295 	}
296 
297 	/* XXXSMP - updating stats without lock; do we care? */
298 	nchstats.ncs_goodhits++;
299 	*vpp = vp;
300 	return (0);
301 
302 remove:
303 	/*
304 	 * Last component and we are renaming or deleting,
305 	 * the cache entry is invalid, or otherwise don't
306 	 * want cache entry to exist.
307 	 */
308 	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
309 	LIST_REMOVE(ncp, nc_hash);
310 	ncp->nc_hash.le_prev = NULL;
311 	if (ncp->nc_vhash.le_prev != NULL) {
312 		LIST_REMOVE(ncp, nc_vhash);
313 		ncp->nc_vhash.le_prev = NULL;
314 	}
315 	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
316 
317 fail_wlock:
318 	simple_unlock(&namecache_slock);
319 fail:
320 	*vpp = NULL;
321 	return (-1);
322 }
323 
324 /*
325  * Scan cache looking for name of directory entry pointing at vp.
326  *
327  * Fill in dvpp.
328  *
329  * If bufp is non-NULL, also place the name in the buffer which starts
330  * at bufp, immediately before *bpp, and move bpp backwards to point
331  * at the start of it.  (Yes, this is a little baroque, but it's done
332  * this way to cater to the whims of getcwd).
333  *
334  * Returns 0 on success, -1 on cache miss, positive errno on failure.
335  */
336 int
337 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
338 {
339 	struct namecache *ncp;
340 	struct vnode *dvp;
341 	struct ncvhashhead *nvcpp;
342 	char *bp;
343 
344 	if (!doingcache)
345 		goto out;
346 
347 	nvcpp = &ncvhashtbl[NCVHASH(vp)];
348 
349 	simple_lock(&namecache_slock);
350 	LIST_FOREACH(ncp, nvcpp, nc_vhash) {
351 		if (ncp->nc_vp == vp &&
352 		    (dvp = ncp->nc_dvp) != NULL &&
353 		    dvp != vp) { 		/* avoid pesky . entries.. */
354 
355 #ifdef DIAGNOSTIC
356 			if (ncp->nc_nlen == 1 &&
357 			    ncp->nc_name[0] == '.')
358 				panic("cache_revlookup: found entry for .");
359 
360 			if (ncp->nc_nlen == 2 &&
361 			    ncp->nc_name[0] == '.' &&
362 			    ncp->nc_name[1] == '.')
363 				panic("cache_revlookup: found entry for ..");
364 #endif
365 			nchstats.ncs_revhits++;
366 
367 			if (bufp) {
368 				bp = *bpp;
369 				bp -= ncp->nc_nlen;
370 				if (bp <= bufp) {
371 					*dvpp = NULL;
372 					simple_unlock(&namecache_slock);
373 					return (ERANGE);
374 				}
375 				memcpy(bp, ncp->nc_name, ncp->nc_nlen);
376 				*bpp = bp;
377 			}
378 
379 			/* XXX MP: how do we know dvp won't evaporate? */
380 			*dvpp = dvp;
381 			simple_unlock(&namecache_slock);
382 			return (0);
383 		}
384 	}
385 	nchstats.ncs_revmiss++;
386 	simple_unlock(&namecache_slock);
387  out:
388 	*dvpp = NULL;
389 	return (-1);
390 }
391 
392 /*
393  * Add an entry to the cache
394  */
395 void
396 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
397 {
398 	struct namecache *ncp;
399 	struct nchashhead *ncpp;
400 	struct ncvhashhead *nvcpp;
401 
402 #ifdef DIAGNOSTIC
403 	if (cnp->cn_namelen > NCHNAMLEN)
404 		panic("cache_enter: name too long");
405 #endif
406 	if (!doingcache)
407 		return;
408 	/*
409 	 * Free the cache slot at head of lru chain.
410 	 */
411 	simple_lock(&namecache_slock);
412 	if (numcache < numvnodes) {
413 		numcache++;
414 		simple_unlock(&namecache_slock);
415 		ncp = pool_get(&namecache_pool, PR_WAITOK);
416 		memset(ncp, 0, sizeof(*ncp));
417 		simple_lock(&namecache_slock);
418 	} else if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) {
419 		cache_remove(ncp);
420 	} else {
421 		simple_unlock(&namecache_slock);
422 		return;
423 	}
424 	/* Grab the vnode we just found. */
425 	ncp->nc_vp = vp;
426 	if (vp == NULL) {
427 		/*
428 		 * For negative hits, save the ISWHITEOUT flag so we can
429 		 * restore it later when the cache entry is used again.
430 		 */
431 		ncp->nc_flags = cnp->cn_flags & ISWHITEOUT;
432 	}
433 	/* Fill in cache info. */
434 	ncp->nc_dvp = dvp;
435 	LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist);
436 	if (vp)
437 		LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist);
438 	ncp->nc_nlen = cnp->cn_namelen;
439 	memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen);
440 	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
441 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
442 	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
443 
444 	ncp->nc_vhash.le_prev = NULL;
445 	ncp->nc_vhash.le_next = NULL;
446 
447 	/*
448 	 * Create reverse-cache entries (used in getcwd) for directories.
449 	 */
450 	if (vp != NULL &&
451 	    vp != dvp &&
452 #ifndef NAMECACHE_ENTER_REVERSE
453 	    vp->v_type == VDIR &&
454 #endif
455 	    (ncp->nc_nlen > 2 ||
456 	    (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
457 	    (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
458 		nvcpp = &ncvhashtbl[NCVHASH(vp)];
459 		LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
460 	}
461 	simple_unlock(&namecache_slock);
462 }
463 
464 /*
465  * Name cache initialization, from vfs_init() when we are booting
466  */
467 void
468 nchinit(void)
469 {
470 
471 	TAILQ_INIT(&nclruhead);
472 	nchashtbl =
473 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &nchash);
474 	ncvhashtbl =
475 #ifdef NAMECACHE_ENTER_REVERSE
476 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash);
477 #else
478 	    hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash);
479 #endif
480 	pool_init(&namecache_pool, sizeof(struct namecache), 0, 0, 0,
481 	    "ncachepl", &pool_allocator_nointr);
482 }
483 
484 /*
485  * Name cache reinitialization, for when the maximum number of vnodes increases.
486  */
487 void
488 nchreinit(void)
489 {
490 	struct namecache *ncp;
491 	struct nchashhead *oldhash1, *hash1;
492 	struct ncvhashhead *oldhash2, *hash2;
493 	u_long i, oldmask1, oldmask2, mask1, mask2;
494 
495 	hash1 = hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask1);
496 	hash2 =
497 #ifdef NAMECACHE_ENTER_REVERSE
498 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask2);
499 #else
500 	    hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &mask2);
501 #endif
502 	simple_lock(&namecache_slock);
503 	oldhash1 = nchashtbl;
504 	oldmask1 = nchash;
505 	nchashtbl = hash1;
506 	nchash = mask1;
507 	oldhash2 = ncvhashtbl;
508 	oldmask2 = ncvhash;
509 	ncvhashtbl = hash2;
510 	ncvhash = mask2;
511 	for (i = 0; i <= oldmask1; i++) {
512 		while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
513 			LIST_REMOVE(ncp, nc_hash);
514 			ncp->nc_hash.le_prev = NULL;
515 		}
516 	}
517 	for (i = 0; i <= oldmask2; i++) {
518 		while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
519 			LIST_REMOVE(ncp, nc_vhash);
520 			ncp->nc_vhash.le_prev = NULL;
521 		}
522 	}
523 	simple_unlock(&namecache_slock);
524 	hashdone(oldhash1, M_CACHE);
525 	hashdone(oldhash2, M_CACHE);
526 }
527 
528 /*
529  * Cache flush, a particular vnode; called when a vnode is renamed to
530  * hide entries that would now be invalid
531  */
532 void
533 cache_purge(struct vnode *vp)
534 {
535 	struct namecache *ncp, *ncnext;
536 
537 	simple_lock(&namecache_slock);
538 	for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL; ncp = ncnext) {
539 		ncnext = LIST_NEXT(ncp, nc_vlist);
540 		cache_remove(ncp);
541 		cache_free(ncp);
542 	}
543 	for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL; ncp = ncnext) {
544 		ncnext = LIST_NEXT(ncp, nc_dvlist);
545 		cache_remove(ncp);
546 		cache_free(ncp);
547 	}
548 	simple_unlock(&namecache_slock);
549 }
550 
551 /*
552  * Cache flush, a whole filesystem; called when filesys is umounted to
553  * remove entries that would now be invalid.
554  */
555 void
556 cache_purgevfs(struct mount *mp)
557 {
558 	struct namecache *ncp, *nxtcp;
559 
560 	simple_lock(&namecache_slock);
561 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
562 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
563 		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
564 			continue;
565 		}
566 		/* Free the resources we had. */
567 		cache_remove(ncp);
568 		cache_free(ncp);
569 	}
570 	simple_unlock(&namecache_slock);
571 }
572 
573 #ifdef DDB
574 void
575 namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
576 {
577 	struct vnode *dvp = NULL;
578 	struct namecache *ncp;
579 
580 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
581 		if (ncp->nc_vp == vp) {
582 			(*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
583 			dvp = ncp->nc_dvp;
584 		}
585 	}
586 	if (dvp == NULL) {
587 		(*pr)("name not found\n");
588 		return;
589 	}
590 	vp = dvp;
591 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
592 		if (ncp->nc_vp == vp) {
593 			(*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
594 		}
595 	}
596 }
597 #endif
598