xref: /netbsd-src/sys/kern/vfs_cache.c (revision 001c68bd94f75ce9270b69227c4199fbf34ee396)
1 /*	$NetBSD: vfs_cache.c,v 1.45 2003/06/29 22:31:32 fvdl Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.45 2003/06/29 22:31:32 fvdl Exp $");
40 
41 #include "opt_ddb.h"
42 #include "opt_revcache.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/time.h>
47 #include <sys/mount.h>
48 #include <sys/vnode.h>
49 #include <sys/namei.h>
50 #include <sys/errno.h>
51 #include <sys/malloc.h>
52 #include <sys/pool.h>
53 #include <sys/lock.h>
54 
55 /*
56  * Name caching works as follows:
57  *
58  * Names found by directory scans are retained in a cache
59  * for future reference.  It is managed LRU, so frequently
60  * used names will hang around.  Cache is indexed by hash value
61  * obtained from (dvp, name) where dvp refers to the directory
62  * containing name.
63  *
64  * For simplicity (and economy of storage), names longer than
65  * a maximum length of NCHNAMLEN are not cached; they occur
66  * infrequently in any case, and are almost never of interest.
67  *
68  * Upon reaching the last segment of a path, if the reference
69  * is for DELETE, or NOCACHE is set (rewrite), and the
70  * name is located in the cache, it will be dropped.
71  * The entry is dropped also when it was not possible to lock
72  * the cached vnode, either because vget() failed or the generation
73  * number has changed while waiting for the lock.
74  */
75 
76 /*
77  * Structures associated with name cacheing.
78  */
79 LIST_HEAD(nchashhead, namecache) *nchashtbl;
80 u_long	nchash;				/* size of hash table - 1 */
81 long	numcache;			/* number of cache entries allocated */
82 #define	NCHASH(cnp, dvp)	(((cnp)->cn_hash ^ (dvp)->v_id) & nchash)
83 
84 LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
85 u_long	ncvhash;			/* size of hash table - 1 */
86 #define	NCVHASH(vp)		((vp)->v_id & ncvhash)
87 
88 TAILQ_HEAD(, namecache) nclruhead;		/* LRU chain */
89 struct	nchstats nchstats;		/* cache effectiveness statistics */
90 
91 struct pool namecache_pool;
92 
93 MALLOC_DEFINE(M_CACHE, "namecache", "Dynamically allocated cache entries");
94 
95 int doingcache = 1;			/* 1 => enable the cache */
96 
97 /* A single lock to protect cache insertion, removal and lookup */
98 static struct simplelock namecache_slock = SIMPLELOCK_INITIALIZER;
99 
100 /*
101  * Look for a the name in the cache. We don't do this
102  * if the segment name is long, simply so the cache can avoid
103  * holding long names (which would either waste space, or
104  * add greatly to the complexity).
105  *
106  * Lookup is called with ni_dvp pointing to the directory to search,
107  * ni_ptr pointing to the name of the entry being sought, ni_namelen
108  * tells the length of the name, and ni_hash contains a hash of
109  * the name. If the lookup succeeds, the vnode is locked, stored in ni_vp
110  * and a status of zero is returned. If the locking fails for whatever
111  * reason, the vnode is unlocked and the error is returned to caller.
112  * If the lookup determines that the name does not exist (negative cacheing),
113  * a status of ENOENT is returned. If the lookup fails, a status of -1
114  * is returned.
115  */
116 int
117 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
118 {
119 	struct namecache *ncp;
120 	struct nchashhead *ncpp;
121 	struct vnode *vp;
122 	u_long vpid;
123 	int error;
124 
125 	if (!doingcache) {
126 		cnp->cn_flags &= ~MAKEENTRY;
127 		*vpp = NULL;
128 		return (-1);
129 	}
130 
131 	simple_lock(&namecache_slock);
132 	if (cnp->cn_namelen > NCHNAMLEN) {
133 		nchstats.ncs_long++;
134 		cnp->cn_flags &= ~MAKEENTRY;
135 		goto fail_wlock;
136 	}
137 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
138 	LIST_FOREACH(ncp, ncpp, nc_hash) {
139 		if (ncp->nc_dvp == dvp &&
140 		    ncp->nc_dvpid == dvp->v_id &&
141 		    ncp->nc_nlen == cnp->cn_namelen &&
142 		    !memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
143 			break;
144 	}
145 	if (ncp == 0) {
146 		nchstats.ncs_miss++;
147 		goto fail_wlock;
148 	}
149 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
150 		nchstats.ncs_badhits++;
151 		goto remove;
152 	} else if (ncp->nc_vp == NULL) {
153 		/*
154 		 * Restore the ISWHITEOUT flag saved earlier.
155 		 */
156 		cnp->cn_flags |= ncp->nc_vpid;
157 		if (cnp->cn_nameiop != CREATE ||
158 		    (cnp->cn_flags & ISLASTCN) == 0) {
159 			nchstats.ncs_neghits++;
160 			/*
161 			 * Move this slot to end of LRU chain,
162 			 * if not already there.
163 			 */
164 			if (TAILQ_NEXT(ncp, nc_lru) != 0) {
165 				TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
166 				TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
167 			}
168 			simple_unlock(&namecache_slock);
169 			return (ENOENT);
170 		} else {
171 			nchstats.ncs_badhits++;
172 			goto remove;
173 		}
174 	} else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
175 		nchstats.ncs_falsehits++;
176 		goto remove;
177 	}
178 
179 	vp = ncp->nc_vp;
180 	vpid = vp->v_id;
181 	/* Release the name cache mutex while we acquire vnode locks */
182 	simple_unlock(&namecache_slock);
183 
184 	if (vp == dvp) {	/* lookup on "." */
185 		VREF(dvp);
186 		error = 0;
187 	} else if (cnp->cn_flags & ISDOTDOT) {
188 		VOP_UNLOCK(dvp, 0);
189 		cnp->cn_flags |= PDIRUNLOCK;
190 		error = vget(vp, LK_EXCLUSIVE);
191 		/*
192 		 * If the above vget() succeeded and both LOCKPARENT and
193 		 * ISLASTCN is set, lock the directory vnode as well.
194 		 */
195 		if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) {
196 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0) {
197 				vput(vp);
198 				return (error);
199 			}
200 			cnp->cn_flags &= ~PDIRUNLOCK;
201 		}
202 	} else {
203 		error = vget(vp, LK_EXCLUSIVE);
204 		/*
205 		 * If the above vget() failed or either of LOCKPARENT or
206 		 * ISLASTCN is set, unlock the directory vnode.
207 		 */
208 		if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
209 			VOP_UNLOCK(dvp, 0);
210 			cnp->cn_flags |= PDIRUNLOCK;
211 		}
212 	}
213 
214 	/*
215 	 * Check that the lock succeeded, and that the capability number did
216 	 * not change while we were waiting for the lock.
217 	 */
218 	if (error || vpid != vp->v_id) {
219 		/* XXXSMP - updating stats without lock; do we care? */
220 		if (!error) {
221 			vput(vp);
222 			nchstats.ncs_falsehits++;
223 		} else
224 			nchstats.ncs_badhits++;
225 
226 		/*
227 		 * The parent needs to be locked when we return to VOP_LOOKUP().
228 		 * The `.' case here should be extremely rare (if it can happen
229 		 * at all), so we don't bother optimizing out the unlock/relock.
230 		 */
231 		if (vp == dvp ||
232 		    error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
233 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0)
234 				return (error);
235 			cnp->cn_flags &= ~PDIRUNLOCK;
236 		}
237 		*vpp = NULL;
238 		return (-1);
239 	}
240 
241 	simple_lock(&namecache_slock);
242 	nchstats.ncs_goodhits++;
243 	/*
244 	 * Move this slot to end of LRU chain, if not already there.
245 	 */
246 	if (TAILQ_NEXT(ncp, nc_lru) != 0) {
247 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
248 		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
249 	}
250 
251 	simple_unlock(&namecache_slock);
252 	*vpp = vp;
253 	return (0);
254 
255 remove:
256 	/*
257 	 * Last component and we are renaming or deleting,
258 	 * the cache entry is invalid, or otherwise don't
259 	 * want cache entry to exist.
260 	 */
261 	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
262 	LIST_REMOVE(ncp, nc_hash);
263 	ncp->nc_hash.le_prev = NULL;
264 	if (ncp->nc_vhash.le_prev != NULL) {
265 		LIST_REMOVE(ncp, nc_vhash);
266 		ncp->nc_vhash.le_prev = NULL;
267 	}
268 	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
269 
270 fail_wlock:
271 	simple_unlock(&namecache_slock);
272 	*vpp = NULL;
273 	return (-1);
274 }
275 
276 /*
277  * Scan cache looking for name of directory entry pointing at vp.
278  *
279  * Fill in dvpp.
280  *
281  * If bufp is non-NULL, also place the name in the buffer which starts
282  * at bufp, immediately before *bpp, and move bpp backwards to point
283  * at the start of it.  (Yes, this is a little baroque, but it's done
284  * this way to cater to the whims of getcwd).
285  *
286  * Returns 0 on success, -1 on cache miss, positive errno on failure.
287  */
288 int
289 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
290 {
291 	struct namecache *ncp;
292 	struct vnode *dvp;
293 	struct ncvhashhead *nvcpp;
294 	char *bp;
295 
296 	if (!doingcache)
297 		goto out;
298 
299 	nvcpp = &ncvhashtbl[NCVHASH(vp)];
300 
301 	simple_lock(&namecache_slock);
302 	LIST_FOREACH(ncp, nvcpp, nc_vhash) {
303 		if (ncp->nc_vp == vp &&
304 		    ncp->nc_vpid == vp->v_id &&
305 		    (dvp = ncp->nc_dvp) != NULL &&
306 		    dvp != vp && 		/* avoid pesky . entries.. */
307 		    dvp->v_id == ncp->nc_dvpid) {
308 
309 #ifdef DIAGNOSTIC
310 			if (ncp->nc_nlen == 1 &&
311 			    ncp->nc_name[0] == '.')
312 				panic("cache_revlookup: found entry for .");
313 
314 			if (ncp->nc_nlen == 2 &&
315 			    ncp->nc_name[0] == '.' &&
316 			    ncp->nc_name[1] == '.')
317 				panic("cache_revlookup: found entry for ..");
318 #endif
319 			nchstats.ncs_revhits++;
320 
321 			if (bufp) {
322 				bp = *bpp;
323 				bp -= ncp->nc_nlen;
324 				if (bp <= bufp) {
325 					*dvpp = NULL;
326 					simple_unlock(&namecache_slock);
327 					return (ERANGE);
328 				}
329 				memcpy(bp, ncp->nc_name, ncp->nc_nlen);
330 				*bpp = bp;
331 			}
332 
333 			/* XXX MP: how do we know dvp won't evaporate? */
334 			*dvpp = dvp;
335 			simple_unlock(&namecache_slock);
336 			return (0);
337 		}
338 	}
339 	nchstats.ncs_revmiss++;
340 	simple_unlock(&namecache_slock);
341  out:
342 	*dvpp = NULL;
343 	return (-1);
344 }
345 
346 /*
347  * Add an entry to the cache
348  */
349 void
350 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
351 {
352 	struct namecache *ncp;
353 	struct nchashhead *ncpp;
354 	struct ncvhashhead *nvcpp;
355 
356 #ifdef DIAGNOSTIC
357 	if (cnp->cn_namelen > NCHNAMLEN)
358 		panic("cache_enter: name too long");
359 #endif
360 	if (!doingcache)
361 		return;
362 	/*
363 	 * Free the cache slot at head of lru chain.
364 	 */
365 	simple_lock(&namecache_slock);
366 	if (numcache < numvnodes) {
367 		numcache++;
368 		simple_unlock(&namecache_slock);
369 		ncp = pool_get(&namecache_pool, PR_WAITOK);
370 		memset(ncp, 0, sizeof(*ncp));
371 		simple_lock(&namecache_slock);
372 	} else if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) {
373 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
374 		if (ncp->nc_hash.le_prev != NULL) {
375 			LIST_REMOVE(ncp, nc_hash);
376 			ncp->nc_hash.le_prev = NULL;
377 		}
378 		if (ncp->nc_vhash.le_prev != NULL) {
379 			LIST_REMOVE(ncp, nc_vhash);
380 			ncp->nc_vhash.le_prev = NULL;
381 		}
382 	} else {
383 		simple_unlock(&namecache_slock);
384 		return;
385 	}
386 	/* Grab the vnode we just found. */
387 	ncp->nc_vp = vp;
388 	if (vp)
389 		ncp->nc_vpid = vp->v_id;
390 	else {
391 		/*
392 		 * For negative hits, save the ISWHITEOUT flag so we can
393 		 * restore it later when the cache entry is used again.
394 		 */
395 		ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
396 	}
397 	/* Fill in cache info. */
398 	ncp->nc_dvp = dvp;
399 	ncp->nc_dvpid = dvp->v_id;
400 	ncp->nc_nlen = cnp->cn_namelen;
401 	memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen);
402 	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
403 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
404 	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
405 
406 	ncp->nc_vhash.le_prev = NULL;
407 	ncp->nc_vhash.le_next = NULL;
408 
409 	/*
410 	 * Create reverse-cache entries (used in getcwd) for directories.
411 	 */
412 	if (vp != NULL &&
413 	    vp != dvp &&
414 #ifndef NAMECACHE_ENTER_REVERSE
415 	    vp->v_type == VDIR &&
416 #endif
417 	    (ncp->nc_nlen > 2 ||
418 	    (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
419 	    (/* ncp->nc_nlen > 0 && */ ncp->nc_name[0] != '.'))) {
420 		nvcpp = &ncvhashtbl[NCVHASH(vp)];
421 		LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
422 	}
423 	simple_unlock(&namecache_slock);
424 }
425 
426 /*
427  * Name cache initialization, from vfs_init() when we are booting
428  */
429 void
430 nchinit(void)
431 {
432 
433 	TAILQ_INIT(&nclruhead);
434 	nchashtbl =
435 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &nchash);
436 	ncvhashtbl =
437 #ifdef NAMECACHE_ENTER_REVERSE
438 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash);
439 #else
440 	    hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash);
441 #endif
442 	pool_init(&namecache_pool, sizeof(struct namecache), 0, 0, 0,
443 	    "ncachepl", &pool_allocator_nointr);
444 }
445 
446 /*
447  * Name cache reinitialization, for when the maximum number of vnodes increases.
448  */
449 void
450 nchreinit(void)
451 {
452 	struct namecache *ncp;
453 	struct nchashhead *oldhash1, *hash1;
454 	struct ncvhashhead *oldhash2, *hash2;
455 	u_long i, oldmask1, oldmask2, mask1, mask2;
456 
457 	hash1 = hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask1);
458 	hash2 =
459 #ifdef NAMECACHE_ENTER_REVERSE
460 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask2);
461 #else
462 	    hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &mask2);
463 #endif
464 	simple_lock(&namecache_slock);
465 	oldhash1 = nchashtbl;
466 	oldmask1 = nchash;
467 	nchashtbl = hash1;
468 	nchash = mask1;
469 	oldhash2 = ncvhashtbl;
470 	oldmask2 = ncvhash;
471 	ncvhashtbl = hash2;
472 	ncvhash = mask2;
473 	for (i = 0; i <= oldmask1; i++) {
474 		while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
475 			LIST_REMOVE(ncp, nc_hash);
476 			ncp->nc_hash.le_prev = NULL;
477 		}
478 	}
479 	for (i = 0; i <= oldmask2; i++) {
480 		while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
481 			LIST_REMOVE(ncp, nc_vhash);
482 			ncp->nc_vhash.le_prev = NULL;
483 		}
484 	}
485 	simple_unlock(&namecache_slock);
486 	hashdone(oldhash1, M_CACHE);
487 	hashdone(oldhash2, M_CACHE);
488 }
489 
490 /*
491  * Cache flush, a particular vnode; called when a vnode is renamed to
492  * hide entries that would now be invalid
493  */
494 void
495 cache_purge(struct vnode *vp)
496 {
497 	struct namecache *ncp;
498 	struct nchashhead *ncpp;
499 	static u_long nextvnodeid;
500 
501 	simple_lock(&namecache_slock);
502 	vp->v_id = ++nextvnodeid;
503 	if (nextvnodeid != 0)
504 		goto out;
505 	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
506 		LIST_FOREACH(ncp, ncpp, nc_hash) {
507 			ncp->nc_vpid = 0;
508 			ncp->nc_dvpid = 0;
509 		}
510 	}
511 	vp->v_id = ++nextvnodeid;
512 out:
513 	simple_unlock(&namecache_slock);
514 }
515 
516 /*
517  * Cache flush, a whole filesystem; called when filesys is umounted to
518  * remove entries that would now be invalid.
519  */
520 void
521 cache_purgevfs(struct mount *mp)
522 {
523 	struct namecache *ncp, *nxtcp;
524 
525 	simple_lock(&namecache_slock);
526 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
527 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
528 		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
529 			continue;
530 		}
531 		/* Free the resources we had. */
532 		ncp->nc_vp = NULL;
533 		ncp->nc_dvp = NULL;
534 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
535 		if (ncp->nc_hash.le_prev != NULL) {
536 			LIST_REMOVE(ncp, nc_hash);
537 			ncp->nc_hash.le_prev = NULL;
538 		}
539 		if (ncp->nc_vhash.le_prev != NULL) {
540 			LIST_REMOVE(ncp, nc_vhash);
541 			ncp->nc_vhash.le_prev = NULL;
542 		}
543 		TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
544 	}
545 	simple_unlock(&namecache_slock);
546 }
547 
548 #ifdef DDB
549 void
550 namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
551 {
552 	struct vnode *dvp = NULL;
553 	struct namecache *ncp;
554 
555 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
556 		if (ncp->nc_vp == vp && ncp->nc_vpid == vp->v_id) {
557 			(*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
558 			dvp = ncp->nc_dvp;
559 		}
560 	}
561 	if (dvp == NULL) {
562 		(*pr)("name not found\n");
563 		return;
564 	}
565 	vp = dvp;
566 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
567 		if (ncp->nc_vp == vp && ncp->nc_vpid == vp->v_id) {
568 			(*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
569 		}
570 	}
571 }
572 #endif
573