xref: /netbsd-src/sys/kern/vfs_cache.c (revision 8a8f936f250a330d54f8a24ed0e92aadf9743a7b)
1 /*	$NetBSD: vfs_cache.c,v 1.30 2001/09/24 06:01:13 chs Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
36  */
37 
38 #include "opt_ddb.h"
39 #include "opt_revcache.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/time.h>
44 #include <sys/mount.h>
45 #include <sys/vnode.h>
46 #include <sys/namei.h>
47 #include <sys/errno.h>
48 #include <sys/malloc.h>
49 #include <sys/pool.h>
50 
51 /*
52  * Name caching works as follows:
53  *
54  * Names found by directory scans are retained in a cache
55  * for future reference.  It is managed LRU, so frequently
56  * used names will hang around.  Cache is indexed by hash value
57  * obtained from (dvp, name) where dvp refers to the directory
58  * containing name.
59  *
60  * For simplicity (and economy of storage), names longer than
61  * a maximum length of NCHNAMLEN are not cached; they occur
62  * infrequently in any case, and are almost never of interest.
63  *
64  * Upon reaching the last segment of a path, if the reference
65  * is for DELETE, or NOCACHE is set (rewrite), and the
66  * name is located in the cache, it will be dropped.
67  * The entry is dropped also when it was not possible to lock
68  * the cached vnode, either because vget() failed or the generation
69  * number has changed while waiting for the lock.
70  */
71 
72 /*
73  * Structures associated with name cacheing.
74  */
75 LIST_HEAD(nchashhead, namecache) *nchashtbl;
76 u_long	nchash;				/* size of hash table - 1 */
77 long	numcache;			/* number of cache entries allocated */
78 #define NCHASH(cnp, dvp)	(((cnp)->cn_hash ^ (dvp)->v_id) & nchash)
79 
80 LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
81 u_long	ncvhash;			/* size of hash table - 1 */
82 #define NCVHASH(vp)		((vp)->v_id & ncvhash)
83 
84 TAILQ_HEAD(, namecache) nclruhead;		/* LRU chain */
85 struct	nchstats nchstats;		/* cache effectiveness statistics */
86 
87 struct pool namecache_pool;
88 
89 int doingcache = 1;			/* 1 => enable the cache */
90 
91 /*
92  * Look for a the name in the cache. We don't do this
93  * if the segment name is long, simply so the cache can avoid
94  * holding long names (which would either waste space, or
95  * add greatly to the complexity).
96  *
97  * Lookup is called with ni_dvp pointing to the directory to search,
98  * ni_ptr pointing to the name of the entry being sought, ni_namelen
99  * tells the length of the name, and ni_hash contains a hash of
100  * the name. If the lookup succeeds, the vnode is locked, stored in ni_vp
101  * and a status of zero is returned. If the locking fails for whatever
102  * reason, the vnode is unlocked and the error is returned to caller.
103  * If the lookup determines that the name does not exist (negative cacheing),
104  * a status of ENOENT is returned. If the lookup fails, a status of -1
105  * is returned.
106  */
107 int
108 cache_lookup(dvp, vpp, cnp)
109 	struct vnode *dvp;
110 	struct vnode **vpp;
111 	struct componentname *cnp;
112 {
113 	struct namecache *ncp;
114 	struct nchashhead *ncpp;
115 	struct vnode *vp;
116 	int vpid, error;
117 
118 	if (!doingcache) {
119 		cnp->cn_flags &= ~MAKEENTRY;
120 		*vpp = 0;
121 		return (-1);
122 	}
123 	if (cnp->cn_namelen > NCHNAMLEN) {
124 		nchstats.ncs_long++;
125 		cnp->cn_flags &= ~MAKEENTRY;
126 		*vpp = 0;
127 		return (-1);
128 	}
129 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
130 	LIST_FOREACH(ncp, ncpp, nc_hash) {
131 		if (ncp->nc_dvp == dvp &&
132 		    ncp->nc_dvpid == dvp->v_id &&
133 		    ncp->nc_nlen == cnp->cn_namelen &&
134 		    !memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
135 			break;
136 	}
137 	if (ncp == 0) {
138 		nchstats.ncs_miss++;
139 		*vpp = 0;
140 		return (-1);
141 	}
142 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
143 		nchstats.ncs_badhits++;
144 		goto remove;
145 	} else if (ncp->nc_vp == NULL) {
146 		/*
147 		 * Restore the ISWHITEOUT flag saved earlier.
148 		 */
149 		cnp->cn_flags |= ncp->nc_vpid;
150 		if (cnp->cn_nameiop != CREATE ||
151 		    (cnp->cn_flags & ISLASTCN) == 0) {
152 			nchstats.ncs_neghits++;
153 			/*
154 			 * Move this slot to end of LRU chain,
155 			 * if not already there.
156 			 */
157 			if (ncp->nc_lru.tqe_next != 0) {
158 				TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
159 				TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
160 			}
161 			return (ENOENT);
162 		} else {
163 			nchstats.ncs_badhits++;
164 			goto remove;
165 		}
166 	} else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
167 		nchstats.ncs_falsehits++;
168 		goto remove;
169 	}
170 
171 	vp = ncp->nc_vp;
172 	vpid = vp->v_id;
173 	if (vp == dvp) {	/* lookup on "." */
174 		VREF(dvp);
175 		error = 0;
176 	} else if (cnp->cn_flags & ISDOTDOT) {
177 		VOP_UNLOCK(dvp, 0);
178 		cnp->cn_flags |= PDIRUNLOCK;
179 		error = vget(vp, LK_EXCLUSIVE);
180 		/* if the above vget() succeeded and both LOCKPARENT and
181 		 * ISLASTCN is set, lock the directory vnode as well */
182 		if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) {
183 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0) {
184 				vput(vp);
185 				return (error);
186 			}
187 			cnp->cn_flags &= ~PDIRUNLOCK;
188 		}
189 	} else {
190 		error = vget(vp, LK_EXCLUSIVE);
191 		/* if the above vget() failed or either of LOCKPARENT or
192 		 * ISLASTCN is set, unlock the directory vnode */
193 		if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
194 			VOP_UNLOCK(dvp, 0);
195 			cnp->cn_flags |= PDIRUNLOCK;
196 		}
197 	}
198 
199 	/*
200 	 * Check that the lock succeeded, and that the capability number did
201 	 * not change while we were waiting for the lock.
202 	 */
203 	if (error || vpid != vp->v_id) {
204 		if (!error) {
205 			vput(vp);
206 			nchstats.ncs_falsehits++;
207 		} else
208 			nchstats.ncs_badhits++;
209 		/*
210 		 * The parent needs to be locked when we return to VOP_LOOKUP().
211 		 * The `.' case here should be extremely rare (if it can happen
212 		 * at all), so we don't bother optimizing out the unlock/relock.
213 		 */
214 		if (vp == dvp ||
215 		    error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
216 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0)
217 				return (error);
218 			cnp->cn_flags &= ~PDIRUNLOCK;
219 		}
220 		goto remove;
221 	}
222 
223 	nchstats.ncs_goodhits++;
224 	/*
225 	 * move this slot to end of LRU chain, if not already there
226 	 */
227 	if (ncp->nc_lru.tqe_next != 0) {
228 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
229 		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
230 	}
231 	*vpp = vp;
232 	return (0);
233 
234 remove:
235 	/*
236 	 * Last component and we are renaming or deleting,
237 	 * the cache entry is invalid, or otherwise don't
238 	 * want cache entry to exist.
239 	 */
240 	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
241 	LIST_REMOVE(ncp, nc_hash);
242 	ncp->nc_hash.le_prev = 0;
243 	if (ncp->nc_vhash.le_prev != NULL) {
244 		LIST_REMOVE(ncp, nc_vhash);
245 		ncp->nc_vhash.le_prev = 0;
246 	}
247 	TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
248 	*vpp = 0;
249 	return (-1);
250 }
251 
252 /*
253  * Scan cache looking for name of directory entry pointing at vp.
254  *
255  * Fill in dvpp.
256  *
257  * If bufp is non-NULL, also place the name in the buffer which starts
258  * at bufp, immediately before *bpp, and move bpp backwards to point
259  * at the start of it.  (Yes, this is a little baroque, but it's done
260  * this way to cater to the whims of getcwd).
261  *
262  * Returns 0 on success, -1 on cache miss, positive errno on failure.
263  */
264 int
265 cache_revlookup (vp, dvpp, bpp, bufp)
266 	struct vnode *vp, **dvpp;
267 	char **bpp;
268 	char *bufp;
269 {
270 	struct namecache *ncp;
271 	struct vnode *dvp;
272 	struct ncvhashhead *nvcpp;
273 
274 	if (!doingcache)
275 		goto out;
276 
277 	nvcpp = &ncvhashtbl[NCVHASH(vp)];
278 
279 	LIST_FOREACH(ncp, nvcpp, nc_vhash) {
280 		if ((ncp->nc_vp == vp) &&
281 		    (ncp->nc_vpid == vp->v_id) &&
282 		    ((dvp = ncp->nc_dvp) != 0) &&
283 		    (dvp != vp) && 		/* avoid pesky . entries.. */
284 		    (dvp->v_id == ncp->nc_dvpid))
285 		{
286 			char *bp;
287 
288 #ifdef DIAGNOSTIC
289 			if ((ncp->nc_nlen == 1) &&
290 			    (ncp->nc_name[0] == '.'))
291 				panic("cache_revlookup: found entry for .");
292 
293 			if ((ncp->nc_nlen == 2) &&
294 			    (ncp->nc_name[0] == '.') &&
295 			    (ncp->nc_name[1] == '.'))
296 				panic("cache_revlookup: found entry for ..");
297 #endif
298 			nchstats.ncs_revhits++;
299 
300 			if (bufp) {
301 				bp = *bpp;
302 				bp -= ncp->nc_nlen;
303 				if (bp <= bufp) {
304 					*dvpp = 0;
305 					return ERANGE;
306 				}
307 				memcpy(bp, ncp->nc_name, ncp->nc_nlen);
308 				*bpp = bp;
309 			}
310 
311 			/* XXX MP: how do we know dvp won't evaporate? */
312 			*dvpp = dvp;
313 			return 0;
314 		}
315 	}
316 	nchstats.ncs_revmiss++;
317  out:
318 	*dvpp = 0;
319 	return -1;
320 }
321 
322 /*
323  * Add an entry to the cache
324  */
325 void
326 cache_enter(dvp, vp, cnp)
327 	struct vnode *dvp;
328 	struct vnode *vp;
329 	struct componentname *cnp;
330 {
331 	struct namecache *ncp;
332 	struct nchashhead *ncpp;
333 	struct ncvhashhead *nvcpp;
334 
335 #ifdef DIAGNOSTIC
336 	if (cnp->cn_namelen > NCHNAMLEN)
337 		panic("cache_enter: name too long");
338 #endif
339 	if (!doingcache)
340 		return;
341 	/*
342 	 * Free the cache slot at head of lru chain.
343 	 */
344 	if (numcache < numvnodes) {
345 		ncp = pool_get(&namecache_pool, PR_WAITOK);
346 		memset(ncp, 0, sizeof(*ncp));
347 		numcache++;
348 	} else if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) {
349 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
350 		if (ncp->nc_hash.le_prev != 0) {
351 			LIST_REMOVE(ncp, nc_hash);
352 			ncp->nc_hash.le_prev = 0;
353 		}
354 		if (ncp->nc_vhash.le_prev != 0) {
355 			LIST_REMOVE(ncp, nc_vhash);
356 			ncp->nc_vhash.le_prev = 0;
357 		}
358 	} else
359 		return;
360 	/* grab the vnode we just found */
361 	ncp->nc_vp = vp;
362 	if (vp)
363 		ncp->nc_vpid = vp->v_id;
364 	else {
365 		/*
366 		 * For negative hits, save the ISWHITEOUT flag so we can
367 		 * restore it later when the cache entry is used again.
368 		 */
369 		ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
370 	}
371 	/* fill in cache info */
372 	ncp->nc_dvp = dvp;
373 	ncp->nc_dvpid = dvp->v_id;
374 	ncp->nc_nlen = cnp->cn_namelen;
375 	memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen);
376 	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
377 	ncpp = &nchashtbl[NCHASH(cnp, dvp)];
378 	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
379 
380 	ncp->nc_vhash.le_prev = 0;
381 	ncp->nc_vhash.le_next = 0;
382 
383 	/*
384 	 * Create reverse-cache entries (used in getcwd) for directories.
385 	 */
386 	if (vp &&
387 	    (vp != dvp) &&
388 #ifndef NAMECACHE_ENTER_REVERSE
389 	    (vp->v_type == VDIR) &&
390 #endif
391 	    ((ncp->nc_nlen > 2) ||
392 	     ((ncp->nc_nlen == 2) && (ncp->nc_name[0] != '.') && (ncp->nc_name[1] != '.')) ||
393 	     ((ncp->nc_nlen == 1) && (ncp->nc_name[0] != '.'))))
394 	{
395 		nvcpp = &ncvhashtbl[NCVHASH(vp)];
396 		LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
397 	}
398 
399 }
400 
401 /*
402  * Name cache initialization, from vfs_init() when we are booting
403  */
404 void
405 nchinit()
406 {
407 
408 	TAILQ_INIT(&nclruhead);
409 	nchashtbl =
410 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &nchash);
411 	ncvhashtbl =
412 #ifdef NAMECACHE_ENTER_REVERSE
413 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash);
414 #else
415 	    hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash);
416 #endif
417 	pool_init(&namecache_pool, sizeof(struct namecache), 0, 0, 0,
418 	    "ncachepl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
419 	    M_CACHE);
420 }
421 
422 /*
423  * Name cache reinitialization, for when the maximum number of vnodes increases.
424  */
425 void
426 nchreinit()
427 {
428 	struct namecache *ncp;
429 	struct nchashhead *oldhash1, *hash1;
430 	struct ncvhashhead *oldhash2, *hash2;
431 	u_long oldmask1, oldmask2, mask1, mask2;
432 	int i;
433 
434 	hash1 = hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask1);
435 	hash2 =
436 #ifdef NAMECACHE_ENTER_REVERSE
437 	    hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &mask2);
438 #else
439 	    hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &mask2);
440 #endif
441 	oldhash1 = nchashtbl;
442 	oldmask1 = nchash;
443 	nchashtbl = hash1;
444 	nchash = mask1;
445 	oldhash2 = ncvhashtbl;
446 	oldmask2 = ncvhash;
447 	ncvhashtbl = hash2;
448 	ncvhash = mask2;
449 	for (i = 0; i <= oldmask1; i++) {
450 		while ((ncp = LIST_FIRST(&oldhash1[i])) != NULL) {
451 			LIST_REMOVE(ncp, nc_hash);
452 			ncp->nc_hash.le_prev = NULL;
453 		}
454 	}
455 	for (i = 0; i <= oldmask2; i++) {
456 		while ((ncp = LIST_FIRST(&oldhash2[i])) != NULL) {
457 			LIST_REMOVE(ncp, nc_vhash);
458 			ncp->nc_vhash.le_prev = NULL;
459 		}
460 	}
461 	hashdone(oldhash1, M_CACHE);
462 	hashdone(oldhash2, M_CACHE);
463 }
464 
465 /*
466  * Cache flush, a particular vnode; called when a vnode is renamed to
467  * hide entries that would now be invalid
468  */
469 void
470 cache_purge(vp)
471 	struct vnode *vp;
472 {
473 	struct namecache *ncp;
474 	struct nchashhead *ncpp;
475 	static u_long nextvnodeid;
476 
477 	vp->v_id = ++nextvnodeid;
478 	if (nextvnodeid != 0)
479 		return;
480 	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
481 		LIST_FOREACH(ncp, ncpp, nc_hash) {
482 			ncp->nc_vpid = 0;
483 			ncp->nc_dvpid = 0;
484 		}
485 	}
486 	vp->v_id = ++nextvnodeid;
487 }
488 
489 /*
490  * Cache flush, a whole filesystem; called when filesys is umounted to
491  * remove entries that would now be invalid.
492  */
493 void
494 cache_purgevfs(mp)
495 	struct mount *mp;
496 {
497 	struct namecache *ncp, *nxtcp;
498 
499 	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
500 		nxtcp = TAILQ_NEXT(ncp, nc_lru);
501 		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
502 			continue;
503 		}
504 		/* free the resources we had */
505 		ncp->nc_vp = NULL;
506 		ncp->nc_dvp = NULL;
507 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
508 		if (ncp->nc_hash.le_prev != 0) {
509 			LIST_REMOVE(ncp, nc_hash);
510 			ncp->nc_hash.le_prev = 0;
511 		}
512 		if (ncp->nc_vhash.le_prev != 0) {
513 			LIST_REMOVE(ncp, nc_vhash);
514 			ncp->nc_vhash.le_prev = 0;
515 		}
516 		TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
517 	}
518 }
519 
520 #ifdef DDB
521 void
522 namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
523 {
524 	struct vnode *dvp = NULL;
525 	struct namecache *ncp;
526 
527 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
528 		if (ncp->nc_vp == vp && ncp->nc_vpid == vp->v_id) {
529 			(*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
530 			dvp = ncp->nc_dvp;
531 		}
532 	}
533 	if (dvp == NULL) {
534 		(*pr)("name not found\n");
535 		return;
536 	}
537 	vp = dvp;
538 	TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
539 		if (ncp->nc_vp == vp && ncp->nc_vpid == vp->v_id) {
540 			(*pr)("parent %.*s\n", ncp->nc_nlen, ncp->nc_name);
541 		}
542 	}
543 }
544 #endif
545