xref: /openbsd-src/sys/kern/vfs_cache.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: vfs_cache.c,v 1.57 2018/06/04 19:42:54 kn Exp $	*/
2 /*	$NetBSD: vfs_cache.c,v 1.13 1996/02/04 02:18:09 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/time.h>
38 #include <sys/mount.h>
39 #include <sys/vnode.h>
40 #include <sys/lock.h>
41 #include <sys/namei.h>
42 #include <sys/errno.h>
43 #include <sys/pool.h>
44 
45 /*
46  * TODO: namecache access should really be locked.
47  */
48 
49 /*
50  * For simplicity (and economy of storage), names longer than
51  * a maximum length of NAMECACHE_MAXLEN are not cached; they occur
52  * infrequently in any case, and are almost never of interest.
53  *
54  * Upon reaching the last segment of a path, if the reference
55  * is for DELETE, or NOCACHE is set (rewrite), and the
56  * name is located in the cache, it will be dropped.
57  */
58 
59 /*
60  * Structures associated with name caching.
61  */
62 long	numcache;	/* total number of cache entries allocated */
63 long	numneg;		/* number of negative cache entries */
64 
65 TAILQ_HEAD(, namecache) nclruhead;	/* Regular Entry LRU chain */
66 TAILQ_HEAD(, namecache) nclruneghead;	/* Negative Entry LRU chain */
67 struct	nchstats nchstats;		/* cache effectiveness statistics */
68 
69 int doingcache = 1;			/* 1 => enable the cache */
70 
71 struct pool nch_pool;
72 
73 void cache_zap(struct namecache *);
74 u_long nextvnodeid;
75 
76 static inline int
77 namecache_compare(const struct namecache *n1, const struct namecache *n2)
78 {
79 	if (n1->nc_nlen == n2->nc_nlen)
80 		return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen));
81 	else
82 		return (n1->nc_nlen - n2->nc_nlen);
83 }
84 
85 RBT_PROTOTYPE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
86 RBT_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
87 
88 void
89 cache_tree_init(struct namecache_rb_cache *tree)
90 {
91 	RBT_INIT(namecache_rb_cache, tree);
92 }
93 
94 /*
95  * blow away a namecache entry
96  */
97 void
98 cache_zap(struct namecache *ncp)
99 {
100 	struct vnode *dvp = NULL;
101 
102 	if (ncp->nc_vp != NULL) {
103 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
104 		numcache--;
105 	} else {
106 		TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
107 		numneg--;
108 	}
109 	if (ncp->nc_dvp) {
110 		RBT_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp);
111 		if (RBT_EMPTY(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree))
112 			dvp = ncp->nc_dvp;
113 	}
114 	if (ncp->nc_vp && (ncp->nc_vpid == ncp->nc_vp->v_id)) {
115 		if (ncp->nc_vp != ncp->nc_dvp &&
116 		    ncp->nc_vp->v_type == VDIR &&
117 		    (ncp->nc_nlen > 2 ||
118 			(ncp->nc_nlen > 1 &&
119 			    ncp->nc_name[1] != '.') ||
120 			(ncp->nc_nlen > 0 &&
121 			    ncp->nc_name[0] != '.'))) {
122 			TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me);
123 		}
124 	}
125 	pool_put(&nch_pool, ncp);
126 	if (dvp)
127 		vdrop(dvp);
128 }
129 
130 /*
131  * Look for a name in the cache.
132  * dvp points to the directory to search. The componentname cnp holds
133  * the information on the entry being sought, such as its length
134  * and its name. If the lookup succeeds, vpp is set to point to the vnode
135  * and an error of 0 is returned. If the lookup determines the name does
136  * not exist (negative caching) an error of ENOENT is returned. If the
137  * lookup fails, an error of -1 is returned.
138  */
139 int
140 cache_lookup(struct vnode *dvp, struct vnode **vpp,
141     struct componentname *cnp)
142 {
143 	struct namecache *ncp;
144 	struct namecache n;
145 	struct vnode *vp;
146 	u_long vpid;
147 	int error;
148 
149 	*vpp = NULL;
150 
151 	if (!doingcache) {
152 		cnp->cn_flags &= ~MAKEENTRY;
153 		return (-1);
154 	}
155 	if (cnp->cn_namelen > NAMECACHE_MAXLEN) {
156 		nchstats.ncs_long++;
157 		cnp->cn_flags &= ~MAKEENTRY;
158 		return (-1);
159 	}
160 
161 	/* lookup in directory vnode's redblack tree */
162 	n.nc_nlen = cnp->cn_namelen;
163 	memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen);
164 	ncp = RBT_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n);
165 
166 	if (ncp == NULL) {
167 		nchstats.ncs_miss++;
168 		return (-1);
169 	}
170 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
171 		nchstats.ncs_badhits++;
172 		goto remove;
173 	} else if (ncp->nc_vp == NULL) {
174 		if (cnp->cn_nameiop != CREATE ||
175 		    (cnp->cn_flags & ISLASTCN) == 0) {
176 			nchstats.ncs_neghits++;
177 			/*
178 			 * Move this slot to end of the negative LRU chain,
179 			 */
180 			if (TAILQ_NEXT(ncp, nc_neg) != NULL) {
181 				TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
182 				TAILQ_INSERT_TAIL(&nclruneghead, ncp,
183 				    nc_neg);
184 			}
185 			return (ENOENT);
186 		} else {
187 			nchstats.ncs_badhits++;
188 			goto remove;
189 		}
190 	} else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
191 		nchstats.ncs_falsehits++;
192 		goto remove;
193 	}
194 
195 	/*
196 	 * Move this slot to end of the regular LRU chain.
197 	 */
198 	if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
199 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
200 		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
201 	}
202 
203 	vp = ncp->nc_vp;
204 	vpid = vp->v_id;
205 	if (vp == dvp) {	/* lookup on "." */
206 		vref(dvp);
207 		error = 0;
208 	} else if (cnp->cn_flags & ISDOTDOT) {
209 		VOP_UNLOCK(dvp);
210 		cnp->cn_flags |= PDIRUNLOCK;
211 		error = vget(vp, LK_EXCLUSIVE);
212 		/*
213 		 * If the above vget() succeeded and both LOCKPARENT and
214 		 * ISLASTCN is set, lock the directory vnode as well.
215 		 */
216 		if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) {
217 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0) {
218 				vput(vp);
219 				return (error);
220 			}
221 			cnp->cn_flags &= ~PDIRUNLOCK;
222 		}
223 	} else {
224 		error = vget(vp, LK_EXCLUSIVE);
225 		/*
226 		 * If the above vget() failed or either of LOCKPARENT or
227 		 * ISLASTCN is set, unlock the directory vnode.
228 		 */
229 		if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
230 			VOP_UNLOCK(dvp);
231 			cnp->cn_flags |= PDIRUNLOCK;
232 		}
233 	}
234 
235 	/*
236 	 * Check that the lock succeeded, and that the capability number did
237 	 * not change while we were waiting for the lock.
238 	 */
239 	if (error || vpid != vp->v_id) {
240 		if (!error) {
241 			vput(vp);
242 			nchstats.ncs_falsehits++;
243 		} else
244 			nchstats.ncs_badhits++;
245 		/*
246 		 * The parent needs to be locked when we return to VOP_LOOKUP().
247 		 * The `.' case here should be extremely rare (if it can happen
248 		 * at all), so we don't bother optimizing out the unlock/relock.
249 		 */
250 		if (vp == dvp || error ||
251 		    (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
252 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0)
253 				return (error);
254 			cnp->cn_flags &= ~PDIRUNLOCK;
255 		}
256 		return (-1);
257 	}
258 
259 	nchstats.ncs_goodhits++;
260 	*vpp = vp;
261 	return (0);
262 
263 remove:
264 	/*
265 	 * Last component and we are renaming or deleting,
266 	 * the cache entry is invalid, or otherwise don't
267 	 * want cache entry to exist.
268 	 */
269 	cache_zap(ncp);
270 	return (-1);
271 }
272 
273 /*
274  * Scan cache looking for name of directory entry pointing at vp.
275  *
276  * Fill in dvpp.
277  *
278  * If bufp is non-NULL, also place the name in the buffer which starts
279  * at bufp, immediately before *bpp, and move bpp backwards to point
280  * at the start of it.  (Yes, this is a little baroque, but it's done
281  * this way to cater to the whims of getcwd).
282  *
283  * Returns 0 on success, -1 on cache miss, positive errno on failure.
284  *
285  * TODO: should we return *dvpp locked?
286  */
287 
288 int
289 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
290 {
291 	struct namecache *ncp;
292 	struct vnode *dvp = NULL;
293 	char *bp;
294 
295 	if (!doingcache)
296 		goto out;
297 	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) {
298 		dvp = ncp->nc_dvp;
299 		if (dvp && dvp != vp && ncp->nc_dvpid == dvp->v_id)
300 			goto found;
301 	}
302 	goto miss;
303 found:
304 #ifdef DIAGNOSTIC
305 	if (ncp->nc_nlen == 1 &&
306 	    ncp->nc_name[0] == '.')
307 		panic("cache_revlookup: found entry for .");
308 	if (ncp->nc_nlen == 2 &&
309 	    ncp->nc_name[0] == '.' &&
310 	    ncp->nc_name[1] == '.')
311 		panic("cache_revlookup: found entry for ..");
312 #endif
313 	nchstats.ncs_revhits++;
314 
315 	if (bufp != NULL) {
316 		bp = *bpp;
317 		bp -= ncp->nc_nlen;
318 		if (bp <= bufp) {
319 			*dvpp = NULL;
320 			return (ERANGE);
321 		}
322 		memcpy(bp, ncp->nc_name, ncp->nc_nlen);
323 		*bpp = bp;
324 	}
325 
326 	*dvpp = dvp;
327 
328 	/*
329 	 * XXX: Should we vget() here to have more
330 	 * consistent semantics with cache_lookup()?
331 	 */
332 	return (0);
333 
334 miss:
335 	nchstats.ncs_revmiss++;
336 out:
337 	*dvpp = NULL;
338 	return (-1);
339 }
340 
341 /*
342  * Add an entry to the cache
343  */
344 void
345 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
346 {
347 	struct namecache *ncp, *lncp;
348 
349 	if (!doingcache || cnp->cn_namelen > NAMECACHE_MAXLEN)
350 		return;
351 
352 	/*
353 	 * allocate, or recycle (free and allocate) an ncp.
354 	 */
355 	if (numcache >= initialvnodes) {
356 		if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL)
357 			cache_zap(ncp);
358 		else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
359 			cache_zap(ncp);
360 		else
361 			panic("wtf? leak?");
362 	}
363 	ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
364 
365 	/* grab the vnode we just found */
366 	ncp->nc_vp = vp;
367 	if (vp)
368 		ncp->nc_vpid = vp->v_id;
369 
370 	/* fill in cache info */
371 	ncp->nc_dvp = dvp;
372 	ncp->nc_dvpid = dvp->v_id;
373 	ncp->nc_nlen = cnp->cn_namelen;
374 	memcpy(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen);
375 	if (RBT_EMPTY(namecache_rb_cache, &dvp->v_nc_tree)) {
376 		vhold(dvp);
377 	}
378 	if ((lncp = RBT_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp))
379 	    != NULL) {
380 		/* someone has raced us and added a different entry
381 		 * for the same vnode (different ncp) - we don't need
382 		 * this entry, so free it and we are done.
383 		 */
384 		pool_put(&nch_pool, ncp);
385 		/* we know now dvp->v_nc_tree is not empty, no need
386 		 * to vdrop here
387 		 */
388 		goto done;
389 	}
390 	if (vp) {
391 		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
392 		numcache++;
393 		/* don't put . or .. in the reverse map */
394 		if (vp != dvp && vp->v_type == VDIR &&
395 		    (ncp->nc_nlen > 2 ||
396 			(ncp->nc_nlen > 1 &&
397 			    ncp->nc_name[1] != '.') ||
398 			(ncp->nc_nlen > 0 &&
399 			    ncp->nc_name[0] != '.')))
400 			TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp,
401 			    nc_me);
402 	} else {
403 		TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
404 		numneg++;
405 	}
406 	if (numneg  > initialvnodes) {
407 		if ((ncp = TAILQ_FIRST(&nclruneghead))
408 		    != NULL)
409 			cache_zap(ncp);
410 	}
411 done:
412 	return;
413 }
414 
415 
416 /*
417  * Name cache initialization, from vfs_init() when we are booting
418  */
419 void
420 nchinit(void)
421 {
422 	TAILQ_INIT(&nclruhead);
423 	TAILQ_INIT(&nclruneghead);
424 	pool_init(&nch_pool, sizeof(struct namecache), 0, IPL_NONE, PR_WAITOK,
425 	    "nchpl", NULL);
426 }
427 
428 /*
429  * Cache flush, a particular vnode; called when a vnode is renamed to
430  * hide entries that would now be invalid
431  */
432 void
433 cache_purge(struct vnode *vp)
434 {
435 	struct namecache *ncp;
436 
437 	/* We should never have destinations cached for a non-VDIR vnode. */
438 	KASSERT(vp->v_type == VDIR || TAILQ_EMPTY(&vp->v_cache_dst));
439 
440 	while ((ncp = TAILQ_FIRST(&vp->v_cache_dst)))
441 		cache_zap(ncp);
442 	while ((ncp = RBT_ROOT(namecache_rb_cache, &vp->v_nc_tree)))
443 		cache_zap(ncp);
444 
445 	/* XXX this blows goats */
446 	vp->v_id = ++nextvnodeid;
447 	if (vp->v_id == 0)
448 		vp->v_id = ++nextvnodeid;
449 }
450 
451 /*
452  * Cache flush, a whole filesystem; called when filesys is umounted to
453  * remove entries that would now be invalid
454  */
455 void
456 cache_purgevfs(struct mount *mp)
457 {
458 	struct namecache *ncp, *nxtcp;
459 
460 	/* whack the regular entries */
461 	TAILQ_FOREACH_SAFE(ncp, &nclruhead, nc_lru, nxtcp) {
462 		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp)
463 			continue;
464 		/* free the resources we had */
465 		cache_zap(ncp);
466 	}
467 	/* whack the negative entries */
468 	TAILQ_FOREACH_SAFE(ncp, &nclruneghead, nc_neg, nxtcp) {
469 		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp)
470 			continue;
471 		/* free the resources we had */
472 		cache_zap(ncp);
473 	}
474 }
475