xref: /openbsd-src/sys/kern/vfs_cache.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: vfs_cache.c,v 1.53 2017/02/09 19:02:34 bluhm Exp $	*/
2 /*	$NetBSD: vfs_cache.c,v 1.13 1996/02/04 02:18:09 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/time.h>
38 #include <sys/mount.h>
39 #include <sys/vnode.h>
40 #include <sys/lock.h>
41 #include <sys/namei.h>
42 #include <sys/errno.h>
43 #include <sys/pool.h>
44 
45 /*
46  * TODO: namecache access should really be locked.
47  */
48 
49 /*
50  * For simplicity (and economy of storage), names longer than
51  * a maximum length of NAMECACHE_MAXLEN are not cached; they occur
52  * infrequently in any case, and are almost never of interest.
53  *
54  * Upon reaching the last segment of a path, if the reference
55  * is for DELETE, or NOCACHE is set (rewrite), and the
56  * name is located in the cache, it will be dropped.
57  */
58 
59 /*
60  * Structures associated with name caching.
61  */
62 long	numcache;	/* total number of cache entries allocated */
63 long	numneg;		/* number of negative cache entries */
64 
65 TAILQ_HEAD(, namecache) nclruhead;	/* Regular Entry LRU chain */
66 TAILQ_HEAD(, namecache) nclruneghead;	/* Negative Entry LRU chain */
67 struct	nchstats nchstats;		/* cache effectiveness statistics */
68 
69 int doingcache = 1;			/* 1 => enable the cache */
70 
71 struct pool nch_pool;
72 
73 void cache_zap(struct namecache *);
74 u_long nextvnodeid;
75 
76 static inline int
77 namecache_compare(const struct namecache *n1, const struct namecache *n2)
78 {
79 	if (n1->nc_nlen == n2->nc_nlen)
80 		return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen));
81 	else
82 		return (n1->nc_nlen - n2->nc_nlen);
83 }
84 
85 RBT_PROTOTYPE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
86 RBT_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
87 
88 void
89 cache_tree_init(struct namecache_rb_cache *tree)
90 {
91 	RBT_INIT(namecache_rb_cache, tree);
92 }
93 
94 /*
95  * blow away a namecache entry
96  */
97 void
98 cache_zap(struct namecache *ncp)
99 {
100 	struct vnode *dvp = NULL;
101 
102 	if (ncp->nc_vp != NULL) {
103 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
104 		numcache--;
105 	} else {
106 		TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
107 		numneg--;
108 	}
109 	if (ncp->nc_dvp) {
110 		RBT_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp);
111 		if (RBT_EMPTY(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree))
112 			dvp = ncp->nc_dvp;
113 	}
114 	if (ncp->nc_vp && (ncp->nc_vpid == ncp->nc_vp->v_id)) {
115 		if (ncp->nc_vp != ncp->nc_dvp &&
116 		    ncp->nc_vp->v_type == VDIR &&
117 		    (ncp->nc_nlen > 2 ||
118 			(ncp->nc_nlen > 1 &&
119 			    ncp->nc_name[1] != '.') ||
120 			(ncp->nc_nlen > 0 &&
121 			    ncp->nc_name[0] != '.'))) {
122 			TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me);
123 		}
124 	}
125 	pool_put(&nch_pool, ncp);
126 	if (dvp)
127 		vdrop(dvp);
128 }
129 
130 /*
131  * Look for a name in the cache. We don't do this if the segment name is
132  * long, simply so the cache can avoid holding long names (which would
133  * either waste space, or add greatly to the complexity).
134  * dvp points to the directory to search. The componentname cnp holds
135  * the information on the entry being sought, such as its length
136  * and its name. If the lookup succeeds, vpp is set to point to the vnode
137  * and an error of 0 is returned. If the lookup determines the name does
138  * not exist (negative caching) an error of ENOENT is returned. If the
139  * lookup fails, an error of -1 is returned.
140  */
141 int
142 cache_lookup(struct vnode *dvp, struct vnode **vpp,
143     struct componentname *cnp)
144 {
145 	struct namecache *ncp;
146 	struct namecache n;
147 	struct vnode *vp;
148 	struct proc *p = curproc;
149 	u_long vpid;
150 	int error;
151 
152 	*vpp = NULL;
153 
154 	if (!doingcache) {
155 		cnp->cn_flags &= ~MAKEENTRY;
156 		return (-1);
157 	}
158 	if (cnp->cn_namelen > NAMECACHE_MAXLEN) {
159 		nchstats.ncs_long++;
160 		cnp->cn_flags &= ~MAKEENTRY;
161 		return (-1);
162 	}
163 
164 	/* lookup in directory vnode's redblack tree */
165 	n.nc_nlen = cnp->cn_namelen;
166 	memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen);
167 	ncp = RBT_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n);
168 
169 	if (ncp == NULL) {
170 		nchstats.ncs_miss++;
171 		return (-1);
172 	}
173 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
174 		nchstats.ncs_badhits++;
175 		goto remove;
176 	} else if (ncp->nc_vp == NULL) {
177 		if (cnp->cn_nameiop != CREATE ||
178 		    (cnp->cn_flags & ISLASTCN) == 0) {
179 			nchstats.ncs_neghits++;
180 			/*
181 			 * Move this slot to end of the negative LRU chain,
182 			 */
183 			if (TAILQ_NEXT(ncp, nc_neg) != NULL) {
184 				TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
185 				TAILQ_INSERT_TAIL(&nclruneghead, ncp,
186 				    nc_neg);
187 			}
188 			return (ENOENT);
189 		} else {
190 			nchstats.ncs_badhits++;
191 			goto remove;
192 		}
193 	} else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
194 		nchstats.ncs_falsehits++;
195 		goto remove;
196 	}
197 
198 	/*
199 	 * Move this slot to end of the regular LRU chain.
200 	 */
201 	if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
202 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
203 		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
204 	}
205 
206 	vp = ncp->nc_vp;
207 	vpid = vp->v_id;
208 	if (vp == dvp) {	/* lookup on "." */
209 		vref(dvp);
210 		error = 0;
211 	} else if (cnp->cn_flags & ISDOTDOT) {
212 		VOP_UNLOCK(dvp, p);
213 		cnp->cn_flags |= PDIRUNLOCK;
214 		error = vget(vp, LK_EXCLUSIVE, p);
215 		/*
216 		 * If the above vget() succeeded and both LOCKPARENT and
217 		 * ISLASTCN is set, lock the directory vnode as well.
218 		 */
219 		if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) {
220 			if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) != 0) {
221 				vput(vp);
222 				return (error);
223 			}
224 			cnp->cn_flags &= ~PDIRUNLOCK;
225 		}
226 	} else {
227 		error = vget(vp, LK_EXCLUSIVE, p);
228 		/*
229 		 * If the above vget() failed or either of LOCKPARENT or
230 		 * ISLASTCN is set, unlock the directory vnode.
231 		 */
232 		if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
233 			VOP_UNLOCK(dvp, p);
234 			cnp->cn_flags |= PDIRUNLOCK;
235 		}
236 	}
237 
238 	/*
239 	 * Check that the lock succeeded, and that the capability number did
240 	 * not change while we were waiting for the lock.
241 	 */
242 	if (error || vpid != vp->v_id) {
243 		if (!error) {
244 			vput(vp);
245 			nchstats.ncs_falsehits++;
246 		} else
247 			nchstats.ncs_badhits++;
248 		/*
249 		 * The parent needs to be locked when we return to VOP_LOOKUP().
250 		 * The `.' case here should be extremely rare (if it can happen
251 		 * at all), so we don't bother optimizing out the unlock/relock.
252 		 */
253 		if (vp == dvp || error ||
254 		    (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
255 			if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) != 0)
256 				return (error);
257 			cnp->cn_flags &= ~PDIRUNLOCK;
258 		}
259 		return (-1);
260 	}
261 
262 	nchstats.ncs_goodhits++;
263 	*vpp = vp;
264 	return (0);
265 
266 remove:
267 	/*
268 	 * Last component and we are renaming or deleting,
269 	 * the cache entry is invalid, or otherwise don't
270 	 * want cache entry to exist.
271 	 */
272 	cache_zap(ncp);
273 	return (-1);
274 }
275 
276 /*
277  * Scan cache looking for name of directory entry pointing at vp.
278  *
279  * Fill in dvpp.
280  *
281  * If bufp is non-NULL, also place the name in the buffer which starts
282  * at bufp, immediately before *bpp, and move bpp backwards to point
283  * at the start of it.  (Yes, this is a little baroque, but it's done
284  * this way to cater to the whims of getcwd).
285  *
286  * Returns 0 on success, -1 on cache miss, positive errno on failure.
287  *
288  * TODO: should we return *dvpp locked?
289  */
290 
291 int
292 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
293 {
294 	struct namecache *ncp;
295 	struct vnode *dvp = NULL;
296 	char *bp;
297 
298 	if (!doingcache)
299 		goto out;
300 	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) {
301 		dvp = ncp->nc_dvp;
302 		if (dvp && dvp != vp && ncp->nc_dvpid == dvp->v_id)
303 			goto found;
304 	}
305 	goto miss;
306 found:
307 #ifdef DIAGNOSTIC
308 	if (ncp->nc_nlen == 1 &&
309 	    ncp->nc_name[0] == '.')
310 		panic("cache_revlookup: found entry for .");
311 	if (ncp->nc_nlen == 2 &&
312 	    ncp->nc_name[0] == '.' &&
313 	    ncp->nc_name[1] == '.')
314 		panic("cache_revlookup: found entry for ..");
315 #endif
316 	nchstats.ncs_revhits++;
317 
318 	if (bufp != NULL) {
319 		bp = *bpp;
320 		bp -= ncp->nc_nlen;
321 		if (bp <= bufp) {
322 			*dvpp = NULL;
323 			return (ERANGE);
324 		}
325 		memcpy(bp, ncp->nc_name, ncp->nc_nlen);
326 		*bpp = bp;
327 	}
328 
329 	*dvpp = dvp;
330 
331 	/*
332 	 * XXX: Should we vget() here to have more
333 	 * consistent semantics with cache_lookup()?
334 	 */
335 	return (0);
336 
337 miss:
338 	nchstats.ncs_revmiss++;
339 out:
340 	*dvpp = NULL;
341 	return (-1);
342 }
343 
344 /*
345  * Add an entry to the cache
346  */
347 void
348 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
349 {
350 	struct namecache *ncp, *lncp;
351 
352 	if (!doingcache || cnp->cn_namelen > NAMECACHE_MAXLEN)
353 		return;
354 
355 	/*
356 	 * allocate, or recycle (free and allocate) an ncp.
357 	 */
358 	if (numcache >= initialvnodes) {
359 		if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL)
360 			cache_zap(ncp);
361 		else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
362 			cache_zap(ncp);
363 		else
364 			panic("wtf? leak?");
365 	}
366 	ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
367 
368 	/* grab the vnode we just found */
369 	ncp->nc_vp = vp;
370 	if (vp)
371 		ncp->nc_vpid = vp->v_id;
372 
373 	/* fill in cache info */
374 	ncp->nc_dvp = dvp;
375 	ncp->nc_dvpid = dvp->v_id;
376 	ncp->nc_nlen = cnp->cn_namelen;
377 	memcpy(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen);
378 	if (RBT_EMPTY(namecache_rb_cache, &dvp->v_nc_tree)) {
379 		vhold(dvp);
380 	}
381 	if ((lncp = RBT_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp))
382 	    != NULL) {
383 		/* someone has raced us and added a different entry
384 		 * for the same vnode (different ncp) - we don't need
385 		 * this entry, so free it and we are done.
386 		 */
387 		pool_put(&nch_pool, ncp);
388 		/* we know now dvp->v_nc_tree is not empty, no need
389 		 * to vdrop here
390 		 */
391 		goto done;
392 	}
393 	if (vp) {
394 		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
395 		numcache++;
396 		/* don't put . or .. in the reverse map */
397 		if (vp != dvp && vp->v_type == VDIR &&
398 		    (ncp->nc_nlen > 2 ||
399 			(ncp->nc_nlen > 1 &&
400 			    ncp->nc_name[1] != '.') ||
401 			(ncp->nc_nlen > 0 &&
402 			    ncp->nc_name[0] != '.')))
403 			TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp,
404 			    nc_me);
405 	} else {
406 		TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
407 		numneg++;
408 	}
409 	if (numneg  > initialvnodes) {
410 		if ((ncp = TAILQ_FIRST(&nclruneghead))
411 		    != NULL)
412 			cache_zap(ncp);
413 	}
414 done:
415 	return;
416 }
417 
418 
419 /*
420  * Name cache initialization, from vfs_init() when we are booting
421  */
422 void
423 nchinit(void)
424 {
425 	TAILQ_INIT(&nclruhead);
426 	TAILQ_INIT(&nclruneghead);
427 	pool_init(&nch_pool, sizeof(struct namecache), 0, IPL_NONE, PR_WAITOK,
428 	    "nchpl", NULL);
429 }
430 
431 /*
432  * Cache flush, a particular vnode; called when a vnode is renamed to
433  * hide entries that would now be invalid
434  */
435 void
436 cache_purge(struct vnode *vp)
437 {
438 	struct namecache *ncp;
439 
440 	/* We should never have destinations cached for a non-VDIR vnode. */
441 	KASSERT(vp->v_type == VDIR || TAILQ_EMPTY(&vp->v_cache_dst));
442 
443 	while ((ncp = TAILQ_FIRST(&vp->v_cache_dst)))
444 		cache_zap(ncp);
445 	while ((ncp = RBT_ROOT(namecache_rb_cache, &vp->v_nc_tree)))
446 		cache_zap(ncp);
447 
448 	/* XXX this blows goats */
449 	vp->v_id = ++nextvnodeid;
450 	if (vp->v_id == 0)
451 		vp->v_id = ++nextvnodeid;
452 }
453 
454 /*
455  * Cache flush, a whole filesystem; called when filesys is umounted to
456  * remove entries that would now be invalid
457  */
458 void
459 cache_purgevfs(struct mount *mp)
460 {
461 	struct namecache *ncp, *nxtcp;
462 
463 	/* whack the regular entries */
464 	TAILQ_FOREACH_SAFE(ncp, &nclruhead, nc_lru, nxtcp) {
465 		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp)
466 			continue;
467 		/* free the resources we had */
468 		cache_zap(ncp);
469 	}
470 	/* whack the negative entries */
471 	TAILQ_FOREACH_SAFE(ncp, &nclruneghead, nc_neg, nxtcp) {
472 		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp)
473 			continue;
474 		/* free the resources we had */
475 		cache_zap(ncp);
476 	}
477 }
478