1 /* $OpenBSD: vfs_cache.c,v 1.53 2017/02/09 19:02:34 bluhm Exp $ */ 2 /* $NetBSD: vfs_cache.c,v 1.13 1996/02/04 02:18:09 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/time.h> 38 #include <sys/mount.h> 39 #include <sys/vnode.h> 40 #include <sys/lock.h> 41 #include <sys/namei.h> 42 #include <sys/errno.h> 43 #include <sys/pool.h> 44 45 /* 46 * TODO: namecache access should really be locked. 47 */ 48 49 /* 50 * For simplicity (and economy of storage), names longer than 51 * a maximum length of NAMECACHE_MAXLEN are not cached; they occur 52 * infrequently in any case, and are almost never of interest. 53 * 54 * Upon reaching the last segment of a path, if the reference 55 * is for DELETE, or NOCACHE is set (rewrite), and the 56 * name is located in the cache, it will be dropped. 57 */ 58 59 /* 60 * Structures associated with name caching. 61 */ 62 long numcache; /* total number of cache entries allocated */ 63 long numneg; /* number of negative cache entries */ 64 65 TAILQ_HEAD(, namecache) nclruhead; /* Regular Entry LRU chain */ 66 TAILQ_HEAD(, namecache) nclruneghead; /* Negative Entry LRU chain */ 67 struct nchstats nchstats; /* cache effectiveness statistics */ 68 69 int doingcache = 1; /* 1 => enable the cache */ 70 71 struct pool nch_pool; 72 73 void cache_zap(struct namecache *); 74 u_long nextvnodeid; 75 76 static inline int 77 namecache_compare(const struct namecache *n1, const struct namecache *n2) 78 { 79 if (n1->nc_nlen == n2->nc_nlen) 80 return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen)); 81 else 82 return (n1->nc_nlen - n2->nc_nlen); 83 } 84 85 RBT_PROTOTYPE(namecache_rb_cache, namecache, n_rbcache, namecache_compare); 86 RBT_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare); 87 88 void 89 cache_tree_init(struct namecache_rb_cache *tree) 90 { 91 RBT_INIT(namecache_rb_cache, tree); 92 } 93 94 /* 95 * blow away a namecache entry 96 */ 97 void 98 cache_zap(struct namecache *ncp) 99 { 100 struct vnode *dvp = NULL; 101 102 if (ncp->nc_vp != NULL) { 103 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 104 numcache--; 105 } else { 106 TAILQ_REMOVE(&nclruneghead, ncp, nc_neg); 107 numneg--; 108 } 109 if (ncp->nc_dvp) { 110 RBT_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp); 111 if (RBT_EMPTY(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree)) 112 dvp = ncp->nc_dvp; 113 } 114 if (ncp->nc_vp && (ncp->nc_vpid == ncp->nc_vp->v_id)) { 115 if (ncp->nc_vp != ncp->nc_dvp && 116 ncp->nc_vp->v_type == VDIR && 117 (ncp->nc_nlen > 2 || 118 (ncp->nc_nlen > 1 && 119 ncp->nc_name[1] != '.') || 120 (ncp->nc_nlen > 0 && 121 ncp->nc_name[0] != '.'))) { 122 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me); 123 } 124 } 125 pool_put(&nch_pool, ncp); 126 if (dvp) 127 vdrop(dvp); 128 } 129 130 /* 131 * Look for a name in the cache. We don't do this if the segment name is 132 * long, simply so the cache can avoid holding long names (which would 133 * either waste space, or add greatly to the complexity). 134 * dvp points to the directory to search. The componentname cnp holds 135 * the information on the entry being sought, such as its length 136 * and its name. If the lookup succeeds, vpp is set to point to the vnode 137 * and an error of 0 is returned. If the lookup determines the name does 138 * not exist (negative caching) an error of ENOENT is returned. If the 139 * lookup fails, an error of -1 is returned. 140 */ 141 int 142 cache_lookup(struct vnode *dvp, struct vnode **vpp, 143 struct componentname *cnp) 144 { 145 struct namecache *ncp; 146 struct namecache n; 147 struct vnode *vp; 148 struct proc *p = curproc; 149 u_long vpid; 150 int error; 151 152 *vpp = NULL; 153 154 if (!doingcache) { 155 cnp->cn_flags &= ~MAKEENTRY; 156 return (-1); 157 } 158 if (cnp->cn_namelen > NAMECACHE_MAXLEN) { 159 nchstats.ncs_long++; 160 cnp->cn_flags &= ~MAKEENTRY; 161 return (-1); 162 } 163 164 /* lookup in directory vnode's redblack tree */ 165 n.nc_nlen = cnp->cn_namelen; 166 memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen); 167 ncp = RBT_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n); 168 169 if (ncp == NULL) { 170 nchstats.ncs_miss++; 171 return (-1); 172 } 173 if ((cnp->cn_flags & MAKEENTRY) == 0) { 174 nchstats.ncs_badhits++; 175 goto remove; 176 } else if (ncp->nc_vp == NULL) { 177 if (cnp->cn_nameiop != CREATE || 178 (cnp->cn_flags & ISLASTCN) == 0) { 179 nchstats.ncs_neghits++; 180 /* 181 * Move this slot to end of the negative LRU chain, 182 */ 183 if (TAILQ_NEXT(ncp, nc_neg) != NULL) { 184 TAILQ_REMOVE(&nclruneghead, ncp, nc_neg); 185 TAILQ_INSERT_TAIL(&nclruneghead, ncp, 186 nc_neg); 187 } 188 return (ENOENT); 189 } else { 190 nchstats.ncs_badhits++; 191 goto remove; 192 } 193 } else if (ncp->nc_vpid != ncp->nc_vp->v_id) { 194 nchstats.ncs_falsehits++; 195 goto remove; 196 } 197 198 /* 199 * Move this slot to end of the regular LRU chain. 200 */ 201 if (TAILQ_NEXT(ncp, nc_lru) != NULL) { 202 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); 203 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 204 } 205 206 vp = ncp->nc_vp; 207 vpid = vp->v_id; 208 if (vp == dvp) { /* lookup on "." */ 209 vref(dvp); 210 error = 0; 211 } else if (cnp->cn_flags & ISDOTDOT) { 212 VOP_UNLOCK(dvp, p); 213 cnp->cn_flags |= PDIRUNLOCK; 214 error = vget(vp, LK_EXCLUSIVE, p); 215 /* 216 * If the above vget() succeeded and both LOCKPARENT and 217 * ISLASTCN is set, lock the directory vnode as well. 218 */ 219 if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) { 220 if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) != 0) { 221 vput(vp); 222 return (error); 223 } 224 cnp->cn_flags &= ~PDIRUNLOCK; 225 } 226 } else { 227 error = vget(vp, LK_EXCLUSIVE, p); 228 /* 229 * If the above vget() failed or either of LOCKPARENT or 230 * ISLASTCN is set, unlock the directory vnode. 231 */ 232 if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) { 233 VOP_UNLOCK(dvp, p); 234 cnp->cn_flags |= PDIRUNLOCK; 235 } 236 } 237 238 /* 239 * Check that the lock succeeded, and that the capability number did 240 * not change while we were waiting for the lock. 241 */ 242 if (error || vpid != vp->v_id) { 243 if (!error) { 244 vput(vp); 245 nchstats.ncs_falsehits++; 246 } else 247 nchstats.ncs_badhits++; 248 /* 249 * The parent needs to be locked when we return to VOP_LOOKUP(). 250 * The `.' case here should be extremely rare (if it can happen 251 * at all), so we don't bother optimizing out the unlock/relock. 252 */ 253 if (vp == dvp || error || 254 (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) { 255 if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) != 0) 256 return (error); 257 cnp->cn_flags &= ~PDIRUNLOCK; 258 } 259 return (-1); 260 } 261 262 nchstats.ncs_goodhits++; 263 *vpp = vp; 264 return (0); 265 266 remove: 267 /* 268 * Last component and we are renaming or deleting, 269 * the cache entry is invalid, or otherwise don't 270 * want cache entry to exist. 271 */ 272 cache_zap(ncp); 273 return (-1); 274 } 275 276 /* 277 * Scan cache looking for name of directory entry pointing at vp. 278 * 279 * Fill in dvpp. 280 * 281 * If bufp is non-NULL, also place the name in the buffer which starts 282 * at bufp, immediately before *bpp, and move bpp backwards to point 283 * at the start of it. (Yes, this is a little baroque, but it's done 284 * this way to cater to the whims of getcwd). 285 * 286 * Returns 0 on success, -1 on cache miss, positive errno on failure. 287 * 288 * TODO: should we return *dvpp locked? 289 */ 290 291 int 292 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp) 293 { 294 struct namecache *ncp; 295 struct vnode *dvp = NULL; 296 char *bp; 297 298 if (!doingcache) 299 goto out; 300 TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) { 301 dvp = ncp->nc_dvp; 302 if (dvp && dvp != vp && ncp->nc_dvpid == dvp->v_id) 303 goto found; 304 } 305 goto miss; 306 found: 307 #ifdef DIAGNOSTIC 308 if (ncp->nc_nlen == 1 && 309 ncp->nc_name[0] == '.') 310 panic("cache_revlookup: found entry for ."); 311 if (ncp->nc_nlen == 2 && 312 ncp->nc_name[0] == '.' && 313 ncp->nc_name[1] == '.') 314 panic("cache_revlookup: found entry for .."); 315 #endif 316 nchstats.ncs_revhits++; 317 318 if (bufp != NULL) { 319 bp = *bpp; 320 bp -= ncp->nc_nlen; 321 if (bp <= bufp) { 322 *dvpp = NULL; 323 return (ERANGE); 324 } 325 memcpy(bp, ncp->nc_name, ncp->nc_nlen); 326 *bpp = bp; 327 } 328 329 *dvpp = dvp; 330 331 /* 332 * XXX: Should we vget() here to have more 333 * consistent semantics with cache_lookup()? 334 */ 335 return (0); 336 337 miss: 338 nchstats.ncs_revmiss++; 339 out: 340 *dvpp = NULL; 341 return (-1); 342 } 343 344 /* 345 * Add an entry to the cache 346 */ 347 void 348 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 349 { 350 struct namecache *ncp, *lncp; 351 352 if (!doingcache || cnp->cn_namelen > NAMECACHE_MAXLEN) 353 return; 354 355 /* 356 * allocate, or recycle (free and allocate) an ncp. 357 */ 358 if (numcache >= initialvnodes) { 359 if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) 360 cache_zap(ncp); 361 else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL) 362 cache_zap(ncp); 363 else 364 panic("wtf? leak?"); 365 } 366 ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO); 367 368 /* grab the vnode we just found */ 369 ncp->nc_vp = vp; 370 if (vp) 371 ncp->nc_vpid = vp->v_id; 372 373 /* fill in cache info */ 374 ncp->nc_dvp = dvp; 375 ncp->nc_dvpid = dvp->v_id; 376 ncp->nc_nlen = cnp->cn_namelen; 377 memcpy(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen); 378 if (RBT_EMPTY(namecache_rb_cache, &dvp->v_nc_tree)) { 379 vhold(dvp); 380 } 381 if ((lncp = RBT_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp)) 382 != NULL) { 383 /* someone has raced us and added a different entry 384 * for the same vnode (different ncp) - we don't need 385 * this entry, so free it and we are done. 386 */ 387 pool_put(&nch_pool, ncp); 388 /* we know now dvp->v_nc_tree is not empty, no need 389 * to vdrop here 390 */ 391 goto done; 392 } 393 if (vp) { 394 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); 395 numcache++; 396 /* don't put . or .. in the reverse map */ 397 if (vp != dvp && vp->v_type == VDIR && 398 (ncp->nc_nlen > 2 || 399 (ncp->nc_nlen > 1 && 400 ncp->nc_name[1] != '.') || 401 (ncp->nc_nlen > 0 && 402 ncp->nc_name[0] != '.'))) 403 TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp, 404 nc_me); 405 } else { 406 TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg); 407 numneg++; 408 } 409 if (numneg > initialvnodes) { 410 if ((ncp = TAILQ_FIRST(&nclruneghead)) 411 != NULL) 412 cache_zap(ncp); 413 } 414 done: 415 return; 416 } 417 418 419 /* 420 * Name cache initialization, from vfs_init() when we are booting 421 */ 422 void 423 nchinit(void) 424 { 425 TAILQ_INIT(&nclruhead); 426 TAILQ_INIT(&nclruneghead); 427 pool_init(&nch_pool, sizeof(struct namecache), 0, IPL_NONE, PR_WAITOK, 428 "nchpl", NULL); 429 } 430 431 /* 432 * Cache flush, a particular vnode; called when a vnode is renamed to 433 * hide entries that would now be invalid 434 */ 435 void 436 cache_purge(struct vnode *vp) 437 { 438 struct namecache *ncp; 439 440 /* We should never have destinations cached for a non-VDIR vnode. */ 441 KASSERT(vp->v_type == VDIR || TAILQ_EMPTY(&vp->v_cache_dst)); 442 443 while ((ncp = TAILQ_FIRST(&vp->v_cache_dst))) 444 cache_zap(ncp); 445 while ((ncp = RBT_ROOT(namecache_rb_cache, &vp->v_nc_tree))) 446 cache_zap(ncp); 447 448 /* XXX this blows goats */ 449 vp->v_id = ++nextvnodeid; 450 if (vp->v_id == 0) 451 vp->v_id = ++nextvnodeid; 452 } 453 454 /* 455 * Cache flush, a whole filesystem; called when filesys is umounted to 456 * remove entries that would now be invalid 457 */ 458 void 459 cache_purgevfs(struct mount *mp) 460 { 461 struct namecache *ncp, *nxtcp; 462 463 /* whack the regular entries */ 464 TAILQ_FOREACH_SAFE(ncp, &nclruhead, nc_lru, nxtcp) { 465 if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) 466 continue; 467 /* free the resources we had */ 468 cache_zap(ncp); 469 } 470 /* whack the negative entries */ 471 TAILQ_FOREACH_SAFE(ncp, &nclruneghead, nc_neg, nxtcp) { 472 if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) 473 continue; 474 /* free the resources we had */ 475 cache_zap(ncp); 476 } 477 } 478