1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>, 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Poul-Henning Kamp of the FreeBSD Project. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 39 * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $ 40 * $DragonFly: src/sys/kern/vfs_cache.c,v 1.16 2004/04/08 22:00:41 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysctl.h> 47 #include <sys/mount.h> 48 #include <sys/vnode.h> 49 #include <sys/malloc.h> 50 #include <sys/sysproto.h> 51 #include <sys/proc.h> 52 #include <sys/namei.h> 53 #include <sys/filedesc.h> 54 #include <sys/fnv_hash.h> 55 #include <sys/globaldata.h> 56 57 /* 58 * Random lookups in the cache are accomplished with a hash table using 59 * a hash key of (nc_src_vp, name). 60 * 61 * Negative entries may exist and correspond to structures where nc_vp 62 * is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry 63 * corresponds to a whited-out directory entry (verses simply not finding the 64 * entry at all). 65 * 66 * Upon reaching the last segment of a path, if the reference is for DELETE, 67 * or NOCACHE is set (rewrite), and the name is located in the cache, it 68 * will be dropped. 69 */ 70 71 /* 72 * Structures associated with name cacheing. 73 */ 74 #define NCHHASH(hash) (&nchashtbl[(hash) & nchash]) 75 #define MINNEG 1024 76 77 MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 78 79 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 80 static struct namecache_list ncneglist; /* instead of vnode */ 81 static struct namecache rootnamecache; /* Dummy node */ 82 83 static int nczapcheck; /* panic on bad release */ 84 SYSCTL_INT(_debug, OID_AUTO, nczapcheck, CTLFLAG_RW, &nczapcheck, 0, ""); 85 86 static u_long nchash; /* size of hash table */ 87 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 88 89 static u_long ncnegfactor = 16; /* ratio of negative entries */ 90 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 91 92 static u_long numneg; /* number of cache entries allocated */ 93 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 94 95 static u_long numcache; /* number of cache entries allocated */ 96 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 97 98 static u_long numunres; /* number of unresolved entries */ 99 SYSCTL_ULONG(_debug, OID_AUTO, numunres, CTLFLAG_RD, &numunres, 0, ""); 100 101 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 102 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 103 104 /* 105 * The new name cache statistics 106 */ 107 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 108 #define STATNODE(mode, name, var) \ 109 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 110 STATNODE(CTLFLAG_RD, numneg, &numneg); 111 STATNODE(CTLFLAG_RD, numcache, &numcache); 112 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 113 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 114 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 115 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 116 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 117 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 118 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 119 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 120 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 121 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 122 123 struct nchstats nchstats[SMP_MAXCPU]; 124 /* 125 * Export VFS cache effectiveness statistics to user-land. 126 * 127 * The statistics are left for aggregation to user-land so 128 * neat things can be achieved, like observing per-CPU cache 129 * distribution. 130 */ 131 static int 132 nchstats_agg(SYSCTL_HANDLER_ARGS) 133 { 134 struct globaldata *gd; 135 int i, error; 136 137 error = 0; 138 for (i = 0; i < ncpus; ++i) { 139 gd = globaldata_find(i); 140 if ((error = SYSCTL_OUT(req, (void *)&(*gd->gd_nchstats), 141 sizeof(struct nchstats)))) 142 break; 143 } 144 145 return (error); 146 } 147 SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE|CTLFLAG_RD, 148 0, 0, nchstats_agg, "S,nchstats", "VFS cache effectiveness statistics"); 149 150 static void cache_zap(struct namecache *ncp); 151 152 /* 153 * cache_hold() and cache_drop() prevent the premature deletion of a 154 * namecache entry but do not prevent operations (such as zapping) on 155 * that namecache entry. 156 */ 157 static __inline 158 struct namecache * 159 _cache_hold(struct namecache *ncp) 160 { 161 ++ncp->nc_refs; 162 return(ncp); 163 } 164 165 static __inline 166 void 167 _cache_drop(struct namecache *ncp) 168 { 169 KKASSERT(ncp->nc_refs > 0); 170 if (ncp->nc_refs == 1 && 171 (ncp->nc_flag & NCF_UNRESOLVED) && 172 TAILQ_EMPTY(&ncp->nc_list) 173 ) { 174 cache_zap(ncp); 175 } else { 176 --ncp->nc_refs; 177 } 178 } 179 180 struct namecache * 181 cache_hold(struct namecache *ncp) 182 { 183 return(_cache_hold(ncp)); 184 } 185 186 void 187 cache_drop(struct namecache *ncp) 188 { 189 _cache_drop(ncp); 190 } 191 192 static void 193 cache_link_parent(struct namecache *ncp, struct namecache *par) 194 { 195 KKASSERT(ncp->nc_parent == NULL); 196 ncp->nc_parent = par; 197 if (TAILQ_EMPTY(&par->nc_list)) { 198 if (par->nc_vp) 199 vhold(par->nc_vp); 200 } 201 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); 202 } 203 204 static void 205 cache_unlink_parent(struct namecache *ncp) 206 { 207 struct namecache *par; 208 209 if ((par = ncp->nc_parent) != NULL) { 210 ncp->nc_parent = NULL; 211 par = cache_hold(par); 212 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 213 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 214 vdrop(par->nc_vp); 215 cache_drop(par); 216 } 217 } 218 219 static struct namecache * 220 cache_alloc(struct vnode *vp) 221 { 222 struct namecache *ncp; 223 224 ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK|M_ZERO); 225 TAILQ_INIT(&ncp->nc_list); 226 ncp->nc_vp = vp; 227 if (vp != NULL) { 228 TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode); 229 ++numcache; 230 } else { 231 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 232 ++numneg; 233 } 234 return(ncp); 235 } 236 237 /* 238 * Try to destroy a namecache entry. The entry is disassociated from its 239 * vnode or ncneglist and reverted to an UNRESOLVED state. 240 * 241 * Then, if there are no additional references to the ncp and we can 242 * successfully delete the children, the entry is also removed from the 243 * namecache hashlist / topology. 244 * 245 * References or undeletable children will prevent the entry from being 246 * removed from the topology. The entry may be revalidated (typically 247 * by cache_enter()) at a later time. Children remain because: 248 * 249 * + we have tried to delete a node rather then a leaf in the topology. 250 * + the presence of negative entries (we try to scrap these). 251 * + an entry or child has a non-zero ref count and cannot be scrapped. 252 * 253 * This function must be called with the ncp held and will drop the ref 254 * count during zapping. 255 */ 256 static void 257 cache_zap(struct namecache *ncp) 258 { 259 struct namecache *par; 260 struct vnode *vp; 261 262 /* 263 * Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED. 264 */ 265 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 266 ncp->nc_flag |= NCF_UNRESOLVED; 267 ++numunres; 268 if ((vp = ncp->nc_vp) != NULL) { 269 --numcache; 270 ncp->nc_vp = NULL; /* safety */ 271 TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode); 272 if (!TAILQ_EMPTY(&ncp->nc_list)) 273 vdrop(vp); 274 } else { 275 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 276 --numneg; 277 } 278 } 279 280 /* 281 * Try to scrap the entry and possibly tail-recurse on its parent. 282 * We only scrap unref'd (other then our ref) unresolved entries, 283 * we do not scrap 'live' entries. 284 */ 285 while (ncp->nc_flag & NCF_UNRESOLVED) { 286 /* 287 * Someone other then us has a ref, stop. 288 */ 289 if (ncp->nc_refs > 1) 290 goto done; 291 292 /* 293 * We have children, stop. 294 */ 295 if (!TAILQ_EMPTY(&ncp->nc_list)) 296 goto done; 297 298 /* 299 * Ok, we can completely destroy and free this entry. Sanity 300 * check it against our static rootnamecache structure, 301 * then remove it from the hash. 302 */ 303 KKASSERT(ncp != &rootnamecache); 304 305 if (ncp->nc_flag & NCF_HASHED) { 306 ncp->nc_flag &= ~NCF_HASHED; 307 LIST_REMOVE(ncp, nc_hash); 308 } 309 310 /* 311 * Unlink from its parent and free, then loop on the 312 * parent. XXX temp hack, in stage-3 parent is never NULL 313 */ 314 if ((par = ncp->nc_parent) != NULL) { 315 par = cache_hold(par); 316 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 317 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 318 vdrop(par->nc_vp); 319 } 320 --numunres; 321 ncp->nc_refs = -1; /* safety */ 322 ncp->nc_parent = NULL; /* safety */ 323 if (ncp->nc_name) 324 free(ncp->nc_name, M_VFSCACHE); 325 free(ncp, M_VFSCACHE); 326 ncp = par; 327 if (par == NULL) /* temp hack */ 328 return; /* temp hack */ 329 } 330 done: 331 --ncp->nc_refs; 332 } 333 334 /* 335 * Lookup an entry in the cache 336 * 337 * Lookup is called with dvp pointing to the directory to search, 338 * cnp pointing to the name of the entry being sought. 339 * 340 * If the lookup succeeds, the vnode is returned in *vpp, and a 341 * status of -1 is returned. 342 * 343 * If the lookup determines that the name does not exist (negative cacheing), 344 * a status of ENOENT is returned. 345 * 346 * If the lookup fails, a status of zero is returned. 347 * 348 * Note that UNRESOLVED entries are ignored. They are not negative cache 349 * entries. 350 */ 351 int 352 cache_lookup(struct vnode *dvp, struct namecache *par, struct vnode **vpp, 353 struct namecache **ncpp, struct componentname *cnp) 354 { 355 struct namecache *ncp; 356 u_int32_t hash; 357 globaldata_t gd = mycpu; 358 359 numcalls++; 360 361 /* 362 * Obtain the namecache entry associated with dvp, creating one if 363 * necessary. If we have to create one we have insufficient 364 * information to hash it or even supply the name, but we still 365 * need one so we can link it in. 366 * 367 * NOTE: in this stage of development, the passed 'par' is 368 * almost always NULL. 369 */ 370 if (par == NULL) { 371 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) 372 par = cache_alloc(dvp); 373 } 374 375 /* 376 * Deal with "." and "..". In this stage of code development we leave 377 * the returned ncpp NULL. Note that if the namecache is disjoint, 378 * we won't find a vnode for "..". 379 */ 380 if (cnp->cn_nameptr[0] == '.') { 381 if (cnp->cn_namelen == 1) { 382 *vpp = dvp; 383 dothits++; 384 numposhits++; /* include in total statistics */ 385 return (-1); 386 } 387 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 388 dotdothits++; 389 numposhits++; /* include in total statistics */ 390 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) 391 return (0); 392 if (par->nc_parent == NULL || 393 par->nc_parent->nc_vp == NULL) { 394 return (0); 395 } 396 *vpp = par->nc_parent->nc_vp; 397 return (-1); 398 } 399 } 400 401 /* 402 * Try to locate an existing entry 403 */ 404 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 405 hash = fnv_32_buf(&par, sizeof(par), hash); 406 if (nczapcheck > 1) 407 printf("DVP %p/%p %08x %*.*s\n", dvp, par, hash, (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr); 408 restart: 409 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 410 numchecks++; 411 if (nczapcheck > 1) { 412 printf("TEST ncp par=%p %*.*s\n", 413 ncp->nc_parent, ncp->nc_nlen, ncp->nc_nlen, 414 ncp->nc_name); 415 } 416 417 /* 418 * Zap entries that have timed out. 419 */ 420 if (ncp->nc_timeout && 421 (int)(ncp->nc_timeout - ticks) < 0 422 ) { 423 if (nczapcheck > 1) 424 printf("TIMEOUT\n"); 425 cache_zap(cache_hold(ncp)); 426 goto restart; 427 } 428 429 /* 430 * Break out if we find a matching entry. UNRESOLVED entries 431 * never match (they are in the middle of being destroyed). 432 */ 433 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 && 434 ncp->nc_parent == par && 435 ncp->nc_nlen == cnp->cn_namelen && 436 bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen) == 0 437 ) { 438 if (nczapcheck > 1) 439 printf("GOOD\n"); 440 cache_hold(ncp); 441 break; 442 } 443 } 444 445 /* 446 * If we failed to locate an entry, return 0 (indicates failure). 447 */ 448 if (ncp == NULL) { 449 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) { 450 nummisszap++; 451 } else { 452 nummiss++; 453 } 454 gd->gd_nchstats->ncs_miss++; 455 if (nczapcheck) { 456 printf("MISS %p/%p %*.*s/%*.*s\n", dvp, par, 457 par->nc_nlen, par->nc_nlen, (par->nc_name ? par->nc_name : ""), 458 (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr); 459 } 460 return (0); 461 } 462 463 /* 464 * If we found an entry, but we don't want to have one, we zap it. 465 */ 466 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) { 467 numposzaps++; 468 gd->gd_nchstats->ncs_badhits++; 469 cache_zap(ncp); 470 return (0); 471 } 472 473 /* 474 * If the vnode is not NULL then return the positive match. 475 */ 476 if (ncp->nc_vp) { 477 numposhits++; 478 gd->gd_nchstats->ncs_goodhits++; 479 *vpp = ncp->nc_vp; 480 cache_drop(ncp); 481 return (-1); 482 } 483 484 /* 485 * If the vnode is NULL we found a negative match. If we want to 486 * create it, purge the negative match and return failure (as if 487 * we hadn't found a match in the first place). 488 */ 489 if (cnp->cn_nameiop == NAMEI_CREATE) { 490 numnegzaps++; 491 gd->gd_nchstats->ncs_badhits++; 492 cache_zap(ncp); 493 return (0); 494 } 495 496 numneghits++; 497 498 /* 499 * We found a "negative" match, ENOENT notifies client of this match. 500 * The nc_flag field records whether this is a whiteout. Since there 501 * is no vnode we can use the vnode tailq link field with ncneglist. 502 */ 503 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 504 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 505 gd->gd_nchstats->ncs_neghits++; 506 if (ncp->nc_flag & NCF_WHITEOUT) 507 cnp->cn_flags |= CNP_ISWHITEOUT; 508 cache_drop(ncp); 509 return (ENOENT); 510 } 511 512 /* 513 * Generate a special linkage between the mount point and the root of the 514 * mounted filesystem in order to maintain the namecache topology across 515 * a mount point. The special linkage has a 0-length name component 516 * and sets NCF_MOUNTPT. 517 */ 518 void 519 cache_mount(struct vnode *dvp, struct vnode *tvp) 520 { 521 struct namecache *ncp; 522 struct namecache *par; 523 struct nchashhead *nchpp; 524 u_int32_t hash; 525 526 /* 527 * If a linkage already exists we do not have to do anything. 528 */ 529 hash = fnv_32_buf("", 0, FNV1_32_INIT); 530 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 531 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 532 numchecks++; 533 if (ncp->nc_vp == tvp && 534 ncp->nc_nlen == 0 && 535 ncp->nc_parent && 536 ncp->nc_parent->nc_vp == dvp 537 ) { 538 return; 539 } 540 } 541 542 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) 543 par = cache_alloc(dvp); 544 545 /* 546 * Otherwise create a new linkage. 547 */ 548 ncp = cache_alloc(tvp); 549 ncp->nc_flag = NCF_MOUNTPT; 550 cache_link_parent(ncp, par); 551 552 /* 553 * Hash table 554 */ 555 hash = fnv_32_buf("", 0, FNV1_32_INIT); 556 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 557 nchpp = NCHHASH(hash); 558 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 559 560 ncp->nc_flag |= NCF_HASHED; 561 } 562 563 /* 564 * Add an entry to the cache. 565 */ 566 void 567 cache_enter(struct vnode *dvp, struct namecache *par, struct vnode *vp, struct componentname *cnp) 568 { 569 struct namecache *ncp; 570 struct namecache *bpar; 571 struct nchashhead *nchpp; 572 u_int32_t hash; 573 char *name; 574 575 /* 576 * If the directory has no namecache entry we must associate one with 577 * it. The name of the entry is not known so it isn't hashed. 578 */ 579 if (par == NULL) { 580 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) 581 par = cache_alloc(dvp); 582 } 583 584 /* 585 * This may be a bit confusing. "." and ".." are 'virtual' entries. 586 * We do not actually create a namecache entry representing either. 587 * However, the ".." case is used to linkup a potentially disjoint 588 * directory with its parent, to disconnect a directory from its 589 * parent, or to change an existing linkage that may no longer be 590 * correct (as might occur when a subdirectory is renamed). 591 */ 592 593 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') 594 return; 595 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' && 596 cnp->cn_nameptr[1] == '.' 597 ) { 598 if (vp == NULL) { 599 if (par->nc_parent) 600 cache_unlink_parent(par); 601 } else { 602 if ((ncp = TAILQ_FIRST(&vp->v_namecache)) == NULL) 603 ncp = cache_alloc(vp); 604 cache_hold(par); 605 if (par->nc_parent) 606 cache_unlink_parent(par); 607 cache_link_parent(par, ncp); /* ncp is parent of par */ 608 cache_drop(par); 609 } 610 return; 611 } 612 613 /* 614 * Locate other entries associated with this vnode and zap them, 615 * because the purge code may not be able to find them due to 616 * the topology not yet being consistent. This is a temporary 617 * hack. 618 */ 619 if (vp) { 620 again: 621 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 622 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 623 cache_zap(cache_hold(ncp)); 624 goto again; 625 } 626 } 627 } 628 629 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 630 bpar = par; 631 hash = fnv_32_buf(&bpar, sizeof(bpar), hash); 632 633 if (nczapcheck > 1) 634 printf("ENTER %p/%p %08x '%*.*s' %p ", dvp, par, hash, (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr, vp); 635 636 637 name = malloc(cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 638 ncp = cache_alloc(vp); 639 if (nczapcheck > 1) 640 printf("alloc\n"); 641 642 /* 643 * Set a timeout 644 */ 645 if (cnp->cn_flags & CNP_CACHETIMEOUT) { 646 if ((ncp->nc_timeout = ticks + cnp->cn_timeout) == 0) 647 ncp->nc_timeout = 1; 648 } 649 650 /* 651 * Linkup the parent pointer, bump the parent vnode's hold 652 * count when we go from 0->1 children. 653 */ 654 cache_link_parent(ncp, par); 655 656 /* 657 * Add to the hash table 658 */ 659 ncp->nc_name = name; 660 ncp->nc_nlen = cnp->cn_namelen; 661 bcopy(cnp->cn_nameptr, ncp->nc_name, cnp->cn_namelen); 662 nchpp = NCHHASH(hash); 663 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 664 ncp->nc_flag |= NCF_HASHED; 665 666 /* 667 * If the target vnode is NULL if this is to be a negative cache 668 * entry. 669 */ 670 if (vp == NULL) { 671 ncp->nc_flag &= ~NCF_WHITEOUT; 672 if (cnp->cn_flags & CNP_ISWHITEOUT) 673 ncp->nc_flag |= NCF_WHITEOUT; 674 } 675 676 /* 677 * Don't cache too many negative hits 678 */ 679 if (numneg > MINNEG && numneg * ncnegfactor > numcache) { 680 ncp = TAILQ_FIRST(&ncneglist); 681 KKASSERT(ncp != NULL); 682 cache_zap(cache_hold(ncp)); 683 } 684 } 685 686 /* 687 * Name cache initialization, from vfsinit() when we are booting 688 * 689 * rootnamecache is initialized such that it cannot be recursively deleted. 690 */ 691 void 692 nchinit(void) 693 { 694 int i; 695 globaldata_t gd; 696 697 /* initialise per-cpu namecache effectiveness statistics. */ 698 for (i = 0; i < ncpus; ++i) { 699 gd = globaldata_find(i); 700 gd->gd_nchstats = &nchstats[i]; 701 } 702 703 TAILQ_INIT(&ncneglist); 704 nchashtbl = hashinit(desiredvnodes*2, M_VFSCACHE, &nchash); 705 TAILQ_INIT(&rootnamecache.nc_list); 706 rootnamecache.nc_flag |= NCF_HASHED | NCF_ROOT | NCF_UNRESOLVED; 707 rootnamecache.nc_refs = 1; 708 } 709 710 /* 711 * vfs_cache_setroot() 712 * 713 * Create an association between the root of our namecache and 714 * the root vnode. This routine may be called several times during 715 * booting. 716 */ 717 void 718 vfs_cache_setroot(struct vnode *nvp) 719 { 720 KKASSERT(rootnamecache.nc_refs > 0); /* don't accidently free */ 721 cache_zap(cache_hold(&rootnamecache)); 722 723 rootnamecache.nc_vp = nvp; 724 rootnamecache.nc_flag &= ~NCF_UNRESOLVED; 725 if (nvp) { 726 ++numcache; 727 if (!TAILQ_EMPTY(&rootnamecache.nc_list)) 728 vhold(nvp); 729 TAILQ_INSERT_HEAD(&nvp->v_namecache, &rootnamecache, nc_vnode); 730 } else { 731 ++numneg; 732 TAILQ_INSERT_TAIL(&ncneglist, &rootnamecache, nc_vnode); 733 rootnamecache.nc_flag &= ~NCF_WHITEOUT; 734 } 735 } 736 737 /* 738 * Invalidate all namecache entries to a particular vnode as well as 739 * any direct children of that vnode in the namecache. This is a 740 * 'catch all' purge used by filesystems that do not know any better. 741 * 742 * A new vnode v_id is generated. Note that no vnode will ever have a 743 * v_id of 0. 744 * 745 * Note that the linkage between the vnode and its namecache entries will 746 * be removed, but the namecache entries themselves might stay put due to 747 * active references from elsewhere in the system or due to the existance of 748 * the children. The namecache topology is left intact even if we do not 749 * know what the vnode association is. Such entries will be marked 750 * NCF_UNRESOLVED. 751 * 752 * XXX: Only time and the size of v_id prevents this from failing: 753 * XXX: In theory we should hunt down all (struct vnode*, v_id) 754 * XXX: soft references and nuke them, at least on the global 755 * XXX: v_id wraparound. The period of resistance can be extended 756 * XXX: by incrementing each vnodes v_id individually instead of 757 * XXX: using the global v_id. 758 */ 759 void 760 cache_purge(struct vnode *vp) 761 { 762 static u_long nextid; 763 struct namecache *ncp; 764 struct namecache *scan; 765 766 /* 767 * Disassociate the vnode from its namecache entries along with 768 * (for historical reasons) any direct children. 769 */ 770 while ((ncp = TAILQ_FIRST(&vp->v_namecache)) != NULL) { 771 cache_hold(ncp); 772 773 restart: /* YYY hack, fix me */ 774 TAILQ_FOREACH(scan, &ncp->nc_list, nc_entry) { 775 if ((scan->nc_flag & NCF_UNRESOLVED) == 0) { 776 cache_zap(cache_hold(scan)); 777 goto restart; 778 } 779 } 780 cache_zap(ncp); 781 } 782 783 /* 784 * Calculate a new unique id for ".." handling 785 */ 786 do { 787 nextid++; 788 } while (nextid == vp->v_id || nextid == 0); 789 vp->v_id = nextid; 790 } 791 792 /* 793 * Flush all entries referencing a particular filesystem. 794 * 795 * Since we need to check it anyway, we will flush all the invalid 796 * entries at the same time. 797 */ 798 void 799 cache_purgevfs(struct mount *mp) 800 { 801 struct nchashhead *nchpp; 802 struct namecache *ncp, *nnp; 803 804 /* 805 * Scan hash tables for applicable entries. 806 */ 807 for (nchpp = &nchashtbl[nchash]; nchpp >= nchashtbl; nchpp--) { 808 ncp = LIST_FIRST(nchpp); 809 if (ncp) 810 cache_hold(ncp); 811 while (ncp) { 812 nnp = LIST_NEXT(ncp, nc_hash); 813 if (nnp) 814 cache_hold(nnp); 815 if (ncp->nc_vp && ncp->nc_vp->v_mount == mp) 816 cache_zap(ncp); 817 else 818 cache_drop(ncp); 819 ncp = nnp; 820 } 821 } 822 } 823 824 /* 825 * cache_leaf_test() 826 * 827 * Test whether the vnode is at a leaf in the nameicache tree. 828 * 829 * Returns 0 if it is a leaf, -1 if it isn't. 830 */ 831 int 832 cache_leaf_test(struct vnode *vp) 833 { 834 struct namecache *scan; 835 struct namecache *ncp; 836 837 TAILQ_FOREACH(scan, &vp->v_namecache, nc_vnode) { 838 TAILQ_FOREACH(ncp, &scan->nc_list, nc_entry) { 839 /* YYY && ncp->nc_vp->v_type == VDIR ? */ 840 if (ncp->nc_vp != NULL) 841 return(-1); 842 } 843 } 844 return(0); 845 } 846 847 /* 848 * Perform canonical checks and cache lookup and pass on to filesystem 849 * through the vop_cachedlookup only if needed. 850 * 851 * vop_lookup_args { 852 * struct vnode a_dvp; 853 * struct namecache *a_ncp; 854 * struct vnode **a_vpp; 855 * struct namecache **a_ncpp; 856 * struct componentname *a_cnp; 857 * } 858 */ 859 int 860 vfs_cache_lookup(struct vop_lookup_args *ap) 861 { 862 struct vnode *dvp, *vp; 863 int lockparent; 864 int error; 865 struct namecache *par = ap->a_par; 866 struct vnode **vpp = ap->a_vpp; 867 struct namecache **ncpp = ap->a_ncpp; 868 struct componentname *cnp = ap->a_cnp; 869 struct ucred *cred = cnp->cn_cred; 870 int flags = cnp->cn_flags; 871 struct thread *td = cnp->cn_td; 872 u_long vpid; /* capability number of vnode */ 873 874 *vpp = NULL; 875 if (ncpp) 876 *ncpp = NULL; 877 dvp = ap->a_dvp; 878 lockparent = flags & CNP_LOCKPARENT; 879 880 if (dvp->v_type != VDIR) 881 return (ENOTDIR); 882 883 if ((flags & CNP_ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 884 (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) { 885 return (EROFS); 886 } 887 888 error = VOP_ACCESS(dvp, VEXEC, cred, td); 889 890 if (error) 891 return (error); 892 893 error = cache_lookup(dvp, par, vpp, ncpp, cnp); 894 895 if (!error) 896 return (VOP_CACHEDLOOKUP(dvp, par, vpp, ncpp, cnp)); 897 898 if (error == ENOENT) 899 return (error); 900 901 vp = *vpp; 902 vpid = vp->v_id; 903 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 904 if (dvp == vp) { /* lookup on "." */ 905 VREF(vp); 906 error = 0; 907 } else if (flags & CNP_ISDOTDOT) { 908 VOP_UNLOCK(dvp, NULL, 0, td); 909 cnp->cn_flags |= CNP_PDIRUNLOCK; 910 error = vget(vp, NULL, LK_EXCLUSIVE, td); 911 if (!error && lockparent && (flags & CNP_ISLASTCN)) { 912 if ((error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td)) == 0) 913 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 914 } 915 } else { 916 error = vget(vp, NULL, LK_EXCLUSIVE, td); 917 if (!lockparent || error || !(flags & CNP_ISLASTCN)) { 918 VOP_UNLOCK(dvp, NULL, 0, td); 919 cnp->cn_flags |= CNP_PDIRUNLOCK; 920 } 921 } 922 /* 923 * Check that the capability number did not change 924 * while we were waiting for the lock. 925 */ 926 if (!error) { 927 if (vpid == vp->v_id) 928 return (0); 929 vput(vp); 930 if (lockparent && dvp != vp && (flags & CNP_ISLASTCN)) { 931 VOP_UNLOCK(dvp, NULL, 0, td); 932 cnp->cn_flags |= CNP_PDIRUNLOCK; 933 } 934 } 935 if (cnp->cn_flags & CNP_PDIRUNLOCK) { 936 error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td); 937 if (error) 938 return (error); 939 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 940 } 941 return (VOP_CACHEDLOOKUP(dvp, par, vpp, ncpp, cnp)); 942 } 943 944 static int disablecwd; 945 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 946 947 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 948 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 949 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 950 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 951 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 952 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 953 954 int 955 __getcwd(struct __getcwd_args *uap) 956 { 957 struct proc *p = curproc; 958 char *bp, *buf; 959 int error, i, slash_prefixed; 960 struct filedesc *fdp; 961 struct namecache *ncp; 962 struct vnode *vp; 963 964 numcwdcalls++; 965 if (disablecwd) 966 return (ENODEV); 967 if (uap->buflen < 2) 968 return (EINVAL); 969 if (uap->buflen > MAXPATHLEN) 970 uap->buflen = MAXPATHLEN; 971 buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 972 bp += uap->buflen - 1; 973 *bp = '\0'; 974 fdp = p->p_fd; 975 slash_prefixed = 0; 976 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 977 if (vp->v_flag & VROOT) { 978 if (vp->v_mount == NULL) { /* forced unmount */ 979 free(buf, M_TEMP); 980 return (EBADF); 981 } 982 vp = vp->v_mount->mnt_vnodecovered; 983 continue; 984 } 985 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 986 if (ncp->nc_parent && ncp->nc_parent->nc_vp && 987 ncp->nc_nlen > 0) { 988 break; 989 } 990 } 991 if (ncp == NULL) { 992 numcwdfail2++; 993 free(buf, M_TEMP); 994 return (ENOENT); 995 } 996 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 997 if (bp == buf) { 998 numcwdfail4++; 999 free(buf, M_TEMP); 1000 return (ENOMEM); 1001 } 1002 *--bp = ncp->nc_name[i]; 1003 } 1004 if (bp == buf) { 1005 numcwdfail4++; 1006 free(buf, M_TEMP); 1007 return (ENOMEM); 1008 } 1009 *--bp = '/'; 1010 slash_prefixed = 1; 1011 vp = ncp->nc_parent->nc_vp; 1012 } 1013 if (!slash_prefixed) { 1014 if (bp == buf) { 1015 numcwdfail4++; 1016 free(buf, M_TEMP); 1017 return (ENOMEM); 1018 } 1019 *--bp = '/'; 1020 } 1021 numcwdfound++; 1022 error = copyout(bp, uap->buf, strlen(bp) + 1); 1023 free(buf, M_TEMP); 1024 return (error); 1025 } 1026 1027 /* 1028 * Thus begins the fullpath magic. 1029 */ 1030 1031 #undef STATNODE 1032 #define STATNODE(name) \ 1033 static u_int name; \ 1034 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 1035 1036 static int disablefullpath; 1037 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 1038 &disablefullpath, 0, ""); 1039 1040 STATNODE(numfullpathcalls); 1041 STATNODE(numfullpathfail1); 1042 STATNODE(numfullpathfail2); 1043 STATNODE(numfullpathfail3); 1044 STATNODE(numfullpathfail4); 1045 STATNODE(numfullpathfound); 1046 1047 int 1048 textvp_fullpath(struct proc *p, char **retbuf, char **retfreebuf) 1049 { 1050 char *bp, *buf; 1051 int i, slash_prefixed; 1052 struct filedesc *fdp; 1053 struct namecache *ncp; 1054 struct vnode *vp, *textvp; 1055 1056 numfullpathcalls++; 1057 if (disablefullpath) 1058 return (ENODEV); 1059 textvp = p->p_textvp; 1060 if (textvp == NULL) 1061 return (EINVAL); 1062 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1063 bp = buf + MAXPATHLEN - 1; 1064 *bp = '\0'; 1065 fdp = p->p_fd; 1066 slash_prefixed = 0; 1067 for (vp = textvp; vp != fdp->fd_rdir && vp != rootvnode;) { 1068 if (vp->v_flag & VROOT) { 1069 if (vp->v_mount == NULL) { /* forced unmount */ 1070 free(buf, M_TEMP); 1071 return (EBADF); 1072 } 1073 vp = vp->v_mount->mnt_vnodecovered; 1074 continue; 1075 } 1076 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 1077 if (vp == textvp) 1078 break; 1079 if (ncp->nc_parent && ncp->nc_parent->nc_vp && 1080 ncp->nc_nlen > 0) { 1081 break; 1082 } 1083 } 1084 if (ncp == NULL) { 1085 numfullpathfail2++; 1086 free(buf, M_TEMP); 1087 return (ENOENT); 1088 } 1089 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 1090 if (bp == buf) { 1091 numfullpathfail4++; 1092 free(buf, M_TEMP); 1093 return (ENOMEM); 1094 } 1095 *--bp = ncp->nc_name[i]; 1096 } 1097 if (bp == buf) { 1098 numfullpathfail4++; 1099 free(buf, M_TEMP); 1100 return (ENOMEM); 1101 } 1102 *--bp = '/'; 1103 slash_prefixed = 1; 1104 vp = ncp->nc_parent->nc_vp; 1105 } 1106 if (!slash_prefixed) { 1107 if (bp == buf) { 1108 numfullpathfail4++; 1109 free(buf, M_TEMP); 1110 return (ENOMEM); 1111 } 1112 *--bp = '/'; 1113 } 1114 numfullpathfound++; 1115 *retbuf = bp; 1116 *retfreebuf = buf; 1117 return (0); 1118 } 1119 1120