1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * Copyright (c) 1989, 1993, 1995 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * Poul-Henning Kamp of the FreeBSD Project. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the University of 51 * California, Berkeley and its contributors. 52 * 4. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 69 * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $ 70 * $DragonFly: src/sys/kern/vfs_cache.c,v 1.62 2006/03/30 02:39:46 dillon Exp $ 71 */ 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/kernel.h> 76 #include <sys/sysctl.h> 77 #include <sys/mount.h> 78 #include <sys/vnode.h> 79 #include <sys/malloc.h> 80 #include <sys/sysproto.h> 81 #include <sys/proc.h> 82 #include <sys/namei.h> 83 #include <sys/nlookup.h> 84 #include <sys/filedesc.h> 85 #include <sys/fnv_hash.h> 86 #include <sys/globaldata.h> 87 #include <sys/kern_syscall.h> 88 #include <sys/dirent.h> 89 #include <ddb/ddb.h> 90 91 /* 92 * Random lookups in the cache are accomplished with a hash table using 93 * a hash key of (nc_src_vp, name). 94 * 95 * Negative entries may exist and correspond to structures where nc_vp 96 * is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry 97 * corresponds to a whited-out directory entry (verses simply not finding the 98 * entry at all). 99 * 100 * Upon reaching the last segment of a path, if the reference is for DELETE, 101 * or NOCACHE is set (rewrite), and the name is located in the cache, it 102 * will be dropped. 103 */ 104 105 /* 106 * Structures associated with name cacheing. 107 */ 108 #define NCHHASH(hash) (&nchashtbl[(hash) & nchash]) 109 #define MINNEG 1024 110 111 MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 112 113 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 114 static struct namecache_list ncneglist; /* instead of vnode */ 115 116 /* 117 * ncvp_debug - debug cache_fromvp(). This is used by the NFS server 118 * to create the namecache infrastructure leading to a dangling vnode. 119 * 120 * 0 Only errors are reported 121 * 1 Successes are reported 122 * 2 Successes + the whole directory scan is reported 123 * 3 Force the directory scan code run as if the parent vnode did not 124 * have a namecache record, even if it does have one. 125 */ 126 static int ncvp_debug; 127 SYSCTL_INT(_debug, OID_AUTO, ncvp_debug, CTLFLAG_RW, &ncvp_debug, 0, ""); 128 129 static u_long nchash; /* size of hash table */ 130 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 131 132 static u_long ncnegfactor = 16; /* ratio of negative entries */ 133 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 134 135 static int nclockwarn; /* warn on locked entries in ticks */ 136 SYSCTL_INT(_debug, OID_AUTO, nclockwarn, CTLFLAG_RW, &nclockwarn, 0, ""); 137 138 static u_long numneg; /* number of cache entries allocated */ 139 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 140 141 static u_long numcache; /* number of cache entries allocated */ 142 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 143 144 static u_long numunres; /* number of unresolved entries */ 145 SYSCTL_ULONG(_debug, OID_AUTO, numunres, CTLFLAG_RD, &numunres, 0, ""); 146 147 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 148 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 149 150 static int cache_resolve_mp(struct namecache *ncp); 151 static void cache_rehash(struct namecache *ncp); 152 153 /* 154 * The new name cache statistics 155 */ 156 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 157 #define STATNODE(mode, name, var) \ 158 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 159 STATNODE(CTLFLAG_RD, numneg, &numneg); 160 STATNODE(CTLFLAG_RD, numcache, &numcache); 161 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 162 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 163 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 164 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 165 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 166 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 167 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 168 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 169 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 170 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 171 172 struct nchstats nchstats[SMP_MAXCPU]; 173 /* 174 * Export VFS cache effectiveness statistics to user-land. 175 * 176 * The statistics are left for aggregation to user-land so 177 * neat things can be achieved, like observing per-CPU cache 178 * distribution. 179 */ 180 static int 181 sysctl_nchstats(SYSCTL_HANDLER_ARGS) 182 { 183 struct globaldata *gd; 184 int i, error; 185 186 error = 0; 187 for (i = 0; i < ncpus; ++i) { 188 gd = globaldata_find(i); 189 if ((error = SYSCTL_OUT(req, (void *)&(*gd->gd_nchstats), 190 sizeof(struct nchstats)))) 191 break; 192 } 193 194 return (error); 195 } 196 SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE|CTLFLAG_RD, 197 0, 0, sysctl_nchstats, "S,nchstats", "VFS cache effectiveness statistics"); 198 199 static void cache_zap(struct namecache *ncp); 200 201 /* 202 * cache_hold() and cache_drop() prevent the premature deletion of a 203 * namecache entry but do not prevent operations (such as zapping) on 204 * that namecache entry. 205 */ 206 static __inline 207 struct namecache * 208 _cache_hold(struct namecache *ncp) 209 { 210 ++ncp->nc_refs; 211 return(ncp); 212 } 213 214 /* 215 * When dropping an entry, if only one ref remains and the entry has not 216 * been resolved, zap it. Since the one reference is being dropped the 217 * entry had better not be locked. 218 */ 219 static __inline 220 void 221 _cache_drop(struct namecache *ncp) 222 { 223 KKASSERT(ncp->nc_refs > 0); 224 if (ncp->nc_refs == 1 && 225 (ncp->nc_flag & NCF_UNRESOLVED) && 226 TAILQ_EMPTY(&ncp->nc_list) 227 ) { 228 KKASSERT(ncp->nc_exlocks == 0); 229 cache_lock(ncp); 230 cache_zap(ncp); 231 } else { 232 --ncp->nc_refs; 233 } 234 } 235 236 /* 237 * Link a new namecache entry to its parent. Be careful to avoid races 238 * if vhold() blocks in the future. 239 * 240 * If we are creating a child under an oldapi parent we must mark the 241 * child as being an oldapi entry as well. 242 */ 243 static void 244 cache_link_parent(struct namecache *ncp, struct namecache *par) 245 { 246 KKASSERT(ncp->nc_parent == NULL); 247 ncp->nc_parent = par; 248 if (TAILQ_EMPTY(&par->nc_list)) { 249 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); 250 /* 251 * Any vp associated with an ncp which has children must 252 * be held to prevent it from being recycled. 253 */ 254 if (par->nc_vp) 255 vhold(par->nc_vp); 256 } else { 257 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); 258 } 259 } 260 261 /* 262 * Remove the parent association from a namecache structure. If this is 263 * the last child of the parent the cache_drop(par) will attempt to 264 * recursively zap the parent. 265 */ 266 static void 267 cache_unlink_parent(struct namecache *ncp) 268 { 269 struct namecache *par; 270 271 if ((par = ncp->nc_parent) != NULL) { 272 ncp->nc_parent = NULL; 273 par = cache_hold(par); 274 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 275 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 276 vdrop(par->nc_vp); 277 cache_drop(par); 278 } 279 } 280 281 /* 282 * Allocate a new namecache structure. Most of the code does not require 283 * zero-termination of the string but it makes vop_compat_ncreate() easier. 284 */ 285 static struct namecache * 286 cache_alloc(int nlen) 287 { 288 struct namecache *ncp; 289 290 ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK|M_ZERO); 291 if (nlen) 292 ncp->nc_name = malloc(nlen + 1, M_VFSCACHE, M_WAITOK); 293 ncp->nc_nlen = nlen; 294 ncp->nc_flag = NCF_UNRESOLVED; 295 ncp->nc_error = ENOTCONN; /* needs to be resolved */ 296 ncp->nc_refs = 1; 297 ncp->nc_fsmid = 1; 298 TAILQ_INIT(&ncp->nc_list); 299 cache_lock(ncp); 300 return(ncp); 301 } 302 303 static void 304 cache_free(struct namecache *ncp) 305 { 306 KKASSERT(ncp->nc_refs == 1 && ncp->nc_exlocks == 1); 307 if (ncp->nc_name) 308 free(ncp->nc_name, M_VFSCACHE); 309 free(ncp, M_VFSCACHE); 310 } 311 312 /* 313 * Ref and deref a namecache structure. 314 */ 315 struct namecache * 316 cache_hold(struct namecache *ncp) 317 { 318 return(_cache_hold(ncp)); 319 } 320 321 void 322 cache_drop(struct namecache *ncp) 323 { 324 _cache_drop(ncp); 325 } 326 327 /* 328 * Namespace locking. The caller must already hold a reference to the 329 * namecache structure in order to lock/unlock it. This function prevents 330 * the namespace from being created or destroyed by accessors other then 331 * the lock holder. 332 * 333 * Note that holding a locked namecache structure prevents other threads 334 * from making namespace changes (e.g. deleting or creating), prevents 335 * vnode association state changes by other threads, and prevents the 336 * namecache entry from being resolved or unresolved by other threads. 337 * 338 * The lock owner has full authority to associate/disassociate vnodes 339 * and resolve/unresolve the locked ncp. 340 * 341 * WARNING! Holding a locked ncp will prevent a vnode from being destroyed 342 * or recycled, but it does NOT help you if the vnode had already initiated 343 * a recyclement. If this is important, use cache_get() rather then 344 * cache_lock() (and deal with the differences in the way the refs counter 345 * is handled). Or, alternatively, make an unconditional call to 346 * cache_validate() or cache_resolve() after cache_lock() returns. 347 */ 348 void 349 cache_lock(struct namecache *ncp) 350 { 351 thread_t td; 352 int didwarn; 353 354 KKASSERT(ncp->nc_refs != 0); 355 didwarn = 0; 356 td = curthread; 357 358 for (;;) { 359 if (ncp->nc_exlocks == 0) { 360 ncp->nc_exlocks = 1; 361 ncp->nc_locktd = td; 362 /* 363 * The vp associated with a locked ncp must be held 364 * to prevent it from being recycled (which would 365 * cause the ncp to become unresolved). 366 * 367 * WARNING! If VRECLAIMED is set the vnode could 368 * already be in the middle of a recycle. Callers 369 * should not assume that nc_vp is usable when 370 * not NULL. cache_vref() or cache_vget() must be 371 * called. 372 * 373 * XXX loop on race for later MPSAFE work. 374 */ 375 if (ncp->nc_vp) 376 vhold(ncp->nc_vp); 377 break; 378 } 379 if (ncp->nc_locktd == td) { 380 ++ncp->nc_exlocks; 381 break; 382 } 383 ncp->nc_flag |= NCF_LOCKREQ; 384 if (tsleep(ncp, 0, "clock", nclockwarn) == EWOULDBLOCK) { 385 if (didwarn) 386 continue; 387 didwarn = 1; 388 printf("[diagnostic] cache_lock: blocked on %p", ncp); 389 if ((ncp->nc_flag & NCF_MOUNTPT) && ncp->nc_mount) 390 printf(" [MOUNTFROM %s]\n", ncp->nc_mount->mnt_stat.f_mntfromname); 391 else 392 printf(" \"%*.*s\"\n", 393 ncp->nc_nlen, ncp->nc_nlen, 394 ncp->nc_name); 395 } 396 } 397 398 if (didwarn == 1) { 399 printf("[diagnostic] cache_lock: unblocked %*.*s\n", 400 ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name); 401 } 402 } 403 404 int 405 cache_lock_nonblock(struct namecache *ncp) 406 { 407 thread_t td; 408 409 KKASSERT(ncp->nc_refs != 0); 410 td = curthread; 411 if (ncp->nc_exlocks == 0) { 412 ncp->nc_exlocks = 1; 413 ncp->nc_locktd = td; 414 /* 415 * The vp associated with a locked ncp must be held 416 * to prevent it from being recycled (which would 417 * cause the ncp to become unresolved). 418 * 419 * WARNING! If VRECLAIMED is set the vnode could 420 * already be in the middle of a recycle. Callers 421 * should not assume that nc_vp is usable when 422 * not NULL. cache_vref() or cache_vget() must be 423 * called. 424 * 425 * XXX loop on race for later MPSAFE work. 426 */ 427 if (ncp->nc_vp) 428 vhold(ncp->nc_vp); 429 return(0); 430 } else { 431 return(EWOULDBLOCK); 432 } 433 } 434 435 void 436 cache_unlock(struct namecache *ncp) 437 { 438 thread_t td = curthread; 439 440 KKASSERT(ncp->nc_refs > 0); 441 KKASSERT(ncp->nc_exlocks > 0); 442 KKASSERT(ncp->nc_locktd == td); 443 if (--ncp->nc_exlocks == 0) { 444 if (ncp->nc_vp) 445 vdrop(ncp->nc_vp); 446 ncp->nc_locktd = NULL; 447 if (ncp->nc_flag & NCF_LOCKREQ) { 448 ncp->nc_flag &= ~NCF_LOCKREQ; 449 wakeup(ncp); 450 } 451 } 452 } 453 454 /* 455 * ref-and-lock, unlock-and-deref functions. 456 * 457 * This function is primarily used by nlookup. Even though cache_lock 458 * holds the vnode, it is possible that the vnode may have already 459 * initiated a recyclement. We want cache_get() to return a definitively 460 * usable vnode or a definitively unresolved ncp. 461 */ 462 struct namecache * 463 cache_get(struct namecache *ncp) 464 { 465 _cache_hold(ncp); 466 cache_lock(ncp); 467 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) 468 cache_setunresolved(ncp); 469 return(ncp); 470 } 471 472 int 473 cache_get_nonblock(struct namecache *ncp) 474 { 475 /* XXX MP */ 476 if (ncp->nc_exlocks == 0 || ncp->nc_locktd == curthread) { 477 _cache_hold(ncp); 478 cache_lock(ncp); 479 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) 480 cache_setunresolved(ncp); 481 return(0); 482 } 483 return(EWOULDBLOCK); 484 } 485 486 void 487 cache_put(struct namecache *ncp) 488 { 489 cache_unlock(ncp); 490 _cache_drop(ncp); 491 } 492 493 /* 494 * Resolve an unresolved ncp by associating a vnode with it. If the 495 * vnode is NULL, a negative cache entry is created. 496 * 497 * The ncp should be locked on entry and will remain locked on return. 498 */ 499 void 500 cache_setvp(struct namecache *ncp, struct vnode *vp) 501 { 502 KKASSERT(ncp->nc_flag & NCF_UNRESOLVED); 503 ncp->nc_vp = vp; 504 if (vp != NULL) { 505 /* 506 * Any vp associated with an ncp which has children must 507 * be held. Any vp associated with a locked ncp must be held. 508 */ 509 if (!TAILQ_EMPTY(&ncp->nc_list)) 510 vhold(vp); 511 TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode); 512 if (ncp->nc_exlocks) 513 vhold(vp); 514 515 /* 516 * Set auxillary flags 517 */ 518 switch(vp->v_type) { 519 case VDIR: 520 ncp->nc_flag |= NCF_ISDIR; 521 break; 522 case VLNK: 523 ncp->nc_flag |= NCF_ISSYMLINK; 524 /* XXX cache the contents of the symlink */ 525 break; 526 default: 527 break; 528 } 529 ++numcache; 530 ncp->nc_error = 0; 531 } else { 532 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 533 ++numneg; 534 ncp->nc_error = ENOENT; 535 } 536 ncp->nc_flag &= ~NCF_UNRESOLVED; 537 } 538 539 void 540 cache_settimeout(struct namecache *ncp, int nticks) 541 { 542 if ((ncp->nc_timeout = ticks + nticks) == 0) 543 ncp->nc_timeout = 1; 544 } 545 546 /* 547 * Disassociate the vnode or negative-cache association and mark a 548 * namecache entry as unresolved again. Note that the ncp is still 549 * left in the hash table and still linked to its parent. 550 * 551 * The ncp should be locked and refd on entry and will remain locked and refd 552 * on return. 553 * 554 * This routine is normally never called on a directory containing children. 555 * However, NFS often does just that in its rename() code as a cop-out to 556 * avoid complex namespace operations. This disconnects a directory vnode 557 * from its namecache and can cause the OLDAPI and NEWAPI to get out of 558 * sync. 559 * 560 * NOTE: NCF_FSMID must be cleared so a refurbishment of the ncp, such as 561 * in a create, properly propogates flag up the chain. 562 */ 563 void 564 cache_setunresolved(struct namecache *ncp) 565 { 566 struct vnode *vp; 567 568 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 569 ncp->nc_flag |= NCF_UNRESOLVED; 570 ncp->nc_flag &= ~(NCF_WHITEOUT|NCF_ISDIR|NCF_ISSYMLINK| 571 NCF_FSMID); 572 ncp->nc_timeout = 0; 573 ncp->nc_error = ENOTCONN; 574 ++numunres; 575 if ((vp = ncp->nc_vp) != NULL) { 576 --numcache; 577 ncp->nc_vp = NULL; 578 TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode); 579 580 /* 581 * Any vp associated with an ncp with children is 582 * held by that ncp. Any vp associated with a locked 583 * ncp is held by that ncp. These conditions must be 584 * undone when the vp is cleared out from the ncp. 585 */ 586 if (!TAILQ_EMPTY(&ncp->nc_list)) 587 vdrop(vp); 588 if (ncp->nc_exlocks) 589 vdrop(vp); 590 } else { 591 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 592 --numneg; 593 } 594 } 595 } 596 597 /* 598 * Invalidate portions of the namecache topology given a starting entry. 599 * The passed ncp is set to an unresolved state and: 600 * 601 * The passed ncp must be locked. 602 * 603 * CINV_DESTROY - Set a flag in the passed ncp entry indicating 604 * that the physical underlying nodes have been 605 * destroyed... as in deleted. For example, when 606 * a directory is removed. This will cause record 607 * lookups on the name to no longer be able to find 608 * the record and tells the resolver to return failure 609 * rather then trying to resolve through the parent. 610 * 611 * The topology itself, including ncp->nc_name, 612 * remains intact. 613 * 614 * This only applies to the passed ncp, if CINV_CHILDREN 615 * is specified the children are not flagged. 616 * 617 * CINV_CHILDREN - Set all children (recursively) to an unresolved 618 * state as well. 619 * 620 * Note that this will also have the side effect of 621 * cleaning out any unreferenced nodes in the topology 622 * from the leaves up as the recursion backs out. 623 * 624 * Note that the topology for any referenced nodes remains intact. 625 * 626 * It is possible for cache_inval() to race a cache_resolve(), meaning that 627 * the namecache entry may not actually be invalidated on return if it was 628 * revalidated while recursing down into its children. This code guarentees 629 * that the node(s) will go through an invalidation cycle, but does not 630 * guarentee that they will remain in an invalidated state. 631 * 632 * Returns non-zero if a revalidation was detected during the invalidation 633 * recursion, zero otherwise. Note that since only the original ncp is 634 * locked the revalidation ultimately can only indicate that the original ncp 635 * *MIGHT* no have been reresolved. 636 */ 637 int 638 cache_inval(struct namecache *ncp, int flags) 639 { 640 struct namecache *kid; 641 struct namecache *nextkid; 642 int rcnt = 0; 643 644 KKASSERT(ncp->nc_exlocks); 645 646 cache_setunresolved(ncp); 647 if (flags & CINV_DESTROY) 648 ncp->nc_flag |= NCF_DESTROYED; 649 650 if ((flags & CINV_CHILDREN) && 651 (kid = TAILQ_FIRST(&ncp->nc_list)) != NULL 652 ) { 653 cache_hold(kid); 654 cache_unlock(ncp); 655 while (kid) { 656 if ((nextkid = TAILQ_NEXT(kid, nc_entry)) != NULL) 657 cache_hold(nextkid); 658 if ((kid->nc_flag & NCF_UNRESOLVED) == 0 || 659 TAILQ_FIRST(&kid->nc_list) 660 ) { 661 cache_lock(kid); 662 rcnt += cache_inval(kid, flags & ~CINV_DESTROY); 663 cache_unlock(kid); 664 } 665 cache_drop(kid); 666 kid = nextkid; 667 } 668 cache_lock(ncp); 669 } 670 671 /* 672 * Someone could have gotten in there while ncp was unlocked, 673 * retry if so. 674 */ 675 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) 676 ++rcnt; 677 return (rcnt); 678 } 679 680 /* 681 * Invalidate a vnode's namecache associations. To avoid races against 682 * the resolver we do not invalidate a node which we previously invalidated 683 * but which was then re-resolved while we were in the invalidation loop. 684 * 685 * Returns non-zero if any namecache entries remain after the invalidation 686 * loop completed. 687 * 688 * NOTE: unlike the namecache topology which guarentees that ncp's will not 689 * be ripped out of the topology while held, the vnode's v_namecache list 690 * has no such restriction. NCP's can be ripped out of the list at virtually 691 * any time if not locked, even if held. 692 */ 693 int 694 cache_inval_vp(struct vnode *vp, int flags, int *retflags) 695 { 696 struct namecache *ncp; 697 struct namecache *next; 698 699 restart: 700 ncp = TAILQ_FIRST(&vp->v_namecache); 701 if (ncp) 702 cache_hold(ncp); 703 while (ncp) { 704 /* loop entered with ncp held */ 705 if ((next = TAILQ_NEXT(ncp, nc_vnode)) != NULL) 706 cache_hold(next); 707 cache_lock(ncp); 708 if (ncp->nc_vp != vp) { 709 printf("Warning: cache_inval_vp: race-A detected on " 710 "%s\n", ncp->nc_name); 711 cache_put(ncp); 712 if (next) 713 cache_drop(next); 714 goto restart; 715 } 716 *retflags |= ncp->nc_flag & NCF_FSMID; 717 cache_inval(ncp, flags); 718 cache_put(ncp); /* also releases reference */ 719 ncp = next; 720 if (ncp && ncp->nc_vp != vp) { 721 printf("Warning: cache_inval_vp: race-B detected on " 722 "%s\n", ncp->nc_name); 723 cache_drop(ncp); 724 goto restart; 725 } 726 } 727 return(TAILQ_FIRST(&vp->v_namecache) != NULL); 728 } 729 730 /* 731 * The source ncp has been renamed to the target ncp. Both fncp and tncp 732 * must be locked. Both will be set to unresolved, any children of tncp 733 * will be disconnected (the prior contents of the target is assumed to be 734 * destroyed by the rename operation, e.g. renaming over an empty directory), 735 * and all children of fncp will be moved to tncp. 736 * 737 * XXX the disconnection could pose a problem, check code paths to make 738 * sure any code that blocks can handle the parent being changed out from 739 * under it. Maybe we should lock the children (watch out for deadlocks) ? 740 * 741 * After we return the caller has the option of calling cache_setvp() if 742 * the vnode of the new target ncp is known. 743 * 744 * Any process CD'd into any of the children will no longer be able to ".." 745 * back out. An rm -rf can cause this situation to occur. 746 */ 747 void 748 cache_rename(struct namecache *fncp, struct namecache *tncp) 749 { 750 struct namecache *scan; 751 int didwarn = 0; 752 753 cache_setunresolved(fncp); 754 cache_setunresolved(tncp); 755 while (cache_inval(tncp, CINV_CHILDREN) != 0) { 756 if (didwarn++ % 10 == 0) { 757 printf("Warning: cache_rename: race during " 758 "rename %s->%s\n", 759 fncp->nc_name, tncp->nc_name); 760 } 761 tsleep(tncp, 0, "mvrace", hz / 10); 762 cache_setunresolved(tncp); 763 } 764 while ((scan = TAILQ_FIRST(&fncp->nc_list)) != NULL) { 765 cache_hold(scan); 766 cache_unlink_parent(scan); 767 cache_link_parent(scan, tncp); 768 if (scan->nc_flag & NCF_HASHED) 769 cache_rehash(scan); 770 cache_drop(scan); 771 } 772 } 773 774 /* 775 * vget the vnode associated with the namecache entry. Resolve the namecache 776 * entry if necessary and deal with namecache/vp races. The passed ncp must 777 * be referenced and may be locked. The ncp's ref/locking state is not 778 * effected by this call. 779 * 780 * lk_type may be LK_SHARED, LK_EXCLUSIVE. A ref'd, possibly locked 781 * (depending on the passed lk_type) will be returned in *vpp with an error 782 * of 0, or NULL will be returned in *vpp with a non-0 error code. The 783 * most typical error is ENOENT, meaning that the ncp represents a negative 784 * cache hit and there is no vnode to retrieve, but other errors can occur 785 * too. 786 * 787 * The main race we have to deal with are namecache zaps. The ncp itself 788 * will not disappear since it is referenced, and it turns out that the 789 * validity of the vp pointer can be checked simply by rechecking the 790 * contents of ncp->nc_vp. 791 */ 792 int 793 cache_vget(struct namecache *ncp, struct ucred *cred, 794 int lk_type, struct vnode **vpp) 795 { 796 struct vnode *vp; 797 int error; 798 799 again: 800 vp = NULL; 801 if (ncp->nc_flag & NCF_UNRESOLVED) { 802 cache_lock(ncp); 803 error = cache_resolve(ncp, cred); 804 cache_unlock(ncp); 805 } else { 806 error = 0; 807 } 808 if (error == 0 && (vp = ncp->nc_vp) != NULL) { 809 /* 810 * Accessing the vnode from the namecache is a bit 811 * dangerous. Because there are no refs on the vnode, it 812 * could be in the middle of a reclaim. 813 */ 814 if (vp->v_flag & VRECLAIMED) { 815 printf("Warning: vnode reclaim race detected in cache_vget on %p (%s)\n", vp, ncp->nc_name); 816 cache_lock(ncp); 817 cache_setunresolved(ncp); 818 cache_unlock(ncp); 819 goto again; 820 } 821 error = vget(vp, lk_type, curthread); 822 if (error) { 823 if (vp != ncp->nc_vp) 824 goto again; 825 vp = NULL; 826 } else if (vp != ncp->nc_vp) { 827 vput(vp); 828 goto again; 829 } else if (vp->v_flag & VRECLAIMED) { 830 panic("vget succeeded on a VRECLAIMED node! vp %p", vp); 831 } 832 } 833 if (error == 0 && vp == NULL) 834 error = ENOENT; 835 *vpp = vp; 836 return(error); 837 } 838 839 int 840 cache_vref(struct namecache *ncp, struct ucred *cred, struct vnode **vpp) 841 { 842 struct vnode *vp; 843 int error; 844 845 again: 846 vp = NULL; 847 if (ncp->nc_flag & NCF_UNRESOLVED) { 848 cache_lock(ncp); 849 error = cache_resolve(ncp, cred); 850 cache_unlock(ncp); 851 } else { 852 error = 0; 853 } 854 if (error == 0 && (vp = ncp->nc_vp) != NULL) { 855 /* 856 * Since we did not obtain any locks, a cache zap 857 * race can occur here if the vnode is in the middle 858 * of being reclaimed and has not yet been able to 859 * clean out its cache node. If that case occurs, 860 * we must lock and unresolve the cache, then loop 861 * to retry. 862 */ 863 if (vp->v_flag & VRECLAIMED) { 864 printf("Warning: vnode reclaim race detected on cache_vref %p (%s)\n", vp, ncp->nc_name); 865 cache_lock(ncp); 866 cache_setunresolved(ncp); 867 cache_unlock(ncp); 868 goto again; 869 } 870 vref(vp); 871 } 872 if (error == 0 && vp == NULL) 873 error = ENOENT; 874 *vpp = vp; 875 return(error); 876 } 877 878 /* 879 * Recursively set the FSMID update flag for namecache nodes leading 880 * to root. This will cause the next getattr or reclaim to increment the 881 * fsmid and mark the inode for lazy updating. 882 * 883 * Stop recursing when we hit a node whos NCF_FSMID flag is already set. 884 * This makes FSMIDs work in an Einsteinian fashion - where the observation 885 * effects the result. In this case a program monitoring a higher level 886 * node will have detected some prior change and started its scan (clearing 887 * NCF_FSMID in higher level nodes), but since it has not yet observed the 888 * node where we find NCF_FSMID still set, we can safely make the related 889 * modification without interfering with the theorized program. 890 * 891 * This also means that FSMIDs cannot represent time-domain quantities 892 * in a hierarchical sense. But the main reason for doing it this way 893 * is to reduce the amount of recursion that occurs in the critical path 894 * when e.g. a program is writing to a file that sits deep in a directory 895 * hierarchy. 896 */ 897 void 898 cache_update_fsmid(struct namecache *ncp) 899 { 900 struct vnode *vp; 901 struct namecache *scan; 902 903 /* 904 * Warning: even if we get a non-NULL vp it could still be in the 905 * middle of a recyclement. Don't do anything fancy, just set 906 * NCF_FSMID. 907 */ 908 if ((vp = ncp->nc_vp) != NULL) { 909 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 910 for (scan = ncp; scan; scan = scan->nc_parent) { 911 if (scan->nc_flag & NCF_FSMID) 912 break; 913 scan->nc_flag |= NCF_FSMID; 914 } 915 } 916 } else { 917 while (ncp && (ncp->nc_flag & NCF_FSMID) == 0) { 918 ncp->nc_flag |= NCF_FSMID; 919 ncp = ncp->nc_parent; 920 } 921 } 922 } 923 924 void 925 cache_update_fsmid_vp(struct vnode *vp) 926 { 927 struct namecache *ncp; 928 struct namecache *scan; 929 930 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 931 for (scan = ncp; scan; scan = scan->nc_parent) { 932 if (scan->nc_flag & NCF_FSMID) 933 break; 934 scan->nc_flag |= NCF_FSMID; 935 } 936 } 937 } 938 939 /* 940 * If getattr is called on a vnode (e.g. a stat call), the filesystem 941 * may call this routine to determine if the namecache has the hierarchical 942 * change flag set, requiring the fsmid to be updated. 943 * 944 * Since 0 indicates no support, make sure the filesystem fsmid is at least 945 * 1. 946 */ 947 int 948 cache_check_fsmid_vp(struct vnode *vp, int64_t *fsmid) 949 { 950 struct namecache *ncp; 951 int changed = 0; 952 953 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 954 if (ncp->nc_flag & NCF_FSMID) { 955 ncp->nc_flag &= ~NCF_FSMID; 956 changed = 1; 957 } 958 } 959 if (*fsmid == 0) 960 ++*fsmid; 961 if (changed) 962 ++*fsmid; 963 return(changed); 964 } 965 966 /* 967 * Convert a directory vnode to a namecache record without any other 968 * knowledge of the topology. This ONLY works with directory vnodes and 969 * is ONLY used by the NFS server. dvp must be refd but unlocked, and the 970 * returned ncp (if not NULL) will be held and unlocked. 971 * 972 * If 'makeit' is 0 and dvp has no existing namecache record, NULL is returned. 973 * If 'makeit' is 1 we attempt to track-down and create the namecache topology 974 * for dvp. This will fail only if the directory has been deleted out from 975 * under the caller. 976 * 977 * Callers must always check for a NULL return no matter the value of 'makeit'. 978 * 979 * To avoid underflowing the kernel stack each recursive call increments 980 * the makeit variable. 981 */ 982 983 static int cache_inefficient_scan(struct namecache *ncp, struct ucred *cred, 984 struct vnode *dvp); 985 static int cache_fromdvp_try(struct vnode *dvp, struct ucred *cred, 986 struct vnode **saved_dvp); 987 988 struct namecache * 989 cache_fromdvp(struct vnode *dvp, struct ucred *cred, int makeit) 990 { 991 struct namecache *ncp; 992 struct vnode *saved_dvp; 993 struct vnode *pvp; 994 int error; 995 996 ncp = NULL; 997 saved_dvp = NULL; 998 999 /* 1000 * Temporary debugging code to force the directory scanning code 1001 * to be exercised. 1002 */ 1003 if (ncvp_debug >= 3 && makeit && TAILQ_FIRST(&dvp->v_namecache)) { 1004 ncp = TAILQ_FIRST(&dvp->v_namecache); 1005 printf("cache_fromdvp: forcing %s\n", ncp->nc_name); 1006 goto force; 1007 } 1008 1009 /* 1010 * Loop until resolution, inside code will break out on error. 1011 */ 1012 while ((ncp = TAILQ_FIRST(&dvp->v_namecache)) == NULL && makeit) { 1013 force: 1014 /* 1015 * If dvp is the root of its filesystem it should already 1016 * have a namecache pointer associated with it as a side 1017 * effect of the mount, but it may have been disassociated. 1018 */ 1019 if (dvp->v_flag & VROOT) { 1020 ncp = cache_get(dvp->v_mount->mnt_ncp); 1021 error = cache_resolve_mp(ncp); 1022 cache_put(ncp); 1023 if (ncvp_debug) { 1024 printf("cache_fromdvp: resolve root of mount %p error %d", 1025 dvp->v_mount, error); 1026 } 1027 if (error) { 1028 if (ncvp_debug) 1029 printf(" failed\n"); 1030 ncp = NULL; 1031 break; 1032 } 1033 if (ncvp_debug) 1034 printf(" succeeded\n"); 1035 continue; 1036 } 1037 1038 /* 1039 * If we are recursed too deeply resort to an O(n^2) 1040 * algorithm to resolve the namecache topology. The 1041 * resolved pvp is left referenced in saved_dvp to 1042 * prevent the tree from being destroyed while we loop. 1043 */ 1044 if (makeit > 20) { 1045 error = cache_fromdvp_try(dvp, cred, &saved_dvp); 1046 if (error) { 1047 printf("lookupdotdot(longpath) failed %d " 1048 "dvp %p\n", error, dvp); 1049 break; 1050 } 1051 continue; 1052 } 1053 1054 /* 1055 * Get the parent directory and resolve its ncp. 1056 */ 1057 error = vop_nlookupdotdot(*dvp->v_ops, dvp, &pvp, cred); 1058 if (error) { 1059 printf("lookupdotdot failed %d dvp %p\n", error, dvp); 1060 break; 1061 } 1062 VOP_UNLOCK(pvp, 0, curthread); 1063 1064 /* 1065 * Reuse makeit as a recursion depth counter. 1066 */ 1067 ncp = cache_fromdvp(pvp, cred, makeit + 1); 1068 vrele(pvp); 1069 if (ncp == NULL) 1070 break; 1071 1072 /* 1073 * Do an inefficient scan of pvp (embodied by ncp) to look 1074 * for dvp. This will create a namecache record for dvp on 1075 * success. We loop up to recheck on success. 1076 * 1077 * ncp and dvp are both held but not locked. 1078 */ 1079 error = cache_inefficient_scan(ncp, cred, dvp); 1080 cache_drop(ncp); 1081 if (error) { 1082 printf("cache_fromdvp: scan %p (%s) failed on dvp=%p\n", 1083 pvp, ncp->nc_name, dvp); 1084 ncp = NULL; 1085 break; 1086 } 1087 if (ncvp_debug) { 1088 printf("cache_fromdvp: scan %p (%s) succeeded\n", 1089 pvp, ncp->nc_name); 1090 } 1091 } 1092 if (ncp) 1093 cache_hold(ncp); 1094 if (saved_dvp) 1095 vrele(saved_dvp); 1096 return (ncp); 1097 } 1098 1099 /* 1100 * Go up the chain of parent directories until we find something 1101 * we can resolve into the namecache. This is very inefficient. 1102 */ 1103 static 1104 int 1105 cache_fromdvp_try(struct vnode *dvp, struct ucred *cred, 1106 struct vnode **saved_dvp) 1107 { 1108 struct namecache *ncp; 1109 struct vnode *pvp; 1110 int error; 1111 static time_t last_fromdvp_report; 1112 1113 /* 1114 * Loop getting the parent directory vnode until we get something we 1115 * can resolve in the namecache. 1116 */ 1117 vref(dvp); 1118 for (;;) { 1119 error = vop_nlookupdotdot(*dvp->v_ops, dvp, &pvp, cred); 1120 if (error) { 1121 vrele(dvp); 1122 return (error); 1123 } 1124 VOP_UNLOCK(pvp, 0, curthread); 1125 if ((ncp = TAILQ_FIRST(&pvp->v_namecache)) != NULL) { 1126 cache_hold(ncp); 1127 vrele(pvp); 1128 break; 1129 } 1130 if (pvp->v_flag & VROOT) { 1131 ncp = cache_get(pvp->v_mount->mnt_ncp); 1132 error = cache_resolve_mp(ncp); 1133 cache_unlock(ncp); 1134 vrele(pvp); 1135 if (error) { 1136 cache_drop(ncp); 1137 vrele(dvp); 1138 return (error); 1139 } 1140 break; 1141 } 1142 vrele(dvp); 1143 dvp = pvp; 1144 } 1145 if (last_fromdvp_report != time_second) { 1146 last_fromdvp_report = time_second; 1147 printf("Warning: extremely inefficient path resolution on %s\n", 1148 ncp->nc_name); 1149 } 1150 error = cache_inefficient_scan(ncp, cred, dvp); 1151 1152 /* 1153 * Hopefully dvp now has a namecache record associated with it. 1154 * Leave it referenced to prevent the kernel from recycling the 1155 * vnode. Otherwise extremely long directory paths could result 1156 * in endless recycling. 1157 */ 1158 if (*saved_dvp) 1159 vrele(*saved_dvp); 1160 *saved_dvp = dvp; 1161 return (error); 1162 } 1163 1164 1165 /* 1166 * Do an inefficient scan of the directory represented by ncp looking for 1167 * the directory vnode dvp. ncp must be held but not locked on entry and 1168 * will be held on return. dvp must be refd but not locked on entry and 1169 * will remain refd on return. 1170 * 1171 * Why do this at all? Well, due to its stateless nature the NFS server 1172 * converts file handles directly to vnodes without necessarily going through 1173 * the namecache ops that would otherwise create the namecache topology 1174 * leading to the vnode. We could either (1) Change the namecache algorithms 1175 * to allow disconnect namecache records that are re-merged opportunistically, 1176 * or (2) Make the NFS server backtrack and scan to recover a connected 1177 * namecache topology in order to then be able to issue new API lookups. 1178 * 1179 * It turns out that (1) is a huge mess. It takes a nice clean set of 1180 * namecache algorithms and introduces a lot of complication in every subsystem 1181 * that calls into the namecache to deal with the re-merge case, especially 1182 * since we are using the namecache to placehold negative lookups and the 1183 * vnode might not be immediately assigned. (2) is certainly far less 1184 * efficient then (1), but since we are only talking about directories here 1185 * (which are likely to remain cached), the case does not actually run all 1186 * that often and has the supreme advantage of not polluting the namecache 1187 * algorithms. 1188 */ 1189 static int 1190 cache_inefficient_scan(struct namecache *ncp, struct ucred *cred, 1191 struct vnode *dvp) 1192 { 1193 struct nlcomponent nlc; 1194 struct namecache *rncp; 1195 struct dirent *den; 1196 struct vnode *pvp; 1197 struct vattr vat; 1198 struct iovec iov; 1199 struct uio uio; 1200 int blksize; 1201 int eofflag; 1202 int bytes; 1203 char *rbuf; 1204 int error; 1205 1206 vat.va_blocksize = 0; 1207 if ((error = VOP_GETATTR(dvp, &vat, curthread)) != 0) 1208 return (error); 1209 if ((error = cache_vget(ncp, cred, LK_SHARED, &pvp)) != 0) 1210 return (error); 1211 if (ncvp_debug) 1212 printf("inefficient_scan: directory iosize %ld vattr fileid = %ld\n", vat.va_blocksize, (long)vat.va_fileid); 1213 if ((blksize = vat.va_blocksize) == 0) 1214 blksize = DEV_BSIZE; 1215 rbuf = malloc(blksize, M_TEMP, M_WAITOK); 1216 rncp = NULL; 1217 1218 eofflag = 0; 1219 uio.uio_offset = 0; 1220 again: 1221 iov.iov_base = rbuf; 1222 iov.iov_len = blksize; 1223 uio.uio_iov = &iov; 1224 uio.uio_iovcnt = 1; 1225 uio.uio_resid = blksize; 1226 uio.uio_segflg = UIO_SYSSPACE; 1227 uio.uio_rw = UIO_READ; 1228 uio.uio_td = curthread; 1229 1230 if (ncvp_debug >= 2) 1231 printf("cache_inefficient_scan: readdir @ %08x\n", (int)uio.uio_offset); 1232 error = VOP_READDIR(pvp, &uio, cred, &eofflag, NULL, NULL); 1233 if (error == 0) { 1234 den = (struct dirent *)rbuf; 1235 bytes = blksize - uio.uio_resid; 1236 1237 while (bytes > 0) { 1238 if (ncvp_debug >= 2) { 1239 printf("cache_inefficient_scan: %*.*s\n", 1240 den->d_namlen, den->d_namlen, 1241 den->d_name); 1242 } 1243 if (den->d_type != DT_WHT && 1244 den->d_ino == vat.va_fileid) { 1245 if (ncvp_debug) { 1246 printf("cache_inefficient_scan: " 1247 "MATCHED inode %ld path %s/%*.*s\n", 1248 vat.va_fileid, ncp->nc_name, 1249 den->d_namlen, den->d_namlen, 1250 den->d_name); 1251 } 1252 nlc.nlc_nameptr = den->d_name; 1253 nlc.nlc_namelen = den->d_namlen; 1254 VOP_UNLOCK(pvp, 0, curthread); 1255 rncp = cache_nlookup(ncp, &nlc); 1256 KKASSERT(rncp != NULL); 1257 break; 1258 } 1259 bytes -= _DIRENT_DIRSIZ(den); 1260 den = _DIRENT_NEXT(den); 1261 } 1262 if (rncp == NULL && eofflag == 0 && uio.uio_resid != blksize) 1263 goto again; 1264 } 1265 if (rncp) { 1266 vrele(pvp); 1267 if (rncp->nc_flag & NCF_UNRESOLVED) { 1268 cache_setvp(rncp, dvp); 1269 if (ncvp_debug >= 2) { 1270 printf("cache_inefficient_scan: setvp %s/%s = %p\n", 1271 ncp->nc_name, rncp->nc_name, dvp); 1272 } 1273 } else { 1274 if (ncvp_debug >= 2) { 1275 printf("cache_inefficient_scan: setvp %s/%s already set %p/%p\n", 1276 ncp->nc_name, rncp->nc_name, dvp, 1277 rncp->nc_vp); 1278 } 1279 } 1280 if (rncp->nc_vp == NULL) 1281 error = rncp->nc_error; 1282 cache_put(rncp); 1283 } else { 1284 printf("cache_inefficient_scan: dvp %p NOT FOUND in %s\n", 1285 dvp, ncp->nc_name); 1286 vput(pvp); 1287 error = ENOENT; 1288 } 1289 free(rbuf, M_TEMP); 1290 return (error); 1291 } 1292 1293 /* 1294 * Zap a namecache entry. The ncp is unconditionally set to an unresolved 1295 * state, which disassociates it from its vnode or ncneglist. 1296 * 1297 * Then, if there are no additional references to the ncp and no children, 1298 * the ncp is removed from the topology and destroyed. This function will 1299 * also run through the nc_parent chain and destroy parent ncps if possible. 1300 * As a side benefit, it turns out the only conditions that allow running 1301 * up the chain are also the conditions to ensure no deadlock will occur. 1302 * 1303 * References and/or children may exist if the ncp is in the middle of the 1304 * topology, preventing the ncp from being destroyed. 1305 * 1306 * This function must be called with the ncp held and locked and will unlock 1307 * and drop it during zapping. 1308 */ 1309 static void 1310 cache_zap(struct namecache *ncp) 1311 { 1312 struct namecache *par; 1313 1314 /* 1315 * Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED. 1316 */ 1317 cache_setunresolved(ncp); 1318 1319 /* 1320 * Try to scrap the entry and possibly tail-recurse on its parent. 1321 * We only scrap unref'd (other then our ref) unresolved entries, 1322 * we do not scrap 'live' entries. 1323 */ 1324 while (ncp->nc_flag & NCF_UNRESOLVED) { 1325 /* 1326 * Someone other then us has a ref, stop. 1327 */ 1328 if (ncp->nc_refs > 1) 1329 goto done; 1330 1331 /* 1332 * We have children, stop. 1333 */ 1334 if (!TAILQ_EMPTY(&ncp->nc_list)) 1335 goto done; 1336 1337 /* 1338 * Remove ncp from the topology: hash table and parent linkage. 1339 */ 1340 if (ncp->nc_flag & NCF_HASHED) { 1341 ncp->nc_flag &= ~NCF_HASHED; 1342 LIST_REMOVE(ncp, nc_hash); 1343 } 1344 if ((par = ncp->nc_parent) != NULL) { 1345 par = cache_hold(par); 1346 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 1347 ncp->nc_parent = NULL; 1348 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 1349 vdrop(par->nc_vp); 1350 } 1351 1352 /* 1353 * ncp should not have picked up any refs. Physically 1354 * destroy the ncp. 1355 */ 1356 KKASSERT(ncp->nc_refs == 1); 1357 --numunres; 1358 /* cache_unlock(ncp) not required */ 1359 ncp->nc_refs = -1; /* safety */ 1360 if (ncp->nc_name) 1361 free(ncp->nc_name, M_VFSCACHE); 1362 free(ncp, M_VFSCACHE); 1363 1364 /* 1365 * Loop on the parent (it may be NULL). Only bother looping 1366 * if the parent has a single ref (ours), which also means 1367 * we can lock it trivially. 1368 */ 1369 ncp = par; 1370 if (ncp == NULL) 1371 return; 1372 if (ncp->nc_refs != 1) { 1373 cache_drop(ncp); 1374 return; 1375 } 1376 KKASSERT(par->nc_exlocks == 0); 1377 cache_lock(ncp); 1378 } 1379 done: 1380 cache_unlock(ncp); 1381 --ncp->nc_refs; 1382 } 1383 1384 static enum { CHI_LOW, CHI_HIGH } cache_hysteresis_state = CHI_LOW; 1385 1386 static __inline 1387 void 1388 cache_hysteresis(void) 1389 { 1390 /* 1391 * Don't cache too many negative hits. We use hysteresis to reduce 1392 * the impact on the critical path. 1393 */ 1394 switch(cache_hysteresis_state) { 1395 case CHI_LOW: 1396 if (numneg > MINNEG && numneg * ncnegfactor > numcache) { 1397 cache_cleanneg(10); 1398 cache_hysteresis_state = CHI_HIGH; 1399 } 1400 break; 1401 case CHI_HIGH: 1402 if (numneg > MINNEG * 9 / 10 && 1403 numneg * ncnegfactor * 9 / 10 > numcache 1404 ) { 1405 cache_cleanneg(10); 1406 } else { 1407 cache_hysteresis_state = CHI_LOW; 1408 } 1409 break; 1410 } 1411 } 1412 1413 /* 1414 * NEW NAMECACHE LOOKUP API 1415 * 1416 * Lookup an entry in the cache. A locked, referenced, non-NULL 1417 * entry is *always* returned, even if the supplied component is illegal. 1418 * The resulting namecache entry should be returned to the system with 1419 * cache_put() or cache_unlock() + cache_drop(). 1420 * 1421 * namecache locks are recursive but care must be taken to avoid lock order 1422 * reversals. 1423 * 1424 * Nobody else will be able to manipulate the associated namespace (e.g. 1425 * create, delete, rename, rename-target) until the caller unlocks the 1426 * entry. 1427 * 1428 * The returned entry will be in one of three states: positive hit (non-null 1429 * vnode), negative hit (null vnode), or unresolved (NCF_UNRESOLVED is set). 1430 * Unresolved entries must be resolved through the filesystem to associate the 1431 * vnode and/or determine whether a positive or negative hit has occured. 1432 * 1433 * It is not necessary to lock a directory in order to lock namespace under 1434 * that directory. In fact, it is explicitly not allowed to do that. A 1435 * directory is typically only locked when being created, renamed, or 1436 * destroyed. 1437 * 1438 * The directory (par) may be unresolved, in which case any returned child 1439 * will likely also be marked unresolved. Likely but not guarenteed. Since 1440 * the filesystem lookup requires a resolved directory vnode the caller is 1441 * responsible for resolving the namecache chain top-down. This API 1442 * specifically allows whole chains to be created in an unresolved state. 1443 */ 1444 struct namecache * 1445 cache_nlookup(struct namecache *par, struct nlcomponent *nlc) 1446 { 1447 struct namecache *ncp; 1448 struct namecache *new_ncp; 1449 struct nchashhead *nchpp; 1450 u_int32_t hash; 1451 globaldata_t gd; 1452 1453 numcalls++; 1454 gd = mycpu; 1455 1456 /* 1457 * Try to locate an existing entry 1458 */ 1459 hash = fnv_32_buf(nlc->nlc_nameptr, nlc->nlc_namelen, FNV1_32_INIT); 1460 hash = fnv_32_buf(&par, sizeof(par), hash); 1461 new_ncp = NULL; 1462 restart: 1463 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 1464 numchecks++; 1465 1466 /* 1467 * Zap entries that have timed out. 1468 */ 1469 if (ncp->nc_timeout && 1470 (int)(ncp->nc_timeout - ticks) < 0 && 1471 (ncp->nc_flag & NCF_UNRESOLVED) == 0 && 1472 ncp->nc_exlocks == 0 1473 ) { 1474 cache_zap(cache_get(ncp)); 1475 goto restart; 1476 } 1477 1478 /* 1479 * Break out if we find a matching entry. Note that 1480 * UNRESOLVED entries may match, but DESTROYED entries 1481 * do not. 1482 */ 1483 if (ncp->nc_parent == par && 1484 ncp->nc_nlen == nlc->nlc_namelen && 1485 bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0 && 1486 (ncp->nc_flag & NCF_DESTROYED) == 0 1487 ) { 1488 if (cache_get_nonblock(ncp) == 0) { 1489 if (new_ncp) 1490 cache_free(new_ncp); 1491 goto found; 1492 } 1493 cache_get(ncp); 1494 cache_put(ncp); 1495 goto restart; 1496 } 1497 } 1498 1499 /* 1500 * We failed to locate an entry, create a new entry and add it to 1501 * the cache. We have to relookup after possibly blocking in 1502 * malloc. 1503 */ 1504 if (new_ncp == NULL) { 1505 new_ncp = cache_alloc(nlc->nlc_namelen); 1506 goto restart; 1507 } 1508 1509 ncp = new_ncp; 1510 1511 /* 1512 * Initialize as a new UNRESOLVED entry, lock (non-blocking), 1513 * and link to the parent. The mount point is usually inherited 1514 * from the parent unless this is a special case such as a mount 1515 * point where nlc_namelen is 0. The caller is responsible for 1516 * setting nc_mount in that case. If nlc_namelen is 0 nc_name will 1517 * be NULL. 1518 */ 1519 if (nlc->nlc_namelen) { 1520 bcopy(nlc->nlc_nameptr, ncp->nc_name, nlc->nlc_namelen); 1521 ncp->nc_name[nlc->nlc_namelen] = 0; 1522 ncp->nc_mount = par->nc_mount; 1523 } 1524 nchpp = NCHHASH(hash); 1525 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 1526 ncp->nc_flag |= NCF_HASHED; 1527 cache_link_parent(ncp, par); 1528 found: 1529 /* 1530 * stats and namecache size management 1531 */ 1532 if (ncp->nc_flag & NCF_UNRESOLVED) 1533 ++gd->gd_nchstats->ncs_miss; 1534 else if (ncp->nc_vp) 1535 ++gd->gd_nchstats->ncs_goodhits; 1536 else 1537 ++gd->gd_nchstats->ncs_neghits; 1538 cache_hysteresis(); 1539 return(ncp); 1540 } 1541 1542 /* 1543 * Given a locked ncp, validate that the vnode, if present, is actually 1544 * usable. If it is not usable set the ncp to an unresolved state. 1545 */ 1546 void 1547 cache_validate(struct namecache *ncp) 1548 { 1549 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 1550 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) 1551 cache_setunresolved(ncp); 1552 } 1553 } 1554 1555 /* 1556 * Resolve an unresolved namecache entry, generally by looking it up. 1557 * The passed ncp must be locked and refd. 1558 * 1559 * Theoretically since a vnode cannot be recycled while held, and since 1560 * the nc_parent chain holds its vnode as long as children exist, the 1561 * direct parent of the cache entry we are trying to resolve should 1562 * have a valid vnode. If not then generate an error that we can 1563 * determine is related to a resolver bug. 1564 * 1565 * However, if a vnode was in the middle of a recyclement when the NCP 1566 * got locked, ncp->nc_vp might point to a vnode that is about to become 1567 * invalid. cache_resolve() handles this case by unresolving the entry 1568 * and then re-resolving it. 1569 * 1570 * Note that successful resolution does not necessarily return an error 1571 * code of 0. If the ncp resolves to a negative cache hit then ENOENT 1572 * will be returned. 1573 */ 1574 int 1575 cache_resolve(struct namecache *ncp, struct ucred *cred) 1576 { 1577 struct namecache *par; 1578 int error; 1579 1580 restart: 1581 /* 1582 * If the ncp is already resolved we have nothing to do. However, 1583 * we do want to guarentee that a usable vnode is returned when 1584 * a vnode is present, so make sure it hasn't been reclaimed. 1585 */ 1586 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 1587 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) 1588 cache_setunresolved(ncp); 1589 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) 1590 return (ncp->nc_error); 1591 } 1592 1593 /* 1594 * Mount points need special handling because the parent does not 1595 * belong to the same filesystem as the ncp. 1596 */ 1597 if (ncp->nc_flag & NCF_MOUNTPT) 1598 return (cache_resolve_mp(ncp)); 1599 1600 /* 1601 * We expect an unbroken chain of ncps to at least the mount point, 1602 * and even all the way to root (but this code doesn't have to go 1603 * past the mount point). 1604 */ 1605 if (ncp->nc_parent == NULL) { 1606 printf("EXDEV case 1 %p %*.*s\n", ncp, 1607 ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name); 1608 ncp->nc_error = EXDEV; 1609 return(ncp->nc_error); 1610 } 1611 1612 /* 1613 * The vp's of the parent directories in the chain are held via vhold() 1614 * due to the existance of the child, and should not disappear. 1615 * However, there are cases where they can disappear: 1616 * 1617 * - due to filesystem I/O errors. 1618 * - due to NFS being stupid about tracking the namespace and 1619 * destroys the namespace for entire directories quite often. 1620 * - due to forced unmounts. 1621 * - due to an rmdir (parent will be marked DESTROYED) 1622 * 1623 * When this occurs we have to track the chain backwards and resolve 1624 * it, looping until the resolver catches up to the current node. We 1625 * could recurse here but we might run ourselves out of kernel stack 1626 * so we do it in a more painful manner. This situation really should 1627 * not occur all that often, or if it does not have to go back too 1628 * many nodes to resolve the ncp. 1629 */ 1630 while (ncp->nc_parent->nc_vp == NULL) { 1631 /* 1632 * This case can occur if a process is CD'd into a 1633 * directory which is then rmdir'd. If the parent is marked 1634 * destroyed there is no point trying to resolve it. 1635 */ 1636 if (ncp->nc_parent->nc_flag & NCF_DESTROYED) 1637 return(ENOENT); 1638 1639 par = ncp->nc_parent; 1640 while (par->nc_parent && par->nc_parent->nc_vp == NULL) 1641 par = par->nc_parent; 1642 if (par->nc_parent == NULL) { 1643 printf("EXDEV case 2 %*.*s\n", 1644 par->nc_nlen, par->nc_nlen, par->nc_name); 1645 return (EXDEV); 1646 } 1647 printf("[diagnostic] cache_resolve: had to recurse on %*.*s\n", 1648 par->nc_nlen, par->nc_nlen, par->nc_name); 1649 /* 1650 * The parent is not set in stone, ref and lock it to prevent 1651 * it from disappearing. Also note that due to renames it 1652 * is possible for our ncp to move and for par to no longer 1653 * be one of its parents. We resolve it anyway, the loop 1654 * will handle any moves. 1655 */ 1656 cache_get(par); 1657 if (par->nc_flag & NCF_MOUNTPT) { 1658 cache_resolve_mp(par); 1659 } else if (par->nc_parent->nc_vp == NULL) { 1660 printf("[diagnostic] cache_resolve: raced on %*.*s\n", par->nc_nlen, par->nc_nlen, par->nc_name); 1661 cache_put(par); 1662 continue; 1663 } else if (par->nc_flag & NCF_UNRESOLVED) { 1664 par->nc_error = VOP_NRESOLVE(par, cred); 1665 } 1666 if ((error = par->nc_error) != 0) { 1667 if (par->nc_error != EAGAIN) { 1668 printf("EXDEV case 3 %*.*s error %d\n", 1669 par->nc_nlen, par->nc_nlen, par->nc_name, 1670 par->nc_error); 1671 cache_put(par); 1672 return(error); 1673 } 1674 printf("[diagnostic] cache_resolve: EAGAIN par %p %*.*s\n", 1675 par, par->nc_nlen, par->nc_nlen, par->nc_name); 1676 } 1677 cache_put(par); 1678 /* loop */ 1679 } 1680 1681 /* 1682 * Call VOP_NRESOLVE() to get the vp, then scan for any disconnected 1683 * ncp's and reattach them. If this occurs the original ncp is marked 1684 * EAGAIN to force a relookup. 1685 * 1686 * NOTE: in order to call VOP_NRESOLVE(), the parent of the passed 1687 * ncp must already be resolved. 1688 */ 1689 KKASSERT((ncp->nc_flag & NCF_MOUNTPT) == 0); 1690 ncp->nc_error = VOP_NRESOLVE(ncp, cred); 1691 /*vop_nresolve(*ncp->nc_parent->nc_vp->v_ops, ncp, cred);*/ 1692 if (ncp->nc_error == EAGAIN) { 1693 printf("[diagnostic] cache_resolve: EAGAIN ncp %p %*.*s\n", 1694 ncp, ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name); 1695 goto restart; 1696 } 1697 return(ncp->nc_error); 1698 } 1699 1700 /* 1701 * Resolve the ncp associated with a mount point. Such ncp's almost always 1702 * remain resolved and this routine is rarely called. NFS MPs tends to force 1703 * re-resolution more often due to its mac-truck-smash-the-namecache 1704 * method of tracking namespace changes. 1705 * 1706 * The semantics for this call is that the passed ncp must be locked on 1707 * entry and will be locked on return. However, if we actually have to 1708 * resolve the mount point we temporarily unlock the entry in order to 1709 * avoid race-to-root deadlocks due to e.g. dead NFS mounts. Because of 1710 * the unlock we have to recheck the flags after we relock. 1711 */ 1712 static int 1713 cache_resolve_mp(struct namecache *ncp) 1714 { 1715 struct vnode *vp; 1716 struct mount *mp = ncp->nc_mount; 1717 int error; 1718 1719 KKASSERT(mp != NULL); 1720 1721 /* 1722 * If the ncp is already resolved we have nothing to do. However, 1723 * we do want to guarentee that a usable vnode is returned when 1724 * a vnode is present, so make sure it hasn't been reclaimed. 1725 */ 1726 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 1727 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) 1728 cache_setunresolved(ncp); 1729 } 1730 1731 if (ncp->nc_flag & NCF_UNRESOLVED) { 1732 cache_unlock(ncp); 1733 while (vfs_busy(mp, 0, curthread)) 1734 ; 1735 error = VFS_ROOT(mp, &vp); 1736 cache_lock(ncp); 1737 1738 /* 1739 * recheck the ncp state after relocking. 1740 */ 1741 if (ncp->nc_flag & NCF_UNRESOLVED) { 1742 ncp->nc_error = error; 1743 if (error == 0) { 1744 cache_setvp(ncp, vp); 1745 vput(vp); 1746 } else { 1747 printf("[diagnostic] cache_resolve_mp: failed to resolve mount %p\n", mp); 1748 cache_setvp(ncp, NULL); 1749 } 1750 } else if (error == 0) { 1751 vput(vp); 1752 } 1753 vfs_unbusy(mp, curthread); 1754 } 1755 return(ncp->nc_error); 1756 } 1757 1758 void 1759 cache_cleanneg(int count) 1760 { 1761 struct namecache *ncp; 1762 1763 /* 1764 * Automode from the vnlru proc - clean out 10% of the negative cache 1765 * entries. 1766 */ 1767 if (count == 0) 1768 count = numneg / 10 + 1; 1769 1770 /* 1771 * Attempt to clean out the specified number of negative cache 1772 * entries. 1773 */ 1774 while (count) { 1775 ncp = TAILQ_FIRST(&ncneglist); 1776 if (ncp == NULL) { 1777 KKASSERT(numneg == 0); 1778 break; 1779 } 1780 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 1781 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 1782 if (cache_get_nonblock(ncp) == 0) 1783 cache_zap(ncp); 1784 --count; 1785 } 1786 } 1787 1788 /* 1789 * Rehash a ncp. Rehashing is typically required if the name changes (should 1790 * not generally occur) or the parent link changes. This function will 1791 * unhash the ncp if the ncp is no longer hashable. 1792 */ 1793 static void 1794 cache_rehash(struct namecache *ncp) 1795 { 1796 struct nchashhead *nchpp; 1797 u_int32_t hash; 1798 1799 if (ncp->nc_flag & NCF_HASHED) { 1800 ncp->nc_flag &= ~NCF_HASHED; 1801 LIST_REMOVE(ncp, nc_hash); 1802 } 1803 if (ncp->nc_nlen && ncp->nc_parent) { 1804 hash = fnv_32_buf(ncp->nc_name, ncp->nc_nlen, FNV1_32_INIT); 1805 hash = fnv_32_buf(&ncp->nc_parent, 1806 sizeof(ncp->nc_parent), hash); 1807 nchpp = NCHHASH(hash); 1808 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 1809 ncp->nc_flag |= NCF_HASHED; 1810 } 1811 } 1812 1813 /* 1814 * Name cache initialization, from vfsinit() when we are booting 1815 */ 1816 void 1817 nchinit(void) 1818 { 1819 int i; 1820 globaldata_t gd; 1821 1822 /* initialise per-cpu namecache effectiveness statistics. */ 1823 for (i = 0; i < ncpus; ++i) { 1824 gd = globaldata_find(i); 1825 gd->gd_nchstats = &nchstats[i]; 1826 } 1827 TAILQ_INIT(&ncneglist); 1828 nchashtbl = hashinit(desiredvnodes*2, M_VFSCACHE, &nchash); 1829 nclockwarn = 1 * hz; 1830 } 1831 1832 /* 1833 * Called from start_init() to bootstrap the root filesystem. Returns 1834 * a referenced, unlocked namecache record. 1835 */ 1836 struct namecache * 1837 cache_allocroot(struct mount *mp, struct vnode *vp) 1838 { 1839 struct namecache *ncp = cache_alloc(0); 1840 1841 ncp->nc_flag |= NCF_MOUNTPT | NCF_ROOT; 1842 ncp->nc_mount = mp; 1843 cache_setvp(ncp, vp); 1844 return(ncp); 1845 } 1846 1847 /* 1848 * vfs_cache_setroot() 1849 * 1850 * Create an association between the root of our namecache and 1851 * the root vnode. This routine may be called several times during 1852 * booting. 1853 * 1854 * If the caller intends to save the returned namecache pointer somewhere 1855 * it must cache_hold() it. 1856 */ 1857 void 1858 vfs_cache_setroot(struct vnode *nvp, struct namecache *ncp) 1859 { 1860 struct vnode *ovp; 1861 struct namecache *oncp; 1862 1863 ovp = rootvnode; 1864 oncp = rootncp; 1865 rootvnode = nvp; 1866 rootncp = ncp; 1867 1868 if (ovp) 1869 vrele(ovp); 1870 if (oncp) 1871 cache_drop(oncp); 1872 } 1873 1874 /* 1875 * XXX OLD API COMPAT FUNCTION. This really messes up the new namecache 1876 * topology and is being removed as quickly as possible. The new VOP_N*() 1877 * API calls are required to make specific adjustments using the supplied 1878 * ncp pointers rather then just bogusly purging random vnodes. 1879 * 1880 * Invalidate all namecache entries to a particular vnode as well as 1881 * any direct children of that vnode in the namecache. This is a 1882 * 'catch all' purge used by filesystems that do not know any better. 1883 * 1884 * A new vnode v_id is generated. Note that no vnode will ever have a 1885 * v_id of 0. 1886 * 1887 * Note that the linkage between the vnode and its namecache entries will 1888 * be removed, but the namecache entries themselves might stay put due to 1889 * active references from elsewhere in the system or due to the existance of 1890 * the children. The namecache topology is left intact even if we do not 1891 * know what the vnode association is. Such entries will be marked 1892 * NCF_UNRESOLVED. 1893 * 1894 * XXX: Only time and the size of v_id prevents this from failing: 1895 * XXX: In theory we should hunt down all (struct vnode*, v_id) 1896 * XXX: soft references and nuke them, at least on the global 1897 * XXX: v_id wraparound. The period of resistance can be extended 1898 * XXX: by incrementing each vnodes v_id individually instead of 1899 * XXX: using the global v_id. 1900 * 1901 * Does not support NCP_FSMID accumulation on invalidation (retflags is 1902 * not used). 1903 */ 1904 void 1905 cache_purge(struct vnode *vp) 1906 { 1907 static u_long nextid; 1908 int retflags = 0; 1909 1910 cache_inval_vp(vp, CINV_DESTROY | CINV_CHILDREN, &retflags); 1911 1912 /* 1913 * Calculate a new unique id for ".." handling 1914 */ 1915 do { 1916 nextid++; 1917 } while (nextid == vp->v_id || nextid == 0); 1918 vp->v_id = nextid; 1919 } 1920 1921 /* 1922 * Flush all entries referencing a particular filesystem. 1923 * 1924 * Since we need to check it anyway, we will flush all the invalid 1925 * entries at the same time. 1926 */ 1927 void 1928 cache_purgevfs(struct mount *mp) 1929 { 1930 struct nchashhead *nchpp; 1931 struct namecache *ncp, *nnp; 1932 1933 /* 1934 * Scan hash tables for applicable entries. 1935 */ 1936 for (nchpp = &nchashtbl[nchash]; nchpp >= nchashtbl; nchpp--) { 1937 ncp = LIST_FIRST(nchpp); 1938 if (ncp) 1939 cache_hold(ncp); 1940 while (ncp) { 1941 nnp = LIST_NEXT(ncp, nc_hash); 1942 if (nnp) 1943 cache_hold(nnp); 1944 if (ncp->nc_mount == mp) { 1945 cache_lock(ncp); 1946 cache_zap(ncp); 1947 } else { 1948 cache_drop(ncp); 1949 } 1950 ncp = nnp; 1951 } 1952 } 1953 } 1954 1955 static int disablecwd; 1956 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 1957 1958 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 1959 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 1960 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 1961 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 1962 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 1963 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 1964 1965 int 1966 __getcwd(struct __getcwd_args *uap) 1967 { 1968 int buflen; 1969 int error; 1970 char *buf; 1971 char *bp; 1972 1973 if (disablecwd) 1974 return (ENODEV); 1975 1976 buflen = uap->buflen; 1977 if (buflen < 2) 1978 return (EINVAL); 1979 if (buflen > MAXPATHLEN) 1980 buflen = MAXPATHLEN; 1981 1982 buf = malloc(buflen, M_TEMP, M_WAITOK); 1983 bp = kern_getcwd(buf, buflen, &error); 1984 if (error == 0) 1985 error = copyout(bp, uap->buf, strlen(bp) + 1); 1986 free(buf, M_TEMP); 1987 return (error); 1988 } 1989 1990 char * 1991 kern_getcwd(char *buf, size_t buflen, int *error) 1992 { 1993 struct proc *p = curproc; 1994 char *bp; 1995 int i, slash_prefixed; 1996 struct filedesc *fdp; 1997 struct namecache *ncp; 1998 1999 numcwdcalls++; 2000 bp = buf; 2001 bp += buflen - 1; 2002 *bp = '\0'; 2003 fdp = p->p_fd; 2004 slash_prefixed = 0; 2005 2006 ncp = fdp->fd_ncdir; 2007 while (ncp && ncp != fdp->fd_nrdir && (ncp->nc_flag & NCF_ROOT) == 0) { 2008 if (ncp->nc_flag & NCF_MOUNTPT) { 2009 if (ncp->nc_mount == NULL) { 2010 *error = EBADF; /* forced unmount? */ 2011 return(NULL); 2012 } 2013 ncp = ncp->nc_parent; 2014 continue; 2015 } 2016 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 2017 if (bp == buf) { 2018 numcwdfail4++; 2019 *error = ENOMEM; 2020 return(NULL); 2021 } 2022 *--bp = ncp->nc_name[i]; 2023 } 2024 if (bp == buf) { 2025 numcwdfail4++; 2026 *error = ENOMEM; 2027 return(NULL); 2028 } 2029 *--bp = '/'; 2030 slash_prefixed = 1; 2031 ncp = ncp->nc_parent; 2032 } 2033 if (ncp == NULL) { 2034 numcwdfail2++; 2035 *error = ENOENT; 2036 return(NULL); 2037 } 2038 if (!slash_prefixed) { 2039 if (bp == buf) { 2040 numcwdfail4++; 2041 *error = ENOMEM; 2042 return(NULL); 2043 } 2044 *--bp = '/'; 2045 } 2046 numcwdfound++; 2047 *error = 0; 2048 return (bp); 2049 } 2050 2051 /* 2052 * Thus begins the fullpath magic. 2053 */ 2054 2055 #undef STATNODE 2056 #define STATNODE(name) \ 2057 static u_int name; \ 2058 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 2059 2060 static int disablefullpath; 2061 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 2062 &disablefullpath, 0, ""); 2063 2064 STATNODE(numfullpathcalls); 2065 STATNODE(numfullpathfail1); 2066 STATNODE(numfullpathfail2); 2067 STATNODE(numfullpathfail3); 2068 STATNODE(numfullpathfail4); 2069 STATNODE(numfullpathfound); 2070 2071 int 2072 cache_fullpath(struct proc *p, struct namecache *ncp, char **retbuf, char **freebuf) 2073 { 2074 char *bp, *buf; 2075 int i, slash_prefixed; 2076 struct namecache *fd_nrdir; 2077 2078 numfullpathcalls--; 2079 2080 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 2081 bp = buf + MAXPATHLEN - 1; 2082 *bp = '\0'; 2083 if (p != NULL) 2084 fd_nrdir = p->p_fd->fd_nrdir; 2085 else 2086 fd_nrdir = NULL; 2087 slash_prefixed = 0; 2088 while (ncp && ncp != fd_nrdir && (ncp->nc_flag & NCF_ROOT) == 0) { 2089 if (ncp->nc_flag & NCF_MOUNTPT) { 2090 if (ncp->nc_mount == NULL) { 2091 free(buf, M_TEMP); 2092 return(EBADF); 2093 } 2094 ncp = ncp->nc_parent; 2095 continue; 2096 } 2097 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 2098 if (bp == buf) { 2099 numfullpathfail4++; 2100 free(buf, M_TEMP); 2101 return(ENOMEM); 2102 } 2103 *--bp = ncp->nc_name[i]; 2104 } 2105 if (bp == buf) { 2106 numfullpathfail4++; 2107 free(buf, M_TEMP); 2108 return(ENOMEM); 2109 } 2110 *--bp = '/'; 2111 slash_prefixed = 1; 2112 ncp = ncp->nc_parent; 2113 } 2114 if (ncp == NULL) { 2115 numfullpathfail2++; 2116 free(buf, M_TEMP); 2117 return(ENOENT); 2118 } 2119 if (p != NULL && (ncp->nc_flag & NCF_ROOT) && ncp != fd_nrdir) { 2120 bp = buf + MAXPATHLEN - 1; 2121 *bp = '\0'; 2122 slash_prefixed = 0; 2123 } 2124 if (!slash_prefixed) { 2125 if (bp == buf) { 2126 numfullpathfail4++; 2127 free(buf, M_TEMP); 2128 return(ENOMEM); 2129 } 2130 *--bp = '/'; 2131 } 2132 numfullpathfound++; 2133 *retbuf = bp; 2134 *freebuf = buf; 2135 2136 return(0); 2137 } 2138 2139 int 2140 vn_fullpath(struct proc *p, struct vnode *vn, char **retbuf, char **freebuf) 2141 { 2142 struct namecache *ncp; 2143 2144 numfullpathcalls++; 2145 if (disablefullpath) 2146 return (ENODEV); 2147 2148 if (p == NULL) 2149 return (EINVAL); 2150 2151 /* vn is NULL, client wants us to use p->p_textvp */ 2152 if (vn == NULL) { 2153 if ((vn = p->p_textvp) == NULL) 2154 return (EINVAL); 2155 } 2156 TAILQ_FOREACH(ncp, &vn->v_namecache, nc_vnode) { 2157 if (ncp->nc_nlen) 2158 break; 2159 } 2160 if (ncp == NULL) 2161 return (EINVAL); 2162 2163 numfullpathcalls--; 2164 return(cache_fullpath(p, ncp, retbuf, freebuf)); 2165 } 2166