1 /* $NetBSD: vfs_vnode.c,v 1.63 2016/12/14 15:49:35 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 /* 70 * The vnode cache subsystem. 71 * 72 * Life-cycle 73 * 74 * Normally, there are two points where new vnodes are created: 75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode 76 * starts in one of the following ways: 77 * 78 * - Allocation, via vcache_get(9) or vcache_new(9). 79 * - Reclamation of inactive vnode, via vget(9). 80 * 81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9) 82 * was another, traditional way. Currently, only the draining thread 83 * recycles the vnodes. This behaviour might be revisited. 84 * 85 * The life-cycle ends when the last reference is dropped, usually 86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform 87 * the file system that vnode is inactive. Via this call, file system 88 * indicates whether vnode can be recycled (usually, it checks its own 89 * references, e.g. count of links, whether the file was removed). 90 * 91 * Depending on indication, vnode can be put into a free list (cache), 92 * or cleaned via vcache_reclaim, which calls VOP_RECLAIM(9) to 93 * disassociate underlying file system from the vnode, and finally 94 * destroyed. 95 * 96 * Vnode state 97 * 98 * Vnode is always in one of six states: 99 * - MARKER This is a marker vnode to help list traversal. It 100 * will never change its state. 101 * - LOADING Vnode is associating underlying file system and not 102 * yet ready to use. 103 * - ACTIVE Vnode has associated underlying file system and is 104 * ready to use. 105 * - BLOCKED Vnode is active but cannot get new references. 106 * - RECLAIMING Vnode is disassociating from the underlying file 107 * system. 108 * - RECLAIMED Vnode has disassociated from underlying file system 109 * and is dead. 110 * 111 * Valid state changes are: 112 * LOADING -> ACTIVE 113 * Vnode has been initialised in vcache_get() or 114 * vcache_new() and is ready to use. 115 * ACTIVE -> RECLAIMING 116 * Vnode starts disassociation from underlying file 117 * system in vcache_reclaim(). 118 * RECLAIMING -> RECLAIMED 119 * Vnode finished disassociation from underlying file 120 * system in vcache_reclaim(). 121 * ACTIVE -> BLOCKED 122 * Either vcache_rekey*() is changing the vnode key or 123 * vrelel() is about to call VOP_INACTIVE(). 124 * BLOCKED -> ACTIVE 125 * The block condition is over. 126 * LOADING -> RECLAIMED 127 * Either vcache_get() or vcache_new() failed to 128 * associate the underlying file system or vcache_rekey*() 129 * drops a vnode used as placeholder. 130 * 131 * Of these states LOADING, BLOCKED and RECLAIMING are intermediate 132 * and it is possible to wait for state change. 133 * 134 * State is protected with v_interlock with one exception: 135 * to change from LOADING both v_interlock and vcache.lock must be held 136 * so it is possible to check "state == LOADING" without holding 137 * v_interlock. See vcache_get() for details. 138 * 139 * Reference counting 140 * 141 * Vnode is considered active, if reference count (vnode_t::v_usecount) 142 * is non-zero. It is maintained using: vref(9) and vrele(9), as well 143 * as vput(9), routines. Common points holding references are e.g. 144 * file openings, current working directory, mount points, etc. 145 * 146 * Note on v_usecount and its locking 147 * 148 * At nearly all points it is known that v_usecount could be zero, 149 * the vnode_t::v_interlock will be held. To change v_usecount away 150 * from zero, the interlock must be held. To change from a non-zero 151 * value to zero, again the interlock must be held. 152 * 153 * Changing the usecount from a non-zero value to a non-zero value can 154 * safely be done using atomic operations, without the interlock held. 155 * 156 */ 157 158 #include <sys/cdefs.h> 159 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.63 2016/12/14 15:49:35 hannken Exp $"); 160 161 #include <sys/param.h> 162 #include <sys/kernel.h> 163 164 #include <sys/atomic.h> 165 #include <sys/buf.h> 166 #include <sys/conf.h> 167 #include <sys/device.h> 168 #include <sys/hash.h> 169 #include <sys/kauth.h> 170 #include <sys/kmem.h> 171 #include <sys/kthread.h> 172 #include <sys/module.h> 173 #include <sys/mount.h> 174 #include <sys/namei.h> 175 #include <sys/syscallargs.h> 176 #include <sys/sysctl.h> 177 #include <sys/systm.h> 178 #include <sys/vnode_impl.h> 179 #include <sys/wapbl.h> 180 #include <sys/fstrans.h> 181 182 #include <uvm/uvm.h> 183 #include <uvm/uvm_readahead.h> 184 185 /* Flags to vrelel. */ 186 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */ 187 188 u_int numvnodes __cacheline_aligned; 189 190 /* 191 * There are three lru lists: one holds vnodes waiting for async release, 192 * one is for vnodes which have no buffer/page references and 193 * one for those which do (i.e. v_holdcnt is non-zero). 194 */ 195 static vnodelst_t lru_vrele_list __cacheline_aligned; 196 static vnodelst_t lru_free_list __cacheline_aligned; 197 static vnodelst_t lru_hold_list __cacheline_aligned; 198 static kmutex_t vdrain_lock __cacheline_aligned; 199 static kcondvar_t vdrain_cv __cacheline_aligned; 200 static int vdrain_gen; 201 static kcondvar_t vdrain_gen_cv; 202 static bool vdrain_retry; 203 static lwp_t * vdrain_lwp; 204 SLIST_HEAD(hashhead, vnode_impl); 205 static struct { 206 kmutex_t lock; 207 kcondvar_t cv; 208 u_int hashsize; 209 u_long hashmask; 210 struct hashhead *hashtab; 211 pool_cache_t pool; 212 } vcache __cacheline_aligned; 213 214 static void lru_requeue(vnode_t *, vnodelst_t *); 215 static vnodelst_t * lru_which(vnode_t *); 216 static vnode_impl_t * vcache_alloc(void); 217 static void vcache_free(vnode_impl_t *); 218 static void vcache_init(void); 219 static void vcache_reinit(void); 220 static void vcache_reclaim(vnode_t *); 221 static void vrelel(vnode_t *, int); 222 static void vdrain_thread(void *); 223 static void vnpanic(vnode_t *, const char *, ...) 224 __printflike(2, 3); 225 226 /* Routines having to do with the management of the vnode table. */ 227 extern struct mount *dead_rootmount; 228 extern int (**dead_vnodeop_p)(void *); 229 extern struct vfsops dead_vfsops; 230 231 /* Vnode state operations and diagnostics. */ 232 233 #if defined(DIAGNOSTIC) 234 235 #define VSTATE_GET(vp) \ 236 vstate_assert_get((vp), __func__, __LINE__) 237 #define VSTATE_CHANGE(vp, from, to) \ 238 vstate_assert_change((vp), (from), (to), __func__, __LINE__) 239 #define VSTATE_WAIT_STABLE(vp) \ 240 vstate_assert_wait_stable((vp), __func__, __LINE__) 241 #define VSTATE_ASSERT(vp, state) \ 242 vstate_assert((vp), (state), __func__, __LINE__) 243 244 static void 245 vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line) 246 { 247 vnode_impl_t *node = VNODE_TO_VIMPL(vp); 248 249 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 250 251 if (__predict_true(node->vi_state == state)) 252 return; 253 vnpanic(vp, "state is %s, expected %s at %s:%d", 254 vstate_name(node->vi_state), vstate_name(state), func, line); 255 } 256 257 static enum vnode_state 258 vstate_assert_get(vnode_t *vp, const char *func, int line) 259 { 260 vnode_impl_t *node = VNODE_TO_VIMPL(vp); 261 262 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 263 if (node->vi_state == VS_MARKER) 264 vnpanic(vp, "state is %s at %s:%d", 265 vstate_name(node->vi_state), func, line); 266 267 return node->vi_state; 268 } 269 270 static void 271 vstate_assert_wait_stable(vnode_t *vp, const char *func, int line) 272 { 273 vnode_impl_t *node = VNODE_TO_VIMPL(vp); 274 275 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 276 if (node->vi_state == VS_MARKER) 277 vnpanic(vp, "state is %s at %s:%d", 278 vstate_name(node->vi_state), func, line); 279 280 while (node->vi_state != VS_ACTIVE && node->vi_state != VS_RECLAIMED) 281 cv_wait(&vp->v_cv, vp->v_interlock); 282 283 if (node->vi_state == VS_MARKER) 284 vnpanic(vp, "state is %s at %s:%d", 285 vstate_name(node->vi_state), func, line); 286 } 287 288 static void 289 vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to, 290 const char *func, int line) 291 { 292 vnode_impl_t *node = VNODE_TO_VIMPL(vp); 293 294 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 295 if (from == VS_LOADING) 296 KASSERTMSG(mutex_owned(&vcache.lock), "at %s:%d", func, line); 297 298 if (from == VS_MARKER) 299 vnpanic(vp, "from is %s at %s:%d", 300 vstate_name(from), func, line); 301 if (to == VS_MARKER) 302 vnpanic(vp, "to is %s at %s:%d", 303 vstate_name(to), func, line); 304 if (node->vi_state != from) 305 vnpanic(vp, "from is %s, expected %s at %s:%d\n", 306 vstate_name(node->vi_state), vstate_name(from), func, line); 307 308 node->vi_state = to; 309 if (from == VS_LOADING) 310 cv_broadcast(&vcache.cv); 311 if (to == VS_ACTIVE || to == VS_RECLAIMED) 312 cv_broadcast(&vp->v_cv); 313 } 314 315 #else /* defined(DIAGNOSTIC) */ 316 317 #define VSTATE_GET(vp) \ 318 (VNODE_TO_VIMPL((vp))->vi_state) 319 #define VSTATE_CHANGE(vp, from, to) \ 320 vstate_change((vp), (from), (to)) 321 #define VSTATE_WAIT_STABLE(vp) \ 322 vstate_wait_stable((vp)) 323 #define VSTATE_ASSERT(vp, state) 324 325 static void 326 vstate_wait_stable(vnode_t *vp) 327 { 328 vnode_impl_t *node = VNODE_TO_VIMPL(vp); 329 330 while (node->vi_state != VS_ACTIVE && node->vi_state != VS_RECLAIMED) 331 cv_wait(&vp->v_cv, vp->v_interlock); 332 } 333 334 static void 335 vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to) 336 { 337 vnode_impl_t *node = VNODE_TO_VIMPL(vp); 338 339 node->vi_state = to; 340 if (from == VS_LOADING) 341 cv_broadcast(&vcache.cv); 342 if (to == VS_ACTIVE || to == VS_RECLAIMED) 343 cv_broadcast(&vp->v_cv); 344 } 345 346 #endif /* defined(DIAGNOSTIC) */ 347 348 void 349 vfs_vnode_sysinit(void) 350 { 351 int error __diagused; 352 353 dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL); 354 KASSERT(dead_rootmount != NULL); 355 dead_rootmount->mnt_iflag = IMNT_MPSAFE; 356 357 mutex_init(&vdrain_lock, MUTEX_DEFAULT, IPL_NONE); 358 TAILQ_INIT(&lru_free_list); 359 TAILQ_INIT(&lru_hold_list); 360 TAILQ_INIT(&lru_vrele_list); 361 362 vcache_init(); 363 364 cv_init(&vdrain_cv, "vdrain"); 365 cv_init(&vdrain_gen_cv, "vdrainwt"); 366 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread, 367 NULL, &vdrain_lwp, "vdrain"); 368 KASSERTMSG((error == 0), "kthread_create(vdrain) failed: %d", error); 369 } 370 371 /* 372 * Allocate a new marker vnode. 373 */ 374 vnode_t * 375 vnalloc_marker(struct mount *mp) 376 { 377 vnode_impl_t *node; 378 vnode_t *vp; 379 380 node = pool_cache_get(vcache.pool, PR_WAITOK); 381 memset(node, 0, sizeof(*node)); 382 vp = VIMPL_TO_VNODE(node); 383 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); 384 vp->v_mount = mp; 385 vp->v_type = VBAD; 386 node->vi_state = VS_MARKER; 387 388 return vp; 389 } 390 391 /* 392 * Free a marker vnode. 393 */ 394 void 395 vnfree_marker(vnode_t *vp) 396 { 397 vnode_impl_t *node; 398 399 node = VNODE_TO_VIMPL(vp); 400 KASSERT(node->vi_state == VS_MARKER); 401 uvm_obj_destroy(&vp->v_uobj, true); 402 pool_cache_put(vcache.pool, node); 403 } 404 405 /* 406 * Test a vnode for being a marker vnode. 407 */ 408 bool 409 vnis_marker(vnode_t *vp) 410 { 411 412 return (VNODE_TO_VIMPL(vp)->vi_state == VS_MARKER); 413 } 414 415 /* 416 * Return the lru list this node should be on. 417 */ 418 static vnodelst_t * 419 lru_which(vnode_t *vp) 420 { 421 422 KASSERT(mutex_owned(vp->v_interlock)); 423 424 if (vp->v_holdcnt > 0) 425 return &lru_hold_list; 426 else 427 return &lru_free_list; 428 } 429 430 /* 431 * Put vnode to end of given list. 432 * Both the current and the new list may be NULL, used on vnode alloc/free. 433 * Adjust numvnodes and signal vdrain thread if there is work. 434 */ 435 static void 436 lru_requeue(vnode_t *vp, vnodelst_t *listhd) 437 { 438 vnode_impl_t *node; 439 440 mutex_enter(&vdrain_lock); 441 node = VNODE_TO_VIMPL(vp); 442 if (node->vi_lrulisthd != NULL) 443 TAILQ_REMOVE(node->vi_lrulisthd, node, vi_lrulist); 444 else 445 numvnodes++; 446 node->vi_lrulisthd = listhd; 447 if (node->vi_lrulisthd != NULL) 448 TAILQ_INSERT_TAIL(node->vi_lrulisthd, node, vi_lrulist); 449 else 450 numvnodes--; 451 if (numvnodes > desiredvnodes || listhd == &lru_vrele_list) 452 cv_broadcast(&vdrain_cv); 453 mutex_exit(&vdrain_lock); 454 } 455 456 /* 457 * Reclaim a cached vnode. Used from vdrain_thread only. 458 */ 459 static __inline void 460 vdrain_remove(vnode_t *vp) 461 { 462 struct mount *mp; 463 464 KASSERT(mutex_owned(&vdrain_lock)); 465 466 /* Probe usecount (unlocked). */ 467 if (vp->v_usecount > 0) 468 return; 469 /* Try v_interlock -- we lock the wrong direction! */ 470 if (!mutex_tryenter(vp->v_interlock)) 471 return; 472 /* Probe usecount and state. */ 473 if (vp->v_usecount > 0 || VSTATE_GET(vp) != VS_ACTIVE) { 474 mutex_exit(vp->v_interlock); 475 return; 476 } 477 mp = vp->v_mount; 478 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { 479 mutex_exit(vp->v_interlock); 480 return; 481 } 482 vdrain_retry = true; 483 mutex_exit(&vdrain_lock); 484 485 if (vget(vp, 0, true /* wait */) == 0) { 486 if (!vrecycle(vp)) 487 vrele(vp); 488 } 489 fstrans_done(mp); 490 491 mutex_enter(&vdrain_lock); 492 } 493 494 /* 495 * Release a cached vnode. Used from vdrain_thread only. 496 */ 497 static __inline void 498 vdrain_vrele(vnode_t *vp) 499 { 500 vnode_impl_t *node = VNODE_TO_VIMPL(vp); 501 struct mount *mp; 502 503 KASSERT(mutex_owned(&vdrain_lock)); 504 505 /* 506 * Safe to take v_interlock -- no other thread will 507 * lock v_interlock -> vdrain_lock as usecount > 0. 508 */ 509 mutex_enter(vp->v_interlock); 510 mp = vp->v_mount; 511 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { 512 mutex_exit(vp->v_interlock); 513 return; 514 } 515 516 /* First put the vnode back onto its lru list. */ 517 KASSERT(node->vi_lrulisthd == &lru_vrele_list); 518 TAILQ_REMOVE(node->vi_lrulisthd, node, vi_lrulist); 519 node->vi_lrulisthd = lru_which(vp); 520 TAILQ_INSERT_TAIL(node->vi_lrulisthd, node, vi_lrulist); 521 522 vdrain_retry = true; 523 mutex_exit(&vdrain_lock); 524 525 vrelel(vp, 0); 526 fstrans_done(mp); 527 528 mutex_enter(&vdrain_lock); 529 } 530 531 /* 532 * Helper thread to keep the number of vnodes below desiredvnodes 533 * and release vnodes from asynchronous vrele. 534 */ 535 static void 536 vdrain_thread(void *cookie) 537 { 538 vnodelst_t *listhd[] = { 539 &lru_vrele_list, &lru_free_list, &lru_hold_list 540 }; 541 int i; 542 u_int target; 543 vnode_impl_t *node, *marker; 544 545 marker = VNODE_TO_VIMPL(vnalloc_marker(NULL)); 546 547 mutex_enter(&vdrain_lock); 548 549 for (;;) { 550 vdrain_retry = false; 551 target = desiredvnodes - desiredvnodes/10; 552 553 for (i = 0; i < __arraycount(listhd); i++) { 554 TAILQ_INSERT_HEAD(listhd[i], marker, vi_lrulist); 555 while ((node = TAILQ_NEXT(marker, vi_lrulist))) { 556 TAILQ_REMOVE(listhd[i], marker, vi_lrulist); 557 TAILQ_INSERT_AFTER(listhd[i], node, marker, 558 vi_lrulist); 559 if (listhd[i] == &lru_vrele_list) 560 vdrain_vrele(VIMPL_TO_VNODE(node)); 561 else if (numvnodes < target) 562 break; 563 else 564 vdrain_remove(VIMPL_TO_VNODE(node)); 565 } 566 TAILQ_REMOVE(listhd[i], marker, vi_lrulist); 567 } 568 569 if (vdrain_retry) { 570 mutex_exit(&vdrain_lock); 571 yield(); 572 mutex_enter(&vdrain_lock); 573 } else { 574 vdrain_gen++; 575 cv_broadcast(&vdrain_gen_cv); 576 cv_wait(&vdrain_cv, &vdrain_lock); 577 } 578 } 579 } 580 581 /* 582 * vget: get a particular vnode from the free list, increment its reference 583 * count and return it. 584 * 585 * => Must be called with v_interlock held. 586 * 587 * If state is VS_RECLAIMING, the vnode may be eliminated in vcache_reclaim(). 588 * In that case, we cannot grab the vnode, so the process is awakened when 589 * the transition is completed, and an error returned to indicate that the 590 * vnode is no longer usable. 591 * 592 * If state is VS_LOADING or VS_BLOCKED, wait until the vnode enters a 593 * stable state (VS_ACTIVE or VS_RECLAIMED). 594 */ 595 int 596 vget(vnode_t *vp, int flags, bool waitok) 597 { 598 599 KASSERT(mutex_owned(vp->v_interlock)); 600 KASSERT((flags & ~LK_NOWAIT) == 0); 601 KASSERT(waitok == ((flags & LK_NOWAIT) == 0)); 602 603 /* 604 * Before adding a reference, we must remove the vnode 605 * from its freelist. 606 */ 607 if (vp->v_usecount == 0) { 608 vp->v_usecount = 1; 609 } else { 610 atomic_inc_uint(&vp->v_usecount); 611 } 612 613 /* 614 * If the vnode is in the process of changing state we wait 615 * for the change to complete and take care not to return 616 * a clean vnode. 617 */ 618 if (! ISSET(flags, LK_NOWAIT)) 619 VSTATE_WAIT_STABLE(vp); 620 if (VSTATE_GET(vp) == VS_RECLAIMED) { 621 vrelel(vp, 0); 622 return ENOENT; 623 } else if (VSTATE_GET(vp) != VS_ACTIVE) { 624 KASSERT(ISSET(flags, LK_NOWAIT)); 625 vrelel(vp, 0); 626 return EBUSY; 627 } 628 629 /* 630 * Ok, we got it in good shape. 631 */ 632 VSTATE_ASSERT(vp, VS_ACTIVE); 633 mutex_exit(vp->v_interlock); 634 635 return 0; 636 } 637 638 /* 639 * vput: unlock and release the reference. 640 */ 641 void 642 vput(vnode_t *vp) 643 { 644 645 VOP_UNLOCK(vp); 646 vrele(vp); 647 } 648 649 /* 650 * Try to drop reference on a vnode. Abort if we are releasing the 651 * last reference. Note: this _must_ succeed if not the last reference. 652 */ 653 static inline bool 654 vtryrele(vnode_t *vp) 655 { 656 u_int use, next; 657 658 for (use = vp->v_usecount;; use = next) { 659 if (use == 1) { 660 return false; 661 } 662 KASSERT(use > 1); 663 next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 664 if (__predict_true(next == use)) { 665 return true; 666 } 667 } 668 } 669 670 /* 671 * Vnode release. If reference count drops to zero, call inactive 672 * routine and either return to freelist or free to the pool. 673 */ 674 static void 675 vrelel(vnode_t *vp, int flags) 676 { 677 bool recycle, defer; 678 int error; 679 680 KASSERT(mutex_owned(vp->v_interlock)); 681 682 if (__predict_false(vp->v_op == dead_vnodeop_p && 683 VSTATE_GET(vp) != VS_RECLAIMED)) { 684 vnpanic(vp, "dead but not clean"); 685 } 686 687 /* 688 * If not the last reference, just drop the reference count 689 * and unlock. 690 */ 691 if (vtryrele(vp)) { 692 mutex_exit(vp->v_interlock); 693 return; 694 } 695 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 696 vnpanic(vp, "%s: bad ref count", __func__); 697 } 698 699 #ifdef DIAGNOSTIC 700 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 701 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 702 vprint("vrelel: missing VOP_CLOSE()", vp); 703 } 704 #endif 705 706 /* 707 * If not clean, deactivate the vnode, but preserve 708 * our reference across the call to VOP_INACTIVE(). 709 */ 710 if (VSTATE_GET(vp) != VS_RECLAIMED) { 711 recycle = false; 712 713 /* 714 * XXX This ugly block can be largely eliminated if 715 * locking is pushed down into the file systems. 716 * 717 * Defer vnode release to vdrain_thread if caller 718 * requests it explicitly or is the pagedaemon. 719 */ 720 if ((curlwp == uvm.pagedaemon_lwp) || 721 (flags & VRELEL_ASYNC_RELE) != 0) { 722 defer = true; 723 } else if (curlwp == vdrain_lwp) { 724 /* 725 * We have to try harder. 726 */ 727 mutex_exit(vp->v_interlock); 728 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 729 KASSERTMSG((error == 0), "vn_lock failed: %d", error); 730 mutex_enter(vp->v_interlock); 731 defer = false; 732 } else { 733 /* If we can't acquire the lock, then defer. */ 734 mutex_exit(vp->v_interlock); 735 error = vn_lock(vp, 736 LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 737 defer = (error != 0); 738 mutex_enter(vp->v_interlock); 739 } 740 741 KASSERT(mutex_owned(vp->v_interlock)); 742 KASSERT(! (curlwp == vdrain_lwp && defer)); 743 744 if (defer) { 745 /* 746 * Defer reclaim to the kthread; it's not safe to 747 * clean it here. We donate it our last reference. 748 */ 749 lru_requeue(vp, &lru_vrele_list); 750 mutex_exit(vp->v_interlock); 751 return; 752 } 753 754 /* 755 * If the node got another reference while we 756 * released the interlock, don't try to inactivate it yet. 757 */ 758 if (__predict_false(vtryrele(vp))) { 759 VOP_UNLOCK(vp); 760 mutex_exit(vp->v_interlock); 761 return; 762 } 763 VSTATE_CHANGE(vp, VS_ACTIVE, VS_BLOCKED); 764 mutex_exit(vp->v_interlock); 765 766 /* 767 * The vnode must not gain another reference while being 768 * deactivated. If VOP_INACTIVE() indicates that 769 * the described file has been deleted, then recycle 770 * the vnode. 771 * 772 * Note that VOP_INACTIVE() will drop the vnode lock. 773 */ 774 VOP_INACTIVE(vp, &recycle); 775 if (recycle) { 776 /* vcache_reclaim() below will drop the lock. */ 777 if (vn_lock(vp, LK_EXCLUSIVE) != 0) 778 recycle = false; 779 } 780 mutex_enter(vp->v_interlock); 781 VSTATE_CHANGE(vp, VS_BLOCKED, VS_ACTIVE); 782 if (!recycle) { 783 if (vtryrele(vp)) { 784 mutex_exit(vp->v_interlock); 785 return; 786 } 787 } 788 789 /* Take care of space accounting. */ 790 if (vp->v_iflag & VI_EXECMAP) { 791 atomic_add_int(&uvmexp.execpages, 792 -vp->v_uobj.uo_npages); 793 atomic_add_int(&uvmexp.filepages, 794 vp->v_uobj.uo_npages); 795 } 796 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 797 vp->v_vflag &= ~VV_MAPPED; 798 799 /* 800 * Recycle the vnode if the file is now unused (unlinked), 801 * otherwise just free it. 802 */ 803 if (recycle) { 804 VSTATE_ASSERT(vp, VS_ACTIVE); 805 vcache_reclaim(vp); 806 } 807 KASSERT(vp->v_usecount > 0); 808 } 809 810 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 811 /* Gained another reference while being reclaimed. */ 812 mutex_exit(vp->v_interlock); 813 return; 814 } 815 816 if (VSTATE_GET(vp) == VS_RECLAIMED) { 817 /* 818 * It's clean so destroy it. It isn't referenced 819 * anywhere since it has been reclaimed. 820 */ 821 KASSERT(vp->v_holdcnt == 0); 822 KASSERT(vp->v_writecount == 0); 823 mutex_exit(vp->v_interlock); 824 vfs_insmntque(vp, NULL); 825 if (vp->v_type == VBLK || vp->v_type == VCHR) { 826 spec_node_destroy(vp); 827 } 828 vcache_free(VNODE_TO_VIMPL(vp)); 829 } else { 830 /* 831 * Otherwise, put it back onto the freelist. It 832 * can't be destroyed while still associated with 833 * a file system. 834 */ 835 lru_requeue(vp, lru_which(vp)); 836 mutex_exit(vp->v_interlock); 837 } 838 } 839 840 void 841 vrele(vnode_t *vp) 842 { 843 844 if (vtryrele(vp)) { 845 return; 846 } 847 mutex_enter(vp->v_interlock); 848 vrelel(vp, 0); 849 } 850 851 /* 852 * Asynchronous vnode release, vnode is released in different context. 853 */ 854 void 855 vrele_async(vnode_t *vp) 856 { 857 858 if (vtryrele(vp)) { 859 return; 860 } 861 mutex_enter(vp->v_interlock); 862 vrelel(vp, VRELEL_ASYNC_RELE); 863 } 864 865 /* 866 * Vnode reference, where a reference is already held by some other 867 * object (for example, a file structure). 868 */ 869 void 870 vref(vnode_t *vp) 871 { 872 873 KASSERT(vp->v_usecount != 0); 874 875 atomic_inc_uint(&vp->v_usecount); 876 } 877 878 /* 879 * Page or buffer structure gets a reference. 880 * Called with v_interlock held. 881 */ 882 void 883 vholdl(vnode_t *vp) 884 { 885 886 KASSERT(mutex_owned(vp->v_interlock)); 887 888 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) 889 lru_requeue(vp, lru_which(vp)); 890 } 891 892 /* 893 * Page or buffer structure frees a reference. 894 * Called with v_interlock held. 895 */ 896 void 897 holdrelel(vnode_t *vp) 898 { 899 900 KASSERT(mutex_owned(vp->v_interlock)); 901 902 if (vp->v_holdcnt <= 0) { 903 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp); 904 } 905 906 vp->v_holdcnt--; 907 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) 908 lru_requeue(vp, lru_which(vp)); 909 } 910 911 /* 912 * Recycle an unused vnode if caller holds the last reference. 913 */ 914 bool 915 vrecycle(vnode_t *vp) 916 { 917 int error __diagused; 918 919 mutex_enter(vp->v_interlock); 920 921 /* Make sure we hold the last reference. */ 922 VSTATE_WAIT_STABLE(vp); 923 if (vp->v_usecount != 1) { 924 mutex_exit(vp->v_interlock); 925 return false; 926 } 927 928 /* If the vnode is already clean we're done. */ 929 if (VSTATE_GET(vp) != VS_ACTIVE) { 930 VSTATE_ASSERT(vp, VS_RECLAIMED); 931 vrelel(vp, 0); 932 return true; 933 } 934 935 /* Prevent further references until the vnode is locked. */ 936 VSTATE_CHANGE(vp, VS_ACTIVE, VS_BLOCKED); 937 mutex_exit(vp->v_interlock); 938 939 error = vn_lock(vp, LK_EXCLUSIVE); 940 KASSERT(error == 0); 941 942 mutex_enter(vp->v_interlock); 943 VSTATE_CHANGE(vp, VS_BLOCKED, VS_ACTIVE); 944 945 vcache_reclaim(vp); 946 vrelel(vp, 0); 947 948 return true; 949 } 950 951 /* 952 * Eliminate all activity associated with the requested vnode 953 * and with all vnodes aliased to the requested vnode. 954 */ 955 void 956 vrevoke(vnode_t *vp) 957 { 958 vnode_t *vq; 959 enum vtype type; 960 dev_t dev; 961 962 KASSERT(vp->v_usecount > 0); 963 964 mutex_enter(vp->v_interlock); 965 VSTATE_WAIT_STABLE(vp); 966 if (VSTATE_GET(vp) == VS_RECLAIMED) { 967 mutex_exit(vp->v_interlock); 968 return; 969 } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 970 atomic_inc_uint(&vp->v_usecount); 971 mutex_exit(vp->v_interlock); 972 vgone(vp); 973 return; 974 } else { 975 dev = vp->v_rdev; 976 type = vp->v_type; 977 mutex_exit(vp->v_interlock); 978 } 979 980 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) { 981 vgone(vq); 982 } 983 } 984 985 /* 986 * Eliminate all activity associated with a vnode in preparation for 987 * reuse. Drops a reference from the vnode. 988 */ 989 void 990 vgone(vnode_t *vp) 991 { 992 993 if (vn_lock(vp, LK_EXCLUSIVE) != 0) { 994 VSTATE_ASSERT(vp, VS_RECLAIMED); 995 vrele(vp); 996 } 997 998 mutex_enter(vp->v_interlock); 999 vcache_reclaim(vp); 1000 vrelel(vp, 0); 1001 } 1002 1003 static inline uint32_t 1004 vcache_hash(const struct vcache_key *key) 1005 { 1006 uint32_t hash = HASH32_BUF_INIT; 1007 1008 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash); 1009 hash = hash32_buf(key->vk_key, key->vk_key_len, hash); 1010 return hash; 1011 } 1012 1013 static void 1014 vcache_init(void) 1015 { 1016 1017 vcache.pool = pool_cache_init(sizeof(vnode_impl_t), 0, 0, 0, 1018 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL); 1019 KASSERT(vcache.pool != NULL); 1020 mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE); 1021 cv_init(&vcache.cv, "vcache"); 1022 vcache.hashsize = desiredvnodes; 1023 vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true, 1024 &vcache.hashmask); 1025 } 1026 1027 static void 1028 vcache_reinit(void) 1029 { 1030 int i; 1031 uint32_t hash; 1032 u_long oldmask, newmask; 1033 struct hashhead *oldtab, *newtab; 1034 vnode_impl_t *node; 1035 1036 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask); 1037 mutex_enter(&vcache.lock); 1038 oldtab = vcache.hashtab; 1039 oldmask = vcache.hashmask; 1040 vcache.hashsize = desiredvnodes; 1041 vcache.hashtab = newtab; 1042 vcache.hashmask = newmask; 1043 for (i = 0; i <= oldmask; i++) { 1044 while ((node = SLIST_FIRST(&oldtab[i])) != NULL) { 1045 SLIST_REMOVE(&oldtab[i], node, vnode_impl, vi_hash); 1046 hash = vcache_hash(&node->vi_key); 1047 SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask], 1048 node, vi_hash); 1049 } 1050 } 1051 mutex_exit(&vcache.lock); 1052 hashdone(oldtab, HASH_SLIST, oldmask); 1053 } 1054 1055 static inline vnode_impl_t * 1056 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash) 1057 { 1058 struct hashhead *hashp; 1059 vnode_impl_t *node; 1060 1061 KASSERT(mutex_owned(&vcache.lock)); 1062 1063 hashp = &vcache.hashtab[hash & vcache.hashmask]; 1064 SLIST_FOREACH(node, hashp, vi_hash) { 1065 if (key->vk_mount != node->vi_key.vk_mount) 1066 continue; 1067 if (key->vk_key_len != node->vi_key.vk_key_len) 1068 continue; 1069 if (memcmp(key->vk_key, node->vi_key.vk_key, key->vk_key_len)) 1070 continue; 1071 return node; 1072 } 1073 return NULL; 1074 } 1075 1076 /* 1077 * Allocate a new, uninitialized vcache node. 1078 */ 1079 static vnode_impl_t * 1080 vcache_alloc(void) 1081 { 1082 vnode_impl_t *node; 1083 vnode_t *vp; 1084 1085 node = pool_cache_get(vcache.pool, PR_WAITOK); 1086 memset(node, 0, sizeof(*node)); 1087 1088 /* SLIST_INIT(&node->vi_hash); */ 1089 1090 vp = VIMPL_TO_VNODE(node); 1091 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); 1092 cv_init(&vp->v_cv, "vnode"); 1093 /* LIST_INIT(&vp->v_nclist); */ 1094 /* LIST_INIT(&vp->v_dnclist); */ 1095 1096 rw_init(&vp->v_lock); 1097 vp->v_usecount = 1; 1098 vp->v_type = VNON; 1099 vp->v_size = vp->v_writesize = VSIZENOTSET; 1100 1101 node->vi_state = VS_LOADING; 1102 1103 lru_requeue(vp, &lru_free_list); 1104 1105 return node; 1106 } 1107 1108 /* 1109 * Free an unused, unreferenced vcache node. 1110 */ 1111 static void 1112 vcache_free(vnode_impl_t *node) 1113 { 1114 vnode_t *vp; 1115 1116 vp = VIMPL_TO_VNODE(node); 1117 1118 KASSERT(vp->v_usecount == 0); 1119 1120 lru_requeue(vp, NULL); 1121 rw_destroy(&vp->v_lock); 1122 uvm_obj_destroy(&vp->v_uobj, true); 1123 cv_destroy(&vp->v_cv); 1124 pool_cache_put(vcache.pool, node); 1125 } 1126 1127 /* 1128 * Get a vnode / fs node pair by key and return it referenced through vpp. 1129 */ 1130 int 1131 vcache_get(struct mount *mp, const void *key, size_t key_len, 1132 struct vnode **vpp) 1133 { 1134 int error; 1135 uint32_t hash; 1136 const void *new_key; 1137 struct vnode *vp; 1138 struct vcache_key vcache_key; 1139 vnode_impl_t *node, *new_node; 1140 1141 new_key = NULL; 1142 *vpp = NULL; 1143 1144 vcache_key.vk_mount = mp; 1145 vcache_key.vk_key = key; 1146 vcache_key.vk_key_len = key_len; 1147 hash = vcache_hash(&vcache_key); 1148 1149 again: 1150 mutex_enter(&vcache.lock); 1151 node = vcache_hash_lookup(&vcache_key, hash); 1152 1153 /* If found, take a reference or retry. */ 1154 if (__predict_true(node != NULL)) { 1155 /* 1156 * If the vnode is loading we cannot take the v_interlock 1157 * here as it might change during load (see uvm_obj_setlock()). 1158 * As changing state from VS_LOADING requires both vcache.lock 1159 * and v_interlock it is safe to test with vcache.lock held. 1160 * 1161 * Wait for vnodes changing state from VS_LOADING and retry. 1162 */ 1163 if (__predict_false(node->vi_state == VS_LOADING)) { 1164 cv_wait(&vcache.cv, &vcache.lock); 1165 mutex_exit(&vcache.lock); 1166 goto again; 1167 } 1168 vp = VIMPL_TO_VNODE(node); 1169 mutex_enter(vp->v_interlock); 1170 mutex_exit(&vcache.lock); 1171 error = vget(vp, 0, true /* wait */); 1172 if (error == ENOENT) 1173 goto again; 1174 if (error == 0) 1175 *vpp = vp; 1176 KASSERT((error != 0) == (*vpp == NULL)); 1177 return error; 1178 } 1179 mutex_exit(&vcache.lock); 1180 1181 /* Allocate and initialize a new vcache / vnode pair. */ 1182 error = vfs_busy(mp, NULL); 1183 if (error) 1184 return error; 1185 new_node = vcache_alloc(); 1186 new_node->vi_key = vcache_key; 1187 vp = VIMPL_TO_VNODE(new_node); 1188 mutex_enter(&vcache.lock); 1189 node = vcache_hash_lookup(&vcache_key, hash); 1190 if (node == NULL) { 1191 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], 1192 new_node, vi_hash); 1193 node = new_node; 1194 } 1195 1196 /* If another thread beat us inserting this node, retry. */ 1197 if (node != new_node) { 1198 mutex_enter(vp->v_interlock); 1199 VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED); 1200 mutex_exit(&vcache.lock); 1201 vrelel(vp, 0); 1202 vfs_unbusy(mp, false, NULL); 1203 goto again; 1204 } 1205 mutex_exit(&vcache.lock); 1206 1207 /* Load the fs node. Exclusive as new_node is VS_LOADING. */ 1208 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key); 1209 if (error) { 1210 mutex_enter(&vcache.lock); 1211 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1212 new_node, vnode_impl, vi_hash); 1213 mutex_enter(vp->v_interlock); 1214 VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED); 1215 mutex_exit(&vcache.lock); 1216 vrelel(vp, 0); 1217 vfs_unbusy(mp, false, NULL); 1218 KASSERT(*vpp == NULL); 1219 return error; 1220 } 1221 KASSERT(new_key != NULL); 1222 KASSERT(memcmp(key, new_key, key_len) == 0); 1223 KASSERT(vp->v_op != NULL); 1224 vfs_insmntque(vp, mp); 1225 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1226 vp->v_vflag |= VV_MPSAFE; 1227 vfs_unbusy(mp, true, NULL); 1228 1229 /* Finished loading, finalize node. */ 1230 mutex_enter(&vcache.lock); 1231 new_node->vi_key.vk_key = new_key; 1232 mutex_enter(vp->v_interlock); 1233 VSTATE_CHANGE(vp, VS_LOADING, VS_ACTIVE); 1234 mutex_exit(vp->v_interlock); 1235 mutex_exit(&vcache.lock); 1236 *vpp = vp; 1237 return 0; 1238 } 1239 1240 /* 1241 * Create a new vnode / fs node pair and return it referenced through vpp. 1242 */ 1243 int 1244 vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap, 1245 kauth_cred_t cred, struct vnode **vpp) 1246 { 1247 int error; 1248 uint32_t hash; 1249 struct vnode *ovp, *vp; 1250 vnode_impl_t *new_node; 1251 vnode_impl_t *old_node __diagused; 1252 1253 *vpp = NULL; 1254 1255 /* Allocate and initialize a new vcache / vnode pair. */ 1256 error = vfs_busy(mp, NULL); 1257 if (error) 1258 return error; 1259 new_node = vcache_alloc(); 1260 new_node->vi_key.vk_mount = mp; 1261 vp = VIMPL_TO_VNODE(new_node); 1262 1263 /* Create and load the fs node. */ 1264 error = VFS_NEWVNODE(mp, dvp, vp, vap, cred, 1265 &new_node->vi_key.vk_key_len, &new_node->vi_key.vk_key); 1266 if (error) { 1267 mutex_enter(&vcache.lock); 1268 mutex_enter(vp->v_interlock); 1269 VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED); 1270 mutex_exit(&vcache.lock); 1271 vrelel(vp, 0); 1272 vfs_unbusy(mp, false, NULL); 1273 KASSERT(*vpp == NULL); 1274 return error; 1275 } 1276 KASSERT(new_node->vi_key.vk_key != NULL); 1277 KASSERT(vp->v_op != NULL); 1278 hash = vcache_hash(&new_node->vi_key); 1279 1280 /* Wait for previous instance to be reclaimed, then insert new node. */ 1281 mutex_enter(&vcache.lock); 1282 while ((old_node = vcache_hash_lookup(&new_node->vi_key, hash))) { 1283 ovp = VIMPL_TO_VNODE(old_node); 1284 mutex_enter(ovp->v_interlock); 1285 mutex_exit(&vcache.lock); 1286 error = vget(ovp, 0, true /* wait */); 1287 KASSERT(error == ENOENT); 1288 mutex_enter(&vcache.lock); 1289 } 1290 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], 1291 new_node, vi_hash); 1292 mutex_exit(&vcache.lock); 1293 vfs_insmntque(vp, mp); 1294 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1295 vp->v_vflag |= VV_MPSAFE; 1296 vfs_unbusy(mp, true, NULL); 1297 1298 /* Finished loading, finalize node. */ 1299 mutex_enter(&vcache.lock); 1300 mutex_enter(vp->v_interlock); 1301 VSTATE_CHANGE(vp, VS_LOADING, VS_ACTIVE); 1302 mutex_exit(&vcache.lock); 1303 mutex_exit(vp->v_interlock); 1304 *vpp = vp; 1305 return 0; 1306 } 1307 1308 /* 1309 * Prepare key change: lock old and new cache node. 1310 * Return an error if the new node already exists. 1311 */ 1312 int 1313 vcache_rekey_enter(struct mount *mp, struct vnode *vp, 1314 const void *old_key, size_t old_key_len, 1315 const void *new_key, size_t new_key_len) 1316 { 1317 uint32_t old_hash, new_hash; 1318 struct vcache_key old_vcache_key, new_vcache_key; 1319 vnode_impl_t *node, *new_node; 1320 struct vnode *tvp; 1321 1322 old_vcache_key.vk_mount = mp; 1323 old_vcache_key.vk_key = old_key; 1324 old_vcache_key.vk_key_len = old_key_len; 1325 old_hash = vcache_hash(&old_vcache_key); 1326 1327 new_vcache_key.vk_mount = mp; 1328 new_vcache_key.vk_key = new_key; 1329 new_vcache_key.vk_key_len = new_key_len; 1330 new_hash = vcache_hash(&new_vcache_key); 1331 1332 new_node = vcache_alloc(); 1333 new_node->vi_key = new_vcache_key; 1334 tvp = VIMPL_TO_VNODE(new_node); 1335 1336 /* Insert locked new node used as placeholder. */ 1337 mutex_enter(&vcache.lock); 1338 node = vcache_hash_lookup(&new_vcache_key, new_hash); 1339 if (node != NULL) { 1340 mutex_enter(tvp->v_interlock); 1341 VSTATE_CHANGE(tvp, VS_LOADING, VS_RECLAIMED); 1342 mutex_exit(&vcache.lock); 1343 vrelel(tvp, 0); 1344 return EEXIST; 1345 } 1346 SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask], 1347 new_node, vi_hash); 1348 1349 /* Lock old node. */ 1350 node = vcache_hash_lookup(&old_vcache_key, old_hash); 1351 KASSERT(node != NULL); 1352 KASSERT(VIMPL_TO_VNODE(node) == vp); 1353 mutex_enter(vp->v_interlock); 1354 VSTATE_CHANGE(vp, VS_ACTIVE, VS_BLOCKED); 1355 node->vi_key = old_vcache_key; 1356 mutex_exit(vp->v_interlock); 1357 mutex_exit(&vcache.lock); 1358 return 0; 1359 } 1360 1361 /* 1362 * Key change complete: remove old node and unlock new node. 1363 */ 1364 void 1365 vcache_rekey_exit(struct mount *mp, struct vnode *vp, 1366 const void *old_key, size_t old_key_len, 1367 const void *new_key, size_t new_key_len) 1368 { 1369 uint32_t old_hash, new_hash; 1370 struct vcache_key old_vcache_key, new_vcache_key; 1371 vnode_impl_t *old_node, *new_node; 1372 struct vnode *tvp; 1373 1374 old_vcache_key.vk_mount = mp; 1375 old_vcache_key.vk_key = old_key; 1376 old_vcache_key.vk_key_len = old_key_len; 1377 old_hash = vcache_hash(&old_vcache_key); 1378 1379 new_vcache_key.vk_mount = mp; 1380 new_vcache_key.vk_key = new_key; 1381 new_vcache_key.vk_key_len = new_key_len; 1382 new_hash = vcache_hash(&new_vcache_key); 1383 1384 mutex_enter(&vcache.lock); 1385 1386 /* Lookup old and new node. */ 1387 old_node = vcache_hash_lookup(&old_vcache_key, old_hash); 1388 KASSERT(old_node != NULL); 1389 KASSERT(VIMPL_TO_VNODE(old_node) == vp); 1390 mutex_enter(vp->v_interlock); 1391 VSTATE_ASSERT(vp, VS_BLOCKED); 1392 1393 new_node = vcache_hash_lookup(&new_vcache_key, new_hash); 1394 KASSERT(new_node != NULL); 1395 KASSERT(new_node->vi_key.vk_key_len == new_key_len); 1396 tvp = VIMPL_TO_VNODE(new_node); 1397 mutex_enter(tvp->v_interlock); 1398 VSTATE_ASSERT(VIMPL_TO_VNODE(new_node), VS_LOADING); 1399 1400 /* Rekey old node and put it onto its new hashlist. */ 1401 old_node->vi_key = new_vcache_key; 1402 if (old_hash != new_hash) { 1403 SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask], 1404 old_node, vnode_impl, vi_hash); 1405 SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask], 1406 old_node, vi_hash); 1407 } 1408 VSTATE_CHANGE(vp, VS_BLOCKED, VS_ACTIVE); 1409 mutex_exit(vp->v_interlock); 1410 1411 /* Remove new node used as placeholder. */ 1412 SLIST_REMOVE(&vcache.hashtab[new_hash & vcache.hashmask], 1413 new_node, vnode_impl, vi_hash); 1414 VSTATE_CHANGE(tvp, VS_LOADING, VS_RECLAIMED); 1415 mutex_exit(&vcache.lock); 1416 vrelel(tvp, 0); 1417 } 1418 1419 /* 1420 * Disassociate the underlying file system from a vnode. 1421 * 1422 * Must be called with vnode locked and will return unlocked. 1423 * Must be called with the interlock held, and will return with it held. 1424 */ 1425 static void 1426 vcache_reclaim(vnode_t *vp) 1427 { 1428 lwp_t *l = curlwp; 1429 vnode_impl_t *node = VNODE_TO_VIMPL(vp); 1430 uint32_t hash; 1431 uint8_t temp_buf[64], *temp_key; 1432 size_t temp_key_len; 1433 bool recycle, active; 1434 int error; 1435 1436 KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 || 1437 VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1438 KASSERT(mutex_owned(vp->v_interlock)); 1439 KASSERT(vp->v_usecount != 0); 1440 1441 active = (vp->v_usecount > 1); 1442 temp_key_len = node->vi_key.vk_key_len; 1443 /* 1444 * Prevent the vnode from being recycled or brought into use 1445 * while we clean it out. 1446 */ 1447 VSTATE_CHANGE(vp, VS_ACTIVE, VS_RECLAIMING); 1448 if (vp->v_iflag & VI_EXECMAP) { 1449 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 1450 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 1451 } 1452 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 1453 mutex_exit(vp->v_interlock); 1454 1455 /* Replace the vnode key with a temporary copy. */ 1456 if (node->vi_key.vk_key_len > sizeof(temp_buf)) { 1457 temp_key = kmem_alloc(temp_key_len, KM_SLEEP); 1458 } else { 1459 temp_key = temp_buf; 1460 } 1461 mutex_enter(&vcache.lock); 1462 memcpy(temp_key, node->vi_key.vk_key, temp_key_len); 1463 node->vi_key.vk_key = temp_key; 1464 mutex_exit(&vcache.lock); 1465 1466 /* 1467 * Clean out any cached data associated with the vnode. 1468 * If purging an active vnode, it must be closed and 1469 * deactivated before being reclaimed. 1470 */ 1471 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1472 if (error != 0) { 1473 if (wapbl_vphaswapbl(vp)) 1474 WAPBL_DISCARD(wapbl_vptomp(vp)); 1475 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1476 } 1477 KASSERTMSG((error == 0), "vinvalbuf failed: %d", error); 1478 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1479 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 1480 spec_node_revoke(vp); 1481 } 1482 1483 /* 1484 * Disassociate the underlying file system from the vnode. 1485 * Note that the VOP_INACTIVE will unlock the vnode. 1486 */ 1487 VOP_INACTIVE(vp, &recycle); 1488 if (VOP_RECLAIM(vp)) { 1489 vnpanic(vp, "%s: cannot reclaim", __func__); 1490 } 1491 1492 KASSERT(vp->v_data == NULL); 1493 KASSERT(vp->v_uobj.uo_npages == 0); 1494 1495 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1496 uvm_ra_freectx(vp->v_ractx); 1497 vp->v_ractx = NULL; 1498 } 1499 1500 /* Purge name cache. */ 1501 cache_purge(vp); 1502 1503 /* Move to dead mount. */ 1504 vp->v_vflag &= ~VV_ROOT; 1505 atomic_inc_uint(&dead_rootmount->mnt_refcnt); 1506 vfs_insmntque(vp, dead_rootmount); 1507 1508 /* Remove from vnode cache. */ 1509 hash = vcache_hash(&node->vi_key); 1510 mutex_enter(&vcache.lock); 1511 KASSERT(node == vcache_hash_lookup(&node->vi_key, hash)); 1512 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1513 node, vnode_impl, vi_hash); 1514 mutex_exit(&vcache.lock); 1515 if (temp_key != temp_buf) 1516 kmem_free(temp_key, temp_key_len); 1517 1518 /* Done with purge, notify sleepers of the grim news. */ 1519 mutex_enter(vp->v_interlock); 1520 vp->v_op = dead_vnodeop_p; 1521 vp->v_vflag |= VV_LOCKSWORK; 1522 VSTATE_CHANGE(vp, VS_RECLAIMING, VS_RECLAIMED); 1523 vp->v_tag = VT_NON; 1524 KNOTE(&vp->v_klist, NOTE_REVOKE); 1525 1526 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1527 } 1528 1529 /* 1530 * Update outstanding I/O count and do wakeup if requested. 1531 */ 1532 void 1533 vwakeup(struct buf *bp) 1534 { 1535 vnode_t *vp; 1536 1537 if ((vp = bp->b_vp) == NULL) 1538 return; 1539 1540 KASSERT(bp->b_objlock == vp->v_interlock); 1541 KASSERT(mutex_owned(bp->b_objlock)); 1542 1543 if (--vp->v_numoutput < 0) 1544 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp); 1545 if (vp->v_numoutput == 0) 1546 cv_broadcast(&vp->v_cv); 1547 } 1548 1549 /* 1550 * Test a vnode for being or becoming dead. Returns one of: 1551 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only. 1552 * ENOENT: vnode is dead. 1553 * 0: otherwise. 1554 * 1555 * Whenever this function returns a non-zero value all future 1556 * calls will also return a non-zero value. 1557 */ 1558 int 1559 vdead_check(struct vnode *vp, int flags) 1560 { 1561 1562 KASSERT(mutex_owned(vp->v_interlock)); 1563 1564 if (! ISSET(flags, VDEAD_NOWAIT)) 1565 VSTATE_WAIT_STABLE(vp); 1566 1567 if (VSTATE_GET(vp) == VS_RECLAIMING) { 1568 KASSERT(ISSET(flags, VDEAD_NOWAIT)); 1569 return EBUSY; 1570 } else if (VSTATE_GET(vp) == VS_RECLAIMED) { 1571 return ENOENT; 1572 } 1573 1574 return 0; 1575 } 1576 1577 int 1578 vfs_drainvnodes(void) 1579 { 1580 int i, gen; 1581 1582 mutex_enter(&vdrain_lock); 1583 for (i = 0; i < 2; i++) { 1584 gen = vdrain_gen; 1585 while (gen == vdrain_gen) { 1586 cv_broadcast(&vdrain_cv); 1587 cv_wait(&vdrain_gen_cv, &vdrain_lock); 1588 } 1589 } 1590 mutex_exit(&vdrain_lock); 1591 1592 if (numvnodes >= desiredvnodes) 1593 return EBUSY; 1594 1595 if (vcache.hashsize != desiredvnodes) 1596 vcache_reinit(); 1597 1598 return 0; 1599 } 1600 1601 void 1602 vnpanic(vnode_t *vp, const char *fmt, ...) 1603 { 1604 va_list ap; 1605 1606 #ifdef DIAGNOSTIC 1607 vprint(NULL, vp); 1608 #endif 1609 va_start(ap, fmt); 1610 vpanic(fmt, ap); 1611 va_end(ap); 1612 } 1613