1 /* $NetBSD: vfs_vnode.c,v 1.103 2019/02/20 10:07:27 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 /* 70 * The vnode cache subsystem. 71 * 72 * Life-cycle 73 * 74 * Normally, there are two points where new vnodes are created: 75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode 76 * starts in one of the following ways: 77 * 78 * - Allocation, via vcache_get(9) or vcache_new(9). 79 * - Reclamation of inactive vnode, via vcache_vget(9). 80 * 81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9) 82 * was another, traditional way. Currently, only the draining thread 83 * recycles the vnodes. This behaviour might be revisited. 84 * 85 * The life-cycle ends when the last reference is dropped, usually 86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform 87 * the file system that vnode is inactive. Via this call, file system 88 * indicates whether vnode can be recycled (usually, it checks its own 89 * references, e.g. count of links, whether the file was removed). 90 * 91 * Depending on indication, vnode can be put into a free list (cache), 92 * or cleaned via vcache_reclaim, which calls VOP_RECLAIM(9) to 93 * disassociate underlying file system from the vnode, and finally 94 * destroyed. 95 * 96 * Vnode state 97 * 98 * Vnode is always in one of six states: 99 * - MARKER This is a marker vnode to help list traversal. It 100 * will never change its state. 101 * - LOADING Vnode is associating underlying file system and not 102 * yet ready to use. 103 * - LOADED Vnode has associated underlying file system and is 104 * ready to use. 105 * - BLOCKED Vnode is active but cannot get new references. 106 * - RECLAIMING Vnode is disassociating from the underlying file 107 * system. 108 * - RECLAIMED Vnode has disassociated from underlying file system 109 * and is dead. 110 * 111 * Valid state changes are: 112 * LOADING -> LOADED 113 * Vnode has been initialised in vcache_get() or 114 * vcache_new() and is ready to use. 115 * LOADED -> RECLAIMING 116 * Vnode starts disassociation from underlying file 117 * system in vcache_reclaim(). 118 * RECLAIMING -> RECLAIMED 119 * Vnode finished disassociation from underlying file 120 * system in vcache_reclaim(). 121 * LOADED -> BLOCKED 122 * Either vcache_rekey*() is changing the vnode key or 123 * vrelel() is about to call VOP_INACTIVE(). 124 * BLOCKED -> LOADED 125 * The block condition is over. 126 * LOADING -> RECLAIMED 127 * Either vcache_get() or vcache_new() failed to 128 * associate the underlying file system or vcache_rekey*() 129 * drops a vnode used as placeholder. 130 * 131 * Of these states LOADING, BLOCKED and RECLAIMING are intermediate 132 * and it is possible to wait for state change. 133 * 134 * State is protected with v_interlock with one exception: 135 * to change from LOADING both v_interlock and vcache_lock must be held 136 * so it is possible to check "state == LOADING" without holding 137 * v_interlock. See vcache_get() for details. 138 * 139 * Reference counting 140 * 141 * Vnode is considered active, if reference count (vnode_t::v_usecount) 142 * is non-zero. It is maintained using: vref(9) and vrele(9), as well 143 * as vput(9), routines. Common points holding references are e.g. 144 * file openings, current working directory, mount points, etc. 145 * 146 * Note on v_usecount and its locking 147 * 148 * At nearly all points it is known that v_usecount could be zero, 149 * the vnode_t::v_interlock will be held. To change v_usecount away 150 * from zero, the interlock must be held. To change from a non-zero 151 * value to zero, again the interlock must be held. 152 * 153 * Changing the usecount from a non-zero value to a non-zero value can 154 * safely be done using atomic operations, without the interlock held. 155 * 156 */ 157 158 #include <sys/cdefs.h> 159 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.103 2019/02/20 10:07:27 hannken Exp $"); 160 161 #include <sys/param.h> 162 #include <sys/kernel.h> 163 164 #include <sys/atomic.h> 165 #include <sys/buf.h> 166 #include <sys/conf.h> 167 #include <sys/device.h> 168 #include <sys/hash.h> 169 #include <sys/kauth.h> 170 #include <sys/kmem.h> 171 #include <sys/kthread.h> 172 #include <sys/module.h> 173 #include <sys/mount.h> 174 #include <sys/namei.h> 175 #include <sys/syscallargs.h> 176 #include <sys/sysctl.h> 177 #include <sys/systm.h> 178 #include <sys/vnode_impl.h> 179 #include <sys/wapbl.h> 180 #include <sys/fstrans.h> 181 182 #include <uvm/uvm.h> 183 #include <uvm/uvm_readahead.h> 184 185 /* Flags to vrelel. */ 186 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */ 187 #define VRELEL_FORCE_RELE 0x0002 /* Must always succeed. */ 188 189 u_int numvnodes __cacheline_aligned; 190 191 /* 192 * There are three lru lists: one holds vnodes waiting for async release, 193 * one is for vnodes which have no buffer/page references and 194 * one for those which do (i.e. v_holdcnt is non-zero). 195 */ 196 static vnodelst_t lru_vrele_list __cacheline_aligned; 197 static vnodelst_t lru_free_list __cacheline_aligned; 198 static vnodelst_t lru_hold_list __cacheline_aligned; 199 static kmutex_t vdrain_lock __cacheline_aligned; 200 static kcondvar_t vdrain_cv __cacheline_aligned; 201 static int vdrain_gen; 202 static kcondvar_t vdrain_gen_cv; 203 static bool vdrain_retry; 204 static lwp_t * vdrain_lwp; 205 SLIST_HEAD(hashhead, vnode_impl); 206 static kmutex_t vcache_lock __cacheline_aligned; 207 static kcondvar_t vcache_cv __cacheline_aligned; 208 static u_int vcache_hashsize; 209 static u_long vcache_hashmask; 210 static struct hashhead *vcache_hashtab __cacheline_aligned; 211 static pool_cache_t vcache_pool; 212 static void lru_requeue(vnode_t *, vnodelst_t *); 213 static vnodelst_t * lru_which(vnode_t *); 214 static vnode_impl_t * vcache_alloc(void); 215 static void vcache_dealloc(vnode_impl_t *); 216 static void vcache_free(vnode_impl_t *); 217 static void vcache_init(void); 218 static void vcache_reinit(void); 219 static void vcache_reclaim(vnode_t *); 220 static void vrelel(vnode_t *, int); 221 static void vdrain_thread(void *); 222 static void vnpanic(vnode_t *, const char *, ...) 223 __printflike(2, 3); 224 225 /* Routines having to do with the management of the vnode table. */ 226 extern struct mount *dead_rootmount; 227 extern int (**dead_vnodeop_p)(void *); 228 extern int (**spec_vnodeop_p)(void *); 229 extern struct vfsops dead_vfsops; 230 231 /* Vnode state operations and diagnostics. */ 232 233 #if defined(DIAGNOSTIC) 234 235 #define VSTATE_VALID(state) \ 236 ((state) != VS_ACTIVE && (state) != VS_MARKER) 237 #define VSTATE_GET(vp) \ 238 vstate_assert_get((vp), __func__, __LINE__) 239 #define VSTATE_CHANGE(vp, from, to) \ 240 vstate_assert_change((vp), (from), (to), __func__, __LINE__) 241 #define VSTATE_WAIT_STABLE(vp) \ 242 vstate_assert_wait_stable((vp), __func__, __LINE__) 243 244 void 245 _vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line, 246 bool has_lock) 247 { 248 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 249 250 if (!has_lock) { 251 /* 252 * Prevent predictive loads from the CPU, but check the state 253 * without loooking first. 254 */ 255 membar_enter(); 256 if (state == VS_ACTIVE && vp->v_usecount > 0 && 257 (vip->vi_state == VS_LOADED || vip->vi_state == VS_BLOCKED)) 258 return; 259 if (vip->vi_state == state) 260 return; 261 mutex_enter((vp)->v_interlock); 262 } 263 264 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 265 266 if ((state == VS_ACTIVE && vp->v_usecount > 0 && 267 (vip->vi_state == VS_LOADED || vip->vi_state == VS_BLOCKED)) || 268 vip->vi_state == state) { 269 if (!has_lock) 270 mutex_exit((vp)->v_interlock); 271 return; 272 } 273 vnpanic(vp, "state is %s, usecount %d, expected %s at %s:%d", 274 vstate_name(vip->vi_state), vp->v_usecount, 275 vstate_name(state), func, line); 276 } 277 278 static enum vnode_state 279 vstate_assert_get(vnode_t *vp, const char *func, int line) 280 { 281 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 282 283 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 284 if (! VSTATE_VALID(vip->vi_state)) 285 vnpanic(vp, "state is %s at %s:%d", 286 vstate_name(vip->vi_state), func, line); 287 288 return vip->vi_state; 289 } 290 291 static void 292 vstate_assert_wait_stable(vnode_t *vp, const char *func, int line) 293 { 294 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 295 296 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 297 if (! VSTATE_VALID(vip->vi_state)) 298 vnpanic(vp, "state is %s at %s:%d", 299 vstate_name(vip->vi_state), func, line); 300 301 while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED) 302 cv_wait(&vp->v_cv, vp->v_interlock); 303 304 if (! VSTATE_VALID(vip->vi_state)) 305 vnpanic(vp, "state is %s at %s:%d", 306 vstate_name(vip->vi_state), func, line); 307 } 308 309 static void 310 vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to, 311 const char *func, int line) 312 { 313 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 314 315 KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 316 if (from == VS_LOADING) 317 KASSERTMSG(mutex_owned(&vcache_lock), "at %s:%d", func, line); 318 319 if (! VSTATE_VALID(from)) 320 vnpanic(vp, "from is %s at %s:%d", 321 vstate_name(from), func, line); 322 if (! VSTATE_VALID(to)) 323 vnpanic(vp, "to is %s at %s:%d", 324 vstate_name(to), func, line); 325 if (vip->vi_state != from) 326 vnpanic(vp, "from is %s, expected %s at %s:%d\n", 327 vstate_name(vip->vi_state), vstate_name(from), func, line); 328 if ((from == VS_BLOCKED || to == VS_BLOCKED) && vp->v_usecount != 1) 329 vnpanic(vp, "%s to %s with usecount %d at %s:%d", 330 vstate_name(from), vstate_name(to), vp->v_usecount, 331 func, line); 332 333 vip->vi_state = to; 334 if (from == VS_LOADING) 335 cv_broadcast(&vcache_cv); 336 if (to == VS_LOADED || to == VS_RECLAIMED) 337 cv_broadcast(&vp->v_cv); 338 } 339 340 #else /* defined(DIAGNOSTIC) */ 341 342 #define VSTATE_GET(vp) \ 343 (VNODE_TO_VIMPL((vp))->vi_state) 344 #define VSTATE_CHANGE(vp, from, to) \ 345 vstate_change((vp), (from), (to)) 346 #define VSTATE_WAIT_STABLE(vp) \ 347 vstate_wait_stable((vp)) 348 void 349 _vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line, 350 bool has_lock) 351 { 352 353 } 354 355 static void 356 vstate_wait_stable(vnode_t *vp) 357 { 358 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 359 360 while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED) 361 cv_wait(&vp->v_cv, vp->v_interlock); 362 } 363 364 static void 365 vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to) 366 { 367 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 368 369 vip->vi_state = to; 370 if (from == VS_LOADING) 371 cv_broadcast(&vcache_cv); 372 if (to == VS_LOADED || to == VS_RECLAIMED) 373 cv_broadcast(&vp->v_cv); 374 } 375 376 #endif /* defined(DIAGNOSTIC) */ 377 378 void 379 vfs_vnode_sysinit(void) 380 { 381 int error __diagused; 382 383 dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL); 384 KASSERT(dead_rootmount != NULL); 385 dead_rootmount->mnt_iflag |= IMNT_MPSAFE; 386 387 mutex_init(&vdrain_lock, MUTEX_DEFAULT, IPL_NONE); 388 TAILQ_INIT(&lru_free_list); 389 TAILQ_INIT(&lru_hold_list); 390 TAILQ_INIT(&lru_vrele_list); 391 392 vcache_init(); 393 394 cv_init(&vdrain_cv, "vdrain"); 395 cv_init(&vdrain_gen_cv, "vdrainwt"); 396 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread, 397 NULL, &vdrain_lwp, "vdrain"); 398 KASSERTMSG((error == 0), "kthread_create(vdrain) failed: %d", error); 399 } 400 401 /* 402 * Allocate a new marker vnode. 403 */ 404 vnode_t * 405 vnalloc_marker(struct mount *mp) 406 { 407 vnode_impl_t *vip; 408 vnode_t *vp; 409 410 vip = pool_cache_get(vcache_pool, PR_WAITOK); 411 memset(vip, 0, sizeof(*vip)); 412 vp = VIMPL_TO_VNODE(vip); 413 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); 414 vp->v_mount = mp; 415 vp->v_type = VBAD; 416 vip->vi_state = VS_MARKER; 417 418 return vp; 419 } 420 421 /* 422 * Free a marker vnode. 423 */ 424 void 425 vnfree_marker(vnode_t *vp) 426 { 427 vnode_impl_t *vip; 428 429 vip = VNODE_TO_VIMPL(vp); 430 KASSERT(vip->vi_state == VS_MARKER); 431 uvm_obj_destroy(&vp->v_uobj, true); 432 pool_cache_put(vcache_pool, vip); 433 } 434 435 /* 436 * Test a vnode for being a marker vnode. 437 */ 438 bool 439 vnis_marker(vnode_t *vp) 440 { 441 442 return (VNODE_TO_VIMPL(vp)->vi_state == VS_MARKER); 443 } 444 445 /* 446 * Return the lru list this node should be on. 447 */ 448 static vnodelst_t * 449 lru_which(vnode_t *vp) 450 { 451 452 KASSERT(mutex_owned(vp->v_interlock)); 453 454 if (vp->v_holdcnt > 0) 455 return &lru_hold_list; 456 else 457 return &lru_free_list; 458 } 459 460 /* 461 * Put vnode to end of given list. 462 * Both the current and the new list may be NULL, used on vnode alloc/free. 463 * Adjust numvnodes and signal vdrain thread if there is work. 464 */ 465 static void 466 lru_requeue(vnode_t *vp, vnodelst_t *listhd) 467 { 468 vnode_impl_t *vip; 469 470 mutex_enter(&vdrain_lock); 471 vip = VNODE_TO_VIMPL(vp); 472 if (vip->vi_lrulisthd != NULL) 473 TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist); 474 else 475 numvnodes++; 476 vip->vi_lrulisthd = listhd; 477 if (vip->vi_lrulisthd != NULL) 478 TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist); 479 else 480 numvnodes--; 481 if (numvnodes > desiredvnodes || listhd == &lru_vrele_list) 482 cv_broadcast(&vdrain_cv); 483 mutex_exit(&vdrain_lock); 484 } 485 486 /* 487 * Release deferred vrele vnodes for this mount. 488 * Called with file system suspended. 489 */ 490 void 491 vrele_flush(struct mount *mp) 492 { 493 vnode_impl_t *vip, *marker; 494 495 KASSERT(fstrans_is_owner(mp)); 496 497 marker = VNODE_TO_VIMPL(vnalloc_marker(NULL)); 498 499 mutex_enter(&vdrain_lock); 500 TAILQ_INSERT_HEAD(&lru_vrele_list, marker, vi_lrulist); 501 502 while ((vip = TAILQ_NEXT(marker, vi_lrulist))) { 503 TAILQ_REMOVE(&lru_vrele_list, marker, vi_lrulist); 504 TAILQ_INSERT_AFTER(&lru_vrele_list, vip, marker, vi_lrulist); 505 if (vnis_marker(VIMPL_TO_VNODE(vip))) 506 continue; 507 508 KASSERT(vip->vi_lrulisthd == &lru_vrele_list); 509 TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist); 510 vip->vi_lrulisthd = &lru_hold_list; 511 TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist); 512 mutex_exit(&vdrain_lock); 513 514 mutex_enter(VIMPL_TO_VNODE(vip)->v_interlock); 515 vrelel(VIMPL_TO_VNODE(vip), VRELEL_FORCE_RELE); 516 517 mutex_enter(&vdrain_lock); 518 } 519 520 TAILQ_REMOVE(&lru_vrele_list, marker, vi_lrulist); 521 mutex_exit(&vdrain_lock); 522 523 vnfree_marker(VIMPL_TO_VNODE(marker)); 524 } 525 526 /* 527 * Reclaim a cached vnode. Used from vdrain_thread only. 528 */ 529 static __inline void 530 vdrain_remove(vnode_t *vp) 531 { 532 struct mount *mp; 533 534 KASSERT(mutex_owned(&vdrain_lock)); 535 536 /* Probe usecount (unlocked). */ 537 if (vp->v_usecount > 0) 538 return; 539 /* Try v_interlock -- we lock the wrong direction! */ 540 if (!mutex_tryenter(vp->v_interlock)) 541 return; 542 /* Probe usecount and state. */ 543 if (vp->v_usecount > 0 || VSTATE_GET(vp) != VS_LOADED) { 544 mutex_exit(vp->v_interlock); 545 return; 546 } 547 mp = vp->v_mount; 548 if (fstrans_start_nowait(mp) != 0) { 549 mutex_exit(vp->v_interlock); 550 return; 551 } 552 vdrain_retry = true; 553 mutex_exit(&vdrain_lock); 554 555 if (vcache_vget(vp) == 0) { 556 if (!vrecycle(vp)) { 557 mutex_enter(vp->v_interlock); 558 vrelel(vp, VRELEL_FORCE_RELE); 559 } 560 } 561 fstrans_done(mp); 562 563 mutex_enter(&vdrain_lock); 564 } 565 566 /* 567 * Release a cached vnode. Used from vdrain_thread only. 568 */ 569 static __inline void 570 vdrain_vrele(vnode_t *vp) 571 { 572 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 573 struct mount *mp; 574 575 KASSERT(mutex_owned(&vdrain_lock)); 576 577 mp = vp->v_mount; 578 if (fstrans_start_nowait(mp) != 0) 579 return; 580 581 /* 582 * First remove the vnode from the vrele list. 583 * Put it on the last lru list, the last vrele() 584 * will put it back onto the right list before 585 * its v_usecount reaches zero. 586 */ 587 KASSERT(vip->vi_lrulisthd == &lru_vrele_list); 588 TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist); 589 vip->vi_lrulisthd = &lru_hold_list; 590 TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist); 591 592 vdrain_retry = true; 593 mutex_exit(&vdrain_lock); 594 595 mutex_enter(vp->v_interlock); 596 vrelel(vp, VRELEL_FORCE_RELE); 597 fstrans_done(mp); 598 599 mutex_enter(&vdrain_lock); 600 } 601 602 /* 603 * Helper thread to keep the number of vnodes below desiredvnodes 604 * and release vnodes from asynchronous vrele. 605 */ 606 static void 607 vdrain_thread(void *cookie) 608 { 609 vnodelst_t *listhd[] = { 610 &lru_vrele_list, &lru_free_list, &lru_hold_list 611 }; 612 int i; 613 u_int target; 614 vnode_impl_t *vip, *marker; 615 616 marker = VNODE_TO_VIMPL(vnalloc_marker(NULL)); 617 618 mutex_enter(&vdrain_lock); 619 620 for (;;) { 621 vdrain_retry = false; 622 target = desiredvnodes - desiredvnodes/10; 623 624 for (i = 0; i < __arraycount(listhd); i++) { 625 TAILQ_INSERT_HEAD(listhd[i], marker, vi_lrulist); 626 while ((vip = TAILQ_NEXT(marker, vi_lrulist))) { 627 TAILQ_REMOVE(listhd[i], marker, vi_lrulist); 628 TAILQ_INSERT_AFTER(listhd[i], vip, marker, 629 vi_lrulist); 630 if (vnis_marker(VIMPL_TO_VNODE(vip))) 631 continue; 632 if (listhd[i] == &lru_vrele_list) 633 vdrain_vrele(VIMPL_TO_VNODE(vip)); 634 else if (numvnodes < target) 635 break; 636 else 637 vdrain_remove(VIMPL_TO_VNODE(vip)); 638 } 639 TAILQ_REMOVE(listhd[i], marker, vi_lrulist); 640 } 641 642 if (vdrain_retry) { 643 mutex_exit(&vdrain_lock); 644 yield(); 645 mutex_enter(&vdrain_lock); 646 } else { 647 vdrain_gen++; 648 cv_broadcast(&vdrain_gen_cv); 649 cv_wait(&vdrain_cv, &vdrain_lock); 650 } 651 } 652 } 653 654 /* 655 * vput: unlock and release the reference. 656 */ 657 void 658 vput(vnode_t *vp) 659 { 660 661 VOP_UNLOCK(vp); 662 vrele(vp); 663 } 664 665 /* 666 * Try to drop reference on a vnode. Abort if we are releasing the 667 * last reference. Note: this _must_ succeed if not the last reference. 668 */ 669 static inline bool 670 vtryrele(vnode_t *vp) 671 { 672 u_int use, next; 673 674 for (use = vp->v_usecount;; use = next) { 675 if (use == 1) { 676 return false; 677 } 678 KASSERT(use > 1); 679 next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 680 if (__predict_true(next == use)) { 681 return true; 682 } 683 } 684 } 685 686 /* 687 * Vnode release. If reference count drops to zero, call inactive 688 * routine and either return to freelist or free to the pool. 689 */ 690 static void 691 vrelel(vnode_t *vp, int flags) 692 { 693 const bool async = ((flags & VRELEL_ASYNC_RELE) != 0); 694 const bool force = ((flags & VRELEL_FORCE_RELE) != 0); 695 bool recycle, defer; 696 int error; 697 698 KASSERT(mutex_owned(vp->v_interlock)); 699 700 if (__predict_false(vp->v_op == dead_vnodeop_p && 701 VSTATE_GET(vp) != VS_RECLAIMED)) { 702 vnpanic(vp, "dead but not clean"); 703 } 704 705 /* 706 * If not the last reference, just drop the reference count 707 * and unlock. 708 */ 709 if (vtryrele(vp)) { 710 mutex_exit(vp->v_interlock); 711 return; 712 } 713 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 714 vnpanic(vp, "%s: bad ref count", __func__); 715 } 716 717 #ifdef DIAGNOSTIC 718 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 719 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 720 vprint("vrelel: missing VOP_CLOSE()", vp); 721 } 722 #endif 723 724 /* 725 * First try to get the vnode locked for VOP_INACTIVE(). 726 * Defer vnode release to vdrain_thread if caller requests 727 * it explicitly, is the pagedaemon or the lock failed. 728 */ 729 if ((curlwp == uvm.pagedaemon_lwp) || async) { 730 defer = true; 731 } else { 732 mutex_exit(vp->v_interlock); 733 error = vn_lock(vp, 734 LK_EXCLUSIVE | LK_RETRY | (force ? 0 : LK_NOWAIT)); 735 defer = (error != 0); 736 mutex_enter(vp->v_interlock); 737 } 738 KASSERT(mutex_owned(vp->v_interlock)); 739 KASSERT(! (force && defer)); 740 if (defer) { 741 /* 742 * Defer reclaim to the kthread; it's not safe to 743 * clean it here. We donate it our last reference. 744 */ 745 lru_requeue(vp, &lru_vrele_list); 746 mutex_exit(vp->v_interlock); 747 return; 748 } 749 750 /* 751 * If the node got another reference while we 752 * released the interlock, don't try to inactivate it yet. 753 */ 754 if (__predict_false(vtryrele(vp))) { 755 VOP_UNLOCK(vp); 756 mutex_exit(vp->v_interlock); 757 return; 758 } 759 760 /* 761 * If not clean, deactivate the vnode, but preserve 762 * our reference across the call to VOP_INACTIVE(). 763 */ 764 if (VSTATE_GET(vp) == VS_RECLAIMED) { 765 VOP_UNLOCK(vp); 766 } else { 767 VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); 768 mutex_exit(vp->v_interlock); 769 770 /* 771 * The vnode must not gain another reference while being 772 * deactivated. If VOP_INACTIVE() indicates that 773 * the described file has been deleted, then recycle 774 * the vnode. 775 * 776 * Note that VOP_INACTIVE() will not drop the vnode lock. 777 */ 778 recycle = false; 779 VOP_INACTIVE(vp, &recycle); 780 if (!recycle) 781 VOP_UNLOCK(vp); 782 mutex_enter(vp->v_interlock); 783 VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); 784 if (!recycle) { 785 if (vtryrele(vp)) { 786 mutex_exit(vp->v_interlock); 787 return; 788 } 789 } 790 791 /* Take care of space accounting. */ 792 if (vp->v_iflag & VI_EXECMAP) { 793 atomic_add_int(&uvmexp.execpages, 794 -vp->v_uobj.uo_npages); 795 atomic_add_int(&uvmexp.filepages, 796 vp->v_uobj.uo_npages); 797 } 798 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 799 vp->v_vflag &= ~VV_MAPPED; 800 801 /* 802 * Recycle the vnode if the file is now unused (unlinked), 803 * otherwise just free it. 804 */ 805 if (recycle) { 806 VSTATE_ASSERT(vp, VS_LOADED); 807 /* vcache_reclaim drops the lock. */ 808 vcache_reclaim(vp); 809 } 810 KASSERT(vp->v_usecount > 0); 811 } 812 813 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 814 /* Gained another reference while being reclaimed. */ 815 mutex_exit(vp->v_interlock); 816 return; 817 } 818 819 if (VSTATE_GET(vp) == VS_RECLAIMED && vp->v_holdcnt == 0) { 820 /* 821 * It's clean so destroy it. It isn't referenced 822 * anywhere since it has been reclaimed. 823 */ 824 vcache_free(VNODE_TO_VIMPL(vp)); 825 } else { 826 /* 827 * Otherwise, put it back onto the freelist. It 828 * can't be destroyed while still associated with 829 * a file system. 830 */ 831 lru_requeue(vp, lru_which(vp)); 832 mutex_exit(vp->v_interlock); 833 } 834 } 835 836 void 837 vrele(vnode_t *vp) 838 { 839 840 if (vtryrele(vp)) { 841 return; 842 } 843 mutex_enter(vp->v_interlock); 844 vrelel(vp, 0); 845 } 846 847 /* 848 * Asynchronous vnode release, vnode is released in different context. 849 */ 850 void 851 vrele_async(vnode_t *vp) 852 { 853 854 if (vtryrele(vp)) { 855 return; 856 } 857 mutex_enter(vp->v_interlock); 858 vrelel(vp, VRELEL_ASYNC_RELE); 859 } 860 861 /* 862 * Vnode reference, where a reference is already held by some other 863 * object (for example, a file structure). 864 */ 865 void 866 vref(vnode_t *vp) 867 { 868 869 KASSERT(vp->v_usecount != 0); 870 871 atomic_inc_uint(&vp->v_usecount); 872 } 873 874 /* 875 * Page or buffer structure gets a reference. 876 * Called with v_interlock held. 877 */ 878 void 879 vholdl(vnode_t *vp) 880 { 881 882 KASSERT(mutex_owned(vp->v_interlock)); 883 884 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) 885 lru_requeue(vp, lru_which(vp)); 886 } 887 888 /* 889 * Page or buffer structure frees a reference. 890 * Called with v_interlock held. 891 */ 892 void 893 holdrelel(vnode_t *vp) 894 { 895 896 KASSERT(mutex_owned(vp->v_interlock)); 897 898 if (vp->v_holdcnt <= 0) { 899 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp); 900 } 901 902 vp->v_holdcnt--; 903 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) 904 lru_requeue(vp, lru_which(vp)); 905 } 906 907 /* 908 * Recycle an unused vnode if caller holds the last reference. 909 */ 910 bool 911 vrecycle(vnode_t *vp) 912 { 913 int error __diagused; 914 915 mutex_enter(vp->v_interlock); 916 917 /* Make sure we hold the last reference. */ 918 VSTATE_WAIT_STABLE(vp); 919 if (vp->v_usecount != 1) { 920 mutex_exit(vp->v_interlock); 921 return false; 922 } 923 924 /* If the vnode is already clean we're done. */ 925 if (VSTATE_GET(vp) != VS_LOADED) { 926 VSTATE_ASSERT(vp, VS_RECLAIMED); 927 vrelel(vp, 0); 928 return true; 929 } 930 931 /* Prevent further references until the vnode is locked. */ 932 VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); 933 mutex_exit(vp->v_interlock); 934 935 /* 936 * On a leaf file system this lock will always succeed as we hold 937 * the last reference and prevent further references. 938 * On layered file systems waiting for the lock would open a can of 939 * deadlocks as the lower vnodes may have other active references. 940 */ 941 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 942 943 mutex_enter(vp->v_interlock); 944 VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); 945 946 if (error) { 947 mutex_exit(vp->v_interlock); 948 return false; 949 } 950 951 KASSERT(vp->v_usecount == 1); 952 vcache_reclaim(vp); 953 vrelel(vp, 0); 954 955 return true; 956 } 957 958 /* 959 * Helper for vrevoke() to propagate suspension from lastmp 960 * to thismp. Both args may be NULL. 961 * Returns the currently suspended file system or NULL. 962 */ 963 static struct mount * 964 vrevoke_suspend_next(struct mount *lastmp, struct mount *thismp) 965 { 966 int error; 967 968 if (lastmp == thismp) 969 return thismp; 970 971 if (lastmp != NULL) 972 vfs_resume(lastmp); 973 974 if (thismp == NULL) 975 return NULL; 976 977 do { 978 error = vfs_suspend(thismp, 0); 979 } while (error == EINTR || error == ERESTART); 980 981 if (error == 0) 982 return thismp; 983 984 KASSERT(error == EOPNOTSUPP); 985 return NULL; 986 } 987 988 /* 989 * Eliminate all activity associated with the requested vnode 990 * and with all vnodes aliased to the requested vnode. 991 */ 992 void 993 vrevoke(vnode_t *vp) 994 { 995 struct mount *mp; 996 vnode_t *vq; 997 enum vtype type; 998 dev_t dev; 999 1000 KASSERT(vp->v_usecount > 0); 1001 1002 mp = vrevoke_suspend_next(NULL, vp->v_mount); 1003 1004 mutex_enter(vp->v_interlock); 1005 VSTATE_WAIT_STABLE(vp); 1006 if (VSTATE_GET(vp) == VS_RECLAIMED) { 1007 mutex_exit(vp->v_interlock); 1008 } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 1009 atomic_inc_uint(&vp->v_usecount); 1010 mutex_exit(vp->v_interlock); 1011 vgone(vp); 1012 } else { 1013 dev = vp->v_rdev; 1014 type = vp->v_type; 1015 mutex_exit(vp->v_interlock); 1016 1017 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) { 1018 mp = vrevoke_suspend_next(mp, vq->v_mount); 1019 vgone(vq); 1020 } 1021 } 1022 vrevoke_suspend_next(mp, NULL); 1023 } 1024 1025 /* 1026 * Eliminate all activity associated with a vnode in preparation for 1027 * reuse. Drops a reference from the vnode. 1028 */ 1029 void 1030 vgone(vnode_t *vp) 1031 { 1032 1033 KASSERT(vp->v_mount == dead_rootmount || fstrans_is_owner(vp->v_mount)); 1034 1035 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1036 mutex_enter(vp->v_interlock); 1037 VSTATE_WAIT_STABLE(vp); 1038 if (VSTATE_GET(vp) == VS_LOADED) 1039 vcache_reclaim(vp); 1040 VSTATE_ASSERT(vp, VS_RECLAIMED); 1041 vrelel(vp, 0); 1042 } 1043 1044 static inline uint32_t 1045 vcache_hash(const struct vcache_key *key) 1046 { 1047 uint32_t hash = HASH32_BUF_INIT; 1048 1049 KASSERT(key->vk_key_len > 0); 1050 1051 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash); 1052 hash = hash32_buf(key->vk_key, key->vk_key_len, hash); 1053 return hash; 1054 } 1055 1056 static void 1057 vcache_init(void) 1058 { 1059 1060 vcache_pool = pool_cache_init(sizeof(vnode_impl_t), 0, 0, 0, 1061 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL); 1062 KASSERT(vcache_pool != NULL); 1063 mutex_init(&vcache_lock, MUTEX_DEFAULT, IPL_NONE); 1064 cv_init(&vcache_cv, "vcache"); 1065 vcache_hashsize = desiredvnodes; 1066 vcache_hashtab = hashinit(desiredvnodes, HASH_SLIST, true, 1067 &vcache_hashmask); 1068 } 1069 1070 static void 1071 vcache_reinit(void) 1072 { 1073 int i; 1074 uint32_t hash; 1075 u_long oldmask, newmask; 1076 struct hashhead *oldtab, *newtab; 1077 vnode_impl_t *vip; 1078 1079 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask); 1080 mutex_enter(&vcache_lock); 1081 oldtab = vcache_hashtab; 1082 oldmask = vcache_hashmask; 1083 vcache_hashsize = desiredvnodes; 1084 vcache_hashtab = newtab; 1085 vcache_hashmask = newmask; 1086 for (i = 0; i <= oldmask; i++) { 1087 while ((vip = SLIST_FIRST(&oldtab[i])) != NULL) { 1088 SLIST_REMOVE(&oldtab[i], vip, vnode_impl, vi_hash); 1089 hash = vcache_hash(&vip->vi_key); 1090 SLIST_INSERT_HEAD(&newtab[hash & vcache_hashmask], 1091 vip, vi_hash); 1092 } 1093 } 1094 mutex_exit(&vcache_lock); 1095 hashdone(oldtab, HASH_SLIST, oldmask); 1096 } 1097 1098 static inline vnode_impl_t * 1099 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash) 1100 { 1101 struct hashhead *hashp; 1102 vnode_impl_t *vip; 1103 1104 KASSERT(mutex_owned(&vcache_lock)); 1105 1106 hashp = &vcache_hashtab[hash & vcache_hashmask]; 1107 SLIST_FOREACH(vip, hashp, vi_hash) { 1108 if (key->vk_mount != vip->vi_key.vk_mount) 1109 continue; 1110 if (key->vk_key_len != vip->vi_key.vk_key_len) 1111 continue; 1112 if (memcmp(key->vk_key, vip->vi_key.vk_key, key->vk_key_len)) 1113 continue; 1114 return vip; 1115 } 1116 return NULL; 1117 } 1118 1119 /* 1120 * Allocate a new, uninitialized vcache node. 1121 */ 1122 static vnode_impl_t * 1123 vcache_alloc(void) 1124 { 1125 vnode_impl_t *vip; 1126 vnode_t *vp; 1127 1128 vip = pool_cache_get(vcache_pool, PR_WAITOK); 1129 memset(vip, 0, sizeof(*vip)); 1130 1131 rw_init(&vip->vi_lock); 1132 /* SLIST_INIT(&vip->vi_hash); */ 1133 /* LIST_INIT(&vip->vi_nclist); */ 1134 /* LIST_INIT(&vip->vi_dnclist); */ 1135 1136 vp = VIMPL_TO_VNODE(vip); 1137 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); 1138 cv_init(&vp->v_cv, "vnode"); 1139 1140 vp->v_usecount = 1; 1141 vp->v_type = VNON; 1142 vp->v_size = vp->v_writesize = VSIZENOTSET; 1143 1144 vip->vi_state = VS_LOADING; 1145 1146 lru_requeue(vp, &lru_free_list); 1147 1148 return vip; 1149 } 1150 1151 /* 1152 * Deallocate a vcache node in state VS_LOADING. 1153 * 1154 * vcache_lock held on entry and released on return. 1155 */ 1156 static void 1157 vcache_dealloc(vnode_impl_t *vip) 1158 { 1159 vnode_t *vp; 1160 1161 KASSERT(mutex_owned(&vcache_lock)); 1162 1163 vp = VIMPL_TO_VNODE(vip); 1164 vfs_ref(dead_rootmount); 1165 vfs_insmntque(vp, dead_rootmount); 1166 mutex_enter(vp->v_interlock); 1167 vp->v_op = dead_vnodeop_p; 1168 VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED); 1169 mutex_exit(&vcache_lock); 1170 vrelel(vp, 0); 1171 } 1172 1173 /* 1174 * Free an unused, unreferenced vcache node. 1175 * v_interlock locked on entry. 1176 */ 1177 static void 1178 vcache_free(vnode_impl_t *vip) 1179 { 1180 vnode_t *vp; 1181 1182 vp = VIMPL_TO_VNODE(vip); 1183 KASSERT(mutex_owned(vp->v_interlock)); 1184 1185 KASSERT(vp->v_usecount == 0); 1186 KASSERT(vp->v_holdcnt == 0); 1187 KASSERT(vp->v_writecount == 0); 1188 lru_requeue(vp, NULL); 1189 mutex_exit(vp->v_interlock); 1190 1191 vfs_insmntque(vp, NULL); 1192 if (vp->v_type == VBLK || vp->v_type == VCHR) 1193 spec_node_destroy(vp); 1194 1195 rw_destroy(&vip->vi_lock); 1196 uvm_obj_destroy(&vp->v_uobj, true); 1197 cv_destroy(&vp->v_cv); 1198 pool_cache_put(vcache_pool, vip); 1199 } 1200 1201 /* 1202 * Try to get an initial reference on this cached vnode. 1203 * Returns zero on success, ENOENT if the vnode has been reclaimed and 1204 * EBUSY if the vnode state is unstable. 1205 * 1206 * v_interlock locked on entry and unlocked on exit. 1207 */ 1208 int 1209 vcache_tryvget(vnode_t *vp) 1210 { 1211 int error = 0; 1212 1213 KASSERT(mutex_owned(vp->v_interlock)); 1214 1215 if (__predict_false(VSTATE_GET(vp) == VS_RECLAIMED)) 1216 error = ENOENT; 1217 else if (__predict_false(VSTATE_GET(vp) != VS_LOADED)) 1218 error = EBUSY; 1219 else if (vp->v_usecount == 0) 1220 vp->v_usecount = 1; 1221 else 1222 atomic_inc_uint(&vp->v_usecount); 1223 1224 mutex_exit(vp->v_interlock); 1225 1226 return error; 1227 } 1228 1229 /* 1230 * Try to get an initial reference on this cached vnode. 1231 * Returns zero on success and ENOENT if the vnode has been reclaimed. 1232 * Will wait for the vnode state to be stable. 1233 * 1234 * v_interlock locked on entry and unlocked on exit. 1235 */ 1236 int 1237 vcache_vget(vnode_t *vp) 1238 { 1239 1240 KASSERT(mutex_owned(vp->v_interlock)); 1241 1242 /* Increment hold count to prevent vnode from disappearing. */ 1243 vp->v_holdcnt++; 1244 VSTATE_WAIT_STABLE(vp); 1245 vp->v_holdcnt--; 1246 1247 /* If this was the last reference to a reclaimed vnode free it now. */ 1248 if (__predict_false(VSTATE_GET(vp) == VS_RECLAIMED)) { 1249 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) 1250 vcache_free(VNODE_TO_VIMPL(vp)); 1251 else 1252 mutex_exit(vp->v_interlock); 1253 return ENOENT; 1254 } 1255 VSTATE_ASSERT(vp, VS_LOADED); 1256 if (vp->v_usecount == 0) 1257 vp->v_usecount = 1; 1258 else 1259 atomic_inc_uint(&vp->v_usecount); 1260 1261 mutex_exit(vp->v_interlock); 1262 1263 return 0; 1264 } 1265 1266 /* 1267 * Get a vnode / fs node pair by key and return it referenced through vpp. 1268 */ 1269 int 1270 vcache_get(struct mount *mp, const void *key, size_t key_len, 1271 struct vnode **vpp) 1272 { 1273 int error; 1274 uint32_t hash; 1275 const void *new_key; 1276 struct vnode *vp; 1277 struct vcache_key vcache_key; 1278 vnode_impl_t *vip, *new_vip; 1279 1280 new_key = NULL; 1281 *vpp = NULL; 1282 1283 vcache_key.vk_mount = mp; 1284 vcache_key.vk_key = key; 1285 vcache_key.vk_key_len = key_len; 1286 hash = vcache_hash(&vcache_key); 1287 1288 again: 1289 mutex_enter(&vcache_lock); 1290 vip = vcache_hash_lookup(&vcache_key, hash); 1291 1292 /* If found, take a reference or retry. */ 1293 if (__predict_true(vip != NULL)) { 1294 /* 1295 * If the vnode is loading we cannot take the v_interlock 1296 * here as it might change during load (see uvm_obj_setlock()). 1297 * As changing state from VS_LOADING requires both vcache_lock 1298 * and v_interlock it is safe to test with vcache_lock held. 1299 * 1300 * Wait for vnodes changing state from VS_LOADING and retry. 1301 */ 1302 if (__predict_false(vip->vi_state == VS_LOADING)) { 1303 cv_wait(&vcache_cv, &vcache_lock); 1304 mutex_exit(&vcache_lock); 1305 goto again; 1306 } 1307 vp = VIMPL_TO_VNODE(vip); 1308 mutex_enter(vp->v_interlock); 1309 mutex_exit(&vcache_lock); 1310 error = vcache_vget(vp); 1311 if (error == ENOENT) 1312 goto again; 1313 if (error == 0) 1314 *vpp = vp; 1315 KASSERT((error != 0) == (*vpp == NULL)); 1316 return error; 1317 } 1318 mutex_exit(&vcache_lock); 1319 1320 /* Allocate and initialize a new vcache / vnode pair. */ 1321 error = vfs_busy(mp); 1322 if (error) 1323 return error; 1324 new_vip = vcache_alloc(); 1325 new_vip->vi_key = vcache_key; 1326 vp = VIMPL_TO_VNODE(new_vip); 1327 mutex_enter(&vcache_lock); 1328 vip = vcache_hash_lookup(&vcache_key, hash); 1329 if (vip == NULL) { 1330 SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask], 1331 new_vip, vi_hash); 1332 vip = new_vip; 1333 } 1334 1335 /* If another thread beat us inserting this node, retry. */ 1336 if (vip != new_vip) { 1337 vcache_dealloc(new_vip); 1338 vfs_unbusy(mp); 1339 goto again; 1340 } 1341 mutex_exit(&vcache_lock); 1342 1343 /* Load the fs node. Exclusive as new_node is VS_LOADING. */ 1344 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key); 1345 if (error) { 1346 mutex_enter(&vcache_lock); 1347 SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask], 1348 new_vip, vnode_impl, vi_hash); 1349 vcache_dealloc(new_vip); 1350 vfs_unbusy(mp); 1351 KASSERT(*vpp == NULL); 1352 return error; 1353 } 1354 KASSERT(new_key != NULL); 1355 KASSERT(memcmp(key, new_key, key_len) == 0); 1356 KASSERT(vp->v_op != NULL); 1357 vfs_insmntque(vp, mp); 1358 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1359 vp->v_vflag |= VV_MPSAFE; 1360 vfs_ref(mp); 1361 vfs_unbusy(mp); 1362 1363 /* Finished loading, finalize node. */ 1364 mutex_enter(&vcache_lock); 1365 new_vip->vi_key.vk_key = new_key; 1366 mutex_enter(vp->v_interlock); 1367 VSTATE_CHANGE(vp, VS_LOADING, VS_LOADED); 1368 mutex_exit(vp->v_interlock); 1369 mutex_exit(&vcache_lock); 1370 *vpp = vp; 1371 return 0; 1372 } 1373 1374 /* 1375 * Create a new vnode / fs node pair and return it referenced through vpp. 1376 */ 1377 int 1378 vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap, 1379 kauth_cred_t cred, void *extra, struct vnode **vpp) 1380 { 1381 int error; 1382 uint32_t hash; 1383 struct vnode *vp, *ovp; 1384 vnode_impl_t *vip, *ovip; 1385 1386 *vpp = NULL; 1387 1388 /* Allocate and initialize a new vcache / vnode pair. */ 1389 error = vfs_busy(mp); 1390 if (error) 1391 return error; 1392 vip = vcache_alloc(); 1393 vip->vi_key.vk_mount = mp; 1394 vp = VIMPL_TO_VNODE(vip); 1395 1396 /* Create and load the fs node. */ 1397 error = VFS_NEWVNODE(mp, dvp, vp, vap, cred, extra, 1398 &vip->vi_key.vk_key_len, &vip->vi_key.vk_key); 1399 if (error) { 1400 mutex_enter(&vcache_lock); 1401 vcache_dealloc(vip); 1402 vfs_unbusy(mp); 1403 KASSERT(*vpp == NULL); 1404 return error; 1405 } 1406 KASSERT(vp->v_op != NULL); 1407 KASSERT((vip->vi_key.vk_key_len == 0) == (mp == dead_rootmount)); 1408 if (vip->vi_key.vk_key_len > 0) { 1409 KASSERT(vip->vi_key.vk_key != NULL); 1410 hash = vcache_hash(&vip->vi_key); 1411 1412 /* 1413 * Wait for previous instance to be reclaimed, 1414 * then insert new node. 1415 */ 1416 mutex_enter(&vcache_lock); 1417 while ((ovip = vcache_hash_lookup(&vip->vi_key, hash))) { 1418 ovp = VIMPL_TO_VNODE(ovip); 1419 mutex_enter(ovp->v_interlock); 1420 mutex_exit(&vcache_lock); 1421 error = vcache_vget(ovp); 1422 KASSERT(error == ENOENT); 1423 mutex_enter(&vcache_lock); 1424 } 1425 SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask], 1426 vip, vi_hash); 1427 mutex_exit(&vcache_lock); 1428 } 1429 vfs_insmntque(vp, mp); 1430 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1431 vp->v_vflag |= VV_MPSAFE; 1432 vfs_ref(mp); 1433 vfs_unbusy(mp); 1434 1435 /* Finished loading, finalize node. */ 1436 mutex_enter(&vcache_lock); 1437 mutex_enter(vp->v_interlock); 1438 VSTATE_CHANGE(vp, VS_LOADING, VS_LOADED); 1439 mutex_exit(&vcache_lock); 1440 mutex_exit(vp->v_interlock); 1441 *vpp = vp; 1442 return 0; 1443 } 1444 1445 /* 1446 * Prepare key change: update old cache nodes key and lock new cache node. 1447 * Return an error if the new node already exists. 1448 */ 1449 int 1450 vcache_rekey_enter(struct mount *mp, struct vnode *vp, 1451 const void *old_key, size_t old_key_len, 1452 const void *new_key, size_t new_key_len) 1453 { 1454 uint32_t old_hash, new_hash; 1455 struct vcache_key old_vcache_key, new_vcache_key; 1456 vnode_impl_t *vip, *new_vip; 1457 1458 old_vcache_key.vk_mount = mp; 1459 old_vcache_key.vk_key = old_key; 1460 old_vcache_key.vk_key_len = old_key_len; 1461 old_hash = vcache_hash(&old_vcache_key); 1462 1463 new_vcache_key.vk_mount = mp; 1464 new_vcache_key.vk_key = new_key; 1465 new_vcache_key.vk_key_len = new_key_len; 1466 new_hash = vcache_hash(&new_vcache_key); 1467 1468 new_vip = vcache_alloc(); 1469 new_vip->vi_key = new_vcache_key; 1470 1471 /* Insert locked new node used as placeholder. */ 1472 mutex_enter(&vcache_lock); 1473 vip = vcache_hash_lookup(&new_vcache_key, new_hash); 1474 if (vip != NULL) { 1475 vcache_dealloc(new_vip); 1476 return EEXIST; 1477 } 1478 SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask], 1479 new_vip, vi_hash); 1480 1481 /* Replace old nodes key with the temporary copy. */ 1482 vip = vcache_hash_lookup(&old_vcache_key, old_hash); 1483 KASSERT(vip != NULL); 1484 KASSERT(VIMPL_TO_VNODE(vip) == vp); 1485 KASSERT(vip->vi_key.vk_key != old_vcache_key.vk_key); 1486 vip->vi_key = old_vcache_key; 1487 mutex_exit(&vcache_lock); 1488 return 0; 1489 } 1490 1491 /* 1492 * Key change complete: update old node and remove placeholder. 1493 */ 1494 void 1495 vcache_rekey_exit(struct mount *mp, struct vnode *vp, 1496 const void *old_key, size_t old_key_len, 1497 const void *new_key, size_t new_key_len) 1498 { 1499 uint32_t old_hash, new_hash; 1500 struct vcache_key old_vcache_key, new_vcache_key; 1501 vnode_impl_t *vip, *new_vip; 1502 struct vnode *new_vp; 1503 1504 old_vcache_key.vk_mount = mp; 1505 old_vcache_key.vk_key = old_key; 1506 old_vcache_key.vk_key_len = old_key_len; 1507 old_hash = vcache_hash(&old_vcache_key); 1508 1509 new_vcache_key.vk_mount = mp; 1510 new_vcache_key.vk_key = new_key; 1511 new_vcache_key.vk_key_len = new_key_len; 1512 new_hash = vcache_hash(&new_vcache_key); 1513 1514 mutex_enter(&vcache_lock); 1515 1516 /* Lookup old and new node. */ 1517 vip = vcache_hash_lookup(&old_vcache_key, old_hash); 1518 KASSERT(vip != NULL); 1519 KASSERT(VIMPL_TO_VNODE(vip) == vp); 1520 1521 new_vip = vcache_hash_lookup(&new_vcache_key, new_hash); 1522 KASSERT(new_vip != NULL); 1523 KASSERT(new_vip->vi_key.vk_key_len == new_key_len); 1524 new_vp = VIMPL_TO_VNODE(new_vip); 1525 mutex_enter(new_vp->v_interlock); 1526 VSTATE_ASSERT(VIMPL_TO_VNODE(new_vip), VS_LOADING); 1527 mutex_exit(new_vp->v_interlock); 1528 1529 /* Rekey old node and put it onto its new hashlist. */ 1530 vip->vi_key = new_vcache_key; 1531 if (old_hash != new_hash) { 1532 SLIST_REMOVE(&vcache_hashtab[old_hash & vcache_hashmask], 1533 vip, vnode_impl, vi_hash); 1534 SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask], 1535 vip, vi_hash); 1536 } 1537 1538 /* Remove new node used as placeholder. */ 1539 SLIST_REMOVE(&vcache_hashtab[new_hash & vcache_hashmask], 1540 new_vip, vnode_impl, vi_hash); 1541 vcache_dealloc(new_vip); 1542 } 1543 1544 /* 1545 * Disassociate the underlying file system from a vnode. 1546 * 1547 * Must be called with vnode locked and will return unlocked. 1548 * Must be called with the interlock held, and will return with it held. 1549 */ 1550 static void 1551 vcache_reclaim(vnode_t *vp) 1552 { 1553 lwp_t *l = curlwp; 1554 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 1555 struct mount *mp = vp->v_mount; 1556 uint32_t hash; 1557 uint8_t temp_buf[64], *temp_key; 1558 size_t temp_key_len; 1559 bool recycle, active; 1560 int error; 1561 1562 KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 || 1563 VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1564 KASSERT(mutex_owned(vp->v_interlock)); 1565 KASSERT(vp->v_usecount != 0); 1566 1567 active = (vp->v_usecount > 1); 1568 temp_key_len = vip->vi_key.vk_key_len; 1569 /* 1570 * Prevent the vnode from being recycled or brought into use 1571 * while we clean it out. 1572 */ 1573 VSTATE_CHANGE(vp, VS_LOADED, VS_RECLAIMING); 1574 if (vp->v_iflag & VI_EXECMAP) { 1575 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 1576 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 1577 } 1578 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 1579 mutex_exit(vp->v_interlock); 1580 1581 /* Replace the vnode key with a temporary copy. */ 1582 if (vip->vi_key.vk_key_len > sizeof(temp_buf)) { 1583 temp_key = kmem_alloc(temp_key_len, KM_SLEEP); 1584 } else { 1585 temp_key = temp_buf; 1586 } 1587 if (vip->vi_key.vk_key_len > 0) { 1588 mutex_enter(&vcache_lock); 1589 memcpy(temp_key, vip->vi_key.vk_key, temp_key_len); 1590 vip->vi_key.vk_key = temp_key; 1591 mutex_exit(&vcache_lock); 1592 } 1593 1594 fstrans_start(mp); 1595 1596 /* 1597 * Clean out any cached data associated with the vnode. 1598 * If purging an active vnode, it must be closed and 1599 * deactivated before being reclaimed. 1600 */ 1601 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1602 if (error != 0) { 1603 if (wapbl_vphaswapbl(vp)) 1604 WAPBL_DISCARD(wapbl_vptomp(vp)); 1605 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1606 } 1607 KASSERTMSG((error == 0), "vinvalbuf failed: %d", error); 1608 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1609 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 1610 spec_node_revoke(vp); 1611 } 1612 1613 /* 1614 * Disassociate the underlying file system from the vnode. 1615 * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks 1616 * the vnode, and may destroy the vnode so that VOP_UNLOCK 1617 * would no longer function. 1618 */ 1619 VOP_INACTIVE(vp, &recycle); 1620 KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 || 1621 VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1622 if (VOP_RECLAIM(vp)) { 1623 vnpanic(vp, "%s: cannot reclaim", __func__); 1624 } 1625 1626 KASSERT(vp->v_data == NULL); 1627 KASSERT(vp->v_uobj.uo_npages == 0); 1628 1629 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1630 uvm_ra_freectx(vp->v_ractx); 1631 vp->v_ractx = NULL; 1632 } 1633 1634 /* Purge name cache. */ 1635 cache_purge(vp); 1636 1637 if (vip->vi_key.vk_key_len > 0) { 1638 /* Remove from vnode cache. */ 1639 hash = vcache_hash(&vip->vi_key); 1640 mutex_enter(&vcache_lock); 1641 KASSERT(vip == vcache_hash_lookup(&vip->vi_key, hash)); 1642 SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask], 1643 vip, vnode_impl, vi_hash); 1644 mutex_exit(&vcache_lock); 1645 } 1646 if (temp_key != temp_buf) 1647 kmem_free(temp_key, temp_key_len); 1648 1649 /* Done with purge, notify sleepers of the grim news. */ 1650 mutex_enter(vp->v_interlock); 1651 vp->v_op = dead_vnodeop_p; 1652 vp->v_vflag |= VV_LOCKSWORK; 1653 VSTATE_CHANGE(vp, VS_RECLAIMING, VS_RECLAIMED); 1654 vp->v_tag = VT_NON; 1655 KNOTE(&vp->v_klist, NOTE_REVOKE); 1656 mutex_exit(vp->v_interlock); 1657 1658 /* 1659 * Move to dead mount. Must be after changing the operations 1660 * vector as vnode operations enter the mount before using the 1661 * operations vector. See sys/kern/vnode_if.c. 1662 */ 1663 vp->v_vflag &= ~VV_ROOT; 1664 vfs_ref(dead_rootmount); 1665 vfs_insmntque(vp, dead_rootmount); 1666 1667 mutex_enter(vp->v_interlock); 1668 fstrans_done(mp); 1669 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1670 } 1671 1672 /* 1673 * Disassociate the underlying file system from an open device vnode 1674 * and make it anonymous. 1675 * 1676 * Vnode unlocked on entry, drops a reference to the vnode. 1677 */ 1678 void 1679 vcache_make_anon(vnode_t *vp) 1680 { 1681 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 1682 uint32_t hash; 1683 bool recycle; 1684 1685 KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 1686 KASSERT(vp->v_mount == dead_rootmount || fstrans_is_owner(vp->v_mount)); 1687 VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE); 1688 1689 /* Remove from vnode cache. */ 1690 hash = vcache_hash(&vip->vi_key); 1691 mutex_enter(&vcache_lock); 1692 KASSERT(vip == vcache_hash_lookup(&vip->vi_key, hash)); 1693 SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask], 1694 vip, vnode_impl, vi_hash); 1695 vip->vi_key.vk_mount = dead_rootmount; 1696 vip->vi_key.vk_key_len = 0; 1697 vip->vi_key.vk_key = NULL; 1698 mutex_exit(&vcache_lock); 1699 1700 /* 1701 * Disassociate the underlying file system from the vnode. 1702 * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks 1703 * the vnode, and may destroy the vnode so that VOP_UNLOCK 1704 * would no longer function. 1705 */ 1706 if (vn_lock(vp, LK_EXCLUSIVE)) { 1707 vnpanic(vp, "%s: cannot lock", __func__); 1708 } 1709 VOP_INACTIVE(vp, &recycle); 1710 KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 || 1711 VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1712 if (VOP_RECLAIM(vp)) { 1713 vnpanic(vp, "%s: cannot reclaim", __func__); 1714 } 1715 1716 /* Purge name cache. */ 1717 cache_purge(vp); 1718 1719 /* Done with purge, change operations vector. */ 1720 mutex_enter(vp->v_interlock); 1721 vp->v_op = spec_vnodeop_p; 1722 vp->v_vflag |= VV_MPSAFE; 1723 vp->v_vflag &= ~VV_LOCKSWORK; 1724 mutex_exit(vp->v_interlock); 1725 1726 /* 1727 * Move to dead mount. Must be after changing the operations 1728 * vector as vnode operations enter the mount before using the 1729 * operations vector. See sys/kern/vnode_if.c. 1730 */ 1731 vfs_ref(dead_rootmount); 1732 vfs_insmntque(vp, dead_rootmount); 1733 1734 vrele(vp); 1735 } 1736 1737 /* 1738 * Update outstanding I/O count and do wakeup if requested. 1739 */ 1740 void 1741 vwakeup(struct buf *bp) 1742 { 1743 vnode_t *vp; 1744 1745 if ((vp = bp->b_vp) == NULL) 1746 return; 1747 1748 KASSERT(bp->b_objlock == vp->v_interlock); 1749 KASSERT(mutex_owned(bp->b_objlock)); 1750 1751 if (--vp->v_numoutput < 0) 1752 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp); 1753 if (vp->v_numoutput == 0) 1754 cv_broadcast(&vp->v_cv); 1755 } 1756 1757 /* 1758 * Test a vnode for being or becoming dead. Returns one of: 1759 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only. 1760 * ENOENT: vnode is dead. 1761 * 0: otherwise. 1762 * 1763 * Whenever this function returns a non-zero value all future 1764 * calls will also return a non-zero value. 1765 */ 1766 int 1767 vdead_check(struct vnode *vp, int flags) 1768 { 1769 1770 KASSERT(mutex_owned(vp->v_interlock)); 1771 1772 if (! ISSET(flags, VDEAD_NOWAIT)) 1773 VSTATE_WAIT_STABLE(vp); 1774 1775 if (VSTATE_GET(vp) == VS_RECLAIMING) { 1776 KASSERT(ISSET(flags, VDEAD_NOWAIT)); 1777 return EBUSY; 1778 } else if (VSTATE_GET(vp) == VS_RECLAIMED) { 1779 return ENOENT; 1780 } 1781 1782 return 0; 1783 } 1784 1785 int 1786 vfs_drainvnodes(void) 1787 { 1788 int i, gen; 1789 1790 mutex_enter(&vdrain_lock); 1791 for (i = 0; i < 2; i++) { 1792 gen = vdrain_gen; 1793 while (gen == vdrain_gen) { 1794 cv_broadcast(&vdrain_cv); 1795 cv_wait(&vdrain_gen_cv, &vdrain_lock); 1796 } 1797 } 1798 mutex_exit(&vdrain_lock); 1799 1800 if (numvnodes >= desiredvnodes) 1801 return EBUSY; 1802 1803 if (vcache_hashsize != desiredvnodes) 1804 vcache_reinit(); 1805 1806 return 0; 1807 } 1808 1809 void 1810 vnpanic(vnode_t *vp, const char *fmt, ...) 1811 { 1812 va_list ap; 1813 1814 #ifdef DIAGNOSTIC 1815 vprint(NULL, vp); 1816 #endif 1817 va_start(ap, fmt); 1818 vpanic(fmt, ap); 1819 va_end(ap); 1820 } 1821