1*e94a5d02Sriastradh /* $NetBSD: vfs_vnode.c,v 1.156 2024/12/07 02:27:38 riastradh Exp $ */ 2fbc8beaeSrmind 3fbc8beaeSrmind /*- 4d2a0ebb6Sad * Copyright (c) 1997-2011, 2019, 2020 The NetBSD Foundation, Inc. 5fbc8beaeSrmind * All rights reserved. 6fbc8beaeSrmind * 7fbc8beaeSrmind * This code is derived from software contributed to The NetBSD Foundation 8fbc8beaeSrmind * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9fbc8beaeSrmind * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10fbc8beaeSrmind * 11fbc8beaeSrmind * Redistribution and use in source and binary forms, with or without 12fbc8beaeSrmind * modification, are permitted provided that the following conditions 13fbc8beaeSrmind * are met: 14fbc8beaeSrmind * 1. Redistributions of source code must retain the above copyright 15fbc8beaeSrmind * notice, this list of conditions and the following disclaimer. 16fbc8beaeSrmind * 2. Redistributions in binary form must reproduce the above copyright 17fbc8beaeSrmind * notice, this list of conditions and the following disclaimer in the 18fbc8beaeSrmind * documentation and/or other materials provided with the distribution. 19fbc8beaeSrmind * 20fbc8beaeSrmind * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21fbc8beaeSrmind * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22fbc8beaeSrmind * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23fbc8beaeSrmind * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24fbc8beaeSrmind * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25fbc8beaeSrmind * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26fbc8beaeSrmind * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27fbc8beaeSrmind * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28fbc8beaeSrmind * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29fbc8beaeSrmind * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30fbc8beaeSrmind * POSSIBILITY OF SUCH DAMAGE. 31fbc8beaeSrmind */ 32fbc8beaeSrmind 33fbc8beaeSrmind /* 34fbc8beaeSrmind * Copyright (c) 1989, 1993 35fbc8beaeSrmind * The Regents of the University of California. All rights reserved. 36fbc8beaeSrmind * (c) UNIX System Laboratories, Inc. 37fbc8beaeSrmind * All or some portions of this file are derived from material licensed 38fbc8beaeSrmind * to the University of California by American Telephone and Telegraph 39fbc8beaeSrmind * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40fbc8beaeSrmind * the permission of UNIX System Laboratories, Inc. 41fbc8beaeSrmind * 42fbc8beaeSrmind * Redistribution and use in source and binary forms, with or without 43fbc8beaeSrmind * modification, are permitted provided that the following conditions 44fbc8beaeSrmind * are met: 45fbc8beaeSrmind * 1. Redistributions of source code must retain the above copyright 46fbc8beaeSrmind * notice, this list of conditions and the following disclaimer. 47fbc8beaeSrmind * 2. Redistributions in binary form must reproduce the above copyright 48fbc8beaeSrmind * notice, this list of conditions and the following disclaimer in the 49fbc8beaeSrmind * documentation and/or other materials provided with the distribution. 50fbc8beaeSrmind * 3. Neither the name of the University nor the names of its contributors 51fbc8beaeSrmind * may be used to endorse or promote products derived from this software 52fbc8beaeSrmind * without specific prior written permission. 53fbc8beaeSrmind * 54fbc8beaeSrmind * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55fbc8beaeSrmind * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56fbc8beaeSrmind * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57fbc8beaeSrmind * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58fbc8beaeSrmind * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59fbc8beaeSrmind * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60fbc8beaeSrmind * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61fbc8beaeSrmind * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62fbc8beaeSrmind * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63fbc8beaeSrmind * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64fbc8beaeSrmind * SUCH DAMAGE. 65fbc8beaeSrmind * 66fbc8beaeSrmind * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67fbc8beaeSrmind */ 68fbc8beaeSrmind 69fbc8beaeSrmind /* 705b686f39Srmind * The vnode cache subsystem. 71fbc8beaeSrmind * 725b686f39Srmind * Life-cycle 73fbc8beaeSrmind * 745b686f39Srmind * Normally, there are two points where new vnodes are created: 755b686f39Srmind * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode 765b686f39Srmind * starts in one of the following ways: 77fbc8beaeSrmind * 7850c0a5b0Shannken * - Allocation, via vcache_get(9) or vcache_new(9). 79998709c4Shannken * - Reclamation of inactive vnode, via vcache_vget(9). 805b686f39Srmind * 812440dfcdSrmind * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9) 822440dfcdSrmind * was another, traditional way. Currently, only the draining thread 832440dfcdSrmind * recycles the vnodes. This behaviour might be revisited. 842440dfcdSrmind * 855b686f39Srmind * The life-cycle ends when the last reference is dropped, usually 865b686f39Srmind * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform 875b686f39Srmind * the file system that vnode is inactive. Via this call, file system 882440dfcdSrmind * indicates whether vnode can be recycled (usually, it checks its own 892440dfcdSrmind * references, e.g. count of links, whether the file was removed). 905b686f39Srmind * 915b686f39Srmind * Depending on indication, vnode can be put into a free list (cache), 92113946c5Shannken * or cleaned via vcache_reclaim, which calls VOP_RECLAIM(9) to 93113946c5Shannken * disassociate underlying file system from the vnode, and finally 94113946c5Shannken * destroyed. 955b686f39Srmind * 9640d12c01Shannken * Vnode state 9740d12c01Shannken * 9840d12c01Shannken * Vnode is always in one of six states: 9940d12c01Shannken * - MARKER This is a marker vnode to help list traversal. It 10040d12c01Shannken * will never change its state. 10140d12c01Shannken * - LOADING Vnode is associating underlying file system and not 10240d12c01Shannken * yet ready to use. 1038e1cefd9Shannken * - LOADED Vnode has associated underlying file system and is 10440d12c01Shannken * ready to use. 10540d12c01Shannken * - BLOCKED Vnode is active but cannot get new references. 10640d12c01Shannken * - RECLAIMING Vnode is disassociating from the underlying file 10740d12c01Shannken * system. 10840d12c01Shannken * - RECLAIMED Vnode has disassociated from underlying file system 10940d12c01Shannken * and is dead. 11040d12c01Shannken * 11140d12c01Shannken * Valid state changes are: 1128e1cefd9Shannken * LOADING -> LOADED 11340d12c01Shannken * Vnode has been initialised in vcache_get() or 11440d12c01Shannken * vcache_new() and is ready to use. 11551f3958fSad * BLOCKED -> RECLAIMING 11640d12c01Shannken * Vnode starts disassociation from underlying file 117113946c5Shannken * system in vcache_reclaim(). 11840d12c01Shannken * RECLAIMING -> RECLAIMED 11940d12c01Shannken * Vnode finished disassociation from underlying file 120113946c5Shannken * system in vcache_reclaim(). 1218e1cefd9Shannken * LOADED -> BLOCKED 12240d12c01Shannken * Either vcache_rekey*() is changing the vnode key or 12340d12c01Shannken * vrelel() is about to call VOP_INACTIVE(). 1248e1cefd9Shannken * BLOCKED -> LOADED 12540d12c01Shannken * The block condition is over. 12640d12c01Shannken * LOADING -> RECLAIMED 12740d12c01Shannken * Either vcache_get() or vcache_new() failed to 12840d12c01Shannken * associate the underlying file system or vcache_rekey*() 12940d12c01Shannken * drops a vnode used as placeholder. 13040d12c01Shannken * 13140d12c01Shannken * Of these states LOADING, BLOCKED and RECLAIMING are intermediate 13240d12c01Shannken * and it is possible to wait for state change. 13340d12c01Shannken * 13440d12c01Shannken * State is protected with v_interlock with one exception: 13578a3dd75Shannken * to change from LOADING both v_interlock and vcache_lock must be held 13640d12c01Shannken * so it is possible to check "state == LOADING" without holding 13740d12c01Shannken * v_interlock. See vcache_get() for details. 13840d12c01Shannken * 1395b686f39Srmind * Reference counting 1405b686f39Srmind * 1415b686f39Srmind * Vnode is considered active, if reference count (vnode_t::v_usecount) 1425b686f39Srmind * is non-zero. It is maintained using: vref(9) and vrele(9), as well 1435b686f39Srmind * as vput(9), routines. Common points holding references are e.g. 1445b686f39Srmind * file openings, current working directory, mount points, etc. 1455b686f39Srmind * 14651f3958fSad * v_usecount is adjusted with atomic operations, however to change 14751f3958fSad * from a non-zero value to zero the interlock must also be held. 148fbc8beaeSrmind */ 149fbc8beaeSrmind 150fbc8beaeSrmind #include <sys/cdefs.h> 151*e94a5d02Sriastradh __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.156 2024/12/07 02:27:38 riastradh Exp $"); 15229bbec19Sad 153b48e2f00Sad #ifdef _KERNEL_OPT 15429bbec19Sad #include "opt_pax.h" 155b48e2f00Sad #endif 156fbc8beaeSrmind 157fbc8beaeSrmind #include <sys/param.h> 1581cf06cb4Sriastradh #include <sys/types.h> 159fbc8beaeSrmind 160fbc8beaeSrmind #include <sys/atomic.h> 161fbc8beaeSrmind #include <sys/buf.h> 162fbc8beaeSrmind #include <sys/conf.h> 163fbc8beaeSrmind #include <sys/device.h> 1641cf06cb4Sriastradh #include <sys/fstrans.h> 16542c8d67cShannken #include <sys/hash.h> 166fbc8beaeSrmind #include <sys/kauth.h> 1671cf06cb4Sriastradh #include <sys/kernel.h> 168fbc8beaeSrmind #include <sys/kmem.h> 169fbc8beaeSrmind #include <sys/module.h> 170fbc8beaeSrmind #include <sys/mount.h> 171fbc8beaeSrmind #include <sys/namei.h> 17229bbec19Sad #include <sys/pax.h> 173*e94a5d02Sriastradh #include <sys/sdt.h> 174fbc8beaeSrmind #include <sys/syscallargs.h> 175fbc8beaeSrmind #include <sys/sysctl.h> 176fbc8beaeSrmind #include <sys/systm.h> 1776f60ad1bShannken #include <sys/threadpool.h> 178175d720aShannken #include <sys/vnode_impl.h> 179fbc8beaeSrmind #include <sys/wapbl.h> 180fbc8beaeSrmind 18126784725Sriastradh #include <miscfs/deadfs/deadfs.h> 182fa76fa97Sriastradh #include <miscfs/specfs/specdev.h> 18326784725Sriastradh 184fbc8beaeSrmind #include <uvm/uvm.h> 185fbc8beaeSrmind #include <uvm/uvm_readahead.h> 186fb0bbaf1Sad #include <uvm/uvm_stat.h> 187fbc8beaeSrmind 18865b1f85aShannken /* Flags to vrelel. */ 189fb0bbaf1Sad #define VRELEL_ASYNC 0x0001 /* Always defer to vrele thread. */ 19065b1f85aShannken 191fb0bbaf1Sad #define LRU_VRELE 0 192fb0bbaf1Sad #define LRU_FREE 1 193fb0bbaf1Sad #define LRU_HOLD 2 194fb0bbaf1Sad #define LRU_COUNT 3 195fbc8beaeSrmind 1962440dfcdSrmind /* 19743495351Shannken * There are three lru lists: one holds vnodes waiting for async release, 198fb0bbaf1Sad * one is for vnodes which have no buffer/page references and one for those 199fb0bbaf1Sad * which do (i.e. v_holdcnt is non-zero). We put the lists into a single, 200fb0bbaf1Sad * private cache line as vnodes migrate between them while under the same 201fb0bbaf1Sad * lock (vdrain_lock). 2022440dfcdSrmind */ 2036f60ad1bShannken 2046f60ad1bShannken typedef struct { 2056f60ad1bShannken vnode_impl_t *li_marker; 2066f60ad1bShannken } lru_iter_t; 2076f60ad1bShannken 208fb0bbaf1Sad u_int numvnodes __cacheline_aligned; 209fb0bbaf1Sad static vnodelst_t lru_list[LRU_COUNT] __cacheline_aligned; 2106f60ad1bShannken static struct threadpool *threadpool; 2116f60ad1bShannken static struct threadpool_job vdrain_job; 2126f60ad1bShannken static struct threadpool_job vrele_job; 21343495351Shannken static kmutex_t vdrain_lock __cacheline_aligned; 2144f55676aShannken SLIST_HEAD(hashhead, vnode_impl); 21578a3dd75Shannken static kmutex_t vcache_lock __cacheline_aligned; 216fb0bbaf1Sad static kcondvar_t vcache_cv; 21778a3dd75Shannken static u_int vcache_hashsize; 21878a3dd75Shannken static u_long vcache_hashmask; 219fb0bbaf1Sad static struct hashhead *vcache_hashtab; 22078a3dd75Shannken static pool_cache_t vcache_pool; 22143495351Shannken static void lru_requeue(vnode_t *, vnodelst_t *); 22243495351Shannken static vnodelst_t * lru_which(vnode_t *); 2236f60ad1bShannken static vnode_impl_t * lru_iter_first(int, lru_iter_t *); 2246f60ad1bShannken static vnode_impl_t * lru_iter_next(lru_iter_t *); 2256f60ad1bShannken static void lru_iter_release(lru_iter_t *); 2264f55676aShannken static vnode_impl_t * vcache_alloc(void); 2271a31dbf3Shannken static void vcache_dealloc(vnode_impl_t *); 2284f55676aShannken static void vcache_free(vnode_impl_t *); 22942c8d67cShannken static void vcache_init(void); 23042c8d67cShannken static void vcache_reinit(void); 231113946c5Shannken static void vcache_reclaim(vnode_t *); 2326f60ad1bShannken static void vrele_deferred(vnode_impl_t *); 233bf6921b5Sad static void vrelel(vnode_t *, int, int); 2343fca8694Schristos static void vnpanic(vnode_t *, const char *, ...) 235a67c3c89Schristos __printflike(2, 3); 2366f60ad1bShannken static bool vdrain_one(u_int); 2376f60ad1bShannken static void vdrain_task(struct threadpool_job *); 2386f60ad1bShannken static void vrele_task(struct threadpool_job *); 239fbc8beaeSrmind 240fbc8beaeSrmind /* Routines having to do with the management of the vnode table. */ 241fbc8beaeSrmind 24223bf8800Sad /* 24351f3958fSad * The high bit of v_usecount is a gate for vcache_tryvget(). It's set 24451f3958fSad * only when the vnode state is LOADED. 2459c423f23Shannken * The next bit of v_usecount is a flag for vrelel(). It's set 2469c423f23Shannken * from vcache_vget() and vcache_tryvget() whenever the operation succeeds. 24751f3958fSad */ 2489c423f23Shannken #define VUSECOUNT_MASK 0x3fffffff 24951f3958fSad #define VUSECOUNT_GATE 0x80000000 2509c423f23Shannken #define VUSECOUNT_VGET 0x40000000 25151f3958fSad 25251f3958fSad /* 25323bf8800Sad * Return the current usecount of a vnode. 25423bf8800Sad */ 25523bf8800Sad inline int 25623bf8800Sad vrefcnt(struct vnode *vp) 25723bf8800Sad { 25823bf8800Sad 25951f3958fSad return atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_MASK; 26023bf8800Sad } 26123bf8800Sad 2621e17b1e3Shannken /* Vnode state operations and diagnostics. */ 2631e17b1e3Shannken 2641e17b1e3Shannken #if defined(DIAGNOSTIC) 2651e17b1e3Shannken 2668e1cefd9Shannken #define VSTATE_VALID(state) \ 2678e1cefd9Shannken ((state) != VS_ACTIVE && (state) != VS_MARKER) 2681e17b1e3Shannken #define VSTATE_GET(vp) \ 2691e17b1e3Shannken vstate_assert_get((vp), __func__, __LINE__) 2701e17b1e3Shannken #define VSTATE_CHANGE(vp, from, to) \ 2711e17b1e3Shannken vstate_assert_change((vp), (from), (to), __func__, __LINE__) 2721e17b1e3Shannken #define VSTATE_WAIT_STABLE(vp) \ 2731e17b1e3Shannken vstate_assert_wait_stable((vp), __func__, __LINE__) 2741e17b1e3Shannken 2758e1cefd9Shannken void 2765db09395Sjoerg _vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line, 2775db09395Sjoerg bool has_lock) 2781e17b1e3Shannken { 279592be9aeShannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 28023bf8800Sad int refcnt = vrefcnt(vp); 2811e17b1e3Shannken 2825db09395Sjoerg if (!has_lock) { 2836b011e0cSriastradh enum vnode_state vstate = atomic_load_relaxed(&vip->vi_state); 2846b011e0cSriastradh 28523bf8800Sad if (state == VS_ACTIVE && refcnt > 0 && 2866b011e0cSriastradh (vstate == VS_LOADED || vstate == VS_BLOCKED)) 2871e17b1e3Shannken return; 2886b011e0cSriastradh if (vstate == state) 2898e1cefd9Shannken return; 2905db09395Sjoerg mutex_enter((vp)->v_interlock); 2915db09395Sjoerg } 2925db09395Sjoerg 2935db09395Sjoerg KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 2945db09395Sjoerg 29523bf8800Sad if ((state == VS_ACTIVE && refcnt > 0 && 2965db09395Sjoerg (vip->vi_state == VS_LOADED || vip->vi_state == VS_BLOCKED)) || 2975db09395Sjoerg vip->vi_state == state) { 2985db09395Sjoerg if (!has_lock) 2995db09395Sjoerg mutex_exit((vp)->v_interlock); 3005db09395Sjoerg return; 3015db09395Sjoerg } 3028e1cefd9Shannken vnpanic(vp, "state is %s, usecount %d, expected %s at %s:%d", 30323bf8800Sad vstate_name(vip->vi_state), refcnt, 3048e1cefd9Shannken vstate_name(state), func, line); 3051e17b1e3Shannken } 3061e17b1e3Shannken 3074f55676aShannken static enum vnode_state 3081e17b1e3Shannken vstate_assert_get(vnode_t *vp, const char *func, int line) 3091e17b1e3Shannken { 310592be9aeShannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 3111e17b1e3Shannken 3121e17b1e3Shannken KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 3138e1cefd9Shannken if (! VSTATE_VALID(vip->vi_state)) 3141e17b1e3Shannken vnpanic(vp, "state is %s at %s:%d", 315592be9aeShannken vstate_name(vip->vi_state), func, line); 3161e17b1e3Shannken 317592be9aeShannken return vip->vi_state; 3181e17b1e3Shannken } 3191e17b1e3Shannken 32040d12c01Shannken static void 3211e17b1e3Shannken vstate_assert_wait_stable(vnode_t *vp, const char *func, int line) 3221e17b1e3Shannken { 323592be9aeShannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 3241e17b1e3Shannken 3251e17b1e3Shannken KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 3268e1cefd9Shannken if (! VSTATE_VALID(vip->vi_state)) 3271e17b1e3Shannken vnpanic(vp, "state is %s at %s:%d", 328592be9aeShannken vstate_name(vip->vi_state), func, line); 3291e17b1e3Shannken 3308e1cefd9Shannken while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED) 3311e17b1e3Shannken cv_wait(&vp->v_cv, vp->v_interlock); 3321e17b1e3Shannken 3338e1cefd9Shannken if (! VSTATE_VALID(vip->vi_state)) 3341e17b1e3Shannken vnpanic(vp, "state is %s at %s:%d", 335592be9aeShannken vstate_name(vip->vi_state), func, line); 3361e17b1e3Shannken } 3371e17b1e3Shannken 33840d12c01Shannken static void 3394f55676aShannken vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to, 3401e17b1e3Shannken const char *func, int line) 3411e17b1e3Shannken { 34251f3958fSad bool gated = (atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_GATE); 343592be9aeShannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 3441e17b1e3Shannken 3451e17b1e3Shannken KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); 3464f55676aShannken if (from == VS_LOADING) 34778a3dd75Shannken KASSERTMSG(mutex_owned(&vcache_lock), "at %s:%d", func, line); 3481e17b1e3Shannken 3498e1cefd9Shannken if (! VSTATE_VALID(from)) 3501e17b1e3Shannken vnpanic(vp, "from is %s at %s:%d", 3511e17b1e3Shannken vstate_name(from), func, line); 3528e1cefd9Shannken if (! VSTATE_VALID(to)) 3531e17b1e3Shannken vnpanic(vp, "to is %s at %s:%d", 3541e17b1e3Shannken vstate_name(to), func, line); 355592be9aeShannken if (vip->vi_state != from) 3561e17b1e3Shannken vnpanic(vp, "from is %s, expected %s at %s:%d\n", 357592be9aeShannken vstate_name(vip->vi_state), vstate_name(from), func, line); 35851f3958fSad if ((from == VS_LOADED) != gated) 35951f3958fSad vnpanic(vp, "state is %s, gate %d does not match at %s:%d\n", 36051f3958fSad vstate_name(vip->vi_state), gated, func, line); 36151f3958fSad 36251f3958fSad /* Open/close the gate for vcache_tryvget(). */ 363122a3e8aSriastradh if (to == VS_LOADED) { 364ef3476fbSriastradh membar_release(); 36551f3958fSad atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE); 366122a3e8aSriastradh } else { 36751f3958fSad atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE); 368122a3e8aSriastradh } 3691e17b1e3Shannken 3706b011e0cSriastradh atomic_store_relaxed(&vip->vi_state, to); 3714f55676aShannken if (from == VS_LOADING) 37278a3dd75Shannken cv_broadcast(&vcache_cv); 3738e1cefd9Shannken if (to == VS_LOADED || to == VS_RECLAIMED) 3741e17b1e3Shannken cv_broadcast(&vp->v_cv); 3751e17b1e3Shannken } 3761e17b1e3Shannken 3771e17b1e3Shannken #else /* defined(DIAGNOSTIC) */ 3781e17b1e3Shannken 3791e17b1e3Shannken #define VSTATE_GET(vp) \ 3804f55676aShannken (VNODE_TO_VIMPL((vp))->vi_state) 3811e17b1e3Shannken #define VSTATE_CHANGE(vp, from, to) \ 3821e17b1e3Shannken vstate_change((vp), (from), (to)) 3831e17b1e3Shannken #define VSTATE_WAIT_STABLE(vp) \ 3841e17b1e3Shannken vstate_wait_stable((vp)) 3858e1cefd9Shannken void 3860e5b5aa8Sjoerg _vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line, 3870e5b5aa8Sjoerg bool has_lock) 3888e1cefd9Shannken { 3898e1cefd9Shannken 3908e1cefd9Shannken } 3911e17b1e3Shannken 39240d12c01Shannken static void 3931e17b1e3Shannken vstate_wait_stable(vnode_t *vp) 3941e17b1e3Shannken { 395592be9aeShannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 3961e17b1e3Shannken 3978e1cefd9Shannken while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED) 3981e17b1e3Shannken cv_wait(&vp->v_cv, vp->v_interlock); 3991e17b1e3Shannken } 4001e17b1e3Shannken 40140d12c01Shannken static void 4024f55676aShannken vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to) 4031e17b1e3Shannken { 404592be9aeShannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 4051e17b1e3Shannken 40651f3958fSad /* Open/close the gate for vcache_tryvget(). */ 407122a3e8aSriastradh if (to == VS_LOADED) { 408ef3476fbSriastradh membar_release(); 40951f3958fSad atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE); 410122a3e8aSriastradh } else { 41151f3958fSad atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE); 412122a3e8aSriastradh } 41351f3958fSad 4146b011e0cSriastradh atomic_store_relaxed(&vip->vi_state, to); 4154f55676aShannken if (from == VS_LOADING) 41678a3dd75Shannken cv_broadcast(&vcache_cv); 4178e1cefd9Shannken if (to == VS_LOADED || to == VS_RECLAIMED) 4181e17b1e3Shannken cv_broadcast(&vp->v_cv); 4191e17b1e3Shannken } 4201e17b1e3Shannken 4211e17b1e3Shannken #endif /* defined(DIAGNOSTIC) */ 4221e17b1e3Shannken 423fbc8beaeSrmind void 424fbc8beaeSrmind vfs_vnode_sysinit(void) 425fbc8beaeSrmind { 426fb0bbaf1Sad int error __diagused, i; 427fbc8beaeSrmind 428142e4ca5Shannken dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL); 429142e4ca5Shannken KASSERT(dead_rootmount != NULL); 430f421b366Shannken dead_rootmount->mnt_iflag |= IMNT_MPSAFE; 431d940ddccShannken 43243495351Shannken mutex_init(&vdrain_lock, MUTEX_DEFAULT, IPL_NONE); 433fb0bbaf1Sad for (i = 0; i < LRU_COUNT; i++) { 434fb0bbaf1Sad TAILQ_INIT(&lru_list[i]); 435fb0bbaf1Sad } 43642c8d67cShannken vcache_init(); 43742c8d67cShannken 4386f60ad1bShannken error = threadpool_get(&threadpool, PRI_NONE); 4396f60ad1bShannken KASSERTMSG((error == 0), "threadpool_get failed: %d", error); 4406f60ad1bShannken threadpool_job_init(&vdrain_job, vdrain_task, &vdrain_lock, "vdrain"); 4416f60ad1bShannken threadpool_job_init(&vrele_job, vrele_task, &vdrain_lock, "vrele"); 442fbc8beaeSrmind } 443fbc8beaeSrmind 444fbc8beaeSrmind /* 4454222e592Shannken * Allocate a new marker vnode. 4464222e592Shannken */ 4474222e592Shannken vnode_t * 4484222e592Shannken vnalloc_marker(struct mount *mp) 4494222e592Shannken { 450592be9aeShannken vnode_impl_t *vip; 451c9685569Shannken vnode_t *vp; 4524222e592Shannken 453592be9aeShannken vip = pool_cache_get(vcache_pool, PR_WAITOK); 454592be9aeShannken memset(vip, 0, sizeof(*vip)); 455592be9aeShannken vp = VIMPL_TO_VNODE(vip); 456d2a0ebb6Sad uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 1); 457c9685569Shannken vp->v_mount = mp; 458c9685569Shannken vp->v_type = VBAD; 459d2a0ebb6Sad vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 46075d451f3Sthorpej klist_init(&vip->vi_klist.vk_klist); 46175d451f3Sthorpej vp->v_klist = &vip->vi_klist; 462592be9aeShannken vip->vi_state = VS_MARKER; 463c9685569Shannken 464c9685569Shannken return vp; 4654222e592Shannken } 4664222e592Shannken 4674222e592Shannken /* 4684222e592Shannken * Free a marker vnode. 4694222e592Shannken */ 4704222e592Shannken void 4714222e592Shannken vnfree_marker(vnode_t *vp) 4724222e592Shannken { 473592be9aeShannken vnode_impl_t *vip; 4744222e592Shannken 475592be9aeShannken vip = VNODE_TO_VIMPL(vp); 476592be9aeShannken KASSERT(vip->vi_state == VS_MARKER); 477d2a0ebb6Sad mutex_obj_free(vp->v_interlock); 478c9685569Shannken uvm_obj_destroy(&vp->v_uobj, true); 47975d451f3Sthorpej klist_fini(&vip->vi_klist.vk_klist); 480592be9aeShannken pool_cache_put(vcache_pool, vip); 4814222e592Shannken } 4824222e592Shannken 4834222e592Shannken /* 4844222e592Shannken * Test a vnode for being a marker vnode. 4854222e592Shannken */ 4864222e592Shannken bool 4874222e592Shannken vnis_marker(vnode_t *vp) 4884222e592Shannken { 4894222e592Shannken 4904f55676aShannken return (VNODE_TO_VIMPL(vp)->vi_state == VS_MARKER); 4914222e592Shannken } 4924222e592Shannken 4934222e592Shannken /* 49443495351Shannken * Return the lru list this node should be on. 495fbc8beaeSrmind */ 49643495351Shannken static vnodelst_t * 49743495351Shannken lru_which(vnode_t *vp) 498fbc8beaeSrmind { 49943495351Shannken 50043495351Shannken KASSERT(mutex_owned(vp->v_interlock)); 50143495351Shannken 50243495351Shannken if (vp->v_holdcnt > 0) 503fb0bbaf1Sad return &lru_list[LRU_HOLD]; 50443495351Shannken else 505fb0bbaf1Sad return &lru_list[LRU_FREE]; 50643495351Shannken } 50743495351Shannken 50843495351Shannken /* 50943495351Shannken * Put vnode to end of given list. 51043495351Shannken * Both the current and the new list may be NULL, used on vnode alloc/free. 51143495351Shannken * Adjust numvnodes and signal vdrain thread if there is work. 51243495351Shannken */ 51343495351Shannken static void 51443495351Shannken lru_requeue(vnode_t *vp, vnodelst_t *listhd) 51543495351Shannken { 516592be9aeShannken vnode_impl_t *vip; 517fb0bbaf1Sad int d; 518fb0bbaf1Sad 519fb0bbaf1Sad /* 520fb0bbaf1Sad * If the vnode is on the correct list, and was put there recently, 521fb0bbaf1Sad * then leave it be, thus avoiding huge cache and lock contention. 522fb0bbaf1Sad */ 523fb0bbaf1Sad vip = VNODE_TO_VIMPL(vp); 524fb0bbaf1Sad if (listhd == vip->vi_lrulisthd && 525983fd9ccSmaxv (getticks() - vip->vi_lrulisttm) < hz) { 526fb0bbaf1Sad return; 527fb0bbaf1Sad } 52843495351Shannken 52943495351Shannken mutex_enter(&vdrain_lock); 530fb0bbaf1Sad d = 0; 531592be9aeShannken if (vip->vi_lrulisthd != NULL) 532592be9aeShannken TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist); 53343495351Shannken else 534fb0bbaf1Sad d++; 535592be9aeShannken vip->vi_lrulisthd = listhd; 536983fd9ccSmaxv vip->vi_lrulisttm = getticks(); 537592be9aeShannken if (vip->vi_lrulisthd != NULL) 538592be9aeShannken TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist); 53943495351Shannken else 540fb0bbaf1Sad d--; 541fb0bbaf1Sad if (d != 0) { 542fb0bbaf1Sad /* 543fb0bbaf1Sad * Looks strange? This is not a bug. Don't store 544fb0bbaf1Sad * numvnodes unless there is a change - avoid false 545fb0bbaf1Sad * sharing on MP. 546fb0bbaf1Sad */ 547fb0bbaf1Sad numvnodes += d; 548fb0bbaf1Sad } 5496f60ad1bShannken if (listhd == &lru_list[LRU_VRELE]) 5506f60ad1bShannken threadpool_schedule_job(threadpool, &vrele_job); 5516f60ad1bShannken if (d > 0 && numvnodes > desiredvnodes) 5526f60ad1bShannken threadpool_schedule_job(threadpool, &vdrain_job); 553cc8bf809Shannken if (d > 0 && numvnodes > desiredvnodes + desiredvnodes / 16) 554fdf689ecSriastradh kpause("vnfull", false, MAX(1, mstohz(10)), &vdrain_lock); 55543495351Shannken mutex_exit(&vdrain_lock); 55643495351Shannken } 55743495351Shannken 55843495351Shannken /* 5596f60ad1bShannken * LRU list iterator. 5606f60ad1bShannken * Caller holds vdrain_lock. 5616f60ad1bShannken */ 5626f60ad1bShannken static vnode_impl_t * 5636f60ad1bShannken lru_iter_first(int idx, lru_iter_t *iterp) 5646f60ad1bShannken { 5656f60ad1bShannken vnode_impl_t *marker; 5666f60ad1bShannken 5676f60ad1bShannken KASSERT(mutex_owned(&vdrain_lock)); 5686f60ad1bShannken 5696f60ad1bShannken mutex_exit(&vdrain_lock); 5706f60ad1bShannken marker = VNODE_TO_VIMPL(vnalloc_marker(NULL)); 5716f60ad1bShannken mutex_enter(&vdrain_lock); 5726f60ad1bShannken marker->vi_lrulisthd = &lru_list[idx]; 5736f60ad1bShannken iterp->li_marker = marker; 5746f60ad1bShannken 5756f60ad1bShannken TAILQ_INSERT_HEAD(marker->vi_lrulisthd, marker, vi_lrulist); 5766f60ad1bShannken 5776f60ad1bShannken return lru_iter_next(iterp); 5786f60ad1bShannken } 5796f60ad1bShannken 5806f60ad1bShannken static vnode_impl_t * 5816f60ad1bShannken lru_iter_next(lru_iter_t *iter) 5826f60ad1bShannken { 5836f60ad1bShannken vnode_impl_t *vip, *marker; 5846f60ad1bShannken vnodelst_t *listhd; 5856f60ad1bShannken 5866f60ad1bShannken KASSERT(mutex_owned(&vdrain_lock)); 5876f60ad1bShannken 5886f60ad1bShannken marker = iter->li_marker; 5896f60ad1bShannken listhd = marker->vi_lrulisthd; 5906f60ad1bShannken 5916f60ad1bShannken while ((vip = TAILQ_NEXT(marker, vi_lrulist))) { 5926f60ad1bShannken TAILQ_REMOVE(listhd, marker, vi_lrulist); 5936f60ad1bShannken TAILQ_INSERT_AFTER(listhd, vip, marker, vi_lrulist); 5946f60ad1bShannken if (!vnis_marker(VIMPL_TO_VNODE(vip))) 5956f60ad1bShannken break; 5966f60ad1bShannken } 5976f60ad1bShannken 5986f60ad1bShannken return vip; 5996f60ad1bShannken } 6006f60ad1bShannken 6016f60ad1bShannken static void 6026f60ad1bShannken lru_iter_release(lru_iter_t *iter) 6036f60ad1bShannken { 6046f60ad1bShannken vnode_impl_t *marker; 6056f60ad1bShannken 6066f60ad1bShannken KASSERT(mutex_owned(&vdrain_lock)); 6076f60ad1bShannken 6086f60ad1bShannken marker = iter->li_marker; 6096f60ad1bShannken TAILQ_REMOVE(marker->vi_lrulisthd, marker, vi_lrulist); 6106f60ad1bShannken 6116f60ad1bShannken mutex_exit(&vdrain_lock); 6126f60ad1bShannken vnfree_marker(VIMPL_TO_VNODE(marker)); 6136f60ad1bShannken mutex_enter(&vdrain_lock); 6146f60ad1bShannken } 6156f60ad1bShannken 6166f60ad1bShannken /* 6177599fb1fShannken * Release deferred vrele vnodes for this mount. 6187599fb1fShannken * Called with file system suspended. 6197599fb1fShannken */ 6207599fb1fShannken void 6217599fb1fShannken vrele_flush(struct mount *mp) 6227599fb1fShannken { 6236f60ad1bShannken lru_iter_t iter; 6246f60ad1bShannken vnode_impl_t *vip; 6257599fb1fShannken 6267599fb1fShannken KASSERT(fstrans_is_owner(mp)); 6277599fb1fShannken 6287599fb1fShannken mutex_enter(&vdrain_lock); 6296f60ad1bShannken for (vip = lru_iter_first(LRU_VRELE, &iter); vip != NULL; 6306f60ad1bShannken vip = lru_iter_next(&iter)) { 6316f60ad1bShannken if (VIMPL_TO_VNODE(vip)->v_mount != mp) 6327599fb1fShannken continue; 6336f60ad1bShannken vrele_deferred(vip); 6342c8f7289Shannken } 6356f60ad1bShannken lru_iter_release(&iter); 6367599fb1fShannken mutex_exit(&vdrain_lock); 6377599fb1fShannken } 6387599fb1fShannken 6397599fb1fShannken /* 6406f60ad1bShannken * One pass through the LRU lists to keep the number of allocated 6416f60ad1bShannken * vnodes below target. Returns true if target met. 64243495351Shannken */ 6436f60ad1bShannken static bool 6446f60ad1bShannken vdrain_one(u_int target) 64543495351Shannken { 6466f60ad1bShannken int ix, lists[] = { LRU_FREE, LRU_HOLD }; 6476f60ad1bShannken lru_iter_t iter; 6486f60ad1bShannken vnode_impl_t *vip; 6496f60ad1bShannken vnode_t *vp; 650d54b9812Shannken struct mount *mp; 651fbc8beaeSrmind 65243495351Shannken KASSERT(mutex_owned(&vdrain_lock)); 653d54b9812Shannken 6546f60ad1bShannken for (ix = 0; ix < __arraycount(lists); ix++) { 6556f60ad1bShannken for (vip = lru_iter_first(lists[ix], &iter); vip != NULL; 6566f60ad1bShannken vip = lru_iter_next(&iter)) { 6576f60ad1bShannken if (numvnodes < target) { 6586f60ad1bShannken lru_iter_release(&iter); 6596f60ad1bShannken return true; 6606f60ad1bShannken } 6616f60ad1bShannken 6626f60ad1bShannken vp = VIMPL_TO_VNODE(vip); 6636f60ad1bShannken 66443495351Shannken /* Probe usecount (unlocked). */ 66523bf8800Sad if (vrefcnt(vp) > 0) 6666f60ad1bShannken continue; 66743495351Shannken /* Try v_interlock -- we lock the wrong direction! */ 668f3e32599Shannken if (!mutex_tryenter(vp->v_interlock)) 6696f60ad1bShannken continue; 67043495351Shannken /* Probe usecount and state. */ 67123bf8800Sad if (vrefcnt(vp) > 0 || VSTATE_GET(vp) != VS_LOADED) { 67243495351Shannken mutex_exit(vp->v_interlock); 6736f60ad1bShannken continue; 67443495351Shannken } 6756f60ad1bShannken mutex_exit(&vdrain_lock); 6766f60ad1bShannken 677d54b9812Shannken mp = vp->v_mount; 678287643b0Shannken if (fstrans_start_nowait(mp) != 0) { 679d54b9812Shannken mutex_exit(vp->v_interlock); 6806f60ad1bShannken mutex_enter(&vdrain_lock); 6816f60ad1bShannken continue; 682d54b9812Shannken } 683fbc8beaeSrmind 684998709c4Shannken if (vcache_vget(vp) == 0) { 685cf9ded4aShannken if (!vrecycle(vp)) { 686bf6921b5Sad vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 687cf9ded4aShannken mutex_enter(vp->v_interlock); 688bf6921b5Sad vrelel(vp, 0, LK_EXCLUSIVE); 689cf9ded4aShannken } 690f3e32599Shannken } 691d54b9812Shannken fstrans_done(mp); 692cafde4ebShannken 69343495351Shannken mutex_enter(&vdrain_lock); 694fbc8beaeSrmind } 6956f60ad1bShannken lru_iter_release(&iter); 6966f60ad1bShannken } 697fbc8beaeSrmind 6986f60ad1bShannken return false; 69943495351Shannken } 70043495351Shannken 70143495351Shannken /* 7026f60ad1bShannken * threadpool task to keep the number of vnodes below desiredvnodes. 703cafde4ebShannken */ 704cafde4ebShannken static void 7056f60ad1bShannken vdrain_task(struct threadpool_job *job) 706cafde4ebShannken { 70743495351Shannken u_int target; 708cafde4ebShannken 7096f60ad1bShannken target = desiredvnodes - desiredvnodes / 16; 71043495351Shannken 71143495351Shannken mutex_enter(&vdrain_lock); 712cafde4ebShannken 7136f60ad1bShannken while (!vdrain_one(target)) 714c15cfd47Shannken kpause("vdrain", false, 1, &vdrain_lock); 71543495351Shannken 7166f60ad1bShannken threadpool_job_done(job); 7176f60ad1bShannken mutex_exit(&vdrain_lock); 718cafde4ebShannken } 719cafde4ebShannken 7206f60ad1bShannken /* 7216f60ad1bShannken * threadpool task to process asynchronous vrele. 7226f60ad1bShannken */ 7236f60ad1bShannken static void 7246f60ad1bShannken vrele_task(struct threadpool_job *job) 7256f60ad1bShannken { 7266f60ad1bShannken int skipped; 7276f60ad1bShannken lru_iter_t iter; 7286f60ad1bShannken vnode_impl_t *vip; 7296f60ad1bShannken struct mount *mp; 7306f60ad1bShannken 7316f60ad1bShannken mutex_enter(&vdrain_lock); 7326f60ad1bShannken while ((vip = lru_iter_first(LRU_VRELE, &iter)) != NULL) { 7336f60ad1bShannken for (skipped = 0; vip != NULL; vip = lru_iter_next(&iter)) { 7346f60ad1bShannken mp = VIMPL_TO_VNODE(vip)->v_mount; 7356f60ad1bShannken if (fstrans_start_nowait(mp) == 0) { 7366f60ad1bShannken vrele_deferred(vip); 7376f60ad1bShannken fstrans_done(mp); 738fbc8beaeSrmind } else { 7396f60ad1bShannken skipped++; 740fbc8beaeSrmind } 74143495351Shannken } 7426f60ad1bShannken 7436f60ad1bShannken lru_iter_release(&iter); 7441cf06cb4Sriastradh if (skipped) { 7451cf06cb4Sriastradh kpause("vrele", false, MAX(1, mstohz(10)), 7461cf06cb4Sriastradh &vdrain_lock); 7471cf06cb4Sriastradh } 7486f60ad1bShannken } 7496f60ad1bShannken 7506f60ad1bShannken threadpool_job_done(job); 7516f60ad1bShannken lru_iter_release(&iter); 7526f60ad1bShannken mutex_exit(&vdrain_lock); 753fbc8beaeSrmind } 754fbc8beaeSrmind 755fbc8beaeSrmind /* 756926b25e1Sad * Try to drop reference on a vnode. Abort if we are releasing the 757926b25e1Sad * last reference. Note: this _must_ succeed if not the last reference. 758926b25e1Sad */ 759926b25e1Sad static bool 760926b25e1Sad vtryrele(vnode_t *vp) 761926b25e1Sad { 762926b25e1Sad u_int use, next; 763926b25e1Sad 764ef3476fbSriastradh membar_release(); 765926b25e1Sad for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { 76651f3958fSad if (__predict_false((use & VUSECOUNT_MASK) == 1)) { 767926b25e1Sad return false; 768926b25e1Sad } 76951f3958fSad KASSERT((use & VUSECOUNT_MASK) > 1); 770926b25e1Sad next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 771926b25e1Sad if (__predict_true(next == use)) { 772926b25e1Sad return true; 773926b25e1Sad } 774926b25e1Sad } 775926b25e1Sad } 776926b25e1Sad 777926b25e1Sad /* 77805bae7a2Srmind * vput: unlock and release the reference. 779fbc8beaeSrmind */ 780fbc8beaeSrmind void 781fbc8beaeSrmind vput(vnode_t *vp) 782fbc8beaeSrmind { 783bf6921b5Sad int lktype; 784fbc8beaeSrmind 785926b25e1Sad /* 78623bf8800Sad * Do an unlocked check of the usecount. If it looks like we're not 787926b25e1Sad * about to drop the last reference, then unlock the vnode and try 788926b25e1Sad * to drop the reference. If it ends up being the last reference 789926b25e1Sad * after all, vrelel() can fix it all up. Most of the time this 790926b25e1Sad * will all go to plan. 791926b25e1Sad */ 79223bf8800Sad if (vrefcnt(vp) > 1) { 793926b25e1Sad VOP_UNLOCK(vp); 794926b25e1Sad if (vtryrele(vp)) { 795926b25e1Sad return; 796926b25e1Sad } 797926b25e1Sad lktype = LK_NONE; 798bf6921b5Sad } else { 799bf6921b5Sad lktype = VOP_ISLOCKED(vp); 800bf6921b5Sad KASSERT(lktype != LK_NONE); 801bf6921b5Sad } 802bf6921b5Sad mutex_enter(vp->v_interlock); 803bf6921b5Sad vrelel(vp, 0, lktype); 804fbc8beaeSrmind } 805fbc8beaeSrmind 806fbc8beaeSrmind /* 8076f60ad1bShannken * Release a vnode from the deferred list. 8086f60ad1bShannken */ 8096f60ad1bShannken static void 8106f60ad1bShannken vrele_deferred(vnode_impl_t *vip) 8116f60ad1bShannken { 8126f60ad1bShannken vnode_t *vp; 8136f60ad1bShannken 8146f60ad1bShannken KASSERT(mutex_owned(&vdrain_lock)); 8156f60ad1bShannken KASSERT(vip->vi_lrulisthd == &lru_list[LRU_VRELE]); 8166f60ad1bShannken 8176f60ad1bShannken vp = VIMPL_TO_VNODE(vip); 8186f60ad1bShannken 8196f60ad1bShannken /* 8206f60ad1bShannken * First remove the vnode from the vrele list. 8216f60ad1bShannken * Put it on the last lru list, the last vrele() 8226f60ad1bShannken * will put it back onto the right list before 8236f60ad1bShannken * its usecount reaches zero. 8246f60ad1bShannken */ 8256f60ad1bShannken TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist); 8266f60ad1bShannken vip->vi_lrulisthd = &lru_list[LRU_HOLD]; 8276f60ad1bShannken vip->vi_lrulisttm = getticks(); 8286f60ad1bShannken TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist); 8296f60ad1bShannken 8306f60ad1bShannken mutex_exit(&vdrain_lock); 8316f60ad1bShannken 8326f60ad1bShannken vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 8336f60ad1bShannken mutex_enter(vp->v_interlock); 8346f60ad1bShannken vrelel(vp, 0, LK_EXCLUSIVE); 8356f60ad1bShannken 8366f60ad1bShannken mutex_enter(&vdrain_lock); 8376f60ad1bShannken } 8386f60ad1bShannken 8396f60ad1bShannken /* 840fbc8beaeSrmind * Vnode release. If reference count drops to zero, call inactive 841fbc8beaeSrmind * routine and either return to freelist or free to the pool. 842fbc8beaeSrmind */ 84365b1f85aShannken static void 844bf6921b5Sad vrelel(vnode_t *vp, int flags, int lktype) 845fbc8beaeSrmind { 846fb0bbaf1Sad const bool async = ((flags & VRELEL_ASYNC) != 0); 8470209aa4aShannken bool recycle, defer, objlock_held; 8489c423f23Shannken u_int use, next; 849fbc8beaeSrmind int error; 850fbc8beaeSrmind 8510209aa4aShannken objlock_held = false; 8520209aa4aShannken 8539c423f23Shannken retry: 854e225b7bdSrmind KASSERT(mutex_owned(vp->v_interlock)); 855fbc8beaeSrmind 856fbc8beaeSrmind if (__predict_false(vp->v_op == dead_vnodeop_p && 8574f55676aShannken VSTATE_GET(vp) != VS_RECLAIMED)) { 8583fca8694Schristos vnpanic(vp, "dead but not clean"); 859fbc8beaeSrmind } 860fbc8beaeSrmind 861fbc8beaeSrmind /* 8629c423f23Shannken * If not the last reference, just unlock and drop the reference count. 8639c423f23Shannken * 8649c423f23Shannken * Otherwise make sure we pass a point in time where we hold the 8659c423f23Shannken * last reference with VGET flag unset. 866fbc8beaeSrmind */ 8679c423f23Shannken for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { 8689c423f23Shannken if (__predict_false((use & VUSECOUNT_MASK) > 1)) { 86973e2bd4aShannken if (objlock_held) { 87073e2bd4aShannken objlock_held = false; 87173e2bd4aShannken rw_exit(vp->v_uobj.vmobjlock); 87273e2bd4aShannken } 873bf6921b5Sad if (lktype != LK_NONE) { 8749c423f23Shannken mutex_exit(vp->v_interlock); 8759c423f23Shannken lktype = LK_NONE; 876bf6921b5Sad VOP_UNLOCK(vp); 8779c423f23Shannken mutex_enter(vp->v_interlock); 878bf6921b5Sad } 8799c423f23Shannken if (vtryrele(vp)) { 880e225b7bdSrmind mutex_exit(vp->v_interlock); 881fbc8beaeSrmind return; 882fbc8beaeSrmind } 8839c423f23Shannken next = atomic_load_relaxed(&vp->v_usecount); 8849c423f23Shannken continue; 8859c423f23Shannken } 8869c423f23Shannken KASSERT((use & VUSECOUNT_MASK) == 1); 8879c423f23Shannken next = use & ~VUSECOUNT_VGET; 8889c423f23Shannken if (next != use) { 8899c423f23Shannken next = atomic_cas_uint(&vp->v_usecount, use, next); 8909c423f23Shannken } 8919c423f23Shannken if (__predict_true(next == use)) { 8929c423f23Shannken break; 8939c423f23Shannken } 8949c423f23Shannken } 895ef3476fbSriastradh membar_acquire(); 89623bf8800Sad if (vrefcnt(vp) <= 0 || vp->v_writecount != 0) { 8973fca8694Schristos vnpanic(vp, "%s: bad ref count", __func__); 898fbc8beaeSrmind } 899fbc8beaeSrmind 900872808deShannken #ifdef DIAGNOSTIC 901872808deShannken if ((vp->v_type == VBLK || vp->v_type == VCHR) && 902872808deShannken vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 903872808deShannken vprint("vrelel: missing VOP_CLOSE()", vp); 904872808deShannken } 905872808deShannken #endif 906872808deShannken 907fbc8beaeSrmind /* 9085d699a7aShannken * If already clean there is no need to lock, defer or 9095d699a7aShannken * deactivate this node. 9105d699a7aShannken */ 9115d699a7aShannken if (VSTATE_GET(vp) == VS_RECLAIMED) { 91273e2bd4aShannken if (objlock_held) { 91373e2bd4aShannken objlock_held = false; 91473e2bd4aShannken rw_exit(vp->v_uobj.vmobjlock); 91573e2bd4aShannken } 9165d699a7aShannken if (lktype != LK_NONE) { 9175d699a7aShannken mutex_exit(vp->v_interlock); 9185d699a7aShannken lktype = LK_NONE; 9195d699a7aShannken VOP_UNLOCK(vp); 9205d699a7aShannken mutex_enter(vp->v_interlock); 9215d699a7aShannken } 9225d699a7aShannken goto out; 9235d699a7aShannken } 9245d699a7aShannken 9255d699a7aShannken /* 9261a31dbf3Shannken * First try to get the vnode locked for VOP_INACTIVE(). 9276f60ad1bShannken * Defer vnode release to vrele task if caller requests 9281a31dbf3Shannken * it explicitly, is the pagedaemon or the lock failed. 929fbc8beaeSrmind */ 930bf6921b5Sad defer = false; 9311a31dbf3Shannken if ((curlwp == uvm.pagedaemon_lwp) || async) { 932fbc8beaeSrmind defer = true; 933bf6921b5Sad } else if (lktype == LK_SHARED) { 934bf6921b5Sad /* Excellent chance of getting, if the last ref. */ 9355d699a7aShannken error = vn_lock(vp, LK_UPGRADE | LK_RETRY | LK_NOWAIT); 936bf6921b5Sad if (error != 0) { 937bf6921b5Sad defer = true; 938fb0bbaf1Sad } else { 939bf6921b5Sad lktype = LK_EXCLUSIVE; 940bf6921b5Sad } 941bf6921b5Sad } else if (lktype == LK_NONE) { 942bf6921b5Sad /* Excellent chance of getting, if the last ref. */ 9435d699a7aShannken error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 944bf6921b5Sad if (error != 0) { 945bf6921b5Sad defer = true; 946bf6921b5Sad } else { 947bf6921b5Sad lktype = LK_EXCLUSIVE; 948bf6921b5Sad } 949fbc8beaeSrmind } 950fb83ccaaShannken KASSERT(mutex_owned(vp->v_interlock)); 951fbc8beaeSrmind if (defer) { 952fbc8beaeSrmind /* 9536f60ad1bShannken * Defer reclaim to the vrele task; it's not safe to 954fbc8beaeSrmind * clean it here. We donate it our last reference. 955fbc8beaeSrmind */ 956bf6921b5Sad if (lktype != LK_NONE) { 95773e2bd4aShannken mutex_exit(vp->v_interlock); 958bf6921b5Sad VOP_UNLOCK(vp); 95973e2bd4aShannken mutex_enter(vp->v_interlock); 960bf6921b5Sad } 961fb0bbaf1Sad lru_requeue(vp, &lru_list[LRU_VRELE]); 962e225b7bdSrmind mutex_exit(vp->v_interlock); 963fbc8beaeSrmind return; 964fbc8beaeSrmind } 965bf6921b5Sad KASSERT(lktype == LK_EXCLUSIVE); 9661a31dbf3Shannken 9675213b7d3Shannken /* If the node gained another reference, retry. */ 9685213b7d3Shannken use = atomic_load_relaxed(&vp->v_usecount); 9696b410170Shannken if ((use & VUSECOUNT_VGET) != 0) { 9705213b7d3Shannken goto retry; 9715213b7d3Shannken } 9726b410170Shannken KASSERT((use & VUSECOUNT_MASK) == 1); 9735213b7d3Shannken 9740209aa4aShannken if ((vp->v_iflag & (VI_TEXT|VI_EXECMAP|VI_WRMAP)) != 0 || 9750209aa4aShannken (vp->v_vflag & VV_MAPPED) != 0) { 9760209aa4aShannken /* Take care of space accounting. */ 9770209aa4aShannken if (!objlock_held) { 9780209aa4aShannken objlock_held = true; 9790209aa4aShannken if (!rw_tryenter(vp->v_uobj.vmobjlock, RW_WRITER)) { 9800209aa4aShannken mutex_exit(vp->v_interlock); 9810209aa4aShannken rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 9820209aa4aShannken mutex_enter(vp->v_interlock); 9830209aa4aShannken goto retry; 9840209aa4aShannken } 9850209aa4aShannken } 9860209aa4aShannken if ((vp->v_iflag & VI_EXECMAP) != 0) { 9870209aa4aShannken cpu_count(CPU_COUNT_EXECPAGES, -vp->v_uobj.uo_npages); 9880209aa4aShannken } 9890209aa4aShannken vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 9900209aa4aShannken vp->v_vflag &= ~VV_MAPPED; 9910209aa4aShannken } 9920209aa4aShannken if (objlock_held) { 9930209aa4aShannken objlock_held = false; 9940209aa4aShannken rw_exit(vp->v_uobj.vmobjlock); 9950209aa4aShannken } 9960209aa4aShannken 9971a31dbf3Shannken /* 9985d699a7aShannken * Deactivate the vnode, but preserve our reference across 9995d699a7aShannken * the call to VOP_INACTIVE(). 10005d699a7aShannken * 100151f3958fSad * If VOP_INACTIVE() indicates that the file has been 100251f3958fSad * deleted, then recycle the vnode. 1003fbc8beaeSrmind * 100487fb3229Sriastradh * Note that VOP_INACTIVE() will not drop the vnode lock. 1005fbc8beaeSrmind */ 1006c90f9c8cSad mutex_exit(vp->v_interlock); 10071a31dbf3Shannken recycle = false; 1008fbc8beaeSrmind VOP_INACTIVE(vp, &recycle); 10090209aa4aShannken if (!recycle) { 10100209aa4aShannken lktype = LK_NONE; 10110209aa4aShannken VOP_UNLOCK(vp); 10120209aa4aShannken } 1013e225b7bdSrmind mutex_enter(vp->v_interlock); 101451f3958fSad 101551f3958fSad /* 101651f3958fSad * Block new references then check again to see if a 101751f3958fSad * new reference was acquired in the meantime. If 101851f3958fSad * it was, restore the vnode state and try again. 101951f3958fSad */ 102051f3958fSad if (recycle) { 102151f3958fSad VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); 10229c423f23Shannken use = atomic_load_relaxed(&vp->v_usecount); 10236b410170Shannken if ((use & VUSECOUNT_VGET) != 0) { 10245d699a7aShannken VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); 10259c423f23Shannken goto retry; 102651f3958fSad } 10276b410170Shannken KASSERT((use & VUSECOUNT_MASK) == 1); 102851f3958fSad } 1029fbc8beaeSrmind 1030fbc8beaeSrmind /* 10310209aa4aShannken * Recycle the vnode if the file is now unused (unlinked). 1032fbc8beaeSrmind */ 1033fbc8beaeSrmind if (recycle) { 103451f3958fSad VSTATE_ASSERT(vp, VS_BLOCKED); 10350209aa4aShannken KASSERT(lktype == LK_EXCLUSIVE); 10366d3ccf97Sriastradh /* vcache_reclaim drops the lock. */ 10379c423f23Shannken lktype = LK_NONE; 1038113946c5Shannken vcache_reclaim(vp); 1039fbc8beaeSrmind } 104023bf8800Sad KASSERT(vrefcnt(vp) > 0); 10410209aa4aShannken KASSERT(lktype == LK_NONE); 1042fbc8beaeSrmind 10435d699a7aShannken out: 10449c423f23Shannken for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { 10459c423f23Shannken if (__predict_false((use & VUSECOUNT_VGET) != 0 && 10469c423f23Shannken (use & VUSECOUNT_MASK) == 1)) { 10479c423f23Shannken /* Gained and released another reference, retry. */ 10489c423f23Shannken goto retry; 10499c423f23Shannken } 10509c423f23Shannken next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 10519c423f23Shannken if (__predict_true(next == use)) { 10529c423f23Shannken if (__predict_false((use & VUSECOUNT_MASK) != 1)) { 10539c423f23Shannken /* Gained another reference. */ 1054e225b7bdSrmind mutex_exit(vp->v_interlock); 1055fbc8beaeSrmind return; 1056fbc8beaeSrmind } 10579c423f23Shannken break; 10589c423f23Shannken } 10599c423f23Shannken } 1060ef3476fbSriastradh membar_acquire(); 1061fbc8beaeSrmind 1062e0f81f2cShannken if (VSTATE_GET(vp) == VS_RECLAIMED && vp->v_holdcnt == 0) { 1063fbc8beaeSrmind /* 1064fbc8beaeSrmind * It's clean so destroy it. It isn't referenced 1065fbc8beaeSrmind * anywhere since it has been reclaimed. 1066fbc8beaeSrmind */ 10674f55676aShannken vcache_free(VNODE_TO_VIMPL(vp)); 1068fbc8beaeSrmind } else { 1069fbc8beaeSrmind /* 1070fbc8beaeSrmind * Otherwise, put it back onto the freelist. It 1071fbc8beaeSrmind * can't be destroyed while still associated with 1072fbc8beaeSrmind * a file system. 1073fbc8beaeSrmind */ 107443495351Shannken lru_requeue(vp, lru_which(vp)); 1075e225b7bdSrmind mutex_exit(vp->v_interlock); 1076fbc8beaeSrmind } 1077fbc8beaeSrmind } 1078fbc8beaeSrmind 1079fbc8beaeSrmind void 1080fbc8beaeSrmind vrele(vnode_t *vp) 1081fbc8beaeSrmind { 1082fbc8beaeSrmind 1083926b25e1Sad if (vtryrele(vp)) { 1084926b25e1Sad return; 1085926b25e1Sad } 1086e225b7bdSrmind mutex_enter(vp->v_interlock); 1087bf6921b5Sad vrelel(vp, 0, LK_NONE); 1088fbc8beaeSrmind } 1089fbc8beaeSrmind 1090fbc8beaeSrmind /* 1091fbc8beaeSrmind * Asynchronous vnode release, vnode is released in different context. 1092fbc8beaeSrmind */ 1093fbc8beaeSrmind void 1094fbc8beaeSrmind vrele_async(vnode_t *vp) 1095fbc8beaeSrmind { 1096fbc8beaeSrmind 1097926b25e1Sad if (vtryrele(vp)) { 1098926b25e1Sad return; 1099926b25e1Sad } 1100e225b7bdSrmind mutex_enter(vp->v_interlock); 1101bf6921b5Sad vrelel(vp, VRELEL_ASYNC, LK_NONE); 1102fbc8beaeSrmind } 1103fbc8beaeSrmind 1104fbc8beaeSrmind /* 1105fbc8beaeSrmind * Vnode reference, where a reference is already held by some other 1106fbc8beaeSrmind * object (for example, a file structure). 1107926b25e1Sad * 110851f3958fSad * NB: lockless code sequences may rely on this not blocking. 1109fbc8beaeSrmind */ 1110fbc8beaeSrmind void 1111fbc8beaeSrmind vref(vnode_t *vp) 1112fbc8beaeSrmind { 1113fbc8beaeSrmind 111423bf8800Sad KASSERT(vrefcnt(vp) > 0); 1115fbc8beaeSrmind 1116926b25e1Sad atomic_inc_uint(&vp->v_usecount); 1117fbc8beaeSrmind } 1118fbc8beaeSrmind 1119fbc8beaeSrmind /* 1120fbc8beaeSrmind * Page or buffer structure gets a reference. 1121fbc8beaeSrmind * Called with v_interlock held. 1122fbc8beaeSrmind */ 1123fbc8beaeSrmind void 1124fbc8beaeSrmind vholdl(vnode_t *vp) 1125fbc8beaeSrmind { 1126fbc8beaeSrmind 1127e225b7bdSrmind KASSERT(mutex_owned(vp->v_interlock)); 1128fbc8beaeSrmind 112923bf8800Sad if (vp->v_holdcnt++ == 0 && vrefcnt(vp) == 0) 113043495351Shannken lru_requeue(vp, lru_which(vp)); 1131fbc8beaeSrmind } 1132fbc8beaeSrmind 1133fbc8beaeSrmind /* 1134926b25e1Sad * Page or buffer structure gets a reference. 1135926b25e1Sad */ 1136926b25e1Sad void 1137926b25e1Sad vhold(vnode_t *vp) 1138926b25e1Sad { 1139926b25e1Sad 1140926b25e1Sad mutex_enter(vp->v_interlock); 1141926b25e1Sad vholdl(vp); 1142926b25e1Sad mutex_exit(vp->v_interlock); 1143926b25e1Sad } 1144926b25e1Sad 1145926b25e1Sad /* 1146fbc8beaeSrmind * Page or buffer structure frees a reference. 1147fbc8beaeSrmind * Called with v_interlock held. 1148fbc8beaeSrmind */ 1149fbc8beaeSrmind void 1150fbc8beaeSrmind holdrelel(vnode_t *vp) 1151fbc8beaeSrmind { 1152fbc8beaeSrmind 1153e225b7bdSrmind KASSERT(mutex_owned(vp->v_interlock)); 1154fbc8beaeSrmind 1155fbc8beaeSrmind if (vp->v_holdcnt <= 0) { 11563fca8694Schristos vnpanic(vp, "%s: holdcnt vp %p", __func__, vp); 1157fbc8beaeSrmind } 1158fbc8beaeSrmind 1159fbc8beaeSrmind vp->v_holdcnt--; 116023bf8800Sad if (vp->v_holdcnt == 0 && vrefcnt(vp) == 0) 116143495351Shannken lru_requeue(vp, lru_which(vp)); 1162fbc8beaeSrmind } 1163fbc8beaeSrmind 1164fbc8beaeSrmind /* 1165926b25e1Sad * Page or buffer structure frees a reference. 1166926b25e1Sad */ 1167926b25e1Sad void 1168926b25e1Sad holdrele(vnode_t *vp) 1169926b25e1Sad { 1170926b25e1Sad 1171926b25e1Sad mutex_enter(vp->v_interlock); 1172926b25e1Sad holdrelel(vp); 1173926b25e1Sad mutex_exit(vp->v_interlock); 1174926b25e1Sad } 1175926b25e1Sad 1176926b25e1Sad /* 117772439b7dShannken * Recycle an unused vnode if caller holds the last reference. 1178fbc8beaeSrmind */ 117972439b7dShannken bool 118072439b7dShannken vrecycle(vnode_t *vp) 1181fbc8beaeSrmind { 1182f3e32599Shannken int error __diagused; 1183fa45966eShannken 118472439b7dShannken mutex_enter(vp->v_interlock); 118572439b7dShannken 1186f3e32599Shannken /* If the vnode is already clean we're done. */ 118751f3958fSad VSTATE_WAIT_STABLE(vp); 11888e1cefd9Shannken if (VSTATE_GET(vp) != VS_LOADED) { 1189f3e32599Shannken VSTATE_ASSERT(vp, VS_RECLAIMED); 1190bf6921b5Sad vrelel(vp, 0, LK_NONE); 1191f3e32599Shannken return true; 1192f3e32599Shannken } 1193f3e32599Shannken 1194f3e32599Shannken /* Prevent further references until the vnode is locked. */ 11958e1cefd9Shannken VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); 119651f3958fSad 119751f3958fSad /* Make sure we hold the last reference. */ 119851f3958fSad if (vrefcnt(vp) != 1) { 119951f3958fSad VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); 120051f3958fSad mutex_exit(vp->v_interlock); 120151f3958fSad return false; 120251f3958fSad } 120351f3958fSad 1204f3e32599Shannken mutex_exit(vp->v_interlock); 1205f3e32599Shannken 1206748bb656Shannken /* 1207748bb656Shannken * On a leaf file system this lock will always succeed as we hold 1208748bb656Shannken * the last reference and prevent further references. 1209748bb656Shannken * On layered file systems waiting for the lock would open a can of 1210748bb656Shannken * deadlocks as the lower vnodes may have other active references. 1211748bb656Shannken */ 12126caedad3Shannken error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 1213f3e32599Shannken 1214f3e32599Shannken mutex_enter(vp->v_interlock); 1215748bb656Shannken if (error) { 121651f3958fSad VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); 1217748bb656Shannken mutex_exit(vp->v_interlock); 1218748bb656Shannken return false; 1219748bb656Shannken } 1220748bb656Shannken 122123bf8800Sad KASSERT(vrefcnt(vp) == 1); 1222113946c5Shannken vcache_reclaim(vp); 1223bf6921b5Sad vrelel(vp, 0, LK_NONE); 1224f3e32599Shannken 122572439b7dShannken return true; 1226fbc8beaeSrmind } 1227fbc8beaeSrmind 1228fbc8beaeSrmind /* 1229a8045334Shannken * Helper for vrevoke() to propagate suspension from lastmp 1230a8045334Shannken * to thismp. Both args may be NULL. 1231a8045334Shannken * Returns the currently suspended file system or NULL. 1232a8045334Shannken */ 1233a8045334Shannken static struct mount * 1234a8045334Shannken vrevoke_suspend_next(struct mount *lastmp, struct mount *thismp) 1235a8045334Shannken { 1236a8045334Shannken int error; 1237a8045334Shannken 1238a8045334Shannken if (lastmp == thismp) 1239a8045334Shannken return thismp; 1240a8045334Shannken 1241a8045334Shannken if (lastmp != NULL) 1242a8045334Shannken vfs_resume(lastmp); 1243a8045334Shannken 1244a8045334Shannken if (thismp == NULL) 1245a8045334Shannken return NULL; 1246a8045334Shannken 1247a8045334Shannken do { 1248a8045334Shannken error = vfs_suspend(thismp, 0); 1249a8045334Shannken } while (error == EINTR || error == ERESTART); 1250a8045334Shannken 1251a8045334Shannken if (error == 0) 1252a8045334Shannken return thismp; 1253a8045334Shannken 125441d153faShannken KASSERT(error == EOPNOTSUPP || error == ENOENT); 1255a8045334Shannken return NULL; 1256a8045334Shannken } 1257a8045334Shannken 1258a8045334Shannken /* 1259fbc8beaeSrmind * Eliminate all activity associated with the requested vnode 1260fbc8beaeSrmind * and with all vnodes aliased to the requested vnode. 1261fbc8beaeSrmind */ 1262fbc8beaeSrmind void 1263fbc8beaeSrmind vrevoke(vnode_t *vp) 1264fbc8beaeSrmind { 12659fc3ca45Shannken struct mount *mp; 12669f9ac3cbShannken vnode_t *vq; 1267fbc8beaeSrmind enum vtype type; 1268fbc8beaeSrmind dev_t dev; 1269fbc8beaeSrmind 127023bf8800Sad KASSERT(vrefcnt(vp) > 0); 1271fbc8beaeSrmind 1272a8045334Shannken mp = vrevoke_suspend_next(NULL, vp->v_mount); 12739fc3ca45Shannken 1274e225b7bdSrmind mutex_enter(vp->v_interlock); 127540d12c01Shannken VSTATE_WAIT_STABLE(vp); 12764f55676aShannken if (VSTATE_GET(vp) == VS_RECLAIMED) { 1277e225b7bdSrmind mutex_exit(vp->v_interlock); 1278fbc8beaeSrmind } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 1279926b25e1Sad atomic_inc_uint(&vp->v_usecount); 12808370a84aSchristos mutex_exit(vp->v_interlock); 12818370a84aSchristos vgone(vp); 1282fbc8beaeSrmind } else { 1283fbc8beaeSrmind dev = vp->v_rdev; 1284fbc8beaeSrmind type = vp->v_type; 1285e225b7bdSrmind mutex_exit(vp->v_interlock); 1286fbc8beaeSrmind 1287a2155d69Sriastradh while (spec_node_lookup_by_dev(type, dev, VDEAD_NOWAIT, &vq) 1288a2155d69Sriastradh == 0) { 1289a8045334Shannken mp = vrevoke_suspend_next(mp, vq->v_mount); 12908370a84aSchristos vgone(vq); 1291fbc8beaeSrmind } 1292fbc8beaeSrmind } 1293a8045334Shannken vrevoke_suspend_next(mp, NULL); 12949fc3ca45Shannken } 1295fbc8beaeSrmind 1296fbc8beaeSrmind /* 1297fbc8beaeSrmind * Eliminate all activity associated with a vnode in preparation for 1298fbc8beaeSrmind * reuse. Drops a reference from the vnode. 1299fbc8beaeSrmind */ 1300fbc8beaeSrmind void 1301fbc8beaeSrmind vgone(vnode_t *vp) 1302fbc8beaeSrmind { 1303bf6921b5Sad int lktype; 1304fbc8beaeSrmind 13051cf06cb4Sriastradh KASSERT(vp->v_mount == dead_rootmount || 13061cf06cb4Sriastradh fstrans_is_owner(vp->v_mount)); 1307e4e82d96Shannken 13086caedad3Shannken vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1309bf6921b5Sad lktype = LK_EXCLUSIVE; 1310e225b7bdSrmind mutex_enter(vp->v_interlock); 13116caedad3Shannken VSTATE_WAIT_STABLE(vp); 1312bf6921b5Sad if (VSTATE_GET(vp) == VS_LOADED) { 131351f3958fSad VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); 1314113946c5Shannken vcache_reclaim(vp); 1315bf6921b5Sad lktype = LK_NONE; 1316bf6921b5Sad } 13176caedad3Shannken VSTATE_ASSERT(vp, VS_RECLAIMED); 1318bf6921b5Sad vrelel(vp, 0, lktype); 1319fbc8beaeSrmind } 1320fbc8beaeSrmind 132142c8d67cShannken static inline uint32_t 132242c8d67cShannken vcache_hash(const struct vcache_key *key) 132342c8d67cShannken { 132442c8d67cShannken uint32_t hash = HASH32_BUF_INIT; 132542c8d67cShannken 13267801661cShannken KASSERT(key->vk_key_len > 0); 13277801661cShannken 132842c8d67cShannken hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash); 132942c8d67cShannken hash = hash32_buf(key->vk_key, key->vk_key_len, hash); 133042c8d67cShannken return hash; 133142c8d67cShannken } 133242c8d67cShannken 13330f9be9e9Ssimonb static int 13340f9be9e9Ssimonb vcache_stats(struct hashstat_sysctl *hs, bool fill) 13350f9be9e9Ssimonb { 13360f9be9e9Ssimonb vnode_impl_t *vip; 13370f9be9e9Ssimonb uint64_t chain; 13380f9be9e9Ssimonb 13390f9be9e9Ssimonb strlcpy(hs->hash_name, "vcache", sizeof(hs->hash_name)); 13400f9be9e9Ssimonb strlcpy(hs->hash_desc, "vnode cache hash", sizeof(hs->hash_desc)); 13410f9be9e9Ssimonb if (!fill) 13420f9be9e9Ssimonb return 0; 13430f9be9e9Ssimonb 13440f9be9e9Ssimonb hs->hash_size = vcache_hashmask + 1; 13450f9be9e9Ssimonb 13460f9be9e9Ssimonb for (size_t i = 0; i < hs->hash_size; i++) { 13470f9be9e9Ssimonb chain = 0; 13480f9be9e9Ssimonb mutex_enter(&vcache_lock); 13490f9be9e9Ssimonb SLIST_FOREACH(vip, &vcache_hashtab[i], vi_hash) { 13500f9be9e9Ssimonb chain++; 13510f9be9e9Ssimonb } 13520f9be9e9Ssimonb mutex_exit(&vcache_lock); 13530f9be9e9Ssimonb if (chain > 0) { 13540f9be9e9Ssimonb hs->hash_used++; 13550f9be9e9Ssimonb hs->hash_items += chain; 13560f9be9e9Ssimonb if (chain > hs->hash_maxchain) 13570f9be9e9Ssimonb hs->hash_maxchain = chain; 13580f9be9e9Ssimonb } 13590f9be9e9Ssimonb preempt_point(); 13600f9be9e9Ssimonb } 13610f9be9e9Ssimonb 13620f9be9e9Ssimonb return 0; 13630f9be9e9Ssimonb } 13640f9be9e9Ssimonb 136542c8d67cShannken static void 136642c8d67cShannken vcache_init(void) 136742c8d67cShannken { 136842c8d67cShannken 1369926b25e1Sad vcache_pool = pool_cache_init(sizeof(vnode_impl_t), coherency_unit, 1370926b25e1Sad 0, 0, "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL); 137178a3dd75Shannken KASSERT(vcache_pool != NULL); 137278a3dd75Shannken mutex_init(&vcache_lock, MUTEX_DEFAULT, IPL_NONE); 137378a3dd75Shannken cv_init(&vcache_cv, "vcache"); 137478a3dd75Shannken vcache_hashsize = desiredvnodes; 137578a3dd75Shannken vcache_hashtab = hashinit(desiredvnodes, HASH_SLIST, true, 137678a3dd75Shannken &vcache_hashmask); 13770f9be9e9Ssimonb hashstat_register("vcache", vcache_stats); 137842c8d67cShannken } 137942c8d67cShannken 138042c8d67cShannken static void 138142c8d67cShannken vcache_reinit(void) 138242c8d67cShannken { 138342c8d67cShannken int i; 138442c8d67cShannken uint32_t hash; 138542c8d67cShannken u_long oldmask, newmask; 138642c8d67cShannken struct hashhead *oldtab, *newtab; 1387592be9aeShannken vnode_impl_t *vip; 138842c8d67cShannken 138942c8d67cShannken newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask); 139078a3dd75Shannken mutex_enter(&vcache_lock); 139178a3dd75Shannken oldtab = vcache_hashtab; 139278a3dd75Shannken oldmask = vcache_hashmask; 139378a3dd75Shannken vcache_hashsize = desiredvnodes; 139478a3dd75Shannken vcache_hashtab = newtab; 139578a3dd75Shannken vcache_hashmask = newmask; 139642c8d67cShannken for (i = 0; i <= oldmask; i++) { 1397592be9aeShannken while ((vip = SLIST_FIRST(&oldtab[i])) != NULL) { 1398592be9aeShannken SLIST_REMOVE(&oldtab[i], vip, vnode_impl, vi_hash); 1399592be9aeShannken hash = vcache_hash(&vip->vi_key); 140078a3dd75Shannken SLIST_INSERT_HEAD(&newtab[hash & vcache_hashmask], 1401592be9aeShannken vip, vi_hash); 140242c8d67cShannken } 140342c8d67cShannken } 140478a3dd75Shannken mutex_exit(&vcache_lock); 140542c8d67cShannken hashdone(oldtab, HASH_SLIST, oldmask); 140642c8d67cShannken } 140742c8d67cShannken 14084f55676aShannken static inline vnode_impl_t * 140942c8d67cShannken vcache_hash_lookup(const struct vcache_key *key, uint32_t hash) 141042c8d67cShannken { 141142c8d67cShannken struct hashhead *hashp; 1412592be9aeShannken vnode_impl_t *vip; 141342c8d67cShannken 141478a3dd75Shannken KASSERT(mutex_owned(&vcache_lock)); 141542c8d67cShannken 141678a3dd75Shannken hashp = &vcache_hashtab[hash & vcache_hashmask]; 1417592be9aeShannken SLIST_FOREACH(vip, hashp, vi_hash) { 1418592be9aeShannken if (key->vk_mount != vip->vi_key.vk_mount) 141942c8d67cShannken continue; 1420592be9aeShannken if (key->vk_key_len != vip->vi_key.vk_key_len) 142142c8d67cShannken continue; 1422592be9aeShannken if (memcmp(key->vk_key, vip->vi_key.vk_key, key->vk_key_len)) 142342c8d67cShannken continue; 1424592be9aeShannken return vip; 142542c8d67cShannken } 142642c8d67cShannken return NULL; 142742c8d67cShannken } 142842c8d67cShannken 142942c8d67cShannken /* 1430c9685569Shannken * Allocate a new, uninitialized vcache node. 1431c9685569Shannken */ 14324f55676aShannken static vnode_impl_t * 1433c9685569Shannken vcache_alloc(void) 1434c9685569Shannken { 1435592be9aeShannken vnode_impl_t *vip; 1436c9685569Shannken vnode_t *vp; 1437c9685569Shannken 1438592be9aeShannken vip = pool_cache_get(vcache_pool, PR_WAITOK); 1439d2a0ebb6Sad vp = VIMPL_TO_VNODE(vip); 1440592be9aeShannken memset(vip, 0, sizeof(*vip)); 1441c9685569Shannken 1442926b25e1Sad rw_init(&vip->vi_lock); 1443d2a0ebb6Sad vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 1444d2a0ebb6Sad 1445d2a0ebb6Sad uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 1); 144675d451f3Sthorpej klist_init(&vip->vi_klist.vk_klist); 144775d451f3Sthorpej vp->v_klist = &vip->vi_klist; 1448c9685569Shannken cv_init(&vp->v_cv, "vnode"); 1449420e6df1Sad cache_vnode_init(vp); 1450c9685569Shannken 1451c9685569Shannken vp->v_usecount = 1; 1452c9685569Shannken vp->v_type = VNON; 1453c9685569Shannken vp->v_size = vp->v_writesize = VSIZENOTSET; 1454c9685569Shannken 1455592be9aeShannken vip->vi_state = VS_LOADING; 14561e17b1e3Shannken 1457fb0bbaf1Sad lru_requeue(vp, &lru_list[LRU_FREE]); 145843495351Shannken 1459592be9aeShannken return vip; 1460c9685569Shannken } 1461c9685569Shannken 1462c9685569Shannken /* 14631a31dbf3Shannken * Deallocate a vcache node in state VS_LOADING. 14641a31dbf3Shannken * 14651a31dbf3Shannken * vcache_lock held on entry and released on return. 14661a31dbf3Shannken */ 14671a31dbf3Shannken static void 14681a31dbf3Shannken vcache_dealloc(vnode_impl_t *vip) 14691a31dbf3Shannken { 14701a31dbf3Shannken vnode_t *vp; 14711a31dbf3Shannken 14721a31dbf3Shannken KASSERT(mutex_owned(&vcache_lock)); 14731a31dbf3Shannken 14741a31dbf3Shannken vp = VIMPL_TO_VNODE(vip); 1475a041391eShannken vfs_ref(dead_rootmount); 1476a041391eShannken vfs_insmntque(vp, dead_rootmount); 14771a31dbf3Shannken mutex_enter(vp->v_interlock); 14781a31dbf3Shannken vp->v_op = dead_vnodeop_p; 14791a31dbf3Shannken VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED); 14801a31dbf3Shannken mutex_exit(&vcache_lock); 1481bf6921b5Sad vrelel(vp, 0, LK_NONE); 14821a31dbf3Shannken } 14831a31dbf3Shannken 14841a31dbf3Shannken /* 1485c9685569Shannken * Free an unused, unreferenced vcache node. 1486e0f81f2cShannken * v_interlock locked on entry. 1487c9685569Shannken */ 1488c9685569Shannken static void 1489592be9aeShannken vcache_free(vnode_impl_t *vip) 1490c9685569Shannken { 1491c9685569Shannken vnode_t *vp; 1492c9685569Shannken 1493592be9aeShannken vp = VIMPL_TO_VNODE(vip); 1494e0f81f2cShannken KASSERT(mutex_owned(vp->v_interlock)); 1495c9685569Shannken 149623bf8800Sad KASSERT(vrefcnt(vp) == 0); 1497e0f81f2cShannken KASSERT(vp->v_holdcnt == 0); 1498e0f81f2cShannken KASSERT(vp->v_writecount == 0); 149943495351Shannken lru_requeue(vp, NULL); 1500e0f81f2cShannken mutex_exit(vp->v_interlock); 1501e0f81f2cShannken 1502e0f81f2cShannken vfs_insmntque(vp, NULL); 1503e0f81f2cShannken if (vp->v_type == VBLK || vp->v_type == VCHR) 1504e0f81f2cShannken spec_node_destroy(vp); 1505e0f81f2cShannken 1506d2a0ebb6Sad mutex_obj_free(vp->v_interlock); 1507926b25e1Sad rw_destroy(&vip->vi_lock); 1508c9685569Shannken uvm_obj_destroy(&vp->v_uobj, true); 1509b116f7a8Sthorpej KASSERT(vp->v_klist == &vip->vi_klist); 151075d451f3Sthorpej klist_fini(&vip->vi_klist.vk_klist); 1511c9685569Shannken cv_destroy(&vp->v_cv); 1512420e6df1Sad cache_vnode_fini(vp); 1513592be9aeShannken pool_cache_put(vcache_pool, vip); 1514c9685569Shannken } 1515c9685569Shannken 1516c9685569Shannken /* 1517998709c4Shannken * Try to get an initial reference on this cached vnode. 151851f3958fSad * Returns zero on success or EBUSY if the vnode state is not LOADED. 1519998709c4Shannken * 152051f3958fSad * NB: lockless code sequences may rely on this not blocking. 1521998709c4Shannken */ 1522998709c4Shannken int 1523998709c4Shannken vcache_tryvget(vnode_t *vp) 1524998709c4Shannken { 152551f3958fSad u_int use, next; 1526998709c4Shannken 152751f3958fSad for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { 152851f3958fSad if (__predict_false((use & VUSECOUNT_GATE) == 0)) { 1529*e94a5d02Sriastradh return SET_ERROR(EBUSY); 153051f3958fSad } 15319c423f23Shannken next = atomic_cas_uint(&vp->v_usecount, 15329c423f23Shannken use, (use + 1) | VUSECOUNT_VGET); 153351f3958fSad if (__predict_true(next == use)) { 1534ef3476fbSriastradh membar_acquire(); 153551f3958fSad return 0; 153651f3958fSad } 153751f3958fSad } 1538998709c4Shannken } 1539998709c4Shannken 1540998709c4Shannken /* 1541998709c4Shannken * Try to get an initial reference on this cached vnode. 1542998709c4Shannken * Returns zero on success and ENOENT if the vnode has been reclaimed. 1543998709c4Shannken * Will wait for the vnode state to be stable. 1544998709c4Shannken * 1545998709c4Shannken * v_interlock locked on entry and unlocked on exit. 1546998709c4Shannken */ 1547998709c4Shannken int 1548998709c4Shannken vcache_vget(vnode_t *vp) 1549998709c4Shannken { 15509c423f23Shannken int error; 1551998709c4Shannken 1552998709c4Shannken KASSERT(mutex_owned(vp->v_interlock)); 1553998709c4Shannken 1554e0f81f2cShannken /* Increment hold count to prevent vnode from disappearing. */ 1555e0f81f2cShannken vp->v_holdcnt++; 1556998709c4Shannken VSTATE_WAIT_STABLE(vp); 1557e0f81f2cShannken vp->v_holdcnt--; 1558998709c4Shannken 1559e0f81f2cShannken /* If this was the last reference to a reclaimed vnode free it now. */ 1560e0f81f2cShannken if (__predict_false(VSTATE_GET(vp) == VS_RECLAIMED)) { 156123bf8800Sad if (vp->v_holdcnt == 0 && vrefcnt(vp) == 0) 1562e0f81f2cShannken vcache_free(VNODE_TO_VIMPL(vp)); 1563e0f81f2cShannken else 1564e0f81f2cShannken mutex_exit(vp->v_interlock); 1565*e94a5d02Sriastradh return SET_ERROR(ENOENT); 1566e0f81f2cShannken } 15678e1cefd9Shannken VSTATE_ASSERT(vp, VS_LOADED); 15689c423f23Shannken error = vcache_tryvget(vp); 15699c423f23Shannken KASSERT(error == 0); 1570998709c4Shannken mutex_exit(vp->v_interlock); 1571998709c4Shannken 1572998709c4Shannken return 0; 1573998709c4Shannken } 1574998709c4Shannken 1575998709c4Shannken /* 157642c8d67cShannken * Get a vnode / fs node pair by key and return it referenced through vpp. 157742c8d67cShannken */ 157842c8d67cShannken int 157942c8d67cShannken vcache_get(struct mount *mp, const void *key, size_t key_len, 158042c8d67cShannken struct vnode **vpp) 158142c8d67cShannken { 158242c8d67cShannken int error; 158342c8d67cShannken uint32_t hash; 158442c8d67cShannken const void *new_key; 158542c8d67cShannken struct vnode *vp; 158642c8d67cShannken struct vcache_key vcache_key; 1587592be9aeShannken vnode_impl_t *vip, *new_vip; 158842c8d67cShannken 158942c8d67cShannken new_key = NULL; 159042c8d67cShannken *vpp = NULL; 159142c8d67cShannken 159242c8d67cShannken vcache_key.vk_mount = mp; 159342c8d67cShannken vcache_key.vk_key = key; 159442c8d67cShannken vcache_key.vk_key_len = key_len; 159542c8d67cShannken hash = vcache_hash(&vcache_key); 159642c8d67cShannken 159742c8d67cShannken again: 159878a3dd75Shannken mutex_enter(&vcache_lock); 1599592be9aeShannken vip = vcache_hash_lookup(&vcache_key, hash); 160042c8d67cShannken 160142c8d67cShannken /* If found, take a reference or retry. */ 1602592be9aeShannken if (__predict_true(vip != NULL)) { 160340d12c01Shannken /* 160440d12c01Shannken * If the vnode is loading we cannot take the v_interlock 160540d12c01Shannken * here as it might change during load (see uvm_obj_setlock()). 160678a3dd75Shannken * As changing state from VS_LOADING requires both vcache_lock 160778a3dd75Shannken * and v_interlock it is safe to test with vcache_lock held. 160840d12c01Shannken * 16094f55676aShannken * Wait for vnodes changing state from VS_LOADING and retry. 161040d12c01Shannken */ 1611592be9aeShannken if (__predict_false(vip->vi_state == VS_LOADING)) { 161278a3dd75Shannken cv_wait(&vcache_cv, &vcache_lock); 161378a3dd75Shannken mutex_exit(&vcache_lock); 161440d12c01Shannken goto again; 161540d12c01Shannken } 1616592be9aeShannken vp = VIMPL_TO_VNODE(vip); 161742c8d67cShannken mutex_enter(vp->v_interlock); 161878a3dd75Shannken mutex_exit(&vcache_lock); 1619998709c4Shannken error = vcache_vget(vp); 162042c8d67cShannken if (error == ENOENT) 162142c8d67cShannken goto again; 162242c8d67cShannken if (error == 0) 162342c8d67cShannken *vpp = vp; 162442c8d67cShannken KASSERT((error != 0) == (*vpp == NULL)); 162542c8d67cShannken return error; 162642c8d67cShannken } 162778a3dd75Shannken mutex_exit(&vcache_lock); 162842c8d67cShannken 162942c8d67cShannken /* Allocate and initialize a new vcache / vnode pair. */ 163020bb034fShannken error = vfs_busy(mp); 163142c8d67cShannken if (error) 163242c8d67cShannken return error; 1633592be9aeShannken new_vip = vcache_alloc(); 1634592be9aeShannken new_vip->vi_key = vcache_key; 1635592be9aeShannken vp = VIMPL_TO_VNODE(new_vip); 163678a3dd75Shannken mutex_enter(&vcache_lock); 1637592be9aeShannken vip = vcache_hash_lookup(&vcache_key, hash); 1638592be9aeShannken if (vip == NULL) { 163978a3dd75Shannken SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask], 1640592be9aeShannken new_vip, vi_hash); 1641592be9aeShannken vip = new_vip; 164242c8d67cShannken } 164342c8d67cShannken 164442c8d67cShannken /* If another thread beat us inserting this node, retry. */ 1645592be9aeShannken if (vip != new_vip) { 16461a31dbf3Shannken vcache_dealloc(new_vip); 164720bb034fShannken vfs_unbusy(mp); 164842c8d67cShannken goto again; 164942c8d67cShannken } 165078a3dd75Shannken mutex_exit(&vcache_lock); 165142c8d67cShannken 16524f55676aShannken /* Load the fs node. Exclusive as new_node is VS_LOADING. */ 165342c8d67cShannken error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key); 165442c8d67cShannken if (error) { 165578a3dd75Shannken mutex_enter(&vcache_lock); 165678a3dd75Shannken SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask], 1657592be9aeShannken new_vip, vnode_impl, vi_hash); 16581a31dbf3Shannken vcache_dealloc(new_vip); 165920bb034fShannken vfs_unbusy(mp); 166042c8d67cShannken KASSERT(*vpp == NULL); 166142c8d67cShannken return error; 166242c8d67cShannken } 166342c8d67cShannken KASSERT(new_key != NULL); 166442c8d67cShannken KASSERT(memcmp(key, new_key, key_len) == 0); 166542c8d67cShannken KASSERT(vp->v_op != NULL); 166642c8d67cShannken vfs_insmntque(vp, mp); 166742c8d67cShannken if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 166842c8d67cShannken vp->v_vflag |= VV_MPSAFE; 166920bb034fShannken vfs_ref(mp); 167020bb034fShannken vfs_unbusy(mp); 167142c8d67cShannken 167242c8d67cShannken /* Finished loading, finalize node. */ 167378a3dd75Shannken mutex_enter(&vcache_lock); 1674592be9aeShannken new_vip->vi_key.vk_key = new_key; 1675b09a3a32Shannken mutex_enter(vp->v_interlock); 16768e1cefd9Shannken VSTATE_CHANGE(vp, VS_LOADING, VS_LOADED); 1677b09a3a32Shannken mutex_exit(vp->v_interlock); 167878a3dd75Shannken mutex_exit(&vcache_lock); 167942c8d67cShannken *vpp = vp; 168042c8d67cShannken return 0; 168142c8d67cShannken } 168242c8d67cShannken 168342c8d67cShannken /* 1684b01cc29bShannken * Create a new vnode / fs node pair and return it referenced through vpp. 1685b01cc29bShannken */ 1686b01cc29bShannken int 1687b01cc29bShannken vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap, 1688b689ec0fShannken kauth_cred_t cred, void *extra, struct vnode **vpp) 1689b01cc29bShannken { 1690b01cc29bShannken int error; 1691b01cc29bShannken uint32_t hash; 1692592be9aeShannken struct vnode *vp, *ovp; 1693592be9aeShannken vnode_impl_t *vip, *ovip; 1694b01cc29bShannken 1695b01cc29bShannken *vpp = NULL; 1696b01cc29bShannken 1697b01cc29bShannken /* Allocate and initialize a new vcache / vnode pair. */ 169820bb034fShannken error = vfs_busy(mp); 1699b01cc29bShannken if (error) 1700b01cc29bShannken return error; 1701592be9aeShannken vip = vcache_alloc(); 1702592be9aeShannken vip->vi_key.vk_mount = mp; 1703592be9aeShannken vp = VIMPL_TO_VNODE(vip); 1704b01cc29bShannken 1705b01cc29bShannken /* Create and load the fs node. */ 1706b689ec0fShannken error = VFS_NEWVNODE(mp, dvp, vp, vap, cred, extra, 1707592be9aeShannken &vip->vi_key.vk_key_len, &vip->vi_key.vk_key); 1708b01cc29bShannken if (error) { 170978a3dd75Shannken mutex_enter(&vcache_lock); 17101a31dbf3Shannken vcache_dealloc(vip); 171120bb034fShannken vfs_unbusy(mp); 1712b01cc29bShannken KASSERT(*vpp == NULL); 1713b01cc29bShannken return error; 1714b01cc29bShannken } 1715b01cc29bShannken KASSERT(vp->v_op != NULL); 17167801661cShannken KASSERT((vip->vi_key.vk_key_len == 0) == (mp == dead_rootmount)); 17177801661cShannken if (vip->vi_key.vk_key_len > 0) { 17187801661cShannken KASSERT(vip->vi_key.vk_key != NULL); 1719592be9aeShannken hash = vcache_hash(&vip->vi_key); 1720b01cc29bShannken 17217801661cShannken /* 17227801661cShannken * Wait for previous instance to be reclaimed, 17237801661cShannken * then insert new node. 17247801661cShannken */ 172578a3dd75Shannken mutex_enter(&vcache_lock); 1726592be9aeShannken while ((ovip = vcache_hash_lookup(&vip->vi_key, hash))) { 1727592be9aeShannken ovp = VIMPL_TO_VNODE(ovip); 172840d12c01Shannken mutex_enter(ovp->v_interlock); 172978a3dd75Shannken mutex_exit(&vcache_lock); 1730998709c4Shannken error = vcache_vget(ovp); 173140d12c01Shannken KASSERT(error == ENOENT); 173278a3dd75Shannken mutex_enter(&vcache_lock); 1733b01cc29bShannken } 173478a3dd75Shannken SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask], 1735592be9aeShannken vip, vi_hash); 173678a3dd75Shannken mutex_exit(&vcache_lock); 17377801661cShannken } 1738b01cc29bShannken vfs_insmntque(vp, mp); 1739b01cc29bShannken if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1740b01cc29bShannken vp->v_vflag |= VV_MPSAFE; 174120bb034fShannken vfs_ref(mp); 174220bb034fShannken vfs_unbusy(mp); 1743b01cc29bShannken 1744b01cc29bShannken /* Finished loading, finalize node. */ 174578a3dd75Shannken mutex_enter(&vcache_lock); 1746b01cc29bShannken mutex_enter(vp->v_interlock); 17478e1cefd9Shannken VSTATE_CHANGE(vp, VS_LOADING, VS_LOADED); 174878a3dd75Shannken mutex_exit(&vcache_lock); 1749b01cc29bShannken mutex_exit(vp->v_interlock); 1750b01cc29bShannken *vpp = vp; 1751b01cc29bShannken return 0; 1752b01cc29bShannken } 1753b01cc29bShannken 1754b01cc29bShannken /* 17553b04d6a0Shannken * Prepare key change: update old cache nodes key and lock new cache node. 175651464fb4Shannken * Return an error if the new node already exists. 175751464fb4Shannken */ 175851464fb4Shannken int 175951464fb4Shannken vcache_rekey_enter(struct mount *mp, struct vnode *vp, 176051464fb4Shannken const void *old_key, size_t old_key_len, 176151464fb4Shannken const void *new_key, size_t new_key_len) 176251464fb4Shannken { 176351464fb4Shannken uint32_t old_hash, new_hash; 176451464fb4Shannken struct vcache_key old_vcache_key, new_vcache_key; 1765592be9aeShannken vnode_impl_t *vip, *new_vip; 176651464fb4Shannken 176751464fb4Shannken old_vcache_key.vk_mount = mp; 176851464fb4Shannken old_vcache_key.vk_key = old_key; 176951464fb4Shannken old_vcache_key.vk_key_len = old_key_len; 177051464fb4Shannken old_hash = vcache_hash(&old_vcache_key); 177151464fb4Shannken 177251464fb4Shannken new_vcache_key.vk_mount = mp; 177351464fb4Shannken new_vcache_key.vk_key = new_key; 177451464fb4Shannken new_vcache_key.vk_key_len = new_key_len; 177551464fb4Shannken new_hash = vcache_hash(&new_vcache_key); 177651464fb4Shannken 1777592be9aeShannken new_vip = vcache_alloc(); 1778592be9aeShannken new_vip->vi_key = new_vcache_key; 1779a68d62d6Shannken 1780a68d62d6Shannken /* Insert locked new node used as placeholder. */ 178178a3dd75Shannken mutex_enter(&vcache_lock); 1782592be9aeShannken vip = vcache_hash_lookup(&new_vcache_key, new_hash); 1783592be9aeShannken if (vip != NULL) { 17841a31dbf3Shannken vcache_dealloc(new_vip); 1785*e94a5d02Sriastradh return SET_ERROR(EEXIST); 178651464fb4Shannken } 178778a3dd75Shannken SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask], 1788592be9aeShannken new_vip, vi_hash); 1789a68d62d6Shannken 17903b04d6a0Shannken /* Replace old nodes key with the temporary copy. */ 1791592be9aeShannken vip = vcache_hash_lookup(&old_vcache_key, old_hash); 1792592be9aeShannken KASSERT(vip != NULL); 1793592be9aeShannken KASSERT(VIMPL_TO_VNODE(vip) == vp); 1794592be9aeShannken KASSERT(vip->vi_key.vk_key != old_vcache_key.vk_key); 1795592be9aeShannken vip->vi_key = old_vcache_key; 179678a3dd75Shannken mutex_exit(&vcache_lock); 179751464fb4Shannken return 0; 179851464fb4Shannken } 179951464fb4Shannken 180051464fb4Shannken /* 18013b04d6a0Shannken * Key change complete: update old node and remove placeholder. 180251464fb4Shannken */ 180351464fb4Shannken void 180451464fb4Shannken vcache_rekey_exit(struct mount *mp, struct vnode *vp, 180551464fb4Shannken const void *old_key, size_t old_key_len, 180651464fb4Shannken const void *new_key, size_t new_key_len) 180751464fb4Shannken { 180851464fb4Shannken uint32_t old_hash, new_hash; 180951464fb4Shannken struct vcache_key old_vcache_key, new_vcache_key; 1810592be9aeShannken vnode_impl_t *vip, *new_vip; 1811592be9aeShannken struct vnode *new_vp; 181251464fb4Shannken 181351464fb4Shannken old_vcache_key.vk_mount = mp; 181451464fb4Shannken old_vcache_key.vk_key = old_key; 181551464fb4Shannken old_vcache_key.vk_key_len = old_key_len; 181651464fb4Shannken old_hash = vcache_hash(&old_vcache_key); 181751464fb4Shannken 181851464fb4Shannken new_vcache_key.vk_mount = mp; 181951464fb4Shannken new_vcache_key.vk_key = new_key; 182051464fb4Shannken new_vcache_key.vk_key_len = new_key_len; 182151464fb4Shannken new_hash = vcache_hash(&new_vcache_key); 182251464fb4Shannken 182378a3dd75Shannken mutex_enter(&vcache_lock); 1824a68d62d6Shannken 1825a68d62d6Shannken /* Lookup old and new node. */ 1826592be9aeShannken vip = vcache_hash_lookup(&old_vcache_key, old_hash); 1827592be9aeShannken KASSERT(vip != NULL); 1828592be9aeShannken KASSERT(VIMPL_TO_VNODE(vip) == vp); 182940d12c01Shannken 1830592be9aeShannken new_vip = vcache_hash_lookup(&new_vcache_key, new_hash); 1831592be9aeShannken KASSERT(new_vip != NULL); 1832592be9aeShannken KASSERT(new_vip->vi_key.vk_key_len == new_key_len); 1833592be9aeShannken new_vp = VIMPL_TO_VNODE(new_vip); 1834592be9aeShannken mutex_enter(new_vp->v_interlock); 1835592be9aeShannken VSTATE_ASSERT(VIMPL_TO_VNODE(new_vip), VS_LOADING); 18361a31dbf3Shannken mutex_exit(new_vp->v_interlock); 1837a68d62d6Shannken 1838a68d62d6Shannken /* Rekey old node and put it onto its new hashlist. */ 1839592be9aeShannken vip->vi_key = new_vcache_key; 1840a68d62d6Shannken if (old_hash != new_hash) { 184178a3dd75Shannken SLIST_REMOVE(&vcache_hashtab[old_hash & vcache_hashmask], 1842592be9aeShannken vip, vnode_impl, vi_hash); 184378a3dd75Shannken SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask], 1844592be9aeShannken vip, vi_hash); 1845a68d62d6Shannken } 1846a68d62d6Shannken 1847a68d62d6Shannken /* Remove new node used as placeholder. */ 184878a3dd75Shannken SLIST_REMOVE(&vcache_hashtab[new_hash & vcache_hashmask], 1849592be9aeShannken new_vip, vnode_impl, vi_hash); 18501a31dbf3Shannken vcache_dealloc(new_vip); 185151464fb4Shannken } 185251464fb4Shannken 185351464fb4Shannken /* 1854113946c5Shannken * Disassociate the underlying file system from a vnode. 1855113946c5Shannken * 1856113946c5Shannken * Must be called with vnode locked and will return unlocked. 1857113946c5Shannken * Must be called with the interlock held, and will return with it held. 1858113946c5Shannken */ 1859113946c5Shannken static void 1860113946c5Shannken vcache_reclaim(vnode_t *vp) 1861113946c5Shannken { 1862113946c5Shannken lwp_t *l = curlwp; 1863592be9aeShannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 18644f18a321Shannken struct mount *mp = vp->v_mount; 18652ec6f651Shannken uint32_t hash; 18662ec6f651Shannken uint8_t temp_buf[64], *temp_key; 18672ec6f651Shannken size_t temp_key_len; 186851a3f758Sriastradh bool recycle; 1869113946c5Shannken int error; 1870113946c5Shannken 18719ea3b23dShannken KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1872113946c5Shannken KASSERT(mutex_owned(vp->v_interlock)); 187323bf8800Sad KASSERT(vrefcnt(vp) != 0); 1874113946c5Shannken 1875592be9aeShannken temp_key_len = vip->vi_key.vk_key_len; 1876113946c5Shannken /* 1877113946c5Shannken * Prevent the vnode from being recycled or brought into use 1878113946c5Shannken * while we clean it out. 1879113946c5Shannken */ 188051f3958fSad VSTATE_CHANGE(vp, VS_BLOCKED, VS_RECLAIMING); 1881b116f7a8Sthorpej 1882b116f7a8Sthorpej /* 1883b116f7a8Sthorpej * Send NOTE_REVOKE now, before we call VOP_RECLAIM(), 1884b116f7a8Sthorpej * because VOP_RECLAIM() could cause vp->v_klist to 1885b116f7a8Sthorpej * become invalid. Don't check for interest in NOTE_REVOKE 1886b116f7a8Sthorpej * here; it's always posted because it sets EV_EOF. 1887b116f7a8Sthorpej * 1888b116f7a8Sthorpej * Once it's been posted, reset vp->v_klist to point to 1889b116f7a8Sthorpej * our own local storage, in case we were sharing with 1890b116f7a8Sthorpej * someone else. 1891b116f7a8Sthorpej */ 1892b116f7a8Sthorpej KNOTE(&vp->v_klist->vk_klist, NOTE_REVOKE); 1893b116f7a8Sthorpej vp->v_klist = &vip->vi_klist; 1894d2a0ebb6Sad mutex_exit(vp->v_interlock); 1895d2a0ebb6Sad 1896d2a0ebb6Sad rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 1897d2a0ebb6Sad mutex_enter(vp->v_interlock); 1898e32f090cSad if ((vp->v_iflag & VI_EXECMAP) != 0) { 1899a98966d3Sad cpu_count(CPU_COUNT_EXECPAGES, -vp->v_uobj.uo_npages); 1900113946c5Shannken } 1901113946c5Shannken vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 1902ae660a12Sad vp->v_iflag |= VI_DEADCHECK; /* for genfs_getpages() */ 1903113946c5Shannken mutex_exit(vp->v_interlock); 1904d2a0ebb6Sad rw_exit(vp->v_uobj.vmobjlock); 1905113946c5Shannken 1906420e6df1Sad /* 1907420e6df1Sad * With vnode state set to reclaiming, purge name cache immediately 1908420e6df1Sad * to prevent new handles on vnode, and wait for existing threads 1909420e6df1Sad * trying to get a handle to notice VS_RECLAIMED status and abort. 1910420e6df1Sad */ 1911420e6df1Sad cache_purge(vp); 1912420e6df1Sad 19132ec6f651Shannken /* Replace the vnode key with a temporary copy. */ 1914592be9aeShannken if (vip->vi_key.vk_key_len > sizeof(temp_buf)) { 19152ec6f651Shannken temp_key = kmem_alloc(temp_key_len, KM_SLEEP); 19162ec6f651Shannken } else { 19172ec6f651Shannken temp_key = temp_buf; 19182ec6f651Shannken } 19197801661cShannken if (vip->vi_key.vk_key_len > 0) { 192078a3dd75Shannken mutex_enter(&vcache_lock); 1921592be9aeShannken memcpy(temp_key, vip->vi_key.vk_key, temp_key_len); 1922592be9aeShannken vip->vi_key.vk_key = temp_key; 192378a3dd75Shannken mutex_exit(&vcache_lock); 19247801661cShannken } 19252ec6f651Shannken 1926287643b0Shannken fstrans_start(mp); 19274f18a321Shannken 1928113946c5Shannken /* 1929113946c5Shannken * Clean out any cached data associated with the vnode. 1930113946c5Shannken */ 1931113946c5Shannken error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1932113946c5Shannken if (error != 0) { 1933113946c5Shannken if (wapbl_vphaswapbl(vp)) 1934113946c5Shannken WAPBL_DISCARD(wapbl_vptomp(vp)); 1935113946c5Shannken error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1936113946c5Shannken } 1937113946c5Shannken KASSERTMSG((error == 0), "vinvalbuf failed: %d", error); 1938113946c5Shannken KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 193951a3f758Sriastradh if (vp->v_type == VBLK || vp->v_type == VCHR) { 1940113946c5Shannken spec_node_revoke(vp); 1941113946c5Shannken } 1942113946c5Shannken 1943f3e32599Shannken /* 1944f3e32599Shannken * Disassociate the underlying file system from the vnode. 194551e152b5Sriastradh * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks 194651e152b5Sriastradh * the vnode, and may destroy the vnode so that VOP_UNLOCK 194751e152b5Sriastradh * would no longer function. 1948f3e32599Shannken */ 1949f3e32599Shannken VOP_INACTIVE(vp, &recycle); 19509ea3b23dShannken KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1951113946c5Shannken if (VOP_RECLAIM(vp)) { 1952113946c5Shannken vnpanic(vp, "%s: cannot reclaim", __func__); 1953113946c5Shannken } 1954113946c5Shannken 1955113946c5Shannken KASSERT(vp->v_data == NULL); 1956bf797310Sad KASSERT((vp->v_iflag & VI_PAGES) == 0); 1957113946c5Shannken 1958113946c5Shannken if (vp->v_type == VREG && vp->v_ractx != NULL) { 1959113946c5Shannken uvm_ra_freectx(vp->v_ractx); 1960113946c5Shannken vp->v_ractx = NULL; 1961113946c5Shannken } 1962113946c5Shannken 19637801661cShannken if (vip->vi_key.vk_key_len > 0) { 19642ec6f651Shannken /* Remove from vnode cache. */ 1965592be9aeShannken hash = vcache_hash(&vip->vi_key); 196678a3dd75Shannken mutex_enter(&vcache_lock); 1967592be9aeShannken KASSERT(vip == vcache_hash_lookup(&vip->vi_key, hash)); 196878a3dd75Shannken SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask], 1969592be9aeShannken vip, vnode_impl, vi_hash); 197078a3dd75Shannken mutex_exit(&vcache_lock); 19717801661cShannken } 19722ec6f651Shannken if (temp_key != temp_buf) 19732ec6f651Shannken kmem_free(temp_key, temp_key_len); 19742ec6f651Shannken 1975113946c5Shannken /* Done with purge, notify sleepers of the grim news. */ 1976113946c5Shannken mutex_enter(vp->v_interlock); 1977113946c5Shannken vp->v_op = dead_vnodeop_p; 19784f55676aShannken VSTATE_CHANGE(vp, VS_RECLAIMING, VS_RECLAIMED); 1979113946c5Shannken vp->v_tag = VT_NON; 1980799c5cfeShannken mutex_exit(vp->v_interlock); 1981113946c5Shannken 1982799c5cfeShannken /* 1983799c5cfeShannken * Move to dead mount. Must be after changing the operations 1984799c5cfeShannken * vector as vnode operations enter the mount before using the 1985799c5cfeShannken * operations vector. See sys/kern/vnode_if.c. 1986799c5cfeShannken */ 1987799c5cfeShannken vp->v_vflag &= ~VV_ROOT; 1988ebb8f73bShannken vfs_ref(dead_rootmount); 1989799c5cfeShannken vfs_insmntque(vp, dead_rootmount); 1990799c5cfeShannken 199129bbec19Sad #ifdef PAX_SEGVGUARD 199229bbec19Sad pax_segvguard_cleanup(vp); 199329bbec19Sad #endif /* PAX_SEGVGUARD */ 199471e5c80cSad 199571e5c80cSad mutex_enter(vp->v_interlock); 199671e5c80cSad fstrans_done(mp); 199771e5c80cSad KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1998113946c5Shannken } 1999113946c5Shannken 2000113946c5Shannken /* 200128650af9Shannken * Disassociate the underlying file system from an open device vnode 200228650af9Shannken * and make it anonymous. 200328650af9Shannken * 200428650af9Shannken * Vnode unlocked on entry, drops a reference to the vnode. 200528650af9Shannken */ 200628650af9Shannken void 200728650af9Shannken vcache_make_anon(vnode_t *vp) 200828650af9Shannken { 200928650af9Shannken vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 201028650af9Shannken uint32_t hash; 201128650af9Shannken bool recycle; 201228650af9Shannken 201328650af9Shannken KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); 20141cf06cb4Sriastradh KASSERT(vp->v_mount == dead_rootmount || 20151cf06cb4Sriastradh fstrans_is_owner(vp->v_mount)); 201628650af9Shannken VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE); 201728650af9Shannken 201828650af9Shannken /* Remove from vnode cache. */ 201928650af9Shannken hash = vcache_hash(&vip->vi_key); 202028650af9Shannken mutex_enter(&vcache_lock); 202128650af9Shannken KASSERT(vip == vcache_hash_lookup(&vip->vi_key, hash)); 202228650af9Shannken SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask], 202328650af9Shannken vip, vnode_impl, vi_hash); 202428650af9Shannken vip->vi_key.vk_mount = dead_rootmount; 202528650af9Shannken vip->vi_key.vk_key_len = 0; 202628650af9Shannken vip->vi_key.vk_key = NULL; 202728650af9Shannken mutex_exit(&vcache_lock); 202828650af9Shannken 202928650af9Shannken /* 203028650af9Shannken * Disassociate the underlying file system from the vnode. 203128650af9Shannken * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks 203228650af9Shannken * the vnode, and may destroy the vnode so that VOP_UNLOCK 203328650af9Shannken * would no longer function. 203428650af9Shannken */ 203528650af9Shannken if (vn_lock(vp, LK_EXCLUSIVE)) { 203628650af9Shannken vnpanic(vp, "%s: cannot lock", __func__); 203728650af9Shannken } 203828650af9Shannken VOP_INACTIVE(vp, &recycle); 20399ea3b23dShannken KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 204028650af9Shannken if (VOP_RECLAIM(vp)) { 204128650af9Shannken vnpanic(vp, "%s: cannot reclaim", __func__); 204228650af9Shannken } 204328650af9Shannken 204428650af9Shannken /* Purge name cache. */ 204528650af9Shannken cache_purge(vp); 204628650af9Shannken 204728650af9Shannken /* Done with purge, change operations vector. */ 204828650af9Shannken mutex_enter(vp->v_interlock); 204928650af9Shannken vp->v_op = spec_vnodeop_p; 20509ea3b23dShannken vp->v_vflag |= VV_MPSAFE; 205128650af9Shannken mutex_exit(vp->v_interlock); 205228650af9Shannken 205328650af9Shannken /* 205428650af9Shannken * Move to dead mount. Must be after changing the operations 205528650af9Shannken * vector as vnode operations enter the mount before using the 205628650af9Shannken * operations vector. See sys/kern/vnode_if.c. 205728650af9Shannken */ 205828650af9Shannken vfs_ref(dead_rootmount); 205928650af9Shannken vfs_insmntque(vp, dead_rootmount); 206028650af9Shannken 206128650af9Shannken vrele(vp); 206228650af9Shannken } 206328650af9Shannken 206428650af9Shannken /* 2065fbc8beaeSrmind * Update outstanding I/O count and do wakeup if requested. 2066fbc8beaeSrmind */ 2067fbc8beaeSrmind void 2068fbc8beaeSrmind vwakeup(struct buf *bp) 2069fbc8beaeSrmind { 2070fbc8beaeSrmind vnode_t *vp; 2071fbc8beaeSrmind 2072fbc8beaeSrmind if ((vp = bp->b_vp) == NULL) 2073fbc8beaeSrmind return; 2074fbc8beaeSrmind 2075e225b7bdSrmind KASSERT(bp->b_objlock == vp->v_interlock); 2076fbc8beaeSrmind KASSERT(mutex_owned(bp->b_objlock)); 2077fbc8beaeSrmind 2078fbc8beaeSrmind if (--vp->v_numoutput < 0) 20793fca8694Schristos vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp); 2080fbc8beaeSrmind if (vp->v_numoutput == 0) 2081fbc8beaeSrmind cv_broadcast(&vp->v_cv); 2082fbc8beaeSrmind } 2083fbc8beaeSrmind 2084fbc8beaeSrmind /* 2085f3cf4816Shannken * Test a vnode for being or becoming dead. Returns one of: 2086f3cf4816Shannken * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only. 2087f3cf4816Shannken * ENOENT: vnode is dead. 2088f3cf4816Shannken * 0: otherwise. 2089f3cf4816Shannken * 2090f3cf4816Shannken * Whenever this function returns a non-zero value all future 2091f3cf4816Shannken * calls will also return a non-zero value. 2092f3cf4816Shannken */ 2093f3cf4816Shannken int 2094f3cf4816Shannken vdead_check(struct vnode *vp, int flags) 2095f3cf4816Shannken { 2096f3cf4816Shannken 2097f3cf4816Shannken KASSERT(mutex_owned(vp->v_interlock)); 209840d12c01Shannken 209940d12c01Shannken if (! ISSET(flags, VDEAD_NOWAIT)) 210040d12c01Shannken VSTATE_WAIT_STABLE(vp); 210140d12c01Shannken 21024f55676aShannken if (VSTATE_GET(vp) == VS_RECLAIMING) { 210340d12c01Shannken KASSERT(ISSET(flags, VDEAD_NOWAIT)); 2104*e94a5d02Sriastradh return SET_ERROR(EBUSY); 21054f55676aShannken } else if (VSTATE_GET(vp) == VS_RECLAIMED) { 2106*e94a5d02Sriastradh return SET_ERROR(ENOENT); 2107f3cf4816Shannken } 2108f3cf4816Shannken 210940d12c01Shannken return 0; 2110fbc8beaeSrmind } 2111fbc8beaeSrmind 2112fbc8beaeSrmind int 211313fa9caeShannken vfs_drainvnodes(void) 2114fbc8beaeSrmind { 211513fa9caeShannken 211643495351Shannken mutex_enter(&vdrain_lock); 2117cafde4ebShannken 21186f60ad1bShannken if (!vdrain_one(desiredvnodes)) { 21196f60ad1bShannken mutex_exit(&vdrain_lock); 2120*e94a5d02Sriastradh return SET_ERROR(EBUSY); 21216f60ad1bShannken } 21226f60ad1bShannken 21236f60ad1bShannken mutex_exit(&vdrain_lock); 2124cafde4ebShannken 212578a3dd75Shannken if (vcache_hashsize != desiredvnodes) 212642c8d67cShannken vcache_reinit(); 212742c8d67cShannken 2128fbc8beaeSrmind return 0; 2129fbc8beaeSrmind } 2130fbc8beaeSrmind 2131fbc8beaeSrmind void 21323fca8694Schristos vnpanic(vnode_t *vp, const char *fmt, ...) 2133fbc8beaeSrmind { 21343fca8694Schristos va_list ap; 2135fbc8beaeSrmind 21363fca8694Schristos #ifdef DIAGNOSTIC 2137fbc8beaeSrmind vprint(NULL, vp); 2138fbc8beaeSrmind #endif 21393fca8694Schristos va_start(ap, fmt); 21403fca8694Schristos vpanic(fmt, ap); 21413fca8694Schristos va_end(ap); 2142fbc8beaeSrmind } 2143d2a0ebb6Sad 2144d2a0ebb6Sad void 2145d2a0ebb6Sad vshareilock(vnode_t *tvp, vnode_t *fvp) 2146d2a0ebb6Sad { 2147d2a0ebb6Sad kmutex_t *oldlock; 2148d2a0ebb6Sad 2149d2a0ebb6Sad oldlock = tvp->v_interlock; 2150d2a0ebb6Sad mutex_obj_hold(fvp->v_interlock); 2151d2a0ebb6Sad tvp->v_interlock = fvp->v_interlock; 2152d2a0ebb6Sad mutex_obj_free(oldlock); 2153d2a0ebb6Sad } 215475d451f3Sthorpej 215575d451f3Sthorpej void 215675d451f3Sthorpej vshareklist(vnode_t *tvp, vnode_t *fvp) 215775d451f3Sthorpej { 215875d451f3Sthorpej /* 215975d451f3Sthorpej * If two vnodes share klist state, they must also share 216075d451f3Sthorpej * an interlock. 216175d451f3Sthorpej */ 216275d451f3Sthorpej KASSERT(tvp->v_interlock == fvp->v_interlock); 216375d451f3Sthorpej 216475d451f3Sthorpej /* 216575d451f3Sthorpej * We make the following assumptions: 216675d451f3Sthorpej * 216775d451f3Sthorpej * ==> Some other synchronization is happening outside of 216875d451f3Sthorpej * our view to make this safe. 216975d451f3Sthorpej * 217075d451f3Sthorpej * ==> That the "to" vnode will have the necessary references 217175d451f3Sthorpej * on the "from" vnode so that the storage for the klist 217275d451f3Sthorpej * won't be yanked out from beneath us (the vnode_impl). 217375d451f3Sthorpej * 217475d451f3Sthorpej * ==> If "from" is also sharing, we then assume that "from" 217575d451f3Sthorpej * has the necessary references, and so on. 217675d451f3Sthorpej */ 217775d451f3Sthorpej tvp->v_klist = fvp->v_klist; 217875d451f3Sthorpej } 2179