1 /* $NetBSD: vfs_vnode.c,v 1.46 2015/11/12 11:35:42 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 /* 70 * The vnode cache subsystem. 71 * 72 * Life-cycle 73 * 74 * Normally, there are two points where new vnodes are created: 75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode 76 * starts in one of the following ways: 77 * 78 * - Allocation, via vcache_get(9) or vcache_new(9). 79 * - Reclamation of inactive vnode, via vget(9). 80 * 81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9) 82 * was another, traditional way. Currently, only the draining thread 83 * recycles the vnodes. This behaviour might be revisited. 84 * 85 * The life-cycle ends when the last reference is dropped, usually 86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform 87 * the file system that vnode is inactive. Via this call, file system 88 * indicates whether vnode can be recycled (usually, it checks its own 89 * references, e.g. count of links, whether the file was removed). 90 * 91 * Depending on indication, vnode can be put into a free list (cache), 92 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate 93 * underlying file system from the vnode, and finally destroyed. 94 * 95 * Reference counting 96 * 97 * Vnode is considered active, if reference count (vnode_t::v_usecount) 98 * is non-zero. It is maintained using: vref(9) and vrele(9), as well 99 * as vput(9), routines. Common points holding references are e.g. 100 * file openings, current working directory, mount points, etc. 101 * 102 * Note on v_usecount and its locking 103 * 104 * At nearly all points it is known that v_usecount could be zero, 105 * the vnode_t::v_interlock will be held. To change v_usecount away 106 * from zero, the interlock must be held. To change from a non-zero 107 * value to zero, again the interlock must be held. 108 * 109 * Changing the usecount from a non-zero value to a non-zero value can 110 * safely be done using atomic operations, without the interlock held. 111 * 112 * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while 113 * mntvnode_lock is still held. 114 * 115 * See PR 41374. 116 */ 117 118 #include <sys/cdefs.h> 119 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.46 2015/11/12 11:35:42 hannken Exp $"); 120 121 #define _VFS_VNODE_PRIVATE 122 123 #include <sys/param.h> 124 #include <sys/kernel.h> 125 126 #include <sys/atomic.h> 127 #include <sys/buf.h> 128 #include <sys/conf.h> 129 #include <sys/device.h> 130 #include <sys/hash.h> 131 #include <sys/kauth.h> 132 #include <sys/kmem.h> 133 #include <sys/kthread.h> 134 #include <sys/module.h> 135 #include <sys/mount.h> 136 #include <sys/namei.h> 137 #include <sys/syscallargs.h> 138 #include <sys/sysctl.h> 139 #include <sys/systm.h> 140 #include <sys/vnode.h> 141 #include <sys/wapbl.h> 142 #include <sys/fstrans.h> 143 144 #include <uvm/uvm.h> 145 #include <uvm/uvm_readahead.h> 146 147 /* Flags to vrelel. */ 148 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */ 149 #define VRELEL_CHANGING_SET 0x0002 /* VI_CHANGING set by caller. */ 150 151 struct vcache_key { 152 struct mount *vk_mount; 153 const void *vk_key; 154 size_t vk_key_len; 155 }; 156 struct vcache_node { 157 SLIST_ENTRY(vcache_node) vn_hash; 158 struct vnode *vn_vnode; 159 struct vcache_key vn_key; 160 }; 161 162 u_int numvnodes __cacheline_aligned; 163 164 static pool_cache_t vnode_cache __read_mostly; 165 166 /* 167 * There are two free lists: one is for vnodes which have no buffer/page 168 * references and one for those which do (i.e. v_holdcnt is non-zero). 169 * Vnode recycling mechanism first attempts to look into the former list. 170 */ 171 static kmutex_t vnode_free_list_lock __cacheline_aligned; 172 static vnodelst_t vnode_free_list __cacheline_aligned; 173 static vnodelst_t vnode_hold_list __cacheline_aligned; 174 static kcondvar_t vdrain_cv __cacheline_aligned; 175 176 static vnodelst_t vrele_list __cacheline_aligned; 177 static kmutex_t vrele_lock __cacheline_aligned; 178 static kcondvar_t vrele_cv __cacheline_aligned; 179 static lwp_t * vrele_lwp __cacheline_aligned; 180 static int vrele_pending __cacheline_aligned; 181 static int vrele_gen __cacheline_aligned; 182 183 SLIST_HEAD(hashhead, vcache_node); 184 static struct { 185 kmutex_t lock; 186 u_long hashmask; 187 struct hashhead *hashtab; 188 pool_cache_t pool; 189 } vcache __cacheline_aligned; 190 191 static int cleanvnode(void); 192 static void vcache_init(void); 193 static void vcache_reinit(void); 194 static void vclean(vnode_t *); 195 static void vrelel(vnode_t *, int); 196 static void vdrain_thread(void *); 197 static void vrele_thread(void *); 198 static void vnpanic(vnode_t *, const char *, ...) 199 __printflike(2, 3); 200 static void vwait(vnode_t *, int); 201 202 /* Routines having to do with the management of the vnode table. */ 203 extern struct mount *dead_rootmount; 204 extern int (**dead_vnodeop_p)(void *); 205 extern struct vfsops dead_vfsops; 206 207 void 208 vfs_vnode_sysinit(void) 209 { 210 int error __diagused; 211 212 vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl", 213 NULL, IPL_NONE, NULL, NULL, NULL); 214 KASSERT(vnode_cache != NULL); 215 216 dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL); 217 KASSERT(dead_rootmount != NULL); 218 dead_rootmount->mnt_iflag = IMNT_MPSAFE; 219 220 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); 221 TAILQ_INIT(&vnode_free_list); 222 TAILQ_INIT(&vnode_hold_list); 223 TAILQ_INIT(&vrele_list); 224 225 vcache_init(); 226 227 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); 228 cv_init(&vdrain_cv, "vdrain"); 229 cv_init(&vrele_cv, "vrele"); 230 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread, 231 NULL, NULL, "vdrain"); 232 KASSERT(error == 0); 233 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, 234 NULL, &vrele_lwp, "vrele"); 235 KASSERT(error == 0); 236 } 237 238 /* 239 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a 240 * marker vnode. 241 */ 242 vnode_t * 243 vnalloc(struct mount *mp) 244 { 245 vnode_t *vp; 246 247 vp = pool_cache_get(vnode_cache, PR_WAITOK); 248 KASSERT(vp != NULL); 249 250 memset(vp, 0, sizeof(*vp)); 251 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); 252 cv_init(&vp->v_cv, "vnode"); 253 /* 254 * Done by memset() above. 255 * LIST_INIT(&vp->v_nclist); 256 * LIST_INIT(&vp->v_dnclist); 257 */ 258 259 if (mp != NULL) { 260 vp->v_mount = mp; 261 vp->v_type = VBAD; 262 vp->v_iflag = VI_MARKER; 263 return vp; 264 } 265 266 mutex_enter(&vnode_free_list_lock); 267 numvnodes++; 268 if (numvnodes > desiredvnodes + desiredvnodes / 10) 269 cv_signal(&vdrain_cv); 270 mutex_exit(&vnode_free_list_lock); 271 272 rw_init(&vp->v_lock); 273 vp->v_usecount = 1; 274 vp->v_type = VNON; 275 vp->v_size = vp->v_writesize = VSIZENOTSET; 276 277 return vp; 278 } 279 280 /* 281 * Free an unused, unreferenced vnode. 282 */ 283 void 284 vnfree(vnode_t *vp) 285 { 286 287 KASSERT(vp->v_usecount == 0); 288 289 if ((vp->v_iflag & VI_MARKER) == 0) { 290 rw_destroy(&vp->v_lock); 291 mutex_enter(&vnode_free_list_lock); 292 numvnodes--; 293 mutex_exit(&vnode_free_list_lock); 294 } 295 296 uvm_obj_destroy(&vp->v_uobj, true); 297 cv_destroy(&vp->v_cv); 298 pool_cache_put(vnode_cache, vp); 299 } 300 301 /* 302 * cleanvnode: grab a vnode from freelist, clean and free it. 303 * 304 * => Releases vnode_free_list_lock. 305 */ 306 static int 307 cleanvnode(void) 308 { 309 vnode_t *vp; 310 vnodelst_t *listhd; 311 struct mount *mp; 312 313 KASSERT(mutex_owned(&vnode_free_list_lock)); 314 315 listhd = &vnode_free_list; 316 try_nextlist: 317 TAILQ_FOREACH(vp, listhd, v_freelist) { 318 /* 319 * It's safe to test v_usecount and v_iflag 320 * without holding the interlock here, since 321 * these vnodes should never appear on the 322 * lists. 323 */ 324 KASSERT(vp->v_usecount == 0); 325 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 326 KASSERT(vp->v_freelisthd == listhd); 327 328 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) 329 continue; 330 if (!mutex_tryenter(vp->v_interlock)) { 331 VOP_UNLOCK(vp); 332 continue; 333 } 334 KASSERT((vp->v_iflag & VI_XLOCK) == 0); 335 mp = vp->v_mount; 336 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { 337 mutex_exit(vp->v_interlock); 338 VOP_UNLOCK(vp); 339 continue; 340 } 341 break; 342 } 343 344 if (vp == NULL) { 345 if (listhd == &vnode_free_list) { 346 listhd = &vnode_hold_list; 347 goto try_nextlist; 348 } 349 mutex_exit(&vnode_free_list_lock); 350 return EBUSY; 351 } 352 353 /* Remove it from the freelist. */ 354 TAILQ_REMOVE(listhd, vp, v_freelist); 355 vp->v_freelisthd = NULL; 356 mutex_exit(&vnode_free_list_lock); 357 358 KASSERT(vp->v_usecount == 0); 359 360 /* 361 * The vnode is still associated with a file system, so we must 362 * clean it out before freeing it. We need to add a reference 363 * before doing this. 364 */ 365 vp->v_usecount = 1; 366 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 367 vp->v_iflag |= VI_CHANGING; 368 vclean(vp); 369 vrelel(vp, VRELEL_CHANGING_SET); 370 fstrans_done(mp); 371 372 return 0; 373 } 374 375 /* 376 * Helper thread to keep the number of vnodes below desiredvnodes. 377 */ 378 static void 379 vdrain_thread(void *cookie) 380 { 381 int error; 382 383 mutex_enter(&vnode_free_list_lock); 384 385 for (;;) { 386 cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz); 387 while (numvnodes > desiredvnodes) { 388 error = cleanvnode(); 389 if (error) 390 kpause("vndsbusy", false, hz, NULL); 391 mutex_enter(&vnode_free_list_lock); 392 if (error) 393 break; 394 } 395 } 396 } 397 398 /* 399 * Remove a vnode from its freelist. 400 */ 401 void 402 vremfree(vnode_t *vp) 403 { 404 405 KASSERT(mutex_owned(vp->v_interlock)); 406 KASSERT(vp->v_usecount == 0); 407 408 /* 409 * Note that the reference count must not change until 410 * the vnode is removed. 411 */ 412 mutex_enter(&vnode_free_list_lock); 413 if (vp->v_holdcnt > 0) { 414 KASSERT(vp->v_freelisthd == &vnode_hold_list); 415 } else { 416 KASSERT(vp->v_freelisthd == &vnode_free_list); 417 } 418 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 419 vp->v_freelisthd = NULL; 420 mutex_exit(&vnode_free_list_lock); 421 } 422 423 /* 424 * vget: get a particular vnode from the free list, increment its reference 425 * count and lock it. 426 * 427 * => Should be called with v_interlock held. 428 * 429 * If VI_CHANGING is set, the vnode may be eliminated in vgone()/vclean(). 430 * In that case, we cannot grab the vnode, so the process is awakened when 431 * the transition is completed, and an error returned to indicate that the 432 * vnode is no longer usable. 433 */ 434 int 435 vget(vnode_t *vp, int flags, bool waitok) 436 { 437 int error = 0; 438 439 KASSERT((vp->v_iflag & VI_MARKER) == 0); 440 KASSERT(mutex_owned(vp->v_interlock)); 441 KASSERT((flags & ~LK_NOWAIT) == 0); 442 KASSERT(waitok == ((flags & LK_NOWAIT) == 0)); 443 444 /* 445 * Before adding a reference, we must remove the vnode 446 * from its freelist. 447 */ 448 if (vp->v_usecount == 0) { 449 vremfree(vp); 450 vp->v_usecount = 1; 451 } else { 452 atomic_inc_uint(&vp->v_usecount); 453 } 454 455 /* 456 * If the vnode is in the process of changing state we wait 457 * for the change to complete and take care not to return 458 * a clean vnode. 459 */ 460 if ((vp->v_iflag & VI_CHANGING) != 0) { 461 if ((flags & LK_NOWAIT) != 0) { 462 vrelel(vp, 0); 463 return EBUSY; 464 } 465 vwait(vp, VI_CHANGING); 466 if ((vp->v_iflag & VI_CLEAN) != 0) { 467 vrelel(vp, 0); 468 return ENOENT; 469 } 470 } 471 472 /* 473 * Ok, we got it in good shape. 474 */ 475 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 476 mutex_exit(vp->v_interlock); 477 return error; 478 } 479 480 /* 481 * vput: unlock and release the reference. 482 */ 483 void 484 vput(vnode_t *vp) 485 { 486 487 KASSERT((vp->v_iflag & VI_MARKER) == 0); 488 489 VOP_UNLOCK(vp); 490 vrele(vp); 491 } 492 493 /* 494 * Try to drop reference on a vnode. Abort if we are releasing the 495 * last reference. Note: this _must_ succeed if not the last reference. 496 */ 497 static inline bool 498 vtryrele(vnode_t *vp) 499 { 500 u_int use, next; 501 502 for (use = vp->v_usecount;; use = next) { 503 if (use == 1) { 504 return false; 505 } 506 KASSERT(use > 1); 507 next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 508 if (__predict_true(next == use)) { 509 return true; 510 } 511 } 512 } 513 514 /* 515 * Vnode release. If reference count drops to zero, call inactive 516 * routine and either return to freelist or free to the pool. 517 */ 518 static void 519 vrelel(vnode_t *vp, int flags) 520 { 521 bool recycle, defer; 522 int error; 523 524 KASSERT(mutex_owned(vp->v_interlock)); 525 KASSERT((vp->v_iflag & VI_MARKER) == 0); 526 KASSERT(vp->v_freelisthd == NULL); 527 528 if (__predict_false(vp->v_op == dead_vnodeop_p && 529 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) { 530 vnpanic(vp, "dead but not clean"); 531 } 532 533 /* 534 * If not the last reference, just drop the reference count 535 * and unlock. 536 */ 537 if (vtryrele(vp)) { 538 if ((flags & VRELEL_CHANGING_SET) != 0) { 539 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 540 vp->v_iflag &= ~VI_CHANGING; 541 cv_broadcast(&vp->v_cv); 542 } 543 mutex_exit(vp->v_interlock); 544 return; 545 } 546 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 547 vnpanic(vp, "%s: bad ref count", __func__); 548 } 549 550 KASSERT((vp->v_iflag & VI_XLOCK) == 0); 551 552 #ifdef DIAGNOSTIC 553 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 554 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 555 vprint("vrelel: missing VOP_CLOSE()", vp); 556 } 557 #endif 558 559 /* 560 * If not clean, deactivate the vnode, but preserve 561 * our reference across the call to VOP_INACTIVE(). 562 */ 563 if ((vp->v_iflag & VI_CLEAN) == 0) { 564 recycle = false; 565 566 /* 567 * XXX This ugly block can be largely eliminated if 568 * locking is pushed down into the file systems. 569 * 570 * Defer vnode release to vrele_thread if caller 571 * requests it explicitly or is the pagedaemon. 572 */ 573 if ((curlwp == uvm.pagedaemon_lwp) || 574 (flags & VRELEL_ASYNC_RELE) != 0) { 575 defer = true; 576 } else if (curlwp == vrele_lwp) { 577 /* 578 * We have to try harder. 579 */ 580 mutex_exit(vp->v_interlock); 581 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 582 KASSERT(error == 0); 583 mutex_enter(vp->v_interlock); 584 defer = false; 585 } else { 586 /* If we can't acquire the lock, then defer. */ 587 mutex_exit(vp->v_interlock); 588 error = vn_lock(vp, 589 LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 590 defer = (error != 0); 591 mutex_enter(vp->v_interlock); 592 } 593 594 KASSERT(mutex_owned(vp->v_interlock)); 595 KASSERT(! (curlwp == vrele_lwp && defer)); 596 597 if (defer) { 598 /* 599 * Defer reclaim to the kthread; it's not safe to 600 * clean it here. We donate it our last reference. 601 */ 602 if ((flags & VRELEL_CHANGING_SET) != 0) { 603 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 604 vp->v_iflag &= ~VI_CHANGING; 605 cv_broadcast(&vp->v_cv); 606 } 607 mutex_enter(&vrele_lock); 608 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); 609 if (++vrele_pending > (desiredvnodes >> 8)) 610 cv_signal(&vrele_cv); 611 mutex_exit(&vrele_lock); 612 mutex_exit(vp->v_interlock); 613 return; 614 } 615 616 /* 617 * If the node got another reference while we 618 * released the interlock, don't try to inactivate it yet. 619 */ 620 if (__predict_false(vtryrele(vp))) { 621 VOP_UNLOCK(vp); 622 if ((flags & VRELEL_CHANGING_SET) != 0) { 623 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 624 vp->v_iflag &= ~VI_CHANGING; 625 cv_broadcast(&vp->v_cv); 626 } 627 mutex_exit(vp->v_interlock); 628 return; 629 } 630 631 if ((flags & VRELEL_CHANGING_SET) == 0) { 632 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 633 vp->v_iflag |= VI_CHANGING; 634 } 635 mutex_exit(vp->v_interlock); 636 637 /* 638 * The vnode can gain another reference while being 639 * deactivated. If VOP_INACTIVE() indicates that 640 * the described file has been deleted, then recycle 641 * the vnode irrespective of additional references. 642 * Another thread may be waiting to re-use the on-disk 643 * inode. 644 * 645 * Note that VOP_INACTIVE() will drop the vnode lock. 646 */ 647 VOP_INACTIVE(vp, &recycle); 648 if (recycle) { 649 /* vclean() below will drop the lock. */ 650 if (vn_lock(vp, LK_EXCLUSIVE) != 0) 651 recycle = false; 652 } 653 mutex_enter(vp->v_interlock); 654 if (!recycle) { 655 if (vtryrele(vp)) { 656 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 657 vp->v_iflag &= ~VI_CHANGING; 658 cv_broadcast(&vp->v_cv); 659 mutex_exit(vp->v_interlock); 660 return; 661 } 662 } 663 664 /* Take care of space accounting. */ 665 if (vp->v_iflag & VI_EXECMAP) { 666 atomic_add_int(&uvmexp.execpages, 667 -vp->v_uobj.uo_npages); 668 atomic_add_int(&uvmexp.filepages, 669 vp->v_uobj.uo_npages); 670 } 671 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 672 vp->v_vflag &= ~VV_MAPPED; 673 674 /* 675 * Recycle the vnode if the file is now unused (unlinked), 676 * otherwise just free it. 677 */ 678 if (recycle) { 679 vclean(vp); 680 } 681 KASSERT(vp->v_usecount > 0); 682 } else { /* vnode was already clean */ 683 if ((flags & VRELEL_CHANGING_SET) == 0) { 684 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 685 vp->v_iflag |= VI_CHANGING; 686 } 687 } 688 689 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 690 /* Gained another reference while being reclaimed. */ 691 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 692 vp->v_iflag &= ~VI_CHANGING; 693 cv_broadcast(&vp->v_cv); 694 mutex_exit(vp->v_interlock); 695 return; 696 } 697 698 if ((vp->v_iflag & VI_CLEAN) != 0) { 699 /* 700 * It's clean so destroy it. It isn't referenced 701 * anywhere since it has been reclaimed. 702 */ 703 KASSERT(vp->v_holdcnt == 0); 704 KASSERT(vp->v_writecount == 0); 705 mutex_exit(vp->v_interlock); 706 vfs_insmntque(vp, NULL); 707 if (vp->v_type == VBLK || vp->v_type == VCHR) { 708 spec_node_destroy(vp); 709 } 710 vnfree(vp); 711 } else { 712 /* 713 * Otherwise, put it back onto the freelist. It 714 * can't be destroyed while still associated with 715 * a file system. 716 */ 717 mutex_enter(&vnode_free_list_lock); 718 if (vp->v_holdcnt > 0) { 719 vp->v_freelisthd = &vnode_hold_list; 720 } else { 721 vp->v_freelisthd = &vnode_free_list; 722 } 723 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 724 mutex_exit(&vnode_free_list_lock); 725 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 726 vp->v_iflag &= ~VI_CHANGING; 727 cv_broadcast(&vp->v_cv); 728 mutex_exit(vp->v_interlock); 729 } 730 } 731 732 void 733 vrele(vnode_t *vp) 734 { 735 736 KASSERT((vp->v_iflag & VI_MARKER) == 0); 737 738 if (vtryrele(vp)) { 739 return; 740 } 741 mutex_enter(vp->v_interlock); 742 vrelel(vp, 0); 743 } 744 745 /* 746 * Asynchronous vnode release, vnode is released in different context. 747 */ 748 void 749 vrele_async(vnode_t *vp) 750 { 751 752 KASSERT((vp->v_iflag & VI_MARKER) == 0); 753 754 if (vtryrele(vp)) { 755 return; 756 } 757 mutex_enter(vp->v_interlock); 758 vrelel(vp, VRELEL_ASYNC_RELE); 759 } 760 761 static void 762 vrele_thread(void *cookie) 763 { 764 vnodelst_t skip_list; 765 vnode_t *vp; 766 struct mount *mp; 767 768 TAILQ_INIT(&skip_list); 769 770 mutex_enter(&vrele_lock); 771 for (;;) { 772 while (TAILQ_EMPTY(&vrele_list)) { 773 vrele_gen++; 774 cv_broadcast(&vrele_cv); 775 cv_timedwait(&vrele_cv, &vrele_lock, hz); 776 TAILQ_CONCAT(&vrele_list, &skip_list, v_freelist); 777 } 778 vp = TAILQ_FIRST(&vrele_list); 779 mp = vp->v_mount; 780 TAILQ_REMOVE(&vrele_list, vp, v_freelist); 781 if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0) { 782 TAILQ_INSERT_TAIL(&skip_list, vp, v_freelist); 783 continue; 784 } 785 vrele_pending--; 786 mutex_exit(&vrele_lock); 787 788 /* 789 * If not the last reference, then ignore the vnode 790 * and look for more work. 791 */ 792 mutex_enter(vp->v_interlock); 793 vrelel(vp, 0); 794 fstrans_done(mp); 795 mutex_enter(&vrele_lock); 796 } 797 } 798 799 void 800 vrele_flush(void) 801 { 802 int gen; 803 804 mutex_enter(&vrele_lock); 805 gen = vrele_gen; 806 while (vrele_pending && gen == vrele_gen) { 807 cv_broadcast(&vrele_cv); 808 cv_wait(&vrele_cv, &vrele_lock); 809 } 810 mutex_exit(&vrele_lock); 811 } 812 813 /* 814 * Vnode reference, where a reference is already held by some other 815 * object (for example, a file structure). 816 */ 817 void 818 vref(vnode_t *vp) 819 { 820 821 KASSERT((vp->v_iflag & VI_MARKER) == 0); 822 KASSERT(vp->v_usecount != 0); 823 824 atomic_inc_uint(&vp->v_usecount); 825 } 826 827 /* 828 * Page or buffer structure gets a reference. 829 * Called with v_interlock held. 830 */ 831 void 832 vholdl(vnode_t *vp) 833 { 834 835 KASSERT(mutex_owned(vp->v_interlock)); 836 KASSERT((vp->v_iflag & VI_MARKER) == 0); 837 838 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { 839 mutex_enter(&vnode_free_list_lock); 840 KASSERT(vp->v_freelisthd == &vnode_free_list); 841 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 842 vp->v_freelisthd = &vnode_hold_list; 843 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 844 mutex_exit(&vnode_free_list_lock); 845 } 846 } 847 848 /* 849 * Page or buffer structure frees a reference. 850 * Called with v_interlock held. 851 */ 852 void 853 holdrelel(vnode_t *vp) 854 { 855 856 KASSERT(mutex_owned(vp->v_interlock)); 857 KASSERT((vp->v_iflag & VI_MARKER) == 0); 858 859 if (vp->v_holdcnt <= 0) { 860 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp); 861 } 862 863 vp->v_holdcnt--; 864 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { 865 mutex_enter(&vnode_free_list_lock); 866 KASSERT(vp->v_freelisthd == &vnode_hold_list); 867 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 868 vp->v_freelisthd = &vnode_free_list; 869 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 870 mutex_exit(&vnode_free_list_lock); 871 } 872 } 873 874 /* 875 * Disassociate the underlying file system from a vnode. 876 * 877 * Must be called with vnode locked and will return unlocked. 878 * Must be called with the interlock held, and will return with it held. 879 */ 880 static void 881 vclean(vnode_t *vp) 882 { 883 lwp_t *l = curlwp; 884 bool recycle, active; 885 int error; 886 887 KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 || 888 VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 889 KASSERT(mutex_owned(vp->v_interlock)); 890 KASSERT((vp->v_iflag & VI_MARKER) == 0); 891 KASSERT((vp->v_iflag & (VI_XLOCK | VI_CLEAN)) == 0); 892 KASSERT(vp->v_usecount != 0); 893 894 active = (vp->v_usecount > 1); 895 /* 896 * Prevent the vnode from being recycled or brought into use 897 * while we clean it out. 898 */ 899 vp->v_iflag |= VI_XLOCK; 900 if (vp->v_iflag & VI_EXECMAP) { 901 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 902 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 903 } 904 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 905 mutex_exit(vp->v_interlock); 906 907 /* 908 * Clean out any cached data associated with the vnode. 909 * If purging an active vnode, it must be closed and 910 * deactivated before being reclaimed. Note that the 911 * VOP_INACTIVE will unlock the vnode. 912 */ 913 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 914 if (error != 0) { 915 if (wapbl_vphaswapbl(vp)) 916 WAPBL_DISCARD(wapbl_vptomp(vp)); 917 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 918 } 919 KASSERT(error == 0); 920 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 921 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 922 spec_node_revoke(vp); 923 } 924 if (active) { 925 VOP_INACTIVE(vp, &recycle); 926 } else { 927 /* 928 * Any other processes trying to obtain this lock must first 929 * wait for VI_XLOCK to clear, then call the new lock operation. 930 */ 931 VOP_UNLOCK(vp); 932 } 933 934 /* Disassociate the underlying file system from the vnode. */ 935 if (VOP_RECLAIM(vp)) { 936 vnpanic(vp, "%s: cannot reclaim", __func__); 937 } 938 939 KASSERT(vp->v_data == NULL); 940 KASSERT(vp->v_uobj.uo_npages == 0); 941 942 if (vp->v_type == VREG && vp->v_ractx != NULL) { 943 uvm_ra_freectx(vp->v_ractx); 944 vp->v_ractx = NULL; 945 } 946 947 /* Purge name cache. */ 948 cache_purge(vp); 949 950 /* Move to dead mount. */ 951 vp->v_vflag &= ~VV_ROOT; 952 atomic_inc_uint(&dead_rootmount->mnt_refcnt); 953 vfs_insmntque(vp, dead_rootmount); 954 955 /* Done with purge, notify sleepers of the grim news. */ 956 mutex_enter(vp->v_interlock); 957 vp->v_op = dead_vnodeop_p; 958 vp->v_vflag |= VV_LOCKSWORK; 959 vp->v_iflag |= VI_CLEAN; 960 vp->v_tag = VT_NON; 961 KNOTE(&vp->v_klist, NOTE_REVOKE); 962 vp->v_iflag &= ~VI_XLOCK; 963 cv_broadcast(&vp->v_cv); 964 965 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 966 } 967 968 /* 969 * Recycle an unused vnode if caller holds the last reference. 970 */ 971 bool 972 vrecycle(vnode_t *vp) 973 { 974 975 if (vn_lock(vp, LK_EXCLUSIVE) != 0) 976 return false; 977 978 mutex_enter(vp->v_interlock); 979 980 KASSERT((vp->v_iflag & VI_MARKER) == 0); 981 982 if (vp->v_usecount != 1) { 983 mutex_exit(vp->v_interlock); 984 VOP_UNLOCK(vp); 985 return false; 986 } 987 if ((vp->v_iflag & VI_CHANGING) != 0) 988 vwait(vp, VI_CHANGING); 989 if (vp->v_usecount != 1) { 990 mutex_exit(vp->v_interlock); 991 VOP_UNLOCK(vp); 992 return false; 993 } 994 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 995 vp->v_iflag |= VI_CHANGING; 996 vclean(vp); 997 vrelel(vp, VRELEL_CHANGING_SET); 998 return true; 999 } 1000 1001 /* 1002 * Eliminate all activity associated with the requested vnode 1003 * and with all vnodes aliased to the requested vnode. 1004 */ 1005 void 1006 vrevoke(vnode_t *vp) 1007 { 1008 vnode_t *vq; 1009 enum vtype type; 1010 dev_t dev; 1011 1012 KASSERT(vp->v_usecount > 0); 1013 1014 mutex_enter(vp->v_interlock); 1015 if ((vp->v_iflag & VI_CLEAN) != 0) { 1016 mutex_exit(vp->v_interlock); 1017 return; 1018 } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 1019 atomic_inc_uint(&vp->v_usecount); 1020 mutex_exit(vp->v_interlock); 1021 vgone(vp); 1022 return; 1023 } else { 1024 dev = vp->v_rdev; 1025 type = vp->v_type; 1026 mutex_exit(vp->v_interlock); 1027 } 1028 1029 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) { 1030 vgone(vq); 1031 } 1032 } 1033 1034 /* 1035 * Eliminate all activity associated with a vnode in preparation for 1036 * reuse. Drops a reference from the vnode. 1037 */ 1038 void 1039 vgone(vnode_t *vp) 1040 { 1041 1042 if (vn_lock(vp, LK_EXCLUSIVE) != 0) { 1043 KASSERT((vp->v_iflag & VI_CLEAN) != 0); 1044 vrele(vp); 1045 } 1046 1047 mutex_enter(vp->v_interlock); 1048 if ((vp->v_iflag & VI_CHANGING) != 0) 1049 vwait(vp, VI_CHANGING); 1050 vp->v_iflag |= VI_CHANGING; 1051 vclean(vp); 1052 vrelel(vp, VRELEL_CHANGING_SET); 1053 } 1054 1055 static inline uint32_t 1056 vcache_hash(const struct vcache_key *key) 1057 { 1058 uint32_t hash = HASH32_BUF_INIT; 1059 1060 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash); 1061 hash = hash32_buf(key->vk_key, key->vk_key_len, hash); 1062 return hash; 1063 } 1064 1065 static void 1066 vcache_init(void) 1067 { 1068 1069 vcache.pool = pool_cache_init(sizeof(struct vcache_node), 0, 0, 0, 1070 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL); 1071 KASSERT(vcache.pool != NULL); 1072 mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE); 1073 vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true, 1074 &vcache.hashmask); 1075 } 1076 1077 static void 1078 vcache_reinit(void) 1079 { 1080 int i; 1081 uint32_t hash; 1082 u_long oldmask, newmask; 1083 struct hashhead *oldtab, *newtab; 1084 struct vcache_node *node; 1085 1086 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask); 1087 mutex_enter(&vcache.lock); 1088 oldtab = vcache.hashtab; 1089 oldmask = vcache.hashmask; 1090 vcache.hashtab = newtab; 1091 vcache.hashmask = newmask; 1092 for (i = 0; i <= oldmask; i++) { 1093 while ((node = SLIST_FIRST(&oldtab[i])) != NULL) { 1094 SLIST_REMOVE(&oldtab[i], node, vcache_node, vn_hash); 1095 hash = vcache_hash(&node->vn_key); 1096 SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask], 1097 node, vn_hash); 1098 } 1099 } 1100 mutex_exit(&vcache.lock); 1101 hashdone(oldtab, HASH_SLIST, oldmask); 1102 } 1103 1104 static inline struct vcache_node * 1105 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash) 1106 { 1107 struct hashhead *hashp; 1108 struct vcache_node *node; 1109 1110 KASSERT(mutex_owned(&vcache.lock)); 1111 1112 hashp = &vcache.hashtab[hash & vcache.hashmask]; 1113 SLIST_FOREACH(node, hashp, vn_hash) { 1114 if (key->vk_mount != node->vn_key.vk_mount) 1115 continue; 1116 if (key->vk_key_len != node->vn_key.vk_key_len) 1117 continue; 1118 if (memcmp(key->vk_key, node->vn_key.vk_key, key->vk_key_len)) 1119 continue; 1120 return node; 1121 } 1122 return NULL; 1123 } 1124 1125 /* 1126 * Get a vnode / fs node pair by key and return it referenced through vpp. 1127 */ 1128 int 1129 vcache_get(struct mount *mp, const void *key, size_t key_len, 1130 struct vnode **vpp) 1131 { 1132 int error; 1133 uint32_t hash; 1134 const void *new_key; 1135 struct vnode *vp; 1136 struct vcache_key vcache_key; 1137 struct vcache_node *node, *new_node; 1138 1139 new_key = NULL; 1140 *vpp = NULL; 1141 1142 vcache_key.vk_mount = mp; 1143 vcache_key.vk_key = key; 1144 vcache_key.vk_key_len = key_len; 1145 hash = vcache_hash(&vcache_key); 1146 1147 again: 1148 mutex_enter(&vcache.lock); 1149 node = vcache_hash_lookup(&vcache_key, hash); 1150 1151 /* If found, take a reference or retry. */ 1152 if (__predict_true(node != NULL && node->vn_vnode != NULL)) { 1153 vp = node->vn_vnode; 1154 mutex_enter(vp->v_interlock); 1155 mutex_exit(&vcache.lock); 1156 error = vget(vp, 0, true /* wait */); 1157 if (error == ENOENT) 1158 goto again; 1159 if (error == 0) 1160 *vpp = vp; 1161 KASSERT((error != 0) == (*vpp == NULL)); 1162 return error; 1163 } 1164 1165 /* If another thread loads this node, wait and retry. */ 1166 if (node != NULL) { 1167 KASSERT(node->vn_vnode == NULL); 1168 mutex_exit(&vcache.lock); 1169 kpause("vcache", false, mstohz(20), NULL); 1170 goto again; 1171 } 1172 mutex_exit(&vcache.lock); 1173 1174 /* Allocate and initialize a new vcache / vnode pair. */ 1175 error = vfs_busy(mp, NULL); 1176 if (error) 1177 return error; 1178 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1179 new_node->vn_vnode = NULL; 1180 new_node->vn_key = vcache_key; 1181 vp = vnalloc(NULL); 1182 mutex_enter(&vcache.lock); 1183 node = vcache_hash_lookup(&vcache_key, hash); 1184 if (node == NULL) { 1185 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], 1186 new_node, vn_hash); 1187 node = new_node; 1188 } 1189 mutex_exit(&vcache.lock); 1190 1191 /* If another thread beat us inserting this node, retry. */ 1192 if (node != new_node) { 1193 pool_cache_put(vcache.pool, new_node); 1194 KASSERT(vp->v_usecount == 1); 1195 vp->v_usecount = 0; 1196 vnfree(vp); 1197 vfs_unbusy(mp, false, NULL); 1198 goto again; 1199 } 1200 1201 /* Load the fs node. Exclusive as new_node->vn_vnode is NULL. */ 1202 vp->v_iflag |= VI_CHANGING; 1203 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key); 1204 if (error) { 1205 mutex_enter(&vcache.lock); 1206 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1207 new_node, vcache_node, vn_hash); 1208 mutex_exit(&vcache.lock); 1209 pool_cache_put(vcache.pool, new_node); 1210 KASSERT(vp->v_usecount == 1); 1211 vp->v_usecount = 0; 1212 vnfree(vp); 1213 vfs_unbusy(mp, false, NULL); 1214 KASSERT(*vpp == NULL); 1215 return error; 1216 } 1217 KASSERT(new_key != NULL); 1218 KASSERT(memcmp(key, new_key, key_len) == 0); 1219 KASSERT(vp->v_op != NULL); 1220 vfs_insmntque(vp, mp); 1221 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1222 vp->v_vflag |= VV_MPSAFE; 1223 vfs_unbusy(mp, true, NULL); 1224 1225 /* Finished loading, finalize node. */ 1226 mutex_enter(&vcache.lock); 1227 new_node->vn_key.vk_key = new_key; 1228 new_node->vn_vnode = vp; 1229 mutex_exit(&vcache.lock); 1230 mutex_enter(vp->v_interlock); 1231 vp->v_iflag &= ~VI_CHANGING; 1232 cv_broadcast(&vp->v_cv); 1233 mutex_exit(vp->v_interlock); 1234 *vpp = vp; 1235 return 0; 1236 } 1237 1238 /* 1239 * Create a new vnode / fs node pair and return it referenced through vpp. 1240 */ 1241 int 1242 vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap, 1243 kauth_cred_t cred, struct vnode **vpp) 1244 { 1245 int error; 1246 uint32_t hash; 1247 struct vnode *vp; 1248 struct vcache_node *new_node; 1249 struct vcache_node *old_node __diagused; 1250 1251 *vpp = NULL; 1252 1253 /* Allocate and initialize a new vcache / vnode pair. */ 1254 error = vfs_busy(mp, NULL); 1255 if (error) 1256 return error; 1257 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1258 new_node->vn_key.vk_mount = mp; 1259 new_node->vn_vnode = NULL; 1260 vp = vnalloc(NULL); 1261 1262 /* Create and load the fs node. */ 1263 vp->v_iflag |= VI_CHANGING; 1264 error = VFS_NEWVNODE(mp, dvp, vp, vap, cred, 1265 &new_node->vn_key.vk_key_len, &new_node->vn_key.vk_key); 1266 if (error) { 1267 pool_cache_put(vcache.pool, new_node); 1268 KASSERT(vp->v_usecount == 1); 1269 vp->v_usecount = 0; 1270 vnfree(vp); 1271 vfs_unbusy(mp, false, NULL); 1272 KASSERT(*vpp == NULL); 1273 return error; 1274 } 1275 KASSERT(new_node->vn_key.vk_key != NULL); 1276 KASSERT(vp->v_op != NULL); 1277 hash = vcache_hash(&new_node->vn_key); 1278 1279 /* Wait for previous instance to be reclaimed, then insert new node. */ 1280 mutex_enter(&vcache.lock); 1281 while ((old_node = vcache_hash_lookup(&new_node->vn_key, hash))) { 1282 #ifdef DIAGNOSTIC 1283 if (old_node->vn_vnode != NULL) 1284 mutex_enter(old_node->vn_vnode->v_interlock); 1285 KASSERT(old_node->vn_vnode == NULL || 1286 (old_node->vn_vnode->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0); 1287 if (old_node->vn_vnode != NULL) 1288 mutex_exit(old_node->vn_vnode->v_interlock); 1289 #endif 1290 mutex_exit(&vcache.lock); 1291 kpause("vcache", false, mstohz(20), NULL); 1292 mutex_enter(&vcache.lock); 1293 } 1294 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], 1295 new_node, vn_hash); 1296 mutex_exit(&vcache.lock); 1297 vfs_insmntque(vp, mp); 1298 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1299 vp->v_vflag |= VV_MPSAFE; 1300 vfs_unbusy(mp, true, NULL); 1301 1302 /* Finished loading, finalize node. */ 1303 mutex_enter(&vcache.lock); 1304 new_node->vn_vnode = vp; 1305 mutex_exit(&vcache.lock); 1306 mutex_enter(vp->v_interlock); 1307 vp->v_iflag &= ~VI_CHANGING; 1308 cv_broadcast(&vp->v_cv); 1309 mutex_exit(vp->v_interlock); 1310 *vpp = vp; 1311 return 0; 1312 } 1313 1314 /* 1315 * Prepare key change: lock old and new cache node. 1316 * Return an error if the new node already exists. 1317 */ 1318 int 1319 vcache_rekey_enter(struct mount *mp, struct vnode *vp, 1320 const void *old_key, size_t old_key_len, 1321 const void *new_key, size_t new_key_len) 1322 { 1323 uint32_t old_hash, new_hash; 1324 struct vcache_key old_vcache_key, new_vcache_key; 1325 struct vcache_node *node, *new_node; 1326 1327 old_vcache_key.vk_mount = mp; 1328 old_vcache_key.vk_key = old_key; 1329 old_vcache_key.vk_key_len = old_key_len; 1330 old_hash = vcache_hash(&old_vcache_key); 1331 1332 new_vcache_key.vk_mount = mp; 1333 new_vcache_key.vk_key = new_key; 1334 new_vcache_key.vk_key_len = new_key_len; 1335 new_hash = vcache_hash(&new_vcache_key); 1336 1337 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1338 new_node->vn_vnode = NULL; 1339 new_node->vn_key = new_vcache_key; 1340 1341 mutex_enter(&vcache.lock); 1342 node = vcache_hash_lookup(&new_vcache_key, new_hash); 1343 if (node != NULL) { 1344 mutex_exit(&vcache.lock); 1345 pool_cache_put(vcache.pool, new_node); 1346 return EEXIST; 1347 } 1348 SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask], 1349 new_node, vn_hash); 1350 node = vcache_hash_lookup(&old_vcache_key, old_hash); 1351 KASSERT(node != NULL); 1352 KASSERT(node->vn_vnode == vp); 1353 node->vn_vnode = NULL; 1354 node->vn_key = old_vcache_key; 1355 mutex_exit(&vcache.lock); 1356 return 0; 1357 } 1358 1359 /* 1360 * Key change complete: remove old node and unlock new node. 1361 */ 1362 void 1363 vcache_rekey_exit(struct mount *mp, struct vnode *vp, 1364 const void *old_key, size_t old_key_len, 1365 const void *new_key, size_t new_key_len) 1366 { 1367 uint32_t old_hash, new_hash; 1368 struct vcache_key old_vcache_key, new_vcache_key; 1369 struct vcache_node *node; 1370 1371 old_vcache_key.vk_mount = mp; 1372 old_vcache_key.vk_key = old_key; 1373 old_vcache_key.vk_key_len = old_key_len; 1374 old_hash = vcache_hash(&old_vcache_key); 1375 1376 new_vcache_key.vk_mount = mp; 1377 new_vcache_key.vk_key = new_key; 1378 new_vcache_key.vk_key_len = new_key_len; 1379 new_hash = vcache_hash(&new_vcache_key); 1380 1381 mutex_enter(&vcache.lock); 1382 node = vcache_hash_lookup(&new_vcache_key, new_hash); 1383 KASSERT(node != NULL && node->vn_vnode == NULL); 1384 KASSERT(node->vn_key.vk_key_len == new_key_len); 1385 node->vn_vnode = vp; 1386 node->vn_key = new_vcache_key; 1387 node = vcache_hash_lookup(&old_vcache_key, old_hash); 1388 KASSERT(node != NULL); 1389 KASSERT(node->vn_vnode == NULL); 1390 SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask], 1391 node, vcache_node, vn_hash); 1392 mutex_exit(&vcache.lock); 1393 pool_cache_put(vcache.pool, node); 1394 } 1395 1396 /* 1397 * Remove a vnode / fs node pair from the cache. 1398 */ 1399 void 1400 vcache_remove(struct mount *mp, const void *key, size_t key_len) 1401 { 1402 uint32_t hash; 1403 struct vcache_key vcache_key; 1404 struct vcache_node *node; 1405 1406 vcache_key.vk_mount = mp; 1407 vcache_key.vk_key = key; 1408 vcache_key.vk_key_len = key_len; 1409 hash = vcache_hash(&vcache_key); 1410 1411 mutex_enter(&vcache.lock); 1412 node = vcache_hash_lookup(&vcache_key, hash); 1413 KASSERT(node != NULL); 1414 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1415 node, vcache_node, vn_hash); 1416 mutex_exit(&vcache.lock); 1417 pool_cache_put(vcache.pool, node); 1418 } 1419 1420 /* 1421 * Update outstanding I/O count and do wakeup if requested. 1422 */ 1423 void 1424 vwakeup(struct buf *bp) 1425 { 1426 vnode_t *vp; 1427 1428 if ((vp = bp->b_vp) == NULL) 1429 return; 1430 1431 KASSERT(bp->b_objlock == vp->v_interlock); 1432 KASSERT(mutex_owned(bp->b_objlock)); 1433 1434 if (--vp->v_numoutput < 0) 1435 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp); 1436 if (vp->v_numoutput == 0) 1437 cv_broadcast(&vp->v_cv); 1438 } 1439 1440 /* 1441 * Test a vnode for being or becoming dead. Returns one of: 1442 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only. 1443 * ENOENT: vnode is dead. 1444 * 0: otherwise. 1445 * 1446 * Whenever this function returns a non-zero value all future 1447 * calls will also return a non-zero value. 1448 */ 1449 int 1450 vdead_check(struct vnode *vp, int flags) 1451 { 1452 1453 KASSERT(mutex_owned(vp->v_interlock)); 1454 if (ISSET(vp->v_iflag, VI_XLOCK)) { 1455 if (ISSET(flags, VDEAD_NOWAIT)) 1456 return EBUSY; 1457 vwait(vp, VI_XLOCK); 1458 KASSERT(ISSET(vp->v_iflag, VI_CLEAN)); 1459 } 1460 if (ISSET(vp->v_iflag, VI_CLEAN)) 1461 return ENOENT; 1462 return 0; 1463 } 1464 1465 /* 1466 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or 1467 * recycled. 1468 */ 1469 static void 1470 vwait(vnode_t *vp, int flags) 1471 { 1472 1473 KASSERT(mutex_owned(vp->v_interlock)); 1474 KASSERT(vp->v_usecount != 0); 1475 1476 while ((vp->v_iflag & flags) != 0) 1477 cv_wait(&vp->v_cv, vp->v_interlock); 1478 } 1479 1480 int 1481 vfs_drainvnodes(long target) 1482 { 1483 int error; 1484 1485 mutex_enter(&vnode_free_list_lock); 1486 1487 while (numvnodes > target) { 1488 error = cleanvnode(); 1489 if (error != 0) 1490 return error; 1491 mutex_enter(&vnode_free_list_lock); 1492 } 1493 1494 mutex_exit(&vnode_free_list_lock); 1495 1496 vcache_reinit(); 1497 1498 return 0; 1499 } 1500 1501 void 1502 vnpanic(vnode_t *vp, const char *fmt, ...) 1503 { 1504 va_list ap; 1505 1506 #ifdef DIAGNOSTIC 1507 vprint(NULL, vp); 1508 #endif 1509 va_start(ap, fmt); 1510 vpanic(fmt, ap); 1511 va_end(ap); 1512 } 1513